From 76e2999fa48eabef9e6eac7d220852984fdb55e8 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Mon, 18 Sep 2023 16:15:54 -0700 Subject: [PATCH 01/34] prov/bgq: Remove provider Provider only supported by 1.x series Signed-off-by: Sean Hefty --- .travis.yml | 1 - Makefile.am | 1 - README.md | 30 - configure.ac | 3 +- contrib/intel/jenkins/common.py | 1 - include/ofi_prov.h | 11 - man/fi_bgq.7.md | 126 - man/fi_getinfo.3.md | 4 - man/man7/fi_bgq.7 | 133 -- prov/bgq/.gitignore | 1 - prov/bgq/Makefile.include | 111 - prov/bgq/configure.m4 | 135 -- prov/bgq/include/rdma/bgq/fi_bgq.h | 266 --- prov/bgq/include/rdma/bgq/fi_bgq_compiler.h | 96 - .../include/rdma/bgq/fi_bgq_flight_recorder.h | 138 -- prov/bgq/include/rdma/bgq/fi_bgq_hwi.h | 67 - prov/bgq/include/rdma/bgq/fi_bgq_l2atomic.h | 760 ------ prov/bgq/include/rdma/bgq/fi_bgq_mu.h | 816 ------- prov/bgq/include/rdma/bgq/fi_bgq_node.h | 85 - prov/bgq/include/rdma/bgq/fi_bgq_progress.h | 77 - prov/bgq/include/rdma/bgq/fi_bgq_rx.h | 1572 ------------- prov/bgq/include/rdma/bgq/fi_bgq_spi.h | 194 -- prov/bgq/include/rdma/fi_direct.h.in | 378 --- prov/bgq/include/rdma/fi_direct_atomic.h | 997 -------- prov/bgq/include/rdma/fi_direct_atomic_def.h | 80 - prov/bgq/include/rdma/fi_direct_cm.h | 76 - prov/bgq/include/rdma/fi_direct_domain.h | 368 --- prov/bgq/include/rdma/fi_direct_endpoint.h | 1205 ---------- prov/bgq/include/rdma/fi_direct_eq.h | 698 ------ prov/bgq/include/rdma/fi_direct_rma.h | 1017 -------- prov/bgq/include/rdma/fi_direct_tagged.h | 244 -- prov/bgq/include/rdma/fi_direct_trigger.h | 56 - prov/bgq/provider_FABRIC_1.0.map | 35 - prov/bgq/src/fi_bgq_agent.c | 44 - prov/bgq/src/fi_bgq_atomic.c | 856 ------- prov/bgq/src/fi_bgq_av.c | 494 ---- prov/bgq/src/fi_bgq_cm.c | 111 - prov/bgq/src/fi_bgq_cntr.c | 283 --- prov/bgq/src/fi_bgq_cq.c | 596 ----- prov/bgq/src/fi_bgq_domain.c | 586 ----- prov/bgq/src/fi_bgq_ep.c | 2053 ----------------- prov/bgq/src/fi_bgq_fabric.c | 152 -- prov/bgq/src/fi_bgq_info.c | 141 -- prov/bgq/src/fi_bgq_init.c | 365 --- prov/bgq/src/fi_bgq_mr.c | 207 -- prov/bgq/src/fi_bgq_msg.c | 176 -- prov/bgq/src/fi_bgq_node.c | 486 ---- prov/bgq/src/fi_bgq_pmi.c | 364 --- prov/bgq/src/fi_bgq_progress.c | 384 --- prov/bgq/src/fi_bgq_rma.c | 257 --- prov/bgq/src/fi_bgq_sep.c | 469 ---- prov/bgq/src/fi_bgq_spi.c | 211 -- prov/bgq/src/fi_bgq_tagged.c | 199 -- prov/bgq/src/test/Makefile.include | 139 -- prov/bgq/src/test/cq_agent_init.c | 114 - prov/bgq/src/test/cq_mfifo_init.c | 80 - prov/bgq/src/test/cq_mfifo_multithreaded.c | 109 - .../src/test/cq_mfifo_multithreaded_perf.c | 125 - prov/bgq/src/test/cq_mfifo_overflow.c | 92 - prov/bgq/src/test/l2alloc_func.c | 91 - prov/bgq/src/test/l2alloc_simple.c | 59 - prov/bgq/src/test/l2alloc_single.c | 67 - prov/bgq/src/test/l2atomic_fifo_perf.c | 237 -- prov/bgq/src/test/l2barrier_func.c | 83 - prov/bgq/src/test/l2lock_func.c | 97 - prov/bgq/src/test/l2lock_init.c | 58 - prov/bgq/src/test/spi_pingpong.c | 476 ---- prov/psm3/configure.ac | 2 - src/common.c | 6 - src/fabric.c | 3 +- src/fi_tostr.c | 1 - util/info.c | 1 - 72 files changed, 2 insertions(+), 20054 deletions(-) delete mode 100644 man/fi_bgq.7.md delete mode 100644 man/man7/fi_bgq.7 delete mode 100644 prov/bgq/.gitignore delete mode 100644 prov/bgq/Makefile.include delete mode 100644 prov/bgq/configure.m4 delete mode 100644 prov/bgq/include/rdma/bgq/fi_bgq.h delete mode 100644 prov/bgq/include/rdma/bgq/fi_bgq_compiler.h delete mode 100644 prov/bgq/include/rdma/bgq/fi_bgq_flight_recorder.h delete mode 100644 prov/bgq/include/rdma/bgq/fi_bgq_hwi.h delete mode 100644 prov/bgq/include/rdma/bgq/fi_bgq_l2atomic.h delete mode 100644 prov/bgq/include/rdma/bgq/fi_bgq_mu.h delete mode 100644 prov/bgq/include/rdma/bgq/fi_bgq_node.h delete mode 100644 prov/bgq/include/rdma/bgq/fi_bgq_progress.h delete mode 100644 prov/bgq/include/rdma/bgq/fi_bgq_rx.h delete mode 100644 prov/bgq/include/rdma/bgq/fi_bgq_spi.h delete mode 100644 prov/bgq/include/rdma/fi_direct.h.in delete mode 100644 prov/bgq/include/rdma/fi_direct_atomic.h delete mode 100644 prov/bgq/include/rdma/fi_direct_atomic_def.h delete mode 100644 prov/bgq/include/rdma/fi_direct_cm.h delete mode 100644 prov/bgq/include/rdma/fi_direct_domain.h delete mode 100644 prov/bgq/include/rdma/fi_direct_endpoint.h delete mode 100644 prov/bgq/include/rdma/fi_direct_eq.h delete mode 100644 prov/bgq/include/rdma/fi_direct_rma.h delete mode 100644 prov/bgq/include/rdma/fi_direct_tagged.h delete mode 100644 prov/bgq/include/rdma/fi_direct_trigger.h delete mode 100644 prov/bgq/provider_FABRIC_1.0.map delete mode 100644 prov/bgq/src/fi_bgq_agent.c delete mode 100644 prov/bgq/src/fi_bgq_atomic.c delete mode 100644 prov/bgq/src/fi_bgq_av.c delete mode 100644 prov/bgq/src/fi_bgq_cm.c delete mode 100644 prov/bgq/src/fi_bgq_cntr.c delete mode 100644 prov/bgq/src/fi_bgq_cq.c delete mode 100644 prov/bgq/src/fi_bgq_domain.c delete mode 100644 prov/bgq/src/fi_bgq_ep.c delete mode 100644 prov/bgq/src/fi_bgq_fabric.c delete mode 100644 prov/bgq/src/fi_bgq_info.c delete mode 100644 prov/bgq/src/fi_bgq_init.c delete mode 100644 prov/bgq/src/fi_bgq_mr.c delete mode 100644 prov/bgq/src/fi_bgq_msg.c delete mode 100644 prov/bgq/src/fi_bgq_node.c delete mode 100644 prov/bgq/src/fi_bgq_pmi.c delete mode 100644 prov/bgq/src/fi_bgq_progress.c delete mode 100644 prov/bgq/src/fi_bgq_rma.c delete mode 100644 prov/bgq/src/fi_bgq_sep.c delete mode 100644 prov/bgq/src/fi_bgq_spi.c delete mode 100644 prov/bgq/src/fi_bgq_tagged.c delete mode 100644 prov/bgq/src/test/Makefile.include delete mode 100644 prov/bgq/src/test/cq_agent_init.c delete mode 100644 prov/bgq/src/test/cq_mfifo_init.c delete mode 100644 prov/bgq/src/test/cq_mfifo_multithreaded.c delete mode 100644 prov/bgq/src/test/cq_mfifo_multithreaded_perf.c delete mode 100644 prov/bgq/src/test/cq_mfifo_overflow.c delete mode 100644 prov/bgq/src/test/l2alloc_func.c delete mode 100644 prov/bgq/src/test/l2alloc_simple.c delete mode 100644 prov/bgq/src/test/l2alloc_single.c delete mode 100644 prov/bgq/src/test/l2atomic_fifo_perf.c delete mode 100644 prov/bgq/src/test/l2barrier_func.c delete mode 100644 prov/bgq/src/test/l2lock_func.c delete mode 100644 prov/bgq/src/test/l2lock_init.c delete mode 100644 prov/bgq/src/test/spi_pingpong.c diff --git a/.travis.yml b/.travis.yml index ed7308431ac..2089160f468 100644 --- a/.travis.yml +++ b/.travis.yml @@ -87,7 +87,6 @@ install: # Test loadable library option # List of providers current as of Jan 2020 - ./configure --prefix=$PREFIX --enable-tcp=dl - --disable-bgq --disable-efa --disable-gni --disable-hook_debug diff --git a/Makefile.am b/Makefile.am index b34ad01e0d4..d916654bd58 100644 --- a/Makefile.am +++ b/Makefile.am @@ -459,7 +459,6 @@ include prov/gni/Makefile.include include prov/rxm/Makefile.include include prov/mrail/Makefile.include include prov/rxd/Makefile.include -include prov/bgq/Makefile.include include prov/opx/Makefile.include include prov/shm/Makefile.include include prov/sm2/Makefile.include diff --git a/README.md b/README.md index 9d14319576f..7378f3ad552 100644 --- a/README.md +++ b/README.md @@ -291,36 +291,6 @@ See the `fi_verbs(7)` man page for more details. If the libraries and header files are not in default paths, specify them in CFLAGS, LDFLAGS and LD_LIBRARY_PATH environment variables. -### bgq - -*** - -The `bgq` provider is a native provider that directly utilizes the hardware -interfaces of the Blue Gene/Q system to implement aspects of the libfabric -interface to fully support MPICH3 CH4. - -See the `fi_bgq(7)` man page for more details. - -#### Dependencies - -- The `bgq` provider depends on the system programming interfaces (SPI) and - the hardware interfaces (HWI) located in the Blue Gene/Q driver installation. - Additionally, the open source Blue Gene/Q system files are required. - -#### Configure options - -``` ---with-bgq-progress=(auto|manual) -``` - -If specified, set the progress mode enabled in FABRIC_DIRECT (default is FI_PROGRESS_MANUAL). - -``` ---with-bgq-mr=(basic|scalable) -``` - -If specified, set the memory registration mode (default is FI_MR_BASIC). - ### Network Direct *** diff --git a/configure.ac b/configure.ac index d1a77159d50..a4cca43be4e 100644 --- a/configure.ac +++ b/configure.ac @@ -413,7 +413,7 @@ AC_ARG_ENABLE([xpmem], [Enable xpmem (gni and shm providers) @<:@default=yes@:>@ (yes: enable xpmem; no: disable xpmem; PATH: enable xpmem and use xpmem installed under PATH)])], - ) + ) FI_CHECK_PACKAGE([xpmem], [xpmem.h], @@ -959,7 +959,6 @@ FI_PROVIDER_SETUP([tcp]) FI_PROVIDER_SETUP([rxm]) FI_PROVIDER_SETUP([mrail]) FI_PROVIDER_SETUP([rxd]) -FI_PROVIDER_SETUP([bgq]) FI_PROVIDER_SETUP([shm]) FI_PROVIDER_SETUP([sm2]) FI_PROVIDER_SETUP([rstream]) diff --git a/contrib/intel/jenkins/common.py b/contrib/intel/jenkins/common.py index 88732504691..b4c1918498c 100755 --- a/contrib/intel/jenkins/common.py +++ b/contrib/intel/jenkins/common.py @@ -134,7 +134,6 @@ def run(self): 'perf', 'rstream', 'hook_debug', - 'bgq', 'mrail', 'opx' ] diff --git a/include/ofi_prov.h b/include/ofi_prov.h index 8e444ba65ef..cdfb50794ae 100644 --- a/include/ofi_prov.h +++ b/include/ofi_prov.h @@ -178,17 +178,6 @@ RXD_INI ; # define RXD_INIT NULL #endif -#if (HAVE_BGQ) && (HAVE_BGQ_DL) -# define BGQ_INI FI_EXT_INI -# define BGQ_INIT NULL -#elif (HAVE_BGQ) -# define BGQ_INI INI_SIG(fi_bgq_ini) -# define BGQ_INIT fi_bgq_ini() -BGQ_INI ; -#else -# define BGQ_INIT NULL -#endif - #ifdef _WIN32 #if (HAVE_NETDIR) && (HAVE_NETDIR_DL) # define NETDIR_INI FI_EXT_INI diff --git a/man/fi_bgq.7.md b/man/fi_bgq.7.md deleted file mode 100644 index 4513f2b8bf6..00000000000 --- a/man/fi_bgq.7.md +++ /dev/null @@ -1,126 +0,0 @@ ---- -layout: page -title: fi_bgq(7) -tagline: Libfabric Programmer's Manual ---- -{% include JB/setup %} - -# NAME - -fi_bgq \- The Blue Gene/Q Fabric Provider - -# OVERVIEW - -The bgq provider is a native implementation of the libfabric interfaces -that makes direct use of the unique hardware features such as the -Messaging Unit (MU), Base Address Table (BAT), and L2 Atomics. - -The purpose of this provider is to demonstrate the scalability and -performance of libfabric, providing an "extreme scale" -development environment for applications and middleware using the -libfabric API, and to support a functional and performant version of -MPI3 on Blue Gene/Q via MPICH CH4. - -# SUPPORTED FEATURES - -The bgq provider supports most features defined for the libfabric API. -Key features include: - -*Endpoint types* -: The Blue Gene/Q hardware is connectionless and reliable. Therefore, the - bgq provider only supports the *FI_EP_RDM* endpoint type. - -*Capabilities* -: Supported capabilities include *FI_MSG*, *FI_RMA*, *FI_TAGGED*, - *FI_ATOMIC*, *FI_NAMED_RX_CTX*, *FI_READ*, *FI_WRITE*, *FI_SEND*, *FI_RECV*, - *FI_REMOTE_READ*, *FI_REMOTE_WRITE*, *FI_MULTI_RECV*, *FI_DIRECTED_RECV*, - *FI_SOURCE* and *FI_FENCE*. - -Notes on FI_DIRECTED_RECV capability: -The immediate data which is sent within the *senddata* call to support -FI_DIRECTED_RECV for BGQ must be exactly 4 bytes, which BGQ uses to -completely identify the source address to an exascale-level number of ranks -for tag matching on the recv and can be managed within the MU packet. -Therefore the domain attribute cq_data_size is set to 4 which is the OFI -standard minimum. - -*Modes* -: The bgq provider requires *FI_CONTEXT* and *FI_ASYNC_IOV* - -*Memory registration modes* -: Both FI_MR_SCALABLE and FI_MR_BASIC are supported, specified at configuration - time with the "--with-bgq-mr" configure option. The base address table - utilized by FI_MR_SCALABLE for rdma transfers is completely software emulated, - supporting FI_ATOMIC, FI_READ, FI_WRITE, FI_REMOTE_READ, and FI_REMOTE_WRITE - capabilities. With FI_MR_BASIC the FI_WRITE is completely hardware - accelerated, the other rdma transfers are still software emulated but the - use of a base address table is no longer required as the offset is now the - virtual address of the memory from the application and the key is the delta - from which the physical address can be computed if necessary. - -*Additional features* -: Supported additional features include *FABRIC_DIRECT*, *scalable endpoints*, - and *counters*. - -*Progress* -: Both progress modes, *FI_PROGRESS_AUTO* and *FI_PROGRESS_MANUAL*, are - supported. The progress mode may be specified via the "--with-bgq-progress" - configure option. - -*Address vector* -: Only the *FI_AV_MAP* address vector format is supported. - -# UNSUPPORTED FEATURES - -*Endpoint types* -: Unsupported endpoint types include *FI_EP_DGRAM* and *FI_EP_MSG* - -*Capabilities* -: The bgq provider does not support the *FI_RMA_EVENT*, and - *FI_TRIGGER* capabilities. - -*Address vector* -: The bgq provider does not support the *FI_AV_TABLE* address vector format. - Support for *FI_AV_TABLE* may be added in the future. - -# LIMITATIONS - -The bgq provider only supports *FABRIC_DIRECT*. The size of the fi_context -structure for *FI_CONTEXT* is too small to be useful. In the 'direct' mode the -bgq provider can re-define the struct fi_context to a larger size - currently -64 bytes which is the L1 cache size. - -The fi_context structure for *FI_CONTEXT* must be aligned to 8 bytes. This requirement is because -the bgq provider will use MU network atomics to track completions and the memory -used with MU atomic operations must be aligned to 8 bytes. Unfortunately, the libfabric API -has no mechanism for applications to programmatically determine these alignment -requirements. Because unaligned MU atomics operations are a fatal error, the -bgq provider will assert on the alignment for "debug" builds (i.e., the '-DNDEBUG' -pre-processor flag is not specified). - -The progress thread used for *FI_PROGRESS_AUTO* effectively limits the maximum -number of ranks-per-node to 32. However for FI_PROGRESS_MANUAL the maximum is 64. - -For FI_MR_SCALABLE mr mode the memory region key size (mr_key_size) is 2 *bytes*; Valid key values are -0..2^16-1. - -It is invalid to register memory at the base virtual address "0" with a -length of "UINTPTR_MAX" (or equivalent). The Blue Gene/Q hardware operates on -37-bit physical addresses and all virtual addresses specified in the libfabric -API, such as the location of source/destination data and remote memory locations, -must be converted to a physical address before use. A 64-bit virtual address -space will not fit into a 37-bit physical address space. - -fi_trecvmsg() fnd fi_recvmsg() unctions do not support non-contiguous receives -and the iovec count must be 1. The fi_trecvv() and fi_recvv() functions are -currently not supported. - -# RUNTIME PARAMETERS - -No runtime parameters are currently defined. - -# SEE ALSO - -[`fabric`(7)](fabric.7.html), -[`fi_provider`(7)](fi_provider.7.html), -[`fi_getinfo`(3)](fi_getinfo.3.html) diff --git a/man/fi_getinfo.3.md b/man/fi_getinfo.3.md index 7f3c78fec55..75be2058590 100644 --- a/man/fi_getinfo.3.md +++ b/man/fi_getinfo.3.md @@ -625,10 +625,6 @@ formats. In some cases, a selected addressing format may need to be translated or mapped into an address which is native to the fabric. See [`fi_av`(3)](fi_av.3.html). -*FI_ADDR_BGQ* -: Address is an IBM proprietary format that is used with their Blue Gene Q - systems. - *FI_ADDR_EFA* : Address is an Amazon Elastic Fabric Adapter (EFA) proprietary format. diff --git a/man/man7/fi_bgq.7 b/man/man7/fi_bgq.7 deleted file mode 100644 index ab9d9148ebf..00000000000 --- a/man/man7/fi_bgq.7 +++ /dev/null @@ -1,133 +0,0 @@ -.\" Automatically generated by Pandoc 2.9.2.1 -.\" -.TH "fi_bgq" "7" "2022\-12\-09" "Libfabric Programmer\[cq]s Manual" "#VERSION#" -.hy -.SH NAME -.PP -fi_bgq - The Blue Gene/Q Fabric Provider -.SH OVERVIEW -.PP -The bgq provider is a native implementation of the libfabric interfaces -that makes direct use of the unique hardware features such as the -Messaging Unit (MU), Base Address Table (BAT), and L2 Atomics. -.PP -The purpose of this provider is to demonstrate the scalability and -performance of libfabric, providing an \[lq]extreme scale\[rq] -development environment for applications and middleware using the -libfabric API, and to support a functional and performant version of -MPI3 on Blue Gene/Q via MPICH CH4. -.SH SUPPORTED FEATURES -.PP -The bgq provider supports most features defined for the libfabric API. -Key features include: -.TP -\f[I]Endpoint types\f[R] -The Blue Gene/Q hardware is connectionless and reliable. -Therefore, the bgq provider only supports the \f[I]FI_EP_RDM\f[R] -endpoint type. -.TP -\f[I]Capabilities\f[R] -Supported capabilities include \f[I]FI_MSG\f[R], \f[I]FI_RMA\f[R], -\f[I]FI_TAGGED\f[R], \f[I]FI_ATOMIC\f[R], \f[I]FI_NAMED_RX_CTX\f[R], -\f[I]FI_READ\f[R], \f[I]FI_WRITE\f[R], \f[I]FI_SEND\f[R], -\f[I]FI_RECV\f[R], \f[I]FI_REMOTE_READ\f[R], \f[I]FI_REMOTE_WRITE\f[R], -\f[I]FI_MULTI_RECV\f[R], \f[I]FI_DIRECTED_RECV\f[R], \f[I]FI_SOURCE\f[R] -and \f[I]FI_FENCE\f[R]. -.PP -Notes on FI_DIRECTED_RECV capability: The immediate data which is sent -within the \f[I]senddata\f[R] call to support FI_DIRECTED_RECV for BGQ -must be exactly 4 bytes, which BGQ uses to completely identify the -source address to an exascale-level number of ranks for tag matching on -the recv and can be managed within the MU packet. -Therefore the domain attribute cq_data_size is set to 4 which is the OFI -standard minimum. -.TP -\f[I]Modes\f[R] -The bgq provider requires \f[I]FI_CONTEXT\f[R] and -\f[I]FI_ASYNC_IOV\f[R] -.TP -\f[I]Memory registration modes\f[R] -Both FI_MR_SCALABLE and FI_MR_BASIC are supported, specified at -configuration time with the \[lq]\[en]with-bgq-mr\[rq] configure option. -The base address table utilized by FI_MR_SCALABLE for rdma transfers is -completely software emulated, supporting FI_ATOMIC, FI_READ, FI_WRITE, -FI_REMOTE_READ, and FI_REMOTE_WRITE capabilities. -With FI_MR_BASIC the FI_WRITE is completely hardware accelerated, the -other rdma transfers are still software emulated but the use of a base -address table is no longer required as the offset is now the virtual -address of the memory from the application and the key is the delta from -which the physical address can be computed if necessary. -.TP -\f[I]Additional features\f[R] -Supported additional features include \f[I]FABRIC_DIRECT\f[R], -\f[I]scalable endpoints\f[R], and \f[I]counters\f[R]. -.TP -\f[I]Progress\f[R] -Both progress modes, \f[I]FI_PROGRESS_AUTO\f[R] and -\f[I]FI_PROGRESS_MANUAL\f[R], are supported. -The progress mode may be specified via the -\[lq]\[en]with-bgq-progress\[rq] configure option. -.TP -\f[I]Address vector\f[R] -Only the \f[I]FI_AV_MAP\f[R] address vector format is supported. -.SH UNSUPPORTED FEATURES -.TP -\f[I]Endpoint types\f[R] -Unsupported endpoint types include \f[I]FI_EP_DGRAM\f[R] and -\f[I]FI_EP_MSG\f[R] -.TP -\f[I]Capabilities\f[R] -The bgq provider does not support the \f[I]FI_RMA_EVENT\f[R], and -\f[I]FI_TRIGGER\f[R] capabilities. -.TP -\f[I]Address vector\f[R] -The bgq provider does not support the \f[I]FI_AV_TABLE\f[R] address -vector format. -Support for \f[I]FI_AV_TABLE\f[R] may be added in the future. -.SH LIMITATIONS -.PP -The bgq provider only supports \f[I]FABRIC_DIRECT\f[R]. -The size of the fi_context structure for \f[I]FI_CONTEXT\f[R] is too -small to be useful. -In the `direct' mode the bgq provider can re-define the struct -fi_context to a larger size - currently 64 bytes which is the L1 cache -size. -.PP -The fi_context structure for \f[I]FI_CONTEXT\f[R] must be aligned to 8 -bytes. -This requirement is because the bgq provider will use MU network atomics -to track completions and the memory used with MU atomic operations must -be aligned to 8 bytes. -Unfortunately, the libfabric API has no mechanism for applications to -programmatically determine these alignment requirements. -Because unaligned MU atomics operations are a fatal error, the bgq -provider will assert on the alignment for \[lq]debug\[rq] builds (i.e., -the `-DNDEBUG' pre-processor flag is not specified). -.PP -The progress thread used for \f[I]FI_PROGRESS_AUTO\f[R] effectively -limits the maximum number of ranks-per-node to 32. -However for FI_PROGRESS_MANUAL the maximum is 64. -.PP -For FI_MR_SCALABLE mr mode the memory region key size (mr_key_size) is 2 -\f[I]bytes\f[R]; Valid key values are 0..2\[ha]16-1. -.PP -It is invalid to register memory at the base virtual address \[lq]0\[rq] -with a length of \[lq]UINTPTR_MAX\[rq] (or equivalent). -The Blue Gene/Q hardware operates on 37-bit physical addresses and all -virtual addresses specified in the libfabric API, such as the location -of source/destination data and remote memory locations, must be -converted to a physical address before use. -A 64-bit virtual address space will not fit into a 37-bit physical -address space. -.PP -fi_trecvmsg() fnd fi_recvmsg() unctions do not support non-contiguous -receives and the iovec count must be 1. -The fi_trecvv() and fi_recvv() functions are currently not supported. -.SH RUNTIME PARAMETERS -.PP -No runtime parameters are currently defined. -.SH SEE ALSO -.PP -\f[C]fabric\f[R](7), \f[C]fi_provider\f[R](7), \f[C]fi_getinfo\f[R](3) -.SH AUTHORS -OpenFabrics. diff --git a/prov/bgq/.gitignore b/prov/bgq/.gitignore deleted file mode 100644 index f31f507082d..00000000000 --- a/prov/bgq/.gitignore +++ /dev/null @@ -1 +0,0 @@ -NOTES diff --git a/prov/bgq/Makefile.include b/prov/bgq/Makefile.include deleted file mode 100644 index f85dc48f93f..00000000000 --- a/prov/bgq/Makefile.include +++ /dev/null @@ -1,111 +0,0 @@ -# -# Copyright (C) 2016 by Argonne National Laboratory. -# -# This software is available to you under a choice of one of two -# licenses. You may choose to be licensed under the terms of the GNU -# General Public License (GPL) Version 2, available from the file -# COPYING in the main directory of this source tree, or the -# BSD license below: -# -# Redistribution and use in source and binary forms, with or -# without modification, are permitted provided that the following -# conditions are met: -# -# - Redistributions of source code must retain the above -# copyright notice, this list of conditions and the following -# disclaimer. -# -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials -# provided with the distribution. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. -# -if HAVE_BGQ -bgq_files = prov/bgq/src/fi_bgq_atomic.c \ - prov/bgq/src/fi_bgq_av.c \ - prov/bgq/src/fi_bgq_spi.c \ - prov/bgq/src/fi_bgq_cm.c \ - prov/bgq/src/fi_bgq_cntr.c \ - prov/bgq/src/fi_bgq_cq.c \ - prov/bgq/src/fi_bgq_domain.c \ - prov/bgq/src/fi_bgq_ep.c \ - prov/bgq/src/fi_bgq_fabric.c \ - prov/bgq/src/fi_bgq_info.c \ - prov/bgq/src/fi_bgq_init.c \ - prov/bgq/src/fi_bgq_mr.c \ - prov/bgq/src/fi_bgq_msg.c \ - prov/bgq/src/fi_bgq_rma.c \ - prov/bgq/src/fi_bgq_sep.c \ - prov/bgq/src/fi_bgq_tagged.c \ - prov/bgq/src/fi_bgq_node.c \ - prov/bgq/src/fi_bgq_progress.c \ - prov/bgq/src/fi_bgq_pmi.c - -bgq_files_nodist = prov/bgq/external/memory_impl.c - -bgq_CPPFLAGS = -I@bgq_driver@ -I@bgq_driver@/spi/include/kernel/cnk -I$(builddir)/prov/bgq/include -I$(srcdir)/prov/bgq/include -bgq_LDFLAGS = -static - -prov/bgq/external/memory_impl.c: @bgq_external_source@/spi/src/kernel/cnk/memory_impl.c - @${MKDIR_P} $(@D) - @cp $< $@ - -if HAVE_BGQ_DL -pkglib_LTLIBRARIES += libbgq-fi.la -libbgq_fi_la_SOURCES = $(bgq_files) $(common_srcs) -nodist_libbgq_fi_la_SOURCES = $(bgq_files_nodist) -libbgq_fi_la_CPPFLAGS = $(AM_CPPFLAGS) $(bgq_CPPFLAGS) -libbgq_fi_la_LDFLAGS = \ - -module -avoid-version -export-dynamic $(bgq_LDFLAGS) -libbgq_fi_la_LIBADD = $(linkback) $(bgq_LIBS) -libbgq_fi_la_DEPENDENCIES = $(linkback) -else -src_libfabric_la_SOURCES += $(bgq_files) -nodist_src_libfabric_la_SOURCES += $(bgq_files_nodist) -src_libfabric_la_CPPFLAGS += $(bgq_CPPFLAGS) -src_libfabric_la_LDFLAGS += $(bgq_LDFLAGS) -src_libfabric_la_LIBADD += $(bgq_LIBS) -endif - -sbin_PROGRAMS = fi_bgq_agent0 fi_bgq_agent1 -fi_bgq_agent1_SOURCES = prov/bgq/src/fi_bgq_agent.c -fi_bgq_agent1_CPPFLAGS = $(AM_CPPFLAGS) $(bgq_CPPFLAGS) -fi_bgq_agent1_LDFLAGS = -all-static $(bgq_LDFLAGS) -Wl,--script=@bgq_driver@/cnk/tools/AppAgent1.lds -fi_bgq_agent0_SOURCES = $(fi_bgq_agent1_SOURCES) -fi_bgq_agent0_CPPFLAGS = $(fi_bgq_agent1_CPPFLAGS) -fi_bgq_agent0_LDFLAGS = -all-static $(bgq_LDFLAGS) -Wl,--script=@bgq_driver@/cnk/tools/AppAgent0.lds - -include $(top_srcdir)/prov/bgq/src/test/Makefile.include - -rdma_bgqincludedir = $(includedir)/rdma/bgq - -rdma_bgqinclude_HEADERS = - -# internal utility functions shared by in-tree providers: -if HAVE_DIRECT -nodist_rdma_bgqinclude_HEADERS = \ - $(top_srcdir)/prov/$(PROVIDER_DIRECT)/include/rdma/bgq/fi_bgq.h \ - $(top_srcdir)/prov/$(PROVIDER_DIRECT)/include/rdma/bgq/fi_bgq_compiler.h \ - $(top_srcdir)/prov/$(PROVIDER_DIRECT)/include/rdma/bgq/fi_bgq_hwi.h \ - $(top_srcdir)/prov/$(PROVIDER_DIRECT)/include/rdma/bgq/fi_bgq_mu.h \ - $(top_srcdir)/prov/$(PROVIDER_DIRECT)/include/rdma/bgq/fi_bgq_spi.h \ - $(top_srcdir)/prov/$(PROVIDER_DIRECT)/include/rdma/bgq/fi_bgq_node.h \ - $(top_srcdir)/prov/$(PROVIDER_DIRECT)/include/rdma/bgq/fi_bgq_progress.h \ - $(top_srcdir)/prov/$(PROVIDER_DIRECT)/include/rdma/bgq/fi_bgq_flight_recorder.h \ - $(top_srcdir)/prov/$(PROVIDER_DIRECT)/include/rdma/bgq/fi_bgq_l2atomic.h -endif HAVE_DIRECT - -prov_install_man_pages += man/man7/fi_bgq.7 - -endif #HAVE_BGQ - -prov_dist_man_pages += man/man7/fi_bgq.7 diff --git a/prov/bgq/configure.m4 b/prov/bgq/configure.m4 deleted file mode 100644 index 5f88db683f5..00000000000 --- a/prov/bgq/configure.m4 +++ /dev/null @@ -1,135 +0,0 @@ -dnl -dnl Copyright (C) 2016 by Argonne National Laboratory. -dnl -dnl This software is available to you under a choice of one of two -dnl licenses. You may choose to be licensed under the terms of the GNU -dnl General Public License (GPL) Version 2, available from the file -dnl COPYING in the main directory of this source tree, or the -dnl BSD license below: -dnl -dnl Redistribution and use in source and binary forms, with or -dnl without modification, are permitted provided that the following -dnl conditions are met: -dnl -dnl - Redistributions of source code must retain the above -dnl copyright notice, this list of conditions and the following -dnl disclaimer. -dnl -dnl - Redistributions in binary form must reproduce the above -dnl copyright notice, this list of conditions and the following -dnl disclaimer in the documentation and/or other materials -dnl provided with the distribution. -dnl -dnl THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -dnl EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -dnl MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -dnl NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -dnl BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -dnl ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -dnl CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -dnl SOFTWARE. -dnl -dnl Configury specific to the libfabrics BGQ provider - -dnl Called to configure this provider -dnl -dnl Arguments: -dnl -dnl $1: action if configured successfully -dnl $2: action if not configured successfully -dnl -AC_DEFUN([FI_BGQ_CONFIGURE],[ - # Determine if we can support the bgq provider - bgq_happy=0 - bgq_direct=0 - - AS_IF([test x"$enable_bgq" != x"no"],[ - AC_MSG_CHECKING([for direct bgq provider]) - AS_IF([test x"$enable_direct" != x"bgq"], - [AC_MSG_RESULT([no])], - [AC_MSG_RESULT([yes]) - - bgq_driver=/bgsys/drivers/ppcfloor - AC_SUBST(bgq_driver) - AC_ARG_WITH([bgq-driver], - [AS_HELP_STRING([--with-bgq-driver=@<:@BGQ driver installation path@:>@], - [Provide path to where BGQ system headers are installed]) - ], - [bgq_driver=$with_bgq_driver]) - - bgq_driver_CPPFLAGS="-I$bgq_driver -I$bgq_driver/spi/include/kernel/cnk" - CPPFLAGS="$bgq_driver_CPPFLAGS $CPPFLAGS" - - AC_CHECK_HEADER(hwi/include/bqc/MU_Descriptor.h, - [bgq_happy=1], - [bgq_happy=0]) - - bgq_external_source=auto - AC_SUBST(bgq_external_source) - AC_ARG_WITH([bgq-src], - [AS_HELP_STRING([--with-bgq-src(=DIR)], - [bgq opensource distribution @<:@default=auto@:>@]) - ], - [bgq_external_source=$with_bgq_src]) - - AS_IF([test x"$bgq_external_source" = x"auto"], [ - for bgq_dir in `ls -r /bgsys/source`; do - AC_MSG_CHECKING([for bgq opensource distribution]) - AS_IF([test -f /bgsys/source/$bgq_dir/spi/src/kernel/cnk/memory_impl.c], - bgq_external_source="/bgsys/source/$bgq_dir" - AC_MSG_RESULT([$bgq_external_source]) - break) - done - AS_IF([test x"$bgq_external_source" = x"auto"], [ - bgq_happy=0 - AC_MSG_RESULT([no])]) - ]) - - AS_IF([test ! -f $bgq_external_source/spi/src/kernel/cnk/memory_impl.c], [ - AC_MSG_ERROR([unable to locate the bgq opensource distribution])]) - - AC_ARG_WITH([bgq-progress], - [AS_HELP_STRING([--with-bgq-progress(=auto|manual|runtime)], - [Specify the bgq FABRIC_DIRECT progess mode @<:@default=manual@:>@]) - ]) - - AS_CASE([$with_bgq_progress], - [auto], [BGQ_FABRIC_DIRECT_PROGRESS=FI_PROGRESS_AUTO], - [manual], [BGQ_FABRIC_DIRECT_PROGRESS=FI_PROGRESS_MANUAL], - [runtime], [BGQ_FABRIC_DIRECT_PROGRESS=FI_PROGRESS_UNSPEC], - [BGQ_FABRIC_DIRECT_PROGRESS=FI_PROGRESS_MANUAL]) - - AC_SUBST(bgq_fabric_direct_progress, [$BGQ_FABRIC_DIRECT_PROGRESS]) - - dnl Only FI_AV_MAP is supported by the bgq provider - BGQ_FABRIC_DIRECT_AV=FI_AV_MAP - AC_SUBST(bgq_fabric_direct_av, [$BGQ_FABRIC_DIRECT_AV]) - - AC_ARG_WITH([bgq-mr], - [AS_HELP_STRING([--with-bgq-mr(=scalable|basic)], - [Specify the bgq FABRIC_DIRECT mr mode @<:@default=scalable@:>@]) - ]) - - AS_CASE([$with_bgq_mr], - [scalable], [BGQ_FABRIC_DIRECT_MR=FI_MR_SCALABLE], - [basic], [BGQ_FABRIC_DIRECT_MR=FI_MR_BASIC], - [BGQ_FABRIC_DIRECT_MR=FI_MR_SCALABLE]) - - AC_SUBST(bgq_fabric_direct_mr, [$BGQ_FABRIC_DIRECT_MR]) - - dnl Only FI_THREAD_ENDPOINT is supported by the bgq provider - BGQ_FABRIC_DIRECT_THREAD=FI_THREAD_ENDPOINT - AC_SUBST(bgq_fabric_direct_thread, [$BGQ_FABRIC_DIRECT_THREAD]) - - - AC_CONFIG_FILES([prov/bgq/include/rdma/fi_direct.h]) - ]) - ]) - - AS_IF([test $bgq_happy -eq 1], [$1], [$2]) -]) - -dnl A separate macro for AM CONDITIONALS, since they cannot be invoked -dnl conditionally -AC_DEFUN([FI_BGQ_CONDITIONALS],[ -]) diff --git a/prov/bgq/include/rdma/bgq/fi_bgq.h b/prov/bgq/include/rdma/bgq/fi_bgq.h deleted file mode 100644 index 04a8fe585f0..00000000000 --- a/prov/bgq/include/rdma/bgq/fi_bgq.h +++ /dev/null @@ -1,266 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef _FI_PROV_BGQ_INTERNAL_H_ -#define _FI_PROV_BGQ_INTERNAL_H_ - -#include - -#include -#include - -#include "rdma/bgq/fi_bgq_node.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "fi_bgq_hwi.h" - - -#define FI_BGQ_PFX "bgq" - -// #define FI_BGQ_TRACE 1 - -/* --- Will be exposed by fabric.h */ -#define FI_BGQ_PROTOCOL 0x0008 -#define FI_BGQ_PROTOCOL_VERSION (1) -/* --- end */ - -#define FI_BGQ_PROVIDER_NAME "bgq" -#define FI_BGQ_PROVIDER_VERSION (1) -#define FI_BGQ_DEVICE_MAX_PATH_NAME (32) -#define FI_BGQ_FABRIC_NAME "BGQ" - -#define FI_BGQ_CACHE_LINE_SIZE (L2_CACHE_LINE_SIZE) - -#define FI_BGQ_MAX_STRLEN (64) - -#define EXIT_FAILURE 1 - -struct fi_bgq_global_data { - struct fi_info *info; - struct fi_domain_attr *default_domain_attr; - struct fi_ep_attr *default_ep_attr; - struct fi_tx_attr *default_tx_attr; - struct fi_rx_attr *default_rx_attr; - struct fi_provider *prov; -}; - -extern struct fi_bgq_global_data fi_bgq_global; - -static inline void always_assert(bool val, char *msg) -{ - if (!val) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_FABRIC, - "%s\n", msg); - exit(EXIT_FAILURE); - } -} - -static inline void fi_bgq_ref_init(struct fi_bgq_node *node, - struct l2atomic_counter *ref, char *name) -{ - int ret __attribute__ ((unused)); - ret = fi_bgq_node_counter_allocate(node, ref); - assert(ret == 0); - - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_FABRIC, - "initializing ref count for (%s) to (%d)\n", - name, 0); - - return; -} - -static inline void fi_bgq_ref_inc(struct l2atomic_counter *ref, char *name) -{ - l2atomic_counter_add(ref, 1); - return; -} - -static inline int fi_bgq_ref_dec(struct l2atomic_counter *ref, char *name) -{ - int64_t value = -1; - value = (int64_t) l2atomic_counter_decrement(ref); - - if ((value-1) < 0) { - - FI_WARN(fi_bgq_global.prov, FI_LOG_FABRIC, - "decrement ref for (%s) (ref_cnt %d < 0)\n", - name, (value-1)); - - errno = FI_EOTHER; - return -errno; - } - return 0; -} - -static inline int fi_bgq_ref_finalize(struct l2atomic_counter *ref, char *name) -{ - int64_t value = -1; - value = (int64_t) l2atomic_counter_get(ref); - - if (value != 0) { - FI_WARN(fi_bgq_global.prov, FI_LOG_FABRIC, - "error ref for (%s) (ref_cnt %d != 0)\n", - name, value); - errno = FI_EBUSY; - return -errno; - } - return 0; -} - -static inline int fi_bgq_fid_check(fid_t fid, int fid_class, char *name) -{ - if (!fid) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_FABRIC, - "NULL %s object", name); - errno = FI_EINVAL; - return -errno; - } - if (fid->fclass != fid_class) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_FABRIC, - "wrong type of object (%s) expected (%d), got (%d)\n", - name, fid_class, fid->fclass); - errno = FI_EINVAL; - return -errno; - } - return 0; -} - -struct fi_bgq_fabric { - struct fid_fabric fabric_fid; - struct fi_bgq_node node; - struct l2atomic_counter ref_cnt; -}; - - -int fi_bgq_set_default_info(void); - -int fi_bgq_check_info(const struct fi_info *info); - -int fi_bgq_fabric(struct fi_fabric_attr *attr, - struct fid_fabric **fabric, void *context); - -int fi_bgq_check_fabric_attr(const struct fi_fabric_attr *attr); - -int fi_bgq_domain(struct fid_fabric *fabric, - struct fi_info *info, - struct fid_domain **dom, void *context); - -int fi_bgq_check_domain_attr(const struct fi_domain_attr *attr); -int fi_bgq_choose_domain(uint64_t caps, - struct fi_domain_attr *domain_attr, - const struct fi_domain_attr *hints); - -int fi_bgq_alloc_default_domain_attr(struct fi_domain_attr **domain_attr); - -int fi_bgq_stx_context(struct fid_domain *domain, struct fi_tx_attr *attr, - struct fid_stx **stx, void *context); - -int fi_bgq_av_open(struct fid_domain *dom, - struct fi_av_attr *attr, struct fid_av **av, - void *context); - -int fi_bgq_cq_open(struct fid_domain *dom, - struct fi_cq_attr *attr, - struct fid_cq **eq, void *context); - -int fi_bgq_endpoint(struct fid_domain *dom, struct fi_info *info, - struct fid_ep **ep, void *context); - -int fi_bgq_alloc_default_ep_attr(struct fi_ep_attr **ep_attr); - -int fi_bgq_check_ep_attr(const struct fi_ep_attr *attr); - -int fi_bgq_alloc_default_tx_attr(struct fi_tx_attr **tx_attr); -int fi_bgq_check_tx_attr(const struct fi_tx_attr *attr); - -int fi_bgq_alloc_default_rx_attr(struct fi_rx_attr **rx_attr); -int fi_bgq_check_rx_attr(const struct fi_rx_attr *attr); - -int fi_bgq_scalable_ep(struct fid_domain *dom, struct fi_info *info, - struct fid_ep **ep, void *context); - -int fi_bgq_cntr_open(struct fid_domain *domain, - struct fi_cntr_attr *attr, - struct fid_cntr **cntr, void *context); - -int fi_bgq_init_mr_ops(struct fi_bgq_domain *bgq_domain, struct fi_info *info); -int fi_bgq_finalize_mr_ops(struct fi_bgq_domain *bgq_domain); - -int fi_bgq_init_rma_ops(struct fi_bgq_ep *bgq_ep, struct fi_info *info); -int fi_bgq_enable_rma_ops(struct fi_bgq_ep *bgq_ep); -int fi_bgq_finalize_rma_ops(struct fi_bgq_ep *bgq_ep); - -int fi_bgq_init_msg_ops(struct fi_bgq_ep *bgq_ep, struct fi_info *info); -int fi_bgq_enable_msg_ops(struct fi_bgq_ep *bgq_ep); -int fi_bgq_finalize_msg_ops(struct fi_bgq_ep *bgq_ep); - -int fi_bgq_init_atomic_ops(struct fi_bgq_ep *bgq_ep, struct fi_info *info); -int fi_bgq_enable_atomic_ops(struct fi_bgq_ep *bgq_ep); -int fi_bgq_finalize_atomic_ops(struct fi_bgq_ep *bgq_ep); - -int fi_bgq_init_tagged_ops(struct fi_bgq_ep *bgq_ep, struct fi_info *info); -int fi_bgq_enable_tagged_ops(struct fi_bgq_ep *bgq_ep); -int fi_bgq_finalize_tagged_ops(struct fi_bgq_ep *bgq_ep); - -int fi_bgq_init_cm_ops(struct fid_ep *ep_fid, struct fi_info *info); -int fi_bgq_finalize_cm_ops(struct fi_bgq_ep *bgq_ep); - -int fi_bgq_bind_ep_stx(struct fi_bgq_ep *ep, - struct fi_bgq_stx *stx, uint64_t flags); -int fi_bgq_bind_ep_cq(struct fi_bgq_ep *bgq_ep, - struct fi_bgq_cq *bgq_cq, uint64_t flags); -int fi_bgq_bind_ep_cntr(struct fi_bgq_ep *bgq_ep, - struct fi_bgq_cntr *bgq_cntr, uint64_t flags); -int fi_bgq_bind_ep_mr(struct fi_bgq_ep *bgq_ep, - struct fi_bgq_mr *bgq_mr, uint64_t flags); -int fi_bgq_bind_ep_av(struct fi_bgq_ep *bgq_ep, - struct fi_bgq_av *bgq_av, uint64_t flags); - -#endif /* _FI_PROV_BGQ_INTERNAL_H_ */ diff --git a/prov/bgq/include/rdma/bgq/fi_bgq_compiler.h b/prov/bgq/include/rdma/bgq/fi_bgq_compiler.h deleted file mode 100644 index 05b5626b6b1..00000000000 --- a/prov/bgq/include/rdma/bgq/fi_bgq_compiler.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef _FI_PROV_BGQ_COMPILER_H_ -#define _FI_PROV_BGQ_COMPILER_H_ - - -#if (defined(__xlc__) || defined(__xlC__)) && !defined(__OPTIMIZE__) -#undef VECTOR_LOAD_NU -#undef VECTOR_STORE_NU -#define VECTOR_LOAD_NU(si,sb,f0) \ - do { \ - asm volatile("qvlfdx %0,%1,%2" : "=v" (f0) : "b" (si), "r" (sb)); \ - } while(0) -#define VECTOR_STORE_NU(si,sb,f0) \ - do { \ - asm volatile("qvstfdx %0,%1,%2" :: "v" (f0), "b" (si), "r" (sb) :"memory"); \ - } while(0) -#endif - -#if defined(__xlc__) || defined(__xlC__) -#ifdef __OPTIMIZE__ -#define DECLARE_FP_REGISTER(n) register double f##n asm("f" #n) -#else -#define DECLARE_FP_REGISTER(n) register vector4double f##n asm("v" #n) -#endif /* __OPTIMIZE__ */ -#else /* GNU */ -#define DECLARE_FP_REGISTER(n) register double f##n asm("fr" #n) -#endif - - -static inline -void qpx_memcpy64(void *dst, const void *src) { - - assert(dst); - assert(src); - assert(((uintptr_t)dst&0x1Full) == 0); - assert(((uintptr_t)src&0x1Full) == 0); - -#if defined(__VECTOR4DOUBLE__) - const vector4double v0 = vec_ld(0, (double*)src); - const vector4double v1 = vec_ld(32, (double*)src); - vec_st(v0, 0, (double*)dst); - vec_st(v1, 32, (double*)dst); -#elif defined(__OPTIMIZE__) -# if (defined(__xlc__) || defined(__xlC__)) -# error "xlc but not vector4double" -# else - DECLARE_FP_REGISTER(0); - DECLARE_FP_REGISTER(1); - VECTOR_LOAD_NU((void*)src, 0, f0); - VECTOR_LOAD_NU((void*)src, 32, f1); - VECTOR_STORE_NU(dst, 0, f0); - VECTOR_STORE_NU(dst, 32, f1); -# endif -#else - ((double*)dst)[0] = ((const double*)src)[0]; - ((double*)dst)[1] = ((const double*)src)[1]; - ((double*)dst)[2] = ((const double*)src)[2]; - ((double*)dst)[3] = ((const double*)src)[3]; - ((double*)dst)[4] = ((const double*)src)[4]; - ((double*)dst)[5] = ((const double*)src)[5]; - ((double*)dst)[6] = ((const double*)src)[6]; - ((double*)dst)[7] = ((const double*)src)[7]; -#endif -} - -#endif /* _FI_PROV_BGQ_COMPILER_H_ */ diff --git a/prov/bgq/include/rdma/bgq/fi_bgq_flight_recorder.h b/prov/bgq/include/rdma/bgq/fi_bgq_flight_recorder.h deleted file mode 100644 index 3f7db49d381..00000000000 --- a/prov/bgq/include/rdma/bgq/fi_bgq_flight_recorder.h +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - - -#ifndef PROV_BGQ_INCLUDE_FLIGHT_RECORDER_H -#define PROV_BGQ_INCLUDE_FLIGHT_RECORDER_H - -#include "rdma/bgq/fi_bgq_spi.h" - -#define FLIGHT_RECORDER_ENTRY_STRLEN (256-8-2) -#define FLIGHT_RECORDER_ENTRY_COUNT (1024) -#define FLIGHT_RECORDER_INTERVAL (1024*1024*1024) - -#ifdef FLIGHT_RECORDER_ENABLE -struct flight_recorder_entry { - uint64_t timebase; - uint16_t event; - //uint16_t line; - char str[FLIGHT_RECORDER_ENTRY_STRLEN]; -}; - -struct flight_recorder { - uint64_t last_dump; - unsigned count; - uint32_t rank; - int thread; - uint8_t pad[64-8-4-4-4]; - struct flight_recorder_entry entry[FLIGHT_RECORDER_ENTRY_COUNT]; -}; - -#define FLIGHT_RECORDER(fr, event_id, format, ...) \ -{ \ - unsigned count = (fr)->count; \ - struct flight_recorder_entry * next = &(fr)->entry[count]; \ - next->timebase = GetTimeBase(); \ - next->event = event_id; \ - snprintf((char *)next->str, FLIGHT_RECORDER_ENTRY_STRLEN, format, __VA_ARGS__); \ - next->str[FLIGHT_RECORDER_ENTRY_STRLEN-1] = 0x0; \ - (fr)->count = count+1; \ - if (count+1 == FLIGHT_RECORDER_ENTRY_COUNT) \ - flight_recorder_dump((fr)); \ -} - -static inline -void flight_recorder_init (struct flight_recorder * fr) { - fr->rank = Kernel_GetRank(); - fr->thread = Kernel_ProcessorID(); - fr->count = 0; - fr->last_dump = GetTimeBase(); -} - -static inline -void flight_recorder_dump (struct flight_recorder * fr) { - const unsigned count = fr->count; - if (count == 0) { - fr->last_dump = GetTimeBase(); - return; - } - - const unsigned rank = fr->rank; - const unsigned thread = fr->thread; - - - fprintf(stderr, "#FLIGHT_RECORDER |----timebase---| -rank -p- -event \"str...\"\n"); - - struct flight_recorder_entry * entry = &fr->entry[0]; - unsigned i; - for (i=0; icount = 0; - fr->last_dump = GetTimeBase(); -} - - -static inline -void flight_recorder_write (struct flight_recorder * fr, uint16_t event, uint16_t line, const char * str) { - const unsigned count = fr->count; - struct flight_recorder_entry * next = &fr->entry[count]; - - next->timebase = GetTimeBase(); - next->event = event; - if (str) { - strncpy(next->str, str, FLIGHT_RECORDER_ENTRY_STRLEN); - next->str[FLIGHT_RECORDER_ENTRY_STRLEN-1] = 0x0; - } else { - next->str[0] = 0x0; - } - - fr->count = count+1; - if (count+1 == FLIGHT_RECORDER_ENTRY_COUNT) { - flight_recorder_dump(fr); - } -} - -static inline -void flight_recorder_poll (struct flight_recorder * fr) { - if ((GetTimeBase() - fr->last_dump) > FLIGHT_RECORDER_INTERVAL) - flight_recorder_dump(fr); -} -#else -#define FLIGHT_RECORDER(fr, event_id, format, ...) -#endif - -#endif /* PROV_BGQ_INCLUDE_FLIGHT_RECORDER_H */ diff --git a/prov/bgq/include/rdma/bgq/fi_bgq_hwi.h b/prov/bgq/include/rdma/bgq/fi_bgq_hwi.h deleted file mode 100644 index 723ea1a236f..00000000000 --- a/prov/bgq/include/rdma/bgq/fi_bgq_hwi.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef _FI_PROV_BGQ_HWI_H_ -#define _FI_PROV_BGQ_HWI_H_ - -/* - * The bgq system software, specifically 'hwi/include/common/compiler_support.h', - * will define the __INLINE__ macro if it is not already defined to the following: - * - * #define __INLINE__ extern inline __attribute__((always_inline)) - * - * This is the non-portable "gnu 89" style which easily results in undefined - * symbols or multiple defined symbols when used by software coded to a more - * recent C standard. - * - * As a workaround the __INLINE__ macro will be defined to the more appropriate - * 'static inline' style only for the bgq system software includes and then - * undefined at the end of this file. This seems to fix the problem without - * requiring any changes to the installed bgq system software files. - */ -#ifdef __INLINE__ -#error __INLINE__ already defined! -#else -#define __INLINE__ static inline -#endif - -#include -#include -#include -#include -#include -#include -#include -#include - -#undef __INLINE__ - -#endif /* _FI_PROV_BGQ_HWI_H_ */ diff --git a/prov/bgq/include/rdma/bgq/fi_bgq_l2atomic.h b/prov/bgq/include/rdma/bgq/fi_bgq_l2atomic.h deleted file mode 100644 index 6544f7ca3df..00000000000 --- a/prov/bgq/include/rdma/bgq/fi_bgq_l2atomic.h +++ /dev/null @@ -1,760 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _FI_PROV_BGQ_L2ATOMIC_H_ -#define _FI_PROV_BGQ_L2ATOMIC_H_ - - -#include - -#include "rdma/bgq/fi_bgq_hwi.h" -#include "rdma/bgq/fi_bgq_spi.h" - -/* - * l2atomic lock access structure - */ -struct l2atomic_lock { - uintptr_t ticket_l2vaddr; - uintptr_t serving_l2vaddr; -}; - -/* - * l2atomic lock data structure - */ -struct l2atomic_lock_data { - volatile uint64_t ticket; - volatile uint64_t serving; -} __attribute((aligned(32))); - -static inline -void l2atomic_lock_initialize (struct l2atomic_lock * lock, struct l2atomic_lock_data * data) { - - uint32_t cnk_rc __attribute__ ((unused)); - cnk_rc = Kernel_L2AtomicsAllocate((void*)data, sizeof(struct l2atomic_lock_data)); - assert(0==cnk_rc); - - lock->ticket_l2vaddr = (uintptr_t)&data->ticket; - lock->serving_l2vaddr = (uintptr_t)&data->serving; - - L2_AtomicStore((volatile uint64_t *)lock->ticket_l2vaddr, 0); - L2_AtomicStore((volatile uint64_t *)lock->serving_l2vaddr, 0); -}; - -static inline -uint64_t l2atomic_lock_acquire (struct l2atomic_lock * lock) { - - const uint64_t ticket = L2_AtomicLoadIncrement((volatile uint64_t *)lock->ticket_l2vaddr); - while (L2_AtomicLoad((volatile uint64_t *)lock->serving_l2vaddr) != ticket); - - return ticket; -}; - -static inline -void l2atomic_lock_release (struct l2atomic_lock * lock) { - L2_AtomicStoreAdd((volatile uint64_t *)lock->serving_l2vaddr, 1); -}; - -static inline -uint64_t l2atomic_lock_depth (struct l2atomic_lock * lock) { - return L2_AtomicLoad((volatile uint64_t *)lock->ticket_l2vaddr) - - L2_AtomicLoad((volatile uint64_t *)lock->serving_l2vaddr); -}; - -static inline -uint64_t l2atomic_lock_isbusy (struct l2atomic_lock * lock) { - return (l2atomic_lock_depth(lock) != 0); -}; - - -#define L2ATOMIC_FIFO_CONSUMER_CONTENTION_LEVEL_NONE (0) -#define L2ATOMIC_FIFO_CONSUMER_CONTENTION_LEVEL_LOW (1) -#define L2ATOMIC_FIFO_CONSUMER_CONTENTION_LEVEL_HIGH (2) - -#ifndef L2ATOMIC_FIFO_CONSUMER_CONTENTION_LEVEL -#define L2ATOMIC_FIFO_CONSUMER_CONTENTION_LEVEL (L2ATOMIC_FIFO_CONSUMER_CONTENTION_LEVEL_HIGH) -#endif - -#define L2ATOMIC_FIFO_CONSUMER_ALGORITHM_L2BOUNDED (0) -#define L2ATOMIC_FIFO_CONSUMER_ALGORITHM_STBOUNDED (1) - -#ifndef L2ATOMIC_FIFO_CONSUMER_ALGORITHM -#define L2ATOMIC_FIFO_CONSUMER_ALGORITHM (L2ATOMIC_FIFO_CONSUMER_ALGORITHM_L2BOUNDED) -#endif - -// Change this default to L2ATOMIC_FIFO_CONSUMER_MULTIPLE ?? -#ifndef L2ATOMIC_FIFO_CONSUMER_SINGLE -#ifndef L2ATOMIC_FIFO_CONSUMER_MULTIPLE -#define L2ATOMIC_FIFO_CONSUMER_SINGLE -#endif -#endif - -#define L2ATOMIC_FIFO_PRODUCER_ALGORITHM_L2BOUNDED (0) -#define L2ATOMIC_FIFO_PRODUCER_ALGORITHM_RESERVE (1) - -#ifndef L2ATOMIC_FIFO_PRODUCER_ALGORITHM -#define L2ATOMIC_FIFO_PRODUCER_ALGORITHM (L2ATOMIC_FIFO_PRODUCER_ALGORITHM_L2BOUNDED) -#endif - -#define L2_CACHE_LINE_COUNT_UINT64 (L2_CACHE_LINE_SIZE >> 2) - -// Change this default to L2ATOMIC_FIFO_MSYNC_PRODUCER ?? -#ifndef L2ATOMIC_FIFO_MSYNC_CONSUMER -#ifndef L2ATOMIC_FIFO_MSYNC_PRODUCER -#define L2ATOMIC_FIFO_MSYNC_PRODUCER -#endif -#endif - -// Change this default to L2ATOMIC_FIFO_PRODUCER_STORE_FAST ?? -#ifndef L2ATOMIC_FIFO_PRODUCER_STORE_FAST -#ifndef L2ATOMIC_FIFO_PRODUCER_STORE_ATOMIC -#define L2ATOMIC_FIFO_PRODUCER_STORE_ATOMIC -#endif -#endif - -#ifndef L2ATOMIC_FIFO_CONSUMER_CLEAR_FAST -#ifndef L2ATOMIC_FIFO_CONSUMER_CLEAR_ATOMIC -#define L2ATOMIC_FIFO_CONSUMER_CLEAR_ATOMIC -#endif -#endif - -#ifndef L2ATOMIC_FIFO_BOUNDS_FAST -#ifndef L2ATOMIC_FIFO_BOUNDS_ATOMIC -#define L2ATOMIC_FIFO_BOUNDS_ATOMIC -#endif -#endif - - -/* - * l2atomic fifo access structures - */ -struct l2atomic_fifo_consumer { - uint64_t head; - uintptr_t bounds_l2vaddr; - uint64_t mask; - uintptr_t packet_base_l2vaddr; -}; -struct l2atomic_fifo_producer { - uint64_t mask; - uintptr_t packet_base_l2vaddr; - uintptr_t tail_l2vaddr; -#if L2ATOMIC_FIFO_PRODUCER_ALGORITHM == L2ATOMIC_FIFO_PRODUCER_ALGORITHM_RESERVE - uintptr_t bounds_l2vaddr; -#ifdef L2ATOMIC_FIFO_PRODUCER_ALGORITHM_RESERVE_CACHEBOUNDS - uint64_t local_bounds; -#endif -#endif -} __attribute__((__aligned__(L2_CACHE_LINE_SIZE))); -struct l2atomic_fifo { - struct l2atomic_fifo_consumer consumer; - struct l2atomic_fifo_producer producer; -}; - -/* - * l2atomic fifo data structure - */ -#if L2ATOMIC_FIFO_PRODUCER_ALGORITHM == L2ATOMIC_FIFO_PRODUCER_ALGORITHM_L2BOUNDED -struct l2atomic_fifo_data { - volatile uint64_t tail; - volatile uint64_t bounds; - - volatile uint64_t packet[0]; -} __attribute((aligned(32))); -#elif L2ATOMIC_FIFO_PRODUCER_ALGORITHM == L2ATOMIC_FIFO_PRODUCER_ALGORITHM_RESERVE -struct l2atomic_fifo_data { - volatile uint64_t tail __attribute__((__aligned__(L2_CACHE_LINE_SIZE))); /* producer rw, consumer na */ - uint64_t pad0[(L2_CACHE_LINE_SIZE-sizeof(uint64_t))/sizeof(uint64_t)]; - - volatile uint64_t bounds __attribute__((__aligned__(L2_CACHE_LINE_SIZE))); /* producer ro, consumer rw */ - uint64_t pad1[(L2_CACHE_LINE_SIZE-sizeof(uint64_t))/sizeof(uint64_t)]; - - volatile uint64_t packet[0]; /* producer wo, consumer rw */ - - uint64_t pad2[L2_CACHE_LINE_COUNT_UINT64<<1]; -} __attribute__((__aligned__(L2_CACHE_LINE_SIZE))); -#endif - - - -static inline -void l2atomic_fifo_enable (struct l2atomic_fifo_consumer * consumer, - struct l2atomic_fifo_producer * producer) { - - uint64_t npackets = producer->mask+1; - unsigned n; - for (n = 0; n < npackets; ++n) - L2_AtomicStore(&((uint64_t*)producer->packet_base_l2vaddr)[n], 0); - - consumer->head = 0; - L2_AtomicStore((void *)producer->tail_l2vaddr, 0); - L2_AtomicStore((void *)consumer->bounds_l2vaddr, npackets-L2_CACHE_LINE_COUNT_UINT64-1); -} - -static inline -void l2atomic_fifo_disable (struct l2atomic_fifo_consumer * consumer, - struct l2atomic_fifo_producer * producer) { - - L2_AtomicStore((void *)consumer->bounds_l2vaddr, 0); - -} - - -static inline -void l2atomic_fifo_initialize (struct l2atomic_fifo_consumer * consumer, - struct l2atomic_fifo_producer * producer, - struct l2atomic_fifo_data * data, - uint64_t npackets) { - - assert(consumer); - assert(producer); - assert(data); - assert(((uintptr_t)data & 0x01F) == 0); /* 32 byte aligned */ -#ifdef L2ATOMIC_FIFO_NPACKETS - npackets = L2ATOMIC_FIFO_NPACKETS; -#endif - assert(npackets >= (1 << 8)); /* 256 -- see consume16() */ - - assert((npackets == (1 << 3)) || /* 8 */ - (npackets == (1 << 4)) || /* 16 */ - (npackets == (1 << 5)) || /* 32 */ - (npackets == (1 << 6)) || /* 64 */ - (npackets == (1 << 7)) || /* 128 */ - (npackets == (1 << 8)) || /* 256 */ - (npackets == (1 << 9)) || /* 512 */ - (npackets == (1 << 10)) || /* 1k */ - (npackets == (1 << 11)) || /* 2k */ - (npackets == (1 << 12)) || /* 4k */ - (npackets == (1 << 13)) || /* 8k */ - (npackets == (1 << 14)) || /* 16k */ - (npackets == (1 << 15)) || /* 32k */ - (npackets == (1 << 16)) || /* 64k */ - (npackets == (1 << 17)) || /* 128k */ - (npackets == (1 << 18)) || /* 256k */ - (npackets == (1 << 19))); /* 512k */ - - consumer->mask = npackets-1; - consumer->bounds_l2vaddr = (uintptr_t)&data->bounds; - consumer->packet_base_l2vaddr = (uintptr_t)&data->packet[0]; - - producer->mask = npackets-1; - producer->tail_l2vaddr = (uintptr_t)&data->tail; - producer->packet_base_l2vaddr = (uintptr_t)&data->packet[0]; -#if L2ATOMIC_FIFO_PRODUCER_ALGORITHM == L2ATOMIC_FIFO_PRODUCER_ALGORITHM_RESERVE - producer->bounds_l2vaddr = (uintptr_t)&data->bounds; -#ifdef L2ATOMIC_FIFO_PRODUCER_ALGORITHM_RESERVE_CACHEBOUNDS - producer->local_bounds = npackets-L2_CACHE_LINE_COUNT_UINT64-1; -#endif -#endif - - uint32_t cnk_rc __attribute__ ((unused)); - cnk_rc = Kernel_L2AtomicsAllocate((void*)data, sizeof(struct l2atomic_fifo_data) + sizeof(uint64_t) * npackets); - assert(0==cnk_rc); - - l2atomic_fifo_enable(consumer, producer); - - return; -} - -static inline -int l2atomic_fifo_produce (struct l2atomic_fifo_producer * fifo, const uint64_t data) { - -#if L2ATOMIC_FIFO_PRODUCER_ALGORITHM != L2ATOMIC_FIFO_PRODUCER_ALGORITHM_L2BOUNDED - assert(0); -#endif - - const uint64_t tail = L2_AtomicLoadIncrementBounded((volatile uint64_t *)fifo->tail_l2vaddr); - if (tail != 0x8000000000000000ull) { -#ifdef L2ATOMIC_FIFO_NPACKETS - const uint64_t mask = L2ATOMIC_FIFO_NPACKETS-1; -#else - const uint64_t mask = fifo->mask; -#endif - const uint64_t offset = (tail & mask) << 0x03ull; -#ifdef L2ATOMIC_FIFO_PRODUCER_STORE_FAST - volatile uint64_t *pkt = (volatile uint64_t *)(fifo->packet_base_l2vaddr + offset); - *pkt = 0x8000000000000000ull | data; -#endif - { /* this "l1p flush" hack is only needed to flush *writes* from a processor cache to the memory system */ - volatile uint64_t *mu_register = - (volatile uint64_t *)(BGQ_MU_STATUS_CONTROL_REGS_START_OFFSET(0, 0) + - 0x030 - PHYMAP_PRIVILEGEDOFFSET); - *mu_register = 0; - } -#ifdef L2ATOMIC_FIFO_MSYNC_PRODUCER - ppc_msync(); -#endif -#ifdef L2ATOMIC_FIFO_PRODUCER_STORE_ATOMIC - L2_AtomicStore((volatile uint64_t *)(fifo->packet_base_l2vaddr + offset), - 0x8000000000000000ull | data); -#endif - return 0; - } - - return -1; -} - -static inline -int l2atomic_fifo_produce_wait (struct l2atomic_fifo_producer * fifo, const uint64_t data) { - -#if L2ATOMIC_FIFO_PRODUCER_ALGORITHM == L2ATOMIC_FIFO_PRODUCER_ALGORITHM_L2BOUNDED - while (0 != l2atomic_fifo_produce(fifo, data)); - return 0; - - -#elif L2ATOMIC_FIFO_PRODUCER_ALGORITHM == L2ATOMIC_FIFO_PRODUCER_ALGORITHM_RESERVE - const uint64_t tail = L2_AtomicLoadIncrement((volatile uint64_t *)fifo->tail_l2vaddr); - uint64_t bounds = 0; - volatile uint64_t * const bounds_l2vaddr = (volatile uint64_t * const)fifo->bounds_l2vaddr; -#ifdef L2ATOMIC_FIFO_NPACKETS - const uint64_t mask = L2ATOMIC_FIFO_NPACKETS-1; -#else - const uint64_t mask = fifo->mask; -#endif -#ifdef L2ATOMIC_FIFO_PRODUCER_ALGORITHM_RESERVE_CACHEBOUNDS - const uint64_t local_bounds = fifo->local_bounds; - if (local_bounds < tail) { - const uint64_t offset = (tail & mask) << 0x03ull; -#ifdef L2ATOMIC_FIFO_PRODUCER_STORE_FAST - uint64_t *pkt = (volatile uint64_t *)(fifo->packet_base_l2vaddr + offset); - *pkt = 0x8000000000000000ull | data; -#endif - { /* this "l1p flush" hack is only needed to flush *writes* from a processor cache to the memory system */ - volatile uint64_t *mu_register = - (volatile uint64_t *)(BGQ_MU_STATUS_CONTROL_REGS_START_OFFSET(0, 0) + - 0x030 - PHYMAP_PRIVILEGEDOFFSET); - *mu_register = 0; - } -#ifdef L2ATOMIC_FIFO_MSYNC_PRODUCER - ppc_msync(); -#endif -#ifdef L2ATOMIC_FIFO_PRODUCER_STORE_ATOMIC - L2_AtomicStore((volatile uint64_t *)(fifo->packet_base_l2vaddr + offset), -#endif 0x8000000000000000ull | data); - } else { - -#endif - -#ifdef L2ATOMIC_FIFO_PRODUCER_CHECK_BOUNDS_FAST - while ((bounds = *bounds_l2vaddr) < tail); -#endif -#ifdef L2ATOMIC_FIFO_PRODUCER_CHECK_BOUNDS_ATOMIC - while ((bounds = L2_AtomicLoad((volatile uint64_t *)bounds_l2vaddr)) < tail); -#endif - - const uint64_t offset = (tail & mask) << 0x03ull; -#ifdef L2ATOMIC_FIFO_PRODUCER_STORE_FAST - uint64_t *pkt = (volatile uint64_t *)(fifo->packet_base_l2vaddr + offset); - *pkt = 0x8000000000000000ull | data; -#endif - { /* this "l1p flush" hack is only needed to flush *writes* from a processor cache to the memory system */ - volatile uint64_t *mu_register = - (volatile uint64_t *)(BGQ_MU_STATUS_CONTROL_REGS_START_OFFSET(0, 0) + - 0x030 - PHYMAP_PRIVILEGEDOFFSET); - *mu_register = 0; - } -#ifdef L2ATOMIC_FIFO_MSYNC_PRODUCER - ppc_msync(); -#endif -#ifdef L2ATOMIC_FIFO_PRODUCER_STORE_ATOMIC - L2_AtomicStore((volatile uint64_t *)(fifo->packet_base_l2vaddr + offset), - 0x8000000000000000ull | data); -#endif -#ifdef L2ATOMIC_FIFO_PRODUCER_ALGORITHM_RESERVE_CACHEBOUNDS - fifo->local_bounds = bounds; - } -#endif - return 0; -#endif -} - -static inline -int l2atomic_fifo_consume (struct l2atomic_fifo_consumer * fifo, uint64_t * data) { - -#if L2ATOMIC_FIFO_CONSUMER_ALGORITHM == L2ATOMIC_FIFO_CONSUMER_ALGORITHM_STBOUNDED - uint64_t * bounds_l2vaddr = (uint64_t *)fifo->bounds_l2vaddr; - const uint64_t bounds = *bounds_l2vaddr; -#endif - const uint64_t head = fifo->head; -#ifdef L2ATOMIC_FIFO_NPACKETS - const uint64_t mask = L2ATOMIC_FIFO_NPACKETS-1; -#else - const uint64_t mask = fifo->mask; -#endif - const uint64_t offset = (head & mask) << 0x03ull; -#ifdef L2ATOMIC_FIFO_CONSUMER_CLEAR_FAST - volatile uint64_t *pkt = (volatile uint64_t *)(fifo->packet_base_l2vaddr + offset); - const uint64_t value = *pkt; -#else -#ifdef L2ATOMIC_FIFO_CONSUMER_CLEAR_ATOMIC - const uint64_t value = L2_AtomicLoadClear((volatile uint64_t *)(fifo->packet_base_l2vaddr + offset)); -#endif -#endif - if (value & 0x8000000000000000ull) { -#ifdef L2ATOMIC_FIFO_CONSUMER_CLEAR_FAST - *pkt = 0; -#endif - *data = value & (~0x8000000000000000ull); - fifo->head = head + 1; -#if L2ATOMIC_FIFO_CONSUMER_ALGORITHM == L2ATOMIC_FIFO_CONSUMER_ALGORITHM_L2BOUNDED - L2_AtomicStoreAdd((volatile uint64_t *)fifo->bounds_l2vaddr, 1); -#elif L2ATOMIC_FIFO_CONSUMER_ALGORITHM == L2ATOMIC_FIFO_CONSUMER_ALGORITHM_STBOUNDED - *bounds_l2vaddr = bounds + 1; -#endif -#ifdef L2ATOMIC_FIFO_MSYNC_CONSUMER - ppc_msync(); -#endif - return 0; - } - - return -1; -} - -static inline -unsigned l2atomic_fifo_consume16 (struct l2atomic_fifo_consumer * fifo, uint64_t * data) { - -#ifdef L2ATOMIC_FIFO_CONSUMER_SINGLE - return (0 == l2atomic_fifo_consume(fifo, data)); - -#else -#ifdef L2ATOMIC_FIFO_CONSUMER_MULTIPLE - const uint64_t head_counter = fifo->head; -#ifdef L2ATOMIC_FIFO_NPACKETS - const uint64_t fifo_size = L2ATOMIC_FIFO_NPACKETS; - const uint64_t mask = L2ATOMIC_FIFO_NPACKETS-1; -#else - const uint64_t mask = fifo->mask; -#endif - const uint64_t head_offset = head_counter & mask; - const uint64_t end_offset = head_offset + L2_CACHE_LINE_COUNT_UINT64; - const uint64_t count = L2_CACHE_LINE_COUNT_UINT64 - (((~mask) & end_offset) * (end_offset - fifo_size)); - - volatile uint64_t *ptr = (volatile uint64_t *)(fifo->packet_base_l2vaddr + (head_offset << 0x03ull)); - - uint64_t i, num_processed = 0; -#ifdef DO_CACHE - uint64_t cache[L2_CACHE_LINE_COUNT_UINT64*2]; - ppc_msync(); - for (i = 0; i < count; ++i) { - cache[i] = L2_AtomicLoad(ptr + i); - } -#endif - for (i = 0; i < count; ++i) { -#ifdef DO_CACHE - if (cache[i] & 0x8000000000000000ull) { - data[i] = cache[i] & (~0x8000000000000000ull); - ++num_processed; - L2_AtomicStore(ptr + i, 0); - } else { - break; - } -#else - const uint64_t value = ptr[i]; - //const uint64_t value = L2_AtomicLoadClear(ptr + i); - if (value & 0x8000000000000000ull) { - data[i] = value & (~0x8000000000000000ull); - ++num_processed; - ptr[i] = 0; - } else { - break; - } -#endif - - } - - fifo->head += num_processed; -#ifdef L2ATOMIC_FIFO_BOUNDS_ATOMIC - L2_AtomicStoreAdd((volatile uint64_t *)fifo->bounds_l2vaddr, num_processed); -#else -#ifdef L2ATOMIC_FIFO_BOUNDS_FAST - uint64_t *bounds = (uint64_t *)fifo->bounds_l2vaddr; - ++(*bounds); -#endif -#endif -#ifdef L2ATOMIC_FIFO_MSYNC_CONSUMER - ppc_msync(); -#endif - - return num_processed; -#endif -#endif -} - -static inline -int l2atomic_fifo_drain (struct l2atomic_fifo_consumer * consumer, - struct l2atomic_fifo_producer * producer, uint64_t * data) { - - /* The fifo must be disabled before it can be drained */ - assert(0 == L2_AtomicLoad((volatile uint64_t *)consumer->bounds_l2vaddr)); - - const uint64_t head = consumer->head; - const uint64_t tail = L2_AtomicLoad((void *)producer->tail_l2vaddr); - - if (head == tail) { - /* The fifo is empty */ - return -1; - } - -#ifdef L2ATOMIC_FIFO_NPACKETS - const uint64_t mask = L2ATOMIC_FIFO_NPACKETS-1; -#else - const uint64_t mask = consumer->mask; -#endif - const uint64_t offset = (head & mask) << 0x03ull; - - /* Spin until the next packet is ready */ - uint64_t value = 0; - volatile uint64_t *ptr = (volatile uint64_t *)(consumer->packet_base_l2vaddr + offset); - while (0 == (0x8000000000000000ull & (value = L2_AtomicLoadClear(ptr)))); - - *data = value & (~0x8000000000000000ull); - consumer->head = head + 1; - -#ifdef L2ATOMIC_FIFO_MSYNC_CONSUMER - ppc_msync(); -#endif - return 0; -} - - - -static inline -int l2atomic_fifo_peek (struct l2atomic_fifo_consumer * fifo, uint64_t * data) { - - const uint64_t head = fifo->head; - const uint64_t mask = fifo->mask; - const uint64_t offset = (head & mask) << 0x03ull; - const uint64_t value = L2_AtomicLoad((volatile uint64_t *)(fifo->packet_base_l2vaddr + offset)); - if (value & 0x8000000000000000ull) { - *data = value & (~0x8000000000000000ull); - return 0; - } - - return -1; -} - -static inline -void l2atomic_fifo_advance (struct l2atomic_fifo_consumer * fifo) { - - const uint64_t head = fifo->head; - const uint64_t mask = fifo->mask; - const uint64_t offset = (head & mask) << 0x03ull; - L2_AtomicLoadClear((volatile uint64_t *)(fifo->packet_base_l2vaddr + offset)); - fifo->head = head + 1; - L2_AtomicStoreAdd((volatile uint64_t *)fifo->bounds_l2vaddr, 1); - - return; -} - -static inline -unsigned l2atomic_fifo_isempty (struct l2atomic_fifo_consumer * fifo) { - - const uint64_t head = fifo->head; - const uint64_t mask = fifo->mask; - const uint64_t offset = (head & mask) << 0x03ull; - const uint64_t value = L2_AtomicLoad((volatile uint64_t *)(fifo->packet_base_l2vaddr + offset)); - - return (value & 0x8000000000000000ull) == 0; -} - - -/* - * l2atomic counter data structure - */ -struct l2atomic_counter_data { - volatile uint64_t value; -} __attribute((aligned(8))); - -/* - * l2atomic counter access structure - */ -struct l2atomic_counter { - uintptr_t value_l2vaddr; -}; - -static inline -void l2atomic_counter_initialize (struct l2atomic_counter * counter, - struct l2atomic_counter_data * data) { - - assert(counter); - assert(data); - assert(((uintptr_t)data & 0x07) == 0); /* 8 byte aligned */ - - uint32_t cnk_rc __attribute__ ((unused)); - cnk_rc = Kernel_L2AtomicsAllocate((void*)data, sizeof(struct l2atomic_counter_data)); - assert(0==cnk_rc); - - counter->value_l2vaddr = (uintptr_t)&data->value; - - L2_AtomicStore(&data->value, 0); -}; - -static inline -uint64_t l2atomic_counter_increment (struct l2atomic_counter * counter) { - return L2_AtomicLoadIncrement((volatile uint64_t *)(counter->value_l2vaddr)); -}; - -static inline -uint64_t l2atomic_counter_decrement (struct l2atomic_counter * counter) { - return L2_AtomicLoadDecrement((volatile uint64_t *)(counter->value_l2vaddr)); -}; - -static inline -uint64_t l2atomic_counter_get (struct l2atomic_counter * counter) { - return L2_AtomicLoad((volatile uint64_t *)(counter->value_l2vaddr)); -} - -static inline -void l2atomic_counter_set (struct l2atomic_counter * counter, uint64_t new_value) { - L2_AtomicStore((volatile uint64_t *)(counter->value_l2vaddr), new_value); -}; - -static inline -void l2atomic_counter_add (struct l2atomic_counter * counter, uint64_t add_value) { - L2_AtomicStoreAdd((volatile uint64_t *)(counter->value_l2vaddr), add_value); -}; - -/* - * l2atomic bounded counter data structure - */ -struct l2atomic_boundedcounter_data { - volatile uint64_t value; - volatile uint64_t bounds; -} __attribute((aligned(32))); - -/* - * l2atomic bounded counter access structure - */ -struct l2atomic_boundedcounter { - uintptr_t value_l2vaddr; - uintptr_t bounds_l2vaddr; -}; - -static inline -void l2atomic_boundedcounter_initialize (struct l2atomic_boundedcounter * counter, - struct l2atomic_boundedcounter_data * data, - uint64_t initial_bounds) { - - assert(counter); - assert(data); - assert(((uintptr_t)data & 0x01F) == 0); /* 32 byte aligned */ - - uint32_t cnk_rc __attribute__ ((unused)); - cnk_rc = Kernel_L2AtomicsAllocate((void*)data, sizeof(struct l2atomic_boundedcounter_data)); - assert(0==cnk_rc); - - counter->value_l2vaddr = (uintptr_t)&data->value; - counter->bounds_l2vaddr = (uintptr_t)&data->bounds; - - L2_AtomicStore(&data->value, 0); - L2_AtomicStore(&data->bounds, initial_bounds); -}; - -static inline -uint64_t l2atomic_boundedcounter_increment_value (struct l2atomic_boundedcounter * counter) { - return L2_AtomicLoadIncrementBounded((volatile uint64_t *)(counter->value_l2vaddr)); -}; - -static inline -void l2atomic_boundedcounter_add_bounds (struct l2atomic_boundedcounter * counter, uint64_t add_value) { - L2_AtomicStoreAdd((volatile uint64_t *)(counter->bounds_l2vaddr), add_value); -}; - -/* - * l2atomic barrier data structure - */ -struct l2atomic_barrier_data { - volatile __attribute__((aligned(L1D_CACHE_LINE_SIZE))) uint64_t start; - uint64_t participants; - volatile __attribute__((aligned(L1D_CACHE_LINE_SIZE))) uint64_t count; -} __attribute__((aligned(L1D_CACHE_LINE_SIZE))); - -/* - * l2atomic barrier access structure - */ -struct l2atomic_barrier { - uintptr_t start_l2vaddr; - uintptr_t count_l2vaddr; - uint64_t participants; -}; - -static inline -void l2atomic_barrier_initialize (struct l2atomic_barrier * barrier, - struct l2atomic_barrier_data * data, - uint64_t participants) { - - assert(barrier); - assert(data); - assert(((uintptr_t)data & (L1D_CACHE_LINE_SIZE-1)) == 0); - - uint32_t cnk_rc __attribute__ ((unused)); - cnk_rc = Kernel_L2AtomicsAllocate((void*)data, sizeof(struct l2atomic_barrier_data)); - assert(0==cnk_rc); - - barrier->start_l2vaddr = (uintptr_t)&data->start; - barrier->count_l2vaddr = (uintptr_t)&data->count; - barrier->participants = participants; - - data->participants = participants; - L2_AtomicStore(&data->start, 0); - L2_AtomicStore(&data->count, 0); -} - -static inline -void l2atomic_barrier_clone (struct l2atomic_barrier * barrier, - struct l2atomic_barrier_data * data) { - - assert(barrier); - assert(data); - assert(((uintptr_t)data & (L1D_CACHE_LINE_SIZE-1)) == 0); - - barrier->start_l2vaddr = (uintptr_t)&data->start; - barrier->count_l2vaddr = (uintptr_t)&data->count; - barrier->participants = data->participants; -} - -static inline -void l2atomic_barrier_enter (struct l2atomic_barrier * barrier) { - - volatile uint64_t * start_l2vaddr = (volatile uint64_t *)(barrier->start_l2vaddr); - - const uint64_t start = L2_AtomicLoad(start_l2vaddr); - const uint64_t current = L2_AtomicLoadIncrement((volatile uint64_t *)(barrier->count_l2vaddr)) + 1; - const uint64_t target = start + barrier->participants; - - if (current == target) { - L2_AtomicStoreAdd(start_l2vaddr, barrier->participants); - } else { - while (L2_AtomicLoad(start_l2vaddr) < current); - } -} - - -#endif /* _FI_PROV_BGQ_L2ATOMIC_H_ */ diff --git a/prov/bgq/include/rdma/bgq/fi_bgq_mu.h b/prov/bgq/include/rdma/bgq/fi_bgq_mu.h deleted file mode 100644 index c75c59252b4..00000000000 --- a/prov/bgq/include/rdma/bgq/fi_bgq_mu.h +++ /dev/null @@ -1,816 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef _FI_PROV_BGQ_MU_H_ -#define _FI_PROV_BGQ_MU_H_ - -#include -#include - -#include "rdma/bgq/fi_bgq_hwi.h" -#include "rdma/bgq/fi_bgq_spi.h" - -#include "rdma/fi_errno.h" // only for FI_* errno return codes -#include "rdma/fabric.h" // only for 'fi_addr_t' ... which is a typedef to uint64_t - -#include "rdma/bgq/fi_bgq_l2atomic.h" - -#define FI_BGQ_MU_RECFIFO_BYTES (0x01 << 20) /* 1 MB == 32K entries */ -#define FI_BGQ_MU_RECFIFO_TAGGED_BYTES (0x01 << 20) /* 1 MB == 32K entries */ -#define FI_BGQ_MU_RECFIFO_OTHER_BYTES (0x01 << 15) /* 32 KB == 1K entries */ - -#define FI_BGQ_MU_BAT_SUBGROUP_GLOBAL (65) -#define FI_BGQ_MU_BAT_ID_GLOBAL (FI_BGQ_MU_BAT_SUBGROUP_GLOBAL * BGQ_MU_NUM_DATA_COUNTERS_PER_SUBGROUP) -#define FI_BGQ_MU_BAT_ID_COUNTER (FI_BGQ_MU_BAT_ID_GLOBAL+1) -#define FI_BGQ_MU_BAT_ID_ZERO (FI_BGQ_MU_BAT_ID_COUNTER+1) -#define FI_BGQ_MU_BAT_ID_ONE (FI_BGQ_MU_BAT_ID_ZERO+1) -#define FI_BGQ_MU_BAT_ID_BLACKHOLE (FI_BGQ_MU_BAT_ID_ONE+1) - -#define FI_BGQ_MUHWI_DESTINATION_MASK (0x073CF3C1ul) - -// #define FI_BGQ_TRACE 1 - - -typedef uint32_t fi_bgq_uid_t; - -union fi_bgq_uid { - fi_bgq_uid_t fi; - uint32_t raw32b; - uint16_t raw16b[2]; - uint8_t raw8b[4]; - MUHWI_Destination_t muhwi; /* see fi_bgq_uid_get_destination() */ - struct { - uint32_t rx_msb : 4; /* see fi_bgq_uid_get_rx(); see NOTE_MU_RECFIFO */ - uint32_t a : 4; /* 4 bits needed for the A dimention of the torus on Mira and Sequoia */ - uint32_t unused_1: 2; - uint32_t b : 4; /* 4 bits needed for the B dimention of the torus on Mira and Sequoia */ - uint32_t unused_2: 2; - uint32_t c : 4; /* 4 bits needed for the C dimention of the torus on Mira and Sequoia */ - uint32_t unused_3: 2; - uint32_t d : 4; /* 4 bits needed for the D dimention of the torus on Mira and Sequoia */ - uint32_t rx_lsb : 5; /* see fi_bgq_uid_get_rx(); see NOTE_MU_RECFIFO */ - uint32_t e : 1; /* 1 bit needed for the E dimention of the torus on all BG/Q systems */ - } __attribute__((__packed__)); -} __attribute__((__packed__)); - -static inline void -fi_bgq_uid_dump (char * prefix, const fi_bgq_uid_t * const uid) { - - const union fi_bgq_uid tmp = {.fi=*uid}; - uint32_t * ptr = (uint32_t *)uid; - fprintf(stderr, "%s [%p]: %08x\n", prefix, ptr, *(ptr)); - fprintf(stderr, "%s fi_bgq_uid_t dump at %p\n", prefix, (void*)uid); - - fprintf(stderr, "%s .rx_msb .................................... %u\n", prefix, tmp.rx_msb); - - fprintf(stderr, "%s .a ......................................... %u\n", prefix, tmp.a); - fprintf(stderr, "%s .unused_1 .................................. %u\n", prefix, tmp.unused_1); - fprintf(stderr, "%s .b ......................................... %u\n", prefix, tmp.b); - fprintf(stderr, "%s .unused_2 .................................. %u\n", prefix, tmp.unused_2); - fprintf(stderr, "%s .c ......................................... %u\n", prefix, tmp.c); - fprintf(stderr, "%s .unused_3 .................................. %u\n", prefix, tmp.unused_3); - fprintf(stderr, "%s .d ......................................... %u\n", prefix, tmp.d); - fprintf(stderr, "%s .rx_lsb .................................... %u\n", prefix, tmp.rx_lsb); - fprintf(stderr, "%s .e ......................................... %u\n", prefix, tmp.e); - - fflush(stderr); -} - -#define FI_BGQ_UID_DUMP(uid) \ -({ \ - char prefix[1024]; \ - snprintf(prefix, 1023, "%s:%s():%d", __FILE__, __func__, __LINE__); \ - fi_bgq_uid_dump(prefix, (uid)); \ -}) - -static inline -fi_bgq_uid_t fi_bgq_uid_set_rx (const fi_bgq_uid_t uid, const uint32_t rx) { - return (uid & 0x0FFFFFC1u) | /* clear rx_msb and rx_lsb */ - ((rx << 23) & 0xF0000000u) | /* set rx_msb */ - ((rx << 1) & 0x0000003Eu); /* set rx_lsb */ -} - -static inline -uint32_t fi_bgq_uid_get_rx (const fi_bgq_uid_t uid) { - return ((uid & 0xF0000000u) >> 23) | ((uid & 0x0000003Eu) >> 1); -} - -static inline -fi_bgq_uid_t fi_bgq_uid_set_destination (const fi_bgq_uid_t uid, const MUHWI_Destination_t destination) { - const union fi_bgq_uid tmp = {.muhwi=destination}; - return (uid & 0xF0C30C3Eu) | tmp.fi; /* clear torus fields (a,b,c,d,e); then set */ -} - -static inline -MUHWI_Destination_t fi_bgq_uid_get_destination (const fi_bgq_uid_t uid) { - /* clear all bits except the torus coordinates */ - const union fi_bgq_uid tmp = {.fi=(uid & 0x0F3CF3C1ul)}; - return tmp.muhwi; -} - -static inline -fi_bgq_uid_t fi_bgq_uid_create (const MUHWI_Destination_t destination, const uint32_t rx) { - const union fi_bgq_uid tmp = {.muhwi=destination}; - return fi_bgq_uid_set_rx(tmp.fi, rx); -} - - -union fi_bgq_addr { - fi_addr_t fi; - uint64_t raw64b; - struct { - union fi_bgq_uid uid; - uint16_t unused_0; - uint16_t fifo_map; /* only the 12 msb are used */ - } __attribute__((__packed__)); -} __attribute__((__packed__)); - -static inline void -fi_bgq_addr_dump (char * prefix, fi_addr_t * addr) { - - const union fi_bgq_addr tmp = {.fi=*addr}; - uint32_t * ptr = (uint32_t *)addr; - fprintf(stderr, "%s [%p]: %08x %08x\n", prefix, ptr, *(ptr), *(ptr+1)); - fprintf(stderr, "%s bgq addr dump at %p\n", prefix, (void*)addr); - - fprintf(stderr, "%s .uid.rx_msb .................................... %u\n", prefix, tmp.uid.rx_msb); - fprintf(stderr, "%s .uid.a ......................................... %u\n", prefix, tmp.uid.a); - fprintf(stderr, "%s .uid.unused_1 .................................. %u\n", prefix, tmp.uid.unused_1); - fprintf(stderr, "%s .uid.b ......................................... %u\n", prefix, tmp.uid.b); - fprintf(stderr, "%s .uid.unused_2 .................................. %u\n", prefix, tmp.uid.unused_2); - fprintf(stderr, "%s .uid.c ......................................... %u\n", prefix, tmp.uid.c); - fprintf(stderr, "%s .uid.unused_3 .................................. %u\n", prefix, tmp.uid.unused_3); - fprintf(stderr, "%s .uid.d ......................................... %u\n", prefix, tmp.uid.d); - fprintf(stderr, "%s .uid.rx_lsb .................................... %u\n", prefix, tmp.uid.rx_lsb); - fprintf(stderr, "%s .uid.e ......................................... %u\n", prefix, tmp.uid.e); - - fprintf(stderr, "%s .unused_0 ................................... %u\n", prefix, tmp.unused_0); - fprintf(stderr, "%s .fifo_map ................................... %u\n", prefix, tmp.fifo_map); - - fflush(stderr); -} - -#define FI_BGQ_ADDR_DUMP(addr) \ -({ \ - char prefix[1024]; \ - snprintf(prefix, 1023, "%s:%s():%d", __FILE__, __func__, __LINE__); \ - fi_bgq_addr_dump(prefix, (addr)); \ -}) - -static inline -fi_bgq_uid_t fi_bgq_addr_uid (const fi_addr_t addr) { - return ((union fi_bgq_addr*)&addr)->uid.fi; -} - - -static inline -uint64_t fi_bgq_addr_rec_fifo_id (const fi_addr_t addr) { - return (uint64_t) fi_bgq_uid_get_rx(fi_bgq_addr_uid(addr)); -} - -static inline -uint64_t fi_bgq_addr_get_fifo_map (const fi_addr_t addr) { - return addr & 0x000000000000FFFFu; -} - -static inline -uint64_t fi_bgq_addr_is_local (const fi_addr_t addr) { - return (addr & (MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_LOCAL0 | MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_LOCAL1)) != 0; -} - - -static inline -uint32_t fi_bgq_addr_calculate_base_rx (const uint32_t process_id, const uint32_t processes_per_node) { - - /* only one domain per process is currently supported */ - const uint32_t domain_id = 0; - const uint32_t domains_per_process = 1; - - /* only one endpoint per domain is currently supported */ - const uint32_t endpoint_id = 0; - const uint32_t endpoints_per_domain = 1; - - /* each rx uses one mu reception fifo; See NOTE_MU_RECFIFO */ - const uint32_t rx_per_node = - ((BGQ_MU_NUM_REC_FIFO_GROUPS-1) * BGQ_MU_NUM_REC_FIFOS_PER_GROUP); - - const uint32_t rx_per_process = rx_per_node / processes_per_node; - const uint32_t rx_per_domain = rx_per_process / domains_per_process; - const uint32_t rx_per_endpoint = rx_per_domain / endpoints_per_domain; - - return (rx_per_process * process_id) + (rx_per_domain * domain_id) + (rx_per_endpoint * endpoint_id); -} - -static inline -fi_addr_t fi_bgq_addr_create (const MUHWI_Destination_t destination, - const uint64_t fifo_map, const uint32_t rx) { - - union fi_bgq_addr tmp; - tmp.uid = (union fi_bgq_uid) fi_bgq_uid_create(destination, rx); - tmp.unused_0=0; - tmp.fifo_map=fifo_map; - return tmp.fi; -} - - - -#define FI_BGQ_MU_PACKET_TYPE_TAG (0x01ul<<1) -#define FI_BGQ_MU_PACKET_TYPE_UNUSED (0x01ul<<2) -#define FI_BGQ_MU_PACKET_TYPE_EAGER (0x01ul<<3) -#define FI_BGQ_MU_PACKET_TYPE_RENDEZVOUS (0x01ul<<4) -#define FI_BGQ_MU_PACKET_TYPE_RMA (0x01ul<<5) -#define FI_BGQ_MU_PACKET_TYPE_ATOMIC (0x01ul<<6) -#define FI_BGQ_MU_PACKET_TYPE_ACK (0x01ul<<7) - -/** - * \brief MU packet header - * - * The MU packet header is consumed in many places and sometimes overloaded - * for cache and memory allocation reasons. - */ -union fi_bgq_mu_packet_hdr { - - /* The torus packet header is 32 bytes. see: hwi/include/bqc/MU_PacketHeader.h */ - MUHWI_PacketHeader_t muhwi; - - struct { - /* The point-to-point header occupies bytes 0-11 of the packet header - * see: MUHWI_Pt2PtNetworkHeader_t in hwi/include/bqc/MU_Pt2PtNetworkHeader.h */ - uint64_t reserved_0; - uint32_t reserved_1; - - /* The message unit header occupies bytes 12-31 of the packet header - * see: MUHWI_MessageUnitHeader_t in hwi/include/bqc/MU_MessageUnitHeader.h */ - uint16_t reserved_2 : 10; - uint16_t unused_0 : 6; - uint8_t unused_1[18]; - } __attribute__((__packed__)) raw; - - struct { - uint64_t reserved_0; - uint32_t reserved_1; - uint16_t reserved_2 : 10; - uint16_t unused_0 : 6; - - uint8_t unused_1; - uint8_t packet_type; /* FI_BGQ_MU_PACKET_TYPE_*; all 8 bits are needed */ - uint64_t unused_2[2]; - } __attribute__((__packed__)) common; - - struct { - uint64_t reserved_0; - uint64_t reserved_1 : 32; - uint64_t reserved_2 : 10; - uint64_t unused_0 : 14; - uint64_t reserved_3 : 8; /* a.k.a. common::packet_type */ - - MUHWI_Destination_t origin; - uint32_t cntr_paddr_rsh3b; /* 34b paddr, 8 byte aligned; See: NOTE_MU_PADDR */ - uint64_t is_local; /* only 1 bit is needed */ - } __attribute__((__packed__)) completion; - - struct { - uint64_t reserved_0; - uint32_t reserved_1; - - union { - struct { - uint32_t reserved_2 : 10; - uint32_t is_local : 1; /* used to specify fifo map; only needed for FI_BGQ_REMOTE_COMPLETION */ - uint32_t unused_0 : 3; - uint32_t message_length : 10; /* 0..512 bytes of payload data */ - uint32_t reserved_3 : 8; /* a.k.a. common::packet_type */ - } __attribute__((__packed__)) send; - - struct { - uint16_t reserved_2 : 10; - uint16_t is_local : 1; /* used to specify fifo map */ - uint16_t niov_minus_1 : 5; /* 1..31 mu iov elements in payload data */ - uint8_t rget_inj_fifo_id; /* 0..255 */ - uint8_t reserved_3; /* a.k.a. common::packet_type */ - } __attribute__((__packed__)) rendezvous; - }; - - union fi_bgq_uid uid; - uint32_t immediate_data; - uint64_t ofi_tag; - - } __attribute__((__packed__)) pt2pt; - - struct { - uint64_t reserved_0; - uint32_t reserved_1; - uint16_t reserved_2 : 10; - uint16_t unused_0 : 6; - - uint8_t unused_1; - uint8_t reserved_3; /* a.k.a. common::packet_type (FI_BGQ_MU_PACKET_TYPE_ACK) */ - uint64_t unused_2; - uintptr_t context; - } __attribute__((__packed__)) ack; - - struct { - uint64_t reserved_0; - uint32_t reserved_1; - uint16_t reserved_2 : 10; - uint16_t unused_0 : 6; - - uint8_t ndesc; /* 0..8 descriptors */ - uint8_t reserved_3; /* a.k.a. common::packet_type (FI_BGQ_MU_PACKET_TYPE_RMA) */ - uint64_t nbytes : 16; /* 0..512 bytes */ - uint64_t unused_2 : 11; - uint64_t offset : 37; /* FI_MR_BASIC uses virtual address as the offset */ - uint64_t key; /* only 16 bits needed for FI_MR_SCALABLE but need up to 34 for FI_MR_BASIC vaddr-paddr delta */ - } __attribute__((__packed__)) rma; - - struct { - uint64_t reserved_0; - uint32_t reserved_1; - uint32_t reserved_2 : 10; - uint32_t unused_0 : 5; - uint32_t cntr_bat_id : 9; - uint32_t reserved_3 : 8; /* a.k.a. common::packet_type (FI_BGQ_MU_PACKET_TYPE_ATOMIC) */ - union { - uint32_t origin_raw; - MUHWI_Destination_t origin; - struct { - uint32_t is_fetch : 1; - uint32_t dt : 4; /* enum fi_datatype */ - uint32_t a : 3; /* only 3 bits are needed for Mira */ - uint32_t is_local : 1; - uint32_t do_cntr : 1; - uint32_t b : 4; /* only 4 bits are needed for Mira */ - uint32_t unused_1 : 2; - uint32_t c : 4; /* only 4 bits are needed for Mira */ - uint32_t unused_2 : 2; - uint32_t d : 4; /* only 4 bits are needed for Mira */ - uint32_t op : 5; /* enum fi_op */ - uint32_t e : 1; /* only 1 bit is needed for Mira */ - } __attribute__((__packed__)); - }; - uint16_t nbytes_minus_1; /* only 9 bits needed */ - uint16_t key; /* only 16 bits needed for FI_MR_SCALABLE and not used for FI_MR_BASIC */ - uint64_t offset; /* FI_MR_BASIC needs 34 bits */ - } __attribute__((__packed__)) atomic; - -} __attribute__((__aligned__(32))); - -struct fi_bgq_mu_iov { - uint64_t message_length; - uint64_t src_paddr; -}; - -struct fi_bgq_mu_fetch_metadata { - uint64_t dst_paddr; - uint64_t cq_paddr; - uint64_t fifo_map; - uint64_t unused; -}; - -union fi_bgq_mu_packet_payload { - uint8_t byte[512]; - struct { - uint32_t unused; - uint32_t cntr_paddr_rsh3b; /* 34b paddr, 8 byte aligned; See: NOTE_MU_PADDR */ - uint64_t fifo_map; - struct fi_bgq_mu_iov mu_iov[31]; - } rendezvous; - struct { - struct fi_bgq_mu_fetch_metadata metadata; - uint8_t data[512-sizeof(struct fi_bgq_mu_fetch_metadata)]; - } atomic_fetch; -} __attribute__((__aligned__(32))); - -struct fi_bgq_mu_packet { - union { - struct fi_bgq_mu_packet *next; /* first 8 bytes of the header is unused */ - union fi_bgq_mu_packet_hdr hdr; - }; - union fi_bgq_mu_packet_payload payload; -} __attribute__((__aligned__(32))); - - -static inline uint64_t -fi_bgq_mu_packet_type_get (struct fi_bgq_mu_packet * pkt) { - return pkt->hdr.common.packet_type; -} - -static inline void -fi_bgq_mu_packet_type_set (union fi_bgq_mu_packet_hdr * hdr, const uint64_t packet_type) { - hdr->common.packet_type = (uint8_t)packet_type; -} - -static inline uint64_t -fi_bgq_mu_packet_get_fifo_map (struct fi_bgq_mu_packet * pkt) { - return pkt->payload.rendezvous.fifo_map; -} - -static inline void -fi_bgq_mu_packet_set_fifo_map (struct fi_bgq_mu_packet * pkt, const uint64_t fifo_map) { - pkt->payload.rendezvous.fifo_map = fifo_map; - return; -} - -#define FI_BGQ_MU_DESCRIPTOR_UPDATE_BAT_TYPE_NONE (0) -#define FI_BGQ_MU_DESCRIPTOR_UPDATE_BAT_TYPE_DST (1) -#define FI_BGQ_MU_DESCRIPTOR_UPDATE_BAT_TYPE_SRC (2) - -union fi_bgq_mu_descriptor { - - /* The mu descriptor is 64 bytes. see: hwi/include/bqc/MU_Descriptor.h */ - MUHWI_Descriptor_t muhwi_descriptor; - - struct { - uint16_t key_msb; - uint8_t update_type; /* FI_BGQ_MU_DESCRIPTOR_UPDATE_BAT_TYPE_* */ - uint8_t unused_0 : 7; - uint8_t reserved_0 : 1; - - uint32_t unused_1 : 31; - uint32_t reserved_1 : 1; - - uint64_t Pa_Payload; /* 37 lsb are used */ - uint64_t Message_Length; /* 37 lsb (really, 35) are used */ - uint64_t key_lsb : 48; - uint64_t reserved_2 : 16; /* a.k.a. Torus_FIFO_Map */ - - union { - MUHWI_Pt2PtNetworkHeader_t muhwi_pt2ptnetworkheader; - uint32_t reserved_3[3]; - }; - - union { - MUHWI_MessageUnitHeader_t muhwi_messageunitheader; - struct { - uint64_t rec_payload_base_address_id : 10; - uint64_t reserved_4 : 1; - uint64_t put_offset : 37; - uint64_t unused_2 : 6; - uint64_t rec_counter_base_address_id : 10; - uint32_t reserved_5[3]; - } __attribute__((__packed__)) dput; - }; - - } __attribute__((__packed__)) rma; - -} __attribute__((__aligned__(64))); - - -static inline void -dump_descriptor (char * prefix, MUHWI_Descriptor_t * desc) { - - uint32_t * ptr = (uint32_t *)desc; - fprintf(stderr, "%s [%p]: %08x %08x %08x %08x\n", prefix, ptr, *(ptr), *(ptr+1), *(ptr+2), *(ptr+3)); ptr+=4; - fprintf(stderr, "%s [%p]: %08x %08x %08x %08x\n", prefix, ptr, *(ptr), *(ptr+1), *(ptr+2), *(ptr+3)); ptr+=4; - fprintf(stderr, "%s [%p]: %08x %08x %08x %08x\n", prefix, ptr, *(ptr), *(ptr+1), *(ptr+2), *(ptr+3)); ptr+=4; - fprintf(stderr, "%s [%p]: %08x %08x %08x %08x\n", prefix, ptr, *(ptr), *(ptr+1), *(ptr+2), *(ptr+3)); ptr+=4; - - fprintf(stderr, "%s descriptor dump at %p\n", prefix, (void*)desc); - fprintf(stderr, "%s .Half_Word0.Prefetch_Only .................. %d\n", prefix, desc->Half_Word0.Prefetch_Only); - fprintf(stderr, "%s .Half_Word1.Interrupt ...................... %d\n", prefix, desc->Half_Word1.Interrupt); - fprintf(stderr, "%s .Pa_Payload ................................ 0x%016lx\n", prefix, desc->Pa_Payload); - fprintf(stderr, "%s .Message_Length ............................ %lu\n", prefix, desc->Message_Length); - fprintf(stderr, "%s .Torus_FIFO_Map ............................ 0x%016lx\n", prefix, desc->Torus_FIFO_Map); - fprintf(stderr, "%s .PacketHeader.NetworkHeader.pt2pt\n", prefix); - fprintf(stderr, "%s .Data_Packet_Type ........................ 0x%02x\n", prefix, desc->PacketHeader.NetworkHeader.pt2pt.Data_Packet_Type); - fprintf(stderr, "%s .Hints ................................... 0x%02x\n", prefix, desc->PacketHeader.NetworkHeader.pt2pt.Hints); - fprintf(stderr, "%s .Byte2.Hint_E_plus ....................... %hhu\n", prefix, desc->PacketHeader.NetworkHeader.pt2pt.Byte2.Hint_E_plus); - fprintf(stderr, "%s .Byte2.Hint_E_minus ...................... %hhu\n", prefix, desc->PacketHeader.NetworkHeader.pt2pt.Byte2.Hint_E_minus); - fprintf(stderr, "%s .Byte2.Route_To_IO_Node .................. %hhu\n", prefix, desc->PacketHeader.NetworkHeader.pt2pt.Byte2.Route_To_IO_Node); - fprintf(stderr, "%s .Byte2.Return_From_IO_Node ............... %hhu\n", prefix, desc->PacketHeader.NetworkHeader.pt2pt.Byte2.Return_From_IO_Node); - fprintf(stderr, "%s .Byte2.Dynamic ........................... %hhu\n", prefix, desc->PacketHeader.NetworkHeader.pt2pt.Byte2.Dynamic); - fprintf(stderr, "%s .Byte2.Deposit ........................... %hhu\n", prefix, desc->PacketHeader.NetworkHeader.pt2pt.Byte2.Deposit); - fprintf(stderr, "%s .Byte2.Interrupt ......................... %hhu\n", prefix, desc->PacketHeader.NetworkHeader.pt2pt.Byte2.Interrupt); - fprintf(stderr, "%s .Byte3.Virtual_channel ................... %hhu\n", prefix, desc->PacketHeader.NetworkHeader.pt2pt.Byte3.Virtual_channel); - fprintf(stderr, "%s .Byte3.Zone_Routing_Id ................... %hhu\n", prefix, desc->PacketHeader.NetworkHeader.pt2pt.Byte3.Zone_Routing_Id); - fprintf(stderr, "%s .Byte3.Stay_On_Bubble .................... %hhu\n", prefix, desc->PacketHeader.NetworkHeader.pt2pt.Byte3.Stay_On_Bubble); - fprintf(stderr, "%s .Destination.Destination.Reserved2 ....... %u\n", prefix, desc->PacketHeader.NetworkHeader.pt2pt.Destination.Destination.Reserved2); - fprintf(stderr, "%s .Destination.Destination.A_Destination ... %u\n", prefix, desc->PacketHeader.NetworkHeader.pt2pt.Destination.Destination.A_Destination); - fprintf(stderr, "%s .Destination.Destination.B_Destination ... %u\n", prefix, desc->PacketHeader.NetworkHeader.pt2pt.Destination.Destination.B_Destination); - fprintf(stderr, "%s .Destination.Destination.C_Destination ... %u\n", prefix, desc->PacketHeader.NetworkHeader.pt2pt.Destination.Destination.C_Destination); - fprintf(stderr, "%s .Destination.Destination.D_Destination ... %u\n", prefix, desc->PacketHeader.NetworkHeader.pt2pt.Destination.Destination.D_Destination); - fprintf(stderr, "%s .Destination.Destination.E_Destination ... %u\n", prefix, desc->PacketHeader.NetworkHeader.pt2pt.Destination.Destination.E_Destination); - fprintf(stderr, "%s .Byte8.Packet_Type ....................... %hhu\n", prefix, desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Packet_Type); - fprintf(stderr, "%s .Byte8.Reserved3 ......................... %hhu\n", prefix, desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Reserved3); - fprintf(stderr, "%s .Byte8.Size .............................. %hhu\n", prefix, desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Size); - fprintf(stderr, "%s .Injection_Info.Reserved4 ................ %hu\n", prefix, desc->PacketHeader.NetworkHeader.pt2pt.Injection_Info.Reserved4); - fprintf(stderr, "%s .Injection_Info.Skip ..................... %hhu\n", prefix, desc->PacketHeader.NetworkHeader.pt2pt.Injection_Info.Skip); - if (desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Packet_Type == 0) { - fprintf(stderr, "%s .PacketHeader.messageUnitHeader.Packet_Types\n", prefix); - fprintf(stderr, "%s .Memory_FIFO.Reserved1 ................... %hu\n", prefix, desc->PacketHeader.messageUnitHeader.Packet_Types.Memory_FIFO.Reserved1); - fprintf(stderr, "%s .Memory_FIFO.Rec_FIFO_Id ................. %hu\n", prefix, desc->PacketHeader.messageUnitHeader.Packet_Types.Memory_FIFO.Rec_FIFO_Id); - fprintf(stderr, "%s .Memory_FIFO.Unused1 ..................... %hu\n", prefix, desc->PacketHeader.messageUnitHeader.Packet_Types.Memory_FIFO.Unused1); - fprintf(stderr, "%s .Memory_FIFO.Put_Offset_MSB .............. 0x%08hx\n", prefix, desc->PacketHeader.messageUnitHeader.Packet_Types.Memory_FIFO.Put_Offset_MSB); - fprintf(stderr, "%s .Memory_FIFO.Put_Offset_LSB .............. 0x%08x\n", prefix, desc->PacketHeader.messageUnitHeader.Packet_Types.Memory_FIFO.Put_Offset_LSB); - } else if (desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Packet_Type == 1) { - fprintf(stderr, "%s .PacketHeader.messageUnitHeader.Packet_Types\n", prefix); - fprintf(stderr, "%s .Direct_Put.Rec_Payload_Base_Address_Id .. %hu\n", prefix, desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Rec_Payload_Base_Address_Id); - fprintf(stderr, "%s .Direct_Put.Pacing ....................... %hu\n", prefix, desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Pacing); - fprintf(stderr, "%s .Direct_Put.Put_Offset_MSB ............... 0x%08hx\n", prefix, desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Put_Offset_MSB); - fprintf(stderr, "%s .Direct_Put.Put_Offset_LSB ............... 0x%08x\n", prefix, desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Put_Offset_LSB); - fprintf(stderr, "%s .Direct_Put.Unused1 ...................... %hu\n", prefix, desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Unused1); - fprintf(stderr, "%s .Direct_Put.Rec_Counter_Base_Address_Id .. %hu\n", prefix, desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Rec_Counter_Base_Address_Id); - fprintf(stderr, "%s .Direct_Put.Counter_Offset ............... 0x%016lx\n", prefix, desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Counter_Offset); - } else if (desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Packet_Type == 2) { - fprintf(stderr, "%s .PacketHeader.messageUnitHeader.Packet_Types\n", prefix); - fprintf(stderr, "%s .Remote_Get.Rget_Inj_FIFO_Id ............. %hu\n", prefix, desc->PacketHeader.messageUnitHeader.Packet_Types.Remote_Get.Rget_Inj_FIFO_Id); - } - fflush(stderr); -} - -#define DUMP_DESCRIPTOR(desc) \ -({ \ - char prefix[1024]; \ - snprintf(prefix, 1023, "%s:%s():%d", __FILE__, __func__, __LINE__); \ - dump_descriptor(prefix, (desc)); \ -}) - - - -#define FI_BGQ_MU_TORUS_INJFIFO_COUNT (10) -#define FI_BGQ_MU_LOCAL_INJFIFO_COUNT (6) - - - - -/* expensive .. not for critical path! */ -static inline -uint32_t fi_bgq_mu_calculate_fifo_map(BG_CoordinateMapping_t local, - BG_CoordinateMapping_t remote, Personality_t * personality, - uint64_t dcr_value) { - - /* calculate the signed coordinate difference between the source and - * destination torus coordinates - */ - ssize_t dA = (ssize_t)remote.a - (ssize_t)local.a; - ssize_t dB = (ssize_t)remote.b - (ssize_t)local.b; - ssize_t dC = (ssize_t)remote.c - (ssize_t)local.c; - ssize_t dD = (ssize_t)remote.d - (ssize_t)local.d; - ssize_t dE = (ssize_t)remote.e - (ssize_t)local.e; - - /* select the fifo based on the t coordinate only if local */ - if ((dA | dB | dC | dD | dE) == 0) { - return (remote.t & 0x01) ? MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_LOCAL0 : MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_LOCAL1; - } - - /* select either A- or A+ if communicating only along the A dimension */ - if ((dB | dC | dD | dE) == 0) { - if (ND_ENABLE_TORUS_DIM_A & personality->Network_Config.NetFlags) { - uint64_t cutoff; - if (dA > 0) { - cutoff = ND_500_DCR__CTRL_CUTOFFS__A_PLUS_get(dcr_value); - return (remote.a > cutoff) ? MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_AM : MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_AP; - } else { - cutoff = ND_500_DCR__CTRL_CUTOFFS__A_MINUS_get(dcr_value); - return (remote.a < cutoff) ? MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_AP : MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_AM; - } - } else { - return (dA > 0) ? MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_AP : MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_AM; - } - } - - /* select either B- or B+ if communicating only along the B dimension */ - if ((dA | dC | dD | dE) == 0) { - if (ND_ENABLE_TORUS_DIM_B & personality->Network_Config.NetFlags) { - uint64_t cutoff; - if (dB > 0) { - cutoff = ND_500_DCR__CTRL_CUTOFFS__B_PLUS_get(dcr_value); - return (remote.b > cutoff) ? MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_BM : MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_BP; - } else { - cutoff = ND_500_DCR__CTRL_CUTOFFS__B_MINUS_get(dcr_value); - return (remote.b < cutoff) ? MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_BP : MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_BM; - } - } else { - return (dB > 0) ? MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_BP : MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_BM; - } - } - - /* select either C- or C+ if communicating only along the C dimension */ - if ((dA | dB | dD | dE) == 0) { - if (ND_ENABLE_TORUS_DIM_C & personality->Network_Config.NetFlags) { - uint64_t cutoff; - if (dC > 0) { - cutoff = ND_500_DCR__CTRL_CUTOFFS__C_PLUS_get(dcr_value); - return (remote.c > cutoff) ? MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_CM : MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_CP; - } else { - cutoff = ND_500_DCR__CTRL_CUTOFFS__C_MINUS_get(dcr_value); - return (remote.c < cutoff) ? MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_CP : MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_CM; - } - } else { - return (dC > 0) ? MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_CP : MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_CM; - } - } - - /* select either D- or D+ if communicating only along the D dimension */ - if ((dA | dB | dC | dE) == 0) { - if (ND_ENABLE_TORUS_DIM_D & personality->Network_Config.NetFlags) { - uint64_t cutoff; - if (dD > 0) { - cutoff = ND_500_DCR__CTRL_CUTOFFS__D_PLUS_get(dcr_value); - return (remote.d > cutoff) ? MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_DM : MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_DP; - } else { - cutoff = ND_500_DCR__CTRL_CUTOFFS__D_MINUS_get(dcr_value); - return (remote.d < cutoff) ? MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_DP : MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_DM; - } - } else { - return (dD > 0) ? MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_DP : MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_DM; - } - } - - /* select either E- or E+ if communicating only along the E dimension */ - if ((dA | dB | dC | dD) == 0) { - /* the maximum 'e' dimension size is 2 - and is a torus */ - return (remote.t & 0x01) ? MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_EP : MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_EM; - } - - /* communicating along diagonal */ - /* TODO - OPTIMIZE - round-robin the fifo picking based on destination */ - if (dA > 0) { - return MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_AP; - } else if (dA < 0) - return MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_AM; - - if (dB > 0) { - return MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_BP; - } else if (dB < 0) - return MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_BM; - - if (dC > 0) { - return MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_CP; - } else if (dC < 0) - return MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_CM; - - if (dD > 0) { - return MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_DP; - } else if(dD < 0) - return MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_DM; - - assert(0); - return 0xFFFFu; -} - -static inline -uint32_t fi_bgq_mu_calculate_fifo_map_single (BG_CoordinateMapping_t local, BG_CoordinateMapping_t remote) { - - Personality_t personality; - int rc = Kernel_GetPersonality(&personality, sizeof(Personality_t)); - if (rc) return 0; /* error!? */ - - uint64_t dcr_value = DCRReadUser(ND_500_DCR(CTRL_CUTOFFS)); - - return fi_bgq_mu_calculate_fifo_map(local, remote, &personality, dcr_value); -} - - -#define FI_BGQ_DEBUG_MEMORY() \ -({ \ - fi_bgq_debug_memory(__FILE__, __func__, __LINE__); \ -}) - -static inline -void fi_bgq_debug_memory (char * file, const char * func, int line) -{ - uint64_t shared, persist, heapavail, stackavail, stack, heap, guard, mmap; - - Kernel_GetMemorySize(KERNEL_MEMSIZE_SHARED, &shared); - Kernel_GetMemorySize(KERNEL_MEMSIZE_PERSIST, &persist); - Kernel_GetMemorySize(KERNEL_MEMSIZE_HEAPAVAIL, &heapavail); - Kernel_GetMemorySize(KERNEL_MEMSIZE_STACKAVAIL, &stackavail); - Kernel_GetMemorySize(KERNEL_MEMSIZE_STACK, &stack); - Kernel_GetMemorySize(KERNEL_MEMSIZE_HEAP, &heap); - Kernel_GetMemorySize(KERNEL_MEMSIZE_GUARD, &guard); - Kernel_GetMemorySize(KERNEL_MEMSIZE_MMAP, &mmap); - - fprintf(stderr, "%s:%s():%d Allocated heap: %.2f MB, avail. heap: %.2f MB\n", file, func, line, (double)heap/(1024*1024),(double)heapavail/(1024*1024)); - fprintf(stderr, "%s:%s():%d Allocated stack: %.2f MB, avail. stack: %.2f MB\n", file, func, line, (double)stack/(1024*1024), (double)stackavail/(1024*1024)); - fprintf(stderr, "%s:%s():%d Memory: shared: %.2f MB, persist: %.2f MB, guard: %.2f MB, mmap: %.2f MB\n", file, func, line, (double)shared/(1024*1024), (double)persist/(1024*1024), (double)guard/(1024*1024), (double)mmap/(1024*1024)); - - return; - } - -static inline int fi_bgq_lock_if_required (struct l2atomic_lock *lock, - int required) -{ - if (required) l2atomic_lock_acquire(lock); - return 0; -} - -static inline int fi_bgq_unlock_if_required (struct l2atomic_lock *lock, - int required) -{ - if (required) l2atomic_lock_release(lock); - return 0; -} - -static inline uint64_t fi_bgq_cnk_vaddr2paddr(const void * vaddr, size_t len, uint64_t * paddr) -{ - Kernel_MemoryRegion_t cnk_mr; - uint32_t cnk_rc; - cnk_rc = Kernel_CreateMemoryRegion(&cnk_mr, (void *)vaddr, len); - if (cnk_rc) return cnk_rc; - - *paddr = (uint64_t)cnk_mr.BasePa + ((uint64_t)vaddr - (uint64_t)cnk_mr.BaseVa); - return 0; -} - -enum fi_bgq_msync_type { - FI_BGQ_MSYNC_TYPE_RW, - FI_BGQ_MSYNC_TYPE_RO, - FI_BGQ_MSYNC_TYPE_WO, - FI_BGQ_MSYNC_TYPE_LAST -}; - -static inline void fi_bgq_msync(const enum fi_bgq_msync_type type) -{ - if (type == FI_BGQ_MSYNC_TYPE_RW || type == FI_BGQ_MSYNC_TYPE_WO) { - /* this "l1p flush" hack is only needed to flush *writes* - * from a processor cache to the memory system */ - volatile uint64_t *mu_register = - (volatile uint64_t *)(BGQ_MU_STATUS_CONTROL_REGS_START_OFFSET(0, 0) + - 0x030 - PHYMAP_PRIVILEGEDOFFSET); - *mu_register = 0; - } - ppc_msync(); -} - -static inline void fi_bgq_mu_checks () -{ - assert(sizeof(union fi_bgq_mu_packet_hdr) == sizeof(MUHWI_PacketHeader_t)); - assert(sizeof(union fi_bgq_addr) == sizeof(fi_addr_t)); - assert(sizeof(union fi_bgq_mu_descriptor) == sizeof(MUHWI_Descriptor_t)); -} - -/* ************************************************************************** * - * - * NOTE_MU_PADDR - The MU HWI documentation for MU descriptors states that - * the physical address used for MU operations is 37 bits. However, the MSB - * of this 37 bit physical address is used to indicate an atomic address and - * will always be zero for normal physical addresses, and the maximum - * physical address space depends on the amount of DRAM installed on the - * compute nodes - which is only 16 GB. The physical addresses for main memory - * begin at 0x0, and are contiguous until 64 GB, which means that the two - * MSBs of the 36 bit physical address will always be zero. - * - * Unaligned non-atomic physical addresses can be safely specified using - * only 34 bits in MU operations. - * - * Atomic physical addresses must be 8-byte-aligned which means that the - * corresponding non-atomic physical address will always have the three - * LSBs set to zero. A non-atomic physical address to be used for an atomic - * physical address can be right-shifted 3 bits and can be safely specified - * using only 31 bits when transferred as metadata. For MU operations the - * physical address will be expanded to 37 bits as expected by the hardware. - * - * - MUHWI_Descriptor_t (hwi/include/bqc/MU_Descriptor.h) - * - MUHWI_MessageUnitHeader_t (hwi/include/bqc/MU_MessageUnitHeader.h) - * - MUHWI_ATOMIC_ADDRESS_INDICATOR (hwi/include/bqc/MU_Addressing.h) - * - PHYMAP_MAXADDR_MAINMEMORY (hwi/include/bqc/PhysicalMap.h) - * - * ************************************************************************** */ - -/* ************************************************************************** * - * - * NOTE_MU_RECFIFO - There are 16 "user" MU groups (not including the 17th MU - * group which is normally used by cnk and agents) and there are 16 MU - * reception fifos in each group (BGQ_MU_NUM_REC_FIFOS_PER_GROUP). There is - * one MU reception fifo allocated to each ofi receive context. There are a - * maximum of 256 ofi receive contexts on a compute node which must be - * allocated between all processes, domains, and endpoints. - * - * When configured to support "scalable endpoints" multiple ofi receive - * contexts will be associated with each endpoint. - * - * The ofi address contains 9 bits for the receive context (a.k.a. mu reception - * fifo id) for the application agent (17th core) reception fifos. - * - * ************************************************************************** */ - -#endif /* _FI_PROV_BGQ_MU_H_ */ diff --git a/prov/bgq/include/rdma/bgq/fi_bgq_node.h b/prov/bgq/include/rdma/bgq/fi_bgq_node.h deleted file mode 100644 index 7a1cd7e8e3f..00000000000 --- a/prov/bgq/include/rdma/bgq/fi_bgq_node.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef _FI_PROV_BGQ_NODE_H_ -#define _FI_PROV_BGQ_NODE_H_ - -#include "rdma/bgq/fi_bgq_l2atomic.h" - -#include "rdma/bgq/fi_bgq_spi.h" - -#define FI_BGQ_NODE_NUM_USER_SUBGROUPS (BGQ_MU_NUM_FIFO_SUBGROUPS_PER_NODE-2) /* subgroups 66 and 67 are privileged */ -#define FI_BGQ_NODE_BAT_SIZE (FI_BGQ_NODE_NUM_USER_SUBGROUPS * BGQ_MU_NUM_DATA_COUNTERS_PER_SUBGROUP) -#define FI_BGQ_NODE_APPLICATION_BAT_SIZE ((BGQ_MU_NUM_FIFO_GROUPS-1) * BGQ_MU_NUM_DATA_COUNTERS_PER_GROUP) /* cnk and agents use group 16 */ - -struct fi_bgq_node { - void *shm_ptr; - void *abs_ptr; - struct { - struct l2atomic_counter allocator; - } counter; - struct { - struct l2atomic_counter allocator; - } lock; - struct l2atomic_barrier barrier; - uint32_t leader_tcoord; - uint32_t is_leader; - struct { - volatile uint64_t *shadow; /* in shared memory */ - volatile uint64_t l2_cntr_paddr[FI_BGQ_NODE_APPLICATION_BAT_SIZE]; - MUSPI_BaseAddressTableSubGroup_t subgroup[FI_BGQ_NODE_BAT_SIZE]; - } bat; -}; - -int fi_bgq_node_init (struct fi_bgq_node * node); - -int fi_bgq_node_mu_lock_init (struct fi_bgq_node * node, struct l2atomic_lock * lock); - -int fi_bgq_node_counter_allocate (struct fi_bgq_node * node, struct l2atomic_counter * counter); - -int fi_bgq_node_lock_allocate (struct fi_bgq_node * node, struct l2atomic_lock * lock); - -uint64_t fi_bgq_node_bat_allocate (struct fi_bgq_node * node, struct l2atomic_lock * lock); - -void fi_bgq_node_bat_free (struct fi_bgq_node * node, struct l2atomic_lock * lock, uint64_t index); - -void fi_bgq_node_bat_write (struct fi_bgq_node * node, struct l2atomic_lock * lock, uint64_t index, uint64_t offset); - -void fi_bgq_node_bat_clear (struct fi_bgq_node * node, struct l2atomic_lock * lock, uint64_t index); - -static inline -uint64_t fi_bgq_node_bat_read (struct fi_bgq_node * node, uint64_t index) { - - assert(index < FI_BGQ_NODE_BAT_SIZE); - return node->bat.shadow[index]; -} - -#endif /* _FI_PROV_BGQ_NODE_H_ */ diff --git a/prov/bgq/include/rdma/bgq/fi_bgq_progress.h b/prov/bgq/include/rdma/bgq/fi_bgq_progress.h deleted file mode 100644 index 505784a709e..00000000000 --- a/prov/bgq/include/rdma/bgq/fi_bgq_progress.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef _FI_PROV_BGQ_PROGRESS_H_ -#define _FI_PROV_BGQ_PROGRESS_H_ - - -#include "rdma/bgq/fi_bgq_l2atomic.h" - -#define MAX_ENDPOINTS (128) /* TODO - get this value from somewhere else */ - -struct fi_bgq_ep; -struct fi_bgq_domain; -union fi_bgq_context; - -struct fi_bgq_progress { - - uint64_t tag_ep_count; - uint64_t msg_ep_count; - uint64_t all_ep_count; - volatile uint64_t enabled; - struct l2atomic_fifo_consumer consumer; - uint64_t pad_0[8]; - - /* == L2 CACHE LINE == */ - - struct fi_bgq_ep *tag_ep[MAX_ENDPOINTS]; - struct fi_bgq_ep *msg_ep[MAX_ENDPOINTS]; - struct fi_bgq_ep *all_ep[MAX_ENDPOINTS]; - - /* == L2 CACHE LINE == */ - - volatile uint64_t active; - struct l2atomic_fifo_producer producer; - struct fi_bgq_domain *bgq_domain; - pthread_t pthread; - uint64_t pad_1[10]; - -} __attribute__((__aligned__(L2_CACHE_LINE_SIZE))); - -int fi_bgq_progress_init (struct fi_bgq_domain *bgq_domain, const uint64_t max_threads); -int fi_bgq_progress_enable (struct fi_bgq_domain *bgq_domain, const unsigned id); -int fi_bgq_progress_disable (struct fi_bgq_domain *bgq_domain, const unsigned id); -int fi_bgq_progress_fini (struct fi_bgq_domain *bgq_domain); - -int fi_bgq_progress_ep_enable (struct fi_bgq_progress *thread, struct fi_bgq_ep *bgq_ep); -int fi_bgq_progress_ep_disable (struct fi_bgq_ep *bgq_ep); - -#endif /* _FI_PROV_BGQ_PROGRESS_H_ */ diff --git a/prov/bgq/include/rdma/bgq/fi_bgq_rx.h b/prov/bgq/include/rdma/bgq/fi_bgq_rx.h deleted file mode 100644 index efeba7f8532..00000000000 --- a/prov/bgq/include/rdma/bgq/fi_bgq_rx.h +++ /dev/null @@ -1,1572 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef _FI_PROV_BGQ_RX_H_ -#define _FI_PROV_BGQ_RX_H_ - -#define FI_BGQ_UEPKT_BLOCKSIZE (1024) -#define PROCESS_RFIFO_MAX 64 - -// #define FI_BGQ_TRACE 1 - -/* forward declaration - see: prov/bgq/src/fi_bgq_atomic.c */ -void fi_bgq_rx_atomic_dispatch (void * buf, void * addr, size_t nbytes, - enum fi_datatype dt, enum fi_op op); - -static inline -void dump_uepkt_queue (struct rx_operation * rx) { - - fprintf(stderr, "%s:%s():%d rx=%p, head=%p, tail=%p\n", __FILE__, __func__, __LINE__, rx, rx->ue.head, rx->ue.tail); - struct fi_bgq_mu_packet * pkt = rx->ue.head; - while (pkt) { - fprintf(stderr, "%s:%s():%d --> %p\n", __FILE__, __func__, __LINE__, pkt); - pkt = pkt->next; - } -} - -static inline -void complete_atomic_operation (struct fi_bgq_ep * bgq_ep, struct fi_bgq_mu_packet * pkt) { - - const uint32_t dt = pkt->hdr.atomic.dt; - const uint32_t op = pkt->hdr.atomic.op; - - const uint64_t is_fetch = pkt->hdr.atomic.is_fetch; - const uint64_t do_cntr = pkt->hdr.atomic.do_cntr; - const uint64_t cntr_bat_id = pkt->hdr.atomic.cntr_bat_id; - - const uint16_t nbytes = pkt->hdr.atomic.nbytes_minus_1 + 1; - - void * addr; - if (FI_BGQ_FABRIC_DIRECT_MR == FI_MR_BASIC) { - addr = (void*) pkt->hdr.atomic.offset; - } else if (FI_BGQ_FABRIC_DIRECT_MR == FI_MR_SCALABLE) { - const uint16_t key = pkt->hdr.atomic.key; - const uint64_t offset = pkt->hdr.atomic.offset; - const uintptr_t base = (uintptr_t)fi_bgq_domain_bat_read_vaddr(bgq_ep->rx.poll.bat, key); - addr = (void*)(base + offset); - } - else { - assert(0); - } - - const uint32_t origin = pkt->hdr.atomic.origin_raw & FI_BGQ_MUHWI_DESTINATION_MASK; - - if (is_fetch || (op == FI_ATOMIC_READ)) { - - const uint64_t dst_paddr = pkt->payload.atomic_fetch.metadata.dst_paddr; - const uint64_t cq_paddr = pkt->payload.atomic_fetch.metadata.cq_paddr; - const uint64_t fifo_map = pkt->payload.atomic_fetch.metadata.fifo_map; - - MUHWI_Descriptor_t * desc = - fi_bgq_spi_injfifo_tail_wait(&bgq_ep->rx.poll.injfifo); - - qpx_memcpy64((void*)desc, (const void *)&bgq_ep->rx.poll.atomic_dput_model); - - desc->PacketHeader.NetworkHeader.pt2pt.Destination.Destination.Destination = origin; - desc->Torus_FIFO_Map = fifo_map; - - /* locate the payload lookaside slot */ - uint64_t payload_paddr = 0; - void * payload_vaddr = - fi_bgq_spi_injfifo_immediate_payload(&bgq_ep->rx.poll.injfifo, - desc, &payload_paddr); - desc->Pa_Payload = payload_paddr; - - /* copy the target data into the injection lookaside buffer */ - memcpy(payload_vaddr, (const void*) addr, nbytes); - desc->Message_Length = nbytes; - - MUSPI_SetRecPayloadBaseAddressInfo(desc, FI_BGQ_MU_BAT_ID_GLOBAL, dst_paddr); - if (cq_paddr != 0) { /* unlikely */ - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Counter_Offset = - MUSPI_GetAtomicAddress(cq_paddr, MUHWI_ATOMIC_OPCODE_STORE_ADD); - } - - fi_bgq_rx_atomic_dispatch((void*)&pkt->payload.atomic_fetch.data[0], addr, nbytes, dt, op); - - MUSPI_InjFifoAdvanceDesc(bgq_ep->rx.poll.injfifo.muspi_injfifo); - - } else { - - fi_bgq_rx_atomic_dispatch(&pkt->payload.byte[0], addr, nbytes, dt, op); - - /* - * cq completions (unlikely) are accomplished via a fence - * operation for non-fetch atomic operations - */ - } - - if (do_cntr) { /* likely -- TODO: change to *always* do a counter update?? */ - - const uint64_t is_local = pkt->hdr.atomic.is_local; - - MUHWI_Descriptor_t * desc = - fi_bgq_spi_injfifo_tail_wait(&bgq_ep->rx.poll.injfifo); - - qpx_memcpy64((void*)desc, (const void*)&bgq_ep->rx.poll.atomic_cntr_update_model[is_local]); - - desc->PacketHeader.NetworkHeader.pt2pt.Destination.Destination.Destination = origin; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Rec_Payload_Base_Address_Id = cntr_bat_id; - - MUSPI_InjFifoAdvanceDesc(bgq_ep->rx.poll.injfifo.muspi_injfifo); - } -} - - -/* The 'set_desc_payload_paddr' function sets an mu desc payload addr - * in one of two ways based on the mr mode. - * For FI_MR_SCALABLE is assumes that the base+offset is a - * virtual address, which then must be converted into a physical address. - * - * For FI_MR_BASIC will set offset-key as the physical address. - */ -static inline -void set_desc_payload_paddr (union fi_bgq_mu_descriptor * fi_mu_desc, struct fi_bgq_bat_entry * bat) { - - - const uint8_t rma_update_type = fi_mu_desc->rma.update_type; - - if (rma_update_type == FI_BGQ_MU_DESCRIPTOR_UPDATE_BAT_TYPE_DST) { - const uint64_t key_msb = fi_mu_desc->rma.key_msb; - const uint64_t key_lsb = fi_mu_desc->rma.key_lsb; - const uint64_t key = (key_msb << 48) | key_lsb; - uint64_t paddr = 0; - const uint64_t offset = fi_mu_desc->rma.dput.put_offset; - - if (FI_BGQ_FABRIC_DIRECT_MR == FI_MR_BASIC) { - paddr = offset-key; - } else if (FI_BGQ_FABRIC_DIRECT_MR == FI_MR_SCALABLE) { - - const uintptr_t base = (uintptr_t) fi_bgq_domain_bat_read_vaddr(bat, key); - fi_bgq_cnk_vaddr2paddr((const void *)(base+offset), 1, &paddr); - } - else { - assert(0); - } - -#ifdef FI_BGQ_TRACE -fprintf(stderr,"set_desc_payload_paddr rma_update_type == FI_BGQ_MU_DESCRIPTOR_UPDATE_BAT_TYPE_DST paddr is 0x%016lx\n",paddr); -#endif - MUSPI_SetRecPayloadBaseAddressInfo((MUHWI_Descriptor_t *)fi_mu_desc, - FI_BGQ_MU_BAT_ID_GLOBAL, paddr); - - } else if (rma_update_type == FI_BGQ_MU_DESCRIPTOR_UPDATE_BAT_TYPE_SRC) { - const uint64_t key_msb = fi_mu_desc->rma.key_msb; - const uint64_t key_lsb = fi_mu_desc->rma.key_lsb; - const uint64_t key = (key_msb << 48) | key_lsb; - const uint64_t offset = fi_mu_desc->rma.Pa_Payload; - if (FI_BGQ_FABRIC_DIRECT_MR == FI_MR_BASIC) { - fi_mu_desc->rma.Pa_Payload = offset-key; -#ifdef FI_BGQ_TRACE -fprintf(stderr,"set_desc_payload_paddr rma_update_type == FI_BGQ_MU_DESCRIPTOR_UPDATE_BAT_TYPE_SRC for FI_MR_BASIC fi_mu_desc->rma.Pa_Payload set to paddr 0x%016lx\n",(offset-key)); -fflush(stderr); -#endif - } else if (FI_BGQ_FABRIC_DIRECT_MR == FI_MR_SCALABLE) { - - const uintptr_t base = (uintptr_t) fi_bgq_domain_bat_read_vaddr(bat, key); - fi_bgq_cnk_vaddr2paddr((const void *)(base+offset), 1, &fi_mu_desc->rma.Pa_Payload); -#ifdef FI_BGQ_TRACE -fprintf(stderr,"set_desc_payload_paddr rma_update_type == FI_BGQ_MU_DESCRIPTOR_UPDATE_BAT_TYPE_SRC for FI_MR_SCALABLE fi_mu_desc->rma.Pa_Payload set to paddr 0x%016lx\n",fi_mu_desc->rma.Pa_Payload); -fflush(stderr); -#endif - } - else { - assert(0); - } - } else { - /* no update requested */ - } -} - -static inline -void complete_rma_operation (struct fi_bgq_ep * bgq_ep, struct fi_bgq_mu_packet * pkt) { - - struct fi_bgq_bat_entry * bat = bgq_ep->rx.poll.bat; - const uint64_t nbytes = pkt->hdr.rma.nbytes; - const uint64_t ndesc = pkt->hdr.rma.ndesc; - MUHWI_Descriptor_t * payload = (MUHWI_Descriptor_t *) &pkt->payload; - -#ifdef FI_BGQ_TRACE -fprintf(stderr,"complete_rma_operation starting - nbytes is %lu ndesc is %lu\n",nbytes,ndesc); -fflush(stderr); -#endif - if (nbytes > 0) { /* only for direct-put emulation */ - const uint64_t payload_offset = ndesc << BGQ_MU_DESCRIPTOR_SIZE_IN_POWER_OF_2; - if (FI_BGQ_FABRIC_DIRECT_MR == FI_MR_BASIC) { - uintptr_t vaddr = (uintptr_t) pkt->hdr.rma.offset; - memcpy((void*)vaddr, (void *)((uintptr_t)payload + payload_offset), nbytes); -#ifdef FI_BGQ_TRACE -fprintf(stderr,"direct-put emulation memcpy vaddr is 0x%016lx nbytes is %lu\n",vaddr,nbytes); -#endif - - } else if (FI_BGQ_FABRIC_DIRECT_MR == FI_MR_SCALABLE) { - - uintptr_t vaddr = (uintptr_t)fi_bgq_domain_bat_read_vaddr(bat, pkt->hdr.rma.key); - vaddr += pkt->hdr.rma.offset; -#ifdef FI_BGQ_TRACE -fprintf(stderr,"direct-put emulation memcpy vaddr is 0x%016lx nbytes is %lu\n",vaddr,nbytes); -#endif - - memcpy((void*)vaddr, (void *)((uintptr_t)payload + payload_offset), nbytes); - } - else { - assert(0); - } - } - - unsigned i; - for (i = 0; i < ndesc; ++i) { - - /* - * busy-wait until a fifo slot is available .. - */ - MUHWI_Descriptor_t * desc = - fi_bgq_spi_injfifo_tail_wait(&bgq_ep->rx.poll.injfifo); - - qpx_memcpy64((void*)desc, (const void*)&payload[i]); - - if (desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Packet_Type == 2) { /* rget descriptor */ - -#ifdef FI_BGQ_TRACE -fprintf(stderr,"complete_rma_operation - processing rgat desc %d\n",i); -fflush(stderr); -#endif - /* locate the payload lookaside slot */ - uint64_t payload_paddr = 0; - void * payload_vaddr = - fi_bgq_spi_injfifo_immediate_payload(&bgq_ep->rx.poll.injfifo, - desc, &payload_paddr); - desc->Pa_Payload = payload_paddr; - - /* copy the rget payload descriptors into the injection lookaside buffer */ - union fi_bgq_mu_descriptor * rget_payload = (union fi_bgq_mu_descriptor *) payload_vaddr; - qpx_memcpy64((void*)rget_payload, (const void*)&payload[i+1]); - - const uint64_t rget_ndesc = desc->Message_Length >> BGQ_MU_DESCRIPTOR_SIZE_IN_POWER_OF_2; - i += rget_ndesc; - - unsigned j; - for (j = 0; j < rget_ndesc; ++j) { - set_desc_payload_paddr(&rget_payload[j], bat); - } - - } else { - -#ifdef FI_BGQ_TRACE -fprintf(stderr,"complete_rma_operation - processing fifo desc %d\n",i); -fflush(stderr); -#endif - set_desc_payload_paddr((union fi_bgq_mu_descriptor *)desc, bat); - - } - MUSPI_InjFifoAdvanceDesc(bgq_ep->rx.poll.injfifo.muspi_injfifo); - } -#ifdef FI_BGQ_TRACE -fprintf(stderr,"complete_rma_operation complete\n"); -fflush(stderr); -#endif -} - - -static inline -void inject_eager_completion (struct fi_bgq_ep * bgq_ep, - struct fi_bgq_mu_packet * pkt) { - - const uint64_t is_local = pkt->hdr.completion.is_local; - const uint64_t cntr_paddr = ((uint64_t)pkt->hdr.completion.cntr_paddr_rsh3b) << 3; - - MUHWI_Descriptor_t * desc = - fi_bgq_spi_injfifo_tail_wait(&bgq_ep->rx.poll.injfifo); - - qpx_memcpy64((void*)desc, (const void*)&bgq_ep->rx.poll.ack_model[is_local]); - - MUSPI_SetRecPayloadBaseAddressInfo(desc, FI_BGQ_MU_BAT_ID_GLOBAL, cntr_paddr); - desc->PacketHeader.NetworkHeader.pt2pt.Destination = pkt->hdr.completion.origin; - - MUSPI_InjFifoAdvanceDesc(bgq_ep->rx.poll.injfifo.muspi_injfifo); - - return; -} - - -/** - * \brief Complete a receive operation that has matched the packet header with - * the match information - * - * \param[in] bgq_ep Edpoint associated with the receive - * \param[in] hdr MU packet header that matched - * \param[in,out] entry Completion entry - */ -static inline -void complete_receive_operation (struct fi_bgq_ep * bgq_ep, - struct fi_bgq_mu_packet * pkt, - const uint64_t origin_tag, - union fi_bgq_context * context, - const unsigned is_context_ext, - const unsigned is_multi_receive, - const unsigned is_manual_progress) { - -#ifdef FI_BGQ_TRACE - fprintf(stderr,"complete_receive_operation starting\n"); -#endif - const uint64_t recv_len = context->len; - void * recv_buf = context->buf; - const uint64_t packet_type = fi_bgq_mu_packet_type_get(pkt); - - const uint64_t immediate_data = pkt->hdr.pt2pt.immediate_data; - - if (packet_type & FI_BGQ_MU_PACKET_TYPE_EAGER) { -#ifdef FI_BGQ_TRACE - fprintf(stderr,"complete_receive_operation - packet_type & FI_BGQ_MU_PACKET_TYPE_EAGER\n"); -#endif - - const uint64_t send_len = pkt->hdr.pt2pt.send.message_length; - - if (is_multi_receive) { /* branch should compile out */ - if (send_len) memcpy(recv_buf, (void*)&pkt->payload.byte[0], send_len); - - union fi_bgq_context * original_multi_recv_context = context; - context = (union fi_bgq_context *)((uintptr_t)recv_buf - sizeof(union fi_bgq_context)); - assert((((uintptr_t)context) & 0x07) == 0); - - context->flags = FI_RECV | FI_MSG | FI_BGQ_CQ_CONTEXT_MULTIRECV; - context->buf = recv_buf; - context->len = send_len; - context->data = immediate_data; - context->tag = 0; /* tag is not valid for multi-receives */ - context->multi_recv_context = original_multi_recv_context; - context->byte_counter = 0; - - /* the next 'fi_bgq_context' must be 8-byte aligned */ - uint64_t bytes_consumed = ((send_len + 8) & (~0x07ull)) + sizeof(union fi_bgq_context); - original_multi_recv_context->len -= bytes_consumed; - original_multi_recv_context->buf = (void*)((uintptr_t)(original_multi_recv_context->buf) + bytes_consumed); -#ifdef FI_BGQ_TRACE - fprintf(stderr,"complete_receive_operation - is_multi_receive - enqueue cq for child context %p of parent context %p\n",context,original_multi_recv_context); -#endif - - - /* post a completion event for the individual receive */ - fi_bgq_cq_enqueue_completed(bgq_ep->recv_cq, context, 0); /* TODO - IS lock required? */ - - } else if (send_len <= recv_len) { - if (send_len) memcpy(recv_buf, (void*)&pkt->payload.byte[0], send_len); -#ifdef FI_BGQ_TRACE - fprintf(stderr,"EAGER complete_receive_operation send_len %lu <= recv_len %lu calling fi_bgq_cq_enqueue_completed\n",send_len,recv_len); -#endif - - context->buf = NULL; - context->len = send_len; - context->data = immediate_data; - context->tag = origin_tag; - context->byte_counter = 0; - - /* post a completion event for the individual receive */ - fi_bgq_cq_enqueue_completed(bgq_ep->recv_cq, context, 0); /* TODO - IS lock required? */ - - } else { /* truncation - unlikely */ -#ifdef FI_BGQ_TRACE - fprintf(stderr,"EAGER complete_receive_operation truncation - send_len %lu > recv_len %lu posting error\n",send_len,recv_len); - -#endif - - struct fi_bgq_context_ext * ext; - if (is_context_ext) { - ext = (struct fi_bgq_context_ext *)context; - ext->err_entry.op_context = ext->msg.op_context; - } else { - posix_memalign((void**)&ext, 32, sizeof(struct fi_bgq_context_ext)); - ext->bgq_context.flags = FI_BGQ_CQ_CONTEXT_EXT; - ext->err_entry.op_context = context; - } - - ext->err_entry.flags = context->flags; - ext->err_entry.len = recv_len; - ext->err_entry.buf = recv_buf; - ext->err_entry.data = immediate_data; - ext->err_entry.tag = origin_tag; - ext->err_entry.olen = send_len - recv_len; - ext->err_entry.err = FI_ETRUNC; - ext->err_entry.prov_errno = 0; - ext->err_entry.err_data = NULL; - - ext->bgq_context.byte_counter = 0; - - fi_bgq_cq_enqueue_err (bgq_ep->recv_cq, ext,0); - } - - return; - - } else { /* rendezvous packet */ - - uint64_t niov = pkt->hdr.pt2pt.rendezvous.niov_minus_1 + 1; - assert(niov <= (7-is_multi_receive)); - uint64_t xfer_len = pkt->payload.rendezvous.mu_iov[0].message_length; - { - uint64_t i; - for (i=1; ipayload.rendezvous.mu_iov[i].message_length; - } - - uint64_t byte_counter_vaddr = 0; - - if (is_multi_receive) { /* branch should compile out */ - - /* This code functionaliy is unverified - exit with an error mesg for now - * when we have an mpich case for this we will then verify. - */ - - fprintf(stderr,"BGQ Provider does not support FI_MULTI_RECV and RENDEZVOUS protocol\n"); - fflush(stderr); - exit(1); - - -#ifdef FI_BGQ_TRACE - fprintf(stderr,"rendezvous multirecv\n"); -#endif - - union fi_bgq_context * multi_recv_context = - (union fi_bgq_context *)((uintptr_t)recv_buf - sizeof(union fi_bgq_context)); - assert((((uintptr_t)multi_recv_context) & 0x07) == 0); - - multi_recv_context->flags = FI_RECV | FI_MSG | FI_BGQ_CQ_CONTEXT_MULTIRECV; - multi_recv_context->buf = recv_buf; - multi_recv_context->len = xfer_len; - multi_recv_context->data = immediate_data; - multi_recv_context->tag = 0; /* tag is not valid for multi-receives */ - multi_recv_context->multi_recv_context = context; - multi_recv_context->byte_counter = xfer_len; - - /* the next 'fi_bgq_context' must be 8-byte aligned */ - uint64_t bytes_consumed = ((xfer_len + 8) & (~0x07ull)) + sizeof(union fi_bgq_context); - context->len -= bytes_consumed; - context->buf = (void*)((uintptr_t)(context->buf) + bytes_consumed); - - byte_counter_vaddr = (uint64_t)&multi_recv_context->byte_counter; - - /* the original multi-receive context actually uses an - * operation counter - not a byte counter - but nevertheless - * the same field in the context structure is used */ - context->byte_counter += 1; - - /* post a completion event for the individual receive */ - fi_bgq_cq_enqueue_pending(bgq_ep->recv_cq, context, 0); /* TODO - IS lock required? */ - - } else if (xfer_len <= recv_len) { - -#ifdef FI_BGQ_TRACE - fprintf(stderr,"rendezvous complete_receive_operation xfer_len %lu <= recv_len %lu calling fi_bgq_cq_enqueue_pending\n",xfer_len,recv_len); -#endif - context->len = xfer_len; - context->data = immediate_data; - context->tag = origin_tag; - context->byte_counter = xfer_len; - - byte_counter_vaddr = (uint64_t)&context->byte_counter; - - /* post a completion event for the individual receive */ - fi_bgq_cq_enqueue_pending(bgq_ep->recv_cq, context, 0); /* TODO - IS lock required? */ - - } else { -#ifdef FI_BGQ_TRACE - fprintf(stderr,"rendezvous truncation xfer_len %lu > recv_len %lu posting error\n",xfer_len,recv_len); -#endif - - /* truncation */ - struct fi_bgq_context_ext * ext; - if (is_context_ext) { - ext = (struct fi_bgq_context_ext *)context; - ext->err_entry.op_context = ext->msg.op_context; - } else { - posix_memalign((void**)&ext, 32, sizeof(struct fi_bgq_context_ext)); - ext->bgq_context.flags = FI_BGQ_CQ_CONTEXT_EXT; - ext->err_entry.op_context = context; - } - - ext->err_entry.flags = context->flags; - ext->err_entry.len = recv_len; - ext->err_entry.buf = recv_buf; - ext->err_entry.data = immediate_data; - ext->err_entry.tag = origin_tag; - ext->err_entry.olen = xfer_len - recv_len; - ext->err_entry.err = FI_ETRUNC; - ext->err_entry.prov_errno = 0; - ext->err_entry.err_data = NULL; - - ext->bgq_context.byte_counter = 0; - - byte_counter_vaddr = (uint64_t)&ext->bgq_context.byte_counter; - - fi_bgq_cq_enqueue_err (bgq_ep->recv_cq, ext,0); - - xfer_len = 0; - niov = 0; - } - - /* determine the physical address of the byte counter memory */ - uint64_t byte_counter_paddr = 0; - { - Kernel_MemoryRegion_t mr; - Kernel_CreateMemoryRegion(&mr, (void*)byte_counter_vaddr, sizeof(uint64_t)); - byte_counter_paddr = (uint64_t)mr.BasePa + (byte_counter_vaddr - (uint64_t)mr.BaseVa); - } - - /* determine the physical address of the destination buffer */ - uint64_t dst_paddr = 0; - { - Kernel_MemoryRegion_t mr; - Kernel_CreateMemoryRegion(&mr, (void*)recv_buf, recv_len); - dst_paddr = (uint64_t)mr.BasePa + ((uint64_t)recv_buf - (uint64_t)mr.BaseVa); - } - - const uint64_t fifo_map = fi_bgq_mu_packet_get_fifo_map(pkt); - const uint64_t is_local = (fifo_map & (MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_LOCAL0 | MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_LOCAL1)) != 0; - - /* - * inject a "remote get" descriptor - the payload is composed - * of two descriptors: - * - * the first is a "direct put" descriptor that will rdma - * transfer the source data from the origin and will - * decrement a reception counter on the target as it - * completes - * - * the second is a "direct put" descriptor that will clear - * the byte counter for the send completion entry on the - * origin - */ - - /* busy-wait until a fifo slot is available .. */ - MUHWI_Descriptor_t * rget_desc = - fi_bgq_spi_injfifo_tail_wait(&bgq_ep->rx.poll.injfifo); - - assert(rget_desc); - assert((((uintptr_t)rget_desc)&0x1F) == 0); - - /* locate the payload lookaside slot */ - uint64_t payload_paddr = 0; - MUHWI_Descriptor_t * payload = - (MUHWI_Descriptor_t *)fi_bgq_spi_injfifo_immediate_payload(&bgq_ep->rx.poll.injfifo, - rget_desc, &payload_paddr); - - /* initialize the remote-get descriptor in the injection fifo */ - qpx_memcpy64((void*)rget_desc, (const void*)&bgq_ep->rx.poll.rzv.rget_model[is_local]); - - rget_desc->Pa_Payload = payload_paddr; - rget_desc->PacketHeader.messageUnitHeader.Packet_Types.Remote_Get.Rget_Inj_FIFO_Id = - pkt->hdr.pt2pt.rendezvous.rget_inj_fifo_id; /* TODO - different rget inj fifos for tag vs msg operations? */ - - rget_desc->PacketHeader.NetworkHeader.pt2pt.Destination = fi_bgq_uid_get_destination(pkt->hdr.pt2pt.uid.fi); - - /* initialize the direct-put ("data transfer") descriptor(s) in the rget payload */ - unsigned i; - for (i=0; irx.poll.rzv.dput_model[is_local]); - - xfer_desc->Pa_Payload = pkt->payload.rendezvous.mu_iov[i].src_paddr; - const uint64_t message_length = pkt->payload.rendezvous.mu_iov[i].message_length; - xfer_desc->Message_Length = message_length; - MUSPI_SetRecPayloadBaseAddressInfo(xfer_desc, FI_BGQ_MU_BAT_ID_GLOBAL, dst_paddr); - dst_paddr += message_length; - xfer_desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Counter_Offset = - MUSPI_GetAtomicAddress(byte_counter_paddr, MUHWI_ATOMIC_OPCODE_STORE_ADD); - xfer_desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Rec_Counter_Base_Address_Id = - FI_BGQ_MU_BAT_ID_GLOBAL; - - rget_desc->Message_Length += sizeof(MUHWI_Descriptor_t); - - if (is_multi_receive) { /* branch should compile out */ - xfer_desc->Torus_FIFO_Map = fifo_map; - } - } - - /* initialize the direct-put ("origin completion") descriptor in the rget payload */ - { - MUHWI_Descriptor_t * dput_desc = payload; - qpx_memcpy64((void*)dput_desc, (const void*)&bgq_ep->rx.poll.rzv.dput_completion_model); - - const uint64_t counter_paddr = ((uint64_t) pkt->payload.rendezvous.cntr_paddr_rsh3b) << 3; - dput_desc->Pa_Payload = - MUSPI_GetAtomicAddress(counter_paddr, - MUHWI_ATOMIC_OPCODE_LOAD_CLEAR); - } - - /* initialize the memory-fifo ("rendezvous ack") descriptor in the rget payload for multi-receives */ - if (is_multi_receive) { /* branch should compile out */ - MUHWI_Descriptor_t * ack_desc = ++payload; - qpx_memcpy64((void*)ack_desc, (const void*)&bgq_ep->rx.poll.rzv.multi_recv_ack_model); - - ack_desc->Torus_FIFO_Map = fifo_map; - rget_desc->Torus_FIFO_Map = fifo_map; - rget_desc->Message_Length += sizeof(MUHWI_Descriptor_t); - - union fi_bgq_mu_packet_hdr * hdr = (union fi_bgq_mu_packet_hdr *) &ack_desc->PacketHeader; - hdr->ack.context = (uintptr_t) context; - } - - /* - * inject the descriptor - */ - MUSPI_InjFifoAdvanceDesc(bgq_ep->rx.poll.injfifo.muspi_injfifo); - } - return; -} - -static inline -unsigned is_match(struct fi_bgq_mu_packet *pkt, union fi_bgq_context * context, const unsigned poll_msg) -{ - const uint64_t origin_tag = pkt->hdr.pt2pt.ofi_tag; - const fi_bgq_uid_t origin_uid = pkt->hdr.pt2pt.uid.fi; - const fi_bgq_uid_t target_uid = fi_bgq_addr_uid(context->src_addr); - const uint64_t ignore = context->ignore; - const uint64_t target_tag = context->tag; - const uint64_t target_tag_and_not_ignore = target_tag & ~ignore; - const uint64_t origin_tag_and_not_ignore = origin_tag & ~ignore; - -#ifdef FI_BGQ_TRACE - fprintf(stderr, "%s:%s():%d context %p origin_uid=0x%08x target_uid=0x%08x origin_tag=0x%016lx target_tag=0x%016lx ignore=0x%016lx any_source is %u returning %u\n", __FILE__, __func__, __LINE__, context,origin_uid, target_uid, origin_tag, target_tag, ignore, (context->src_addr == FI_ADDR_UNSPEC),((origin_tag_and_not_ignore == target_tag_and_not_ignore) && ((context->src_addr == FI_ADDR_UNSPEC) || (origin_uid == target_uid)))); - fflush(stderr); -#endif - - return ((origin_tag_and_not_ignore == target_tag_and_not_ignore) && ((context->src_addr == FI_ADDR_UNSPEC) || (origin_uid == target_uid))); -} - -static inline -void process_rfifo_packet_optimized (struct fi_bgq_ep * bgq_ep, struct fi_bgq_mu_packet * pkt, const unsigned poll_msg, const unsigned is_manual_progress) -{ - const uint64_t packet_type = fi_bgq_mu_packet_type_get(pkt); - -#ifdef FI_BGQ_TRACE - fprintf(stderr,"process_rfifo_packet_optimized - poll_msg is %u mq addr is %p\n",poll_msg,&(bgq_ep->rx.poll.rfifo[poll_msg].mq)); - fflush(stderr); -#endif - if (poll_msg) { - if (packet_type == FI_BGQ_MU_PACKET_TYPE_ACK) { /* branch should compile out */ - - union fi_bgq_context * context = (union fi_bgq_context *) pkt->hdr.ack.context; - context->byte_counter -= 1; - /* TODO - msync? */ - return; - } - - if (packet_type == FI_BGQ_MU_PACKET_TYPE_RMA) { - complete_rma_operation(bgq_ep, pkt); - return; - } - - if (packet_type == FI_BGQ_MU_PACKET_TYPE_ATOMIC) { - complete_atomic_operation(bgq_ep, pkt); - return; - } - } - - if ((packet_type & (FI_BGQ_MU_PACKET_TYPE_ACK|FI_BGQ_MU_PACKET_TYPE_EAGER)) == - (FI_BGQ_MU_PACKET_TYPE_ACK|FI_BGQ_MU_PACKET_TYPE_EAGER)) { /* unlikely? */ - inject_eager_completion(bgq_ep, pkt); - return; - } - - assert(packet_type & (FI_BGQ_MU_PACKET_TYPE_EAGER | FI_BGQ_MU_PACKET_TYPE_RENDEZVOUS)); - - /* search the match queue */ - union fi_bgq_context * head = bgq_ep->rx.poll.rfifo[poll_msg].mq.head; - union fi_bgq_context * context = head; - union fi_bgq_context * prev = NULL; -#ifdef FI_BGQ_TRACE - fprintf(stderr,"searching mq - head is %p\n",bgq_ep->rx.poll.rfifo[poll_msg].mq.head); -#endif - while (context) { - - const uint64_t rx_op_flags = context->flags; -#ifdef FI_BGQ_TRACE - fprintf(stderr,"is_match calling with context %p prev is %p next is %p\n",context,p,context->next); - fflush(stderr); -#endif - if (is_match(pkt, context, poll_msg)) { - - if (!poll_msg || ((rx_op_flags | FI_MULTI_RECV) == 0)) { /* branch should compile out for tagged receives */ - - union fi_bgq_context * next = context->next; - - /* remove the context from the match queue */ - if (prev) prev->next = next; - else bgq_ep->rx.poll.rfifo[poll_msg].mq.head = next; - - if (!next) bgq_ep->rx.poll.rfifo[poll_msg].mq.tail = prev; - - const uint64_t is_context_ext = rx_op_flags & FI_BGQ_CQ_CONTEXT_EXT; - - /* branch will compile out */ - if (poll_msg) - complete_receive_operation(bgq_ep, pkt, - 0, context, is_context_ext, 0, is_manual_progress); - else - complete_receive_operation(bgq_ep, pkt, - pkt->hdr.pt2pt.ofi_tag, context, is_context_ext, 0, is_manual_progress); - - return; - - } else { /* FI_MULTI_RECV - unlikely */ - - /* verify that there is enough space available in - * the multi-receive buffer for the incoming data */ - const uint64_t recv_len = context->len; - uint64_t send_len = 0; - - if (packet_type & FI_BGQ_MU_PACKET_TYPE_EAGER) { - send_len = pkt->hdr.pt2pt.send.message_length; - } else /* FI_BGQ_MU_PACKET_TYPE_RENDEZVOUS */ { - - /* This code functionaliy is unverified - exit with an error mesg for now - * when we have an mpich case for this we will then verify. - */ - - fprintf(stderr,"BGQ Provider does not support FI_MULTI_RECV and RENDEZVOUS protocol\n"); - fflush(stderr); - exit(1); - - const uint64_t niov = pkt->hdr.pt2pt.rendezvous.niov_minus_1 + 1; - send_len = pkt->payload.rendezvous.mu_iov[0].message_length; - uint64_t i; - for (i=1; ipayload.rendezvous.mu_iov[i].message_length; - } - - if (send_len > recv_len) { - - /* To keep ordering need to complete this multirecv context now and remove - * from match queue and the next multirecv context should have enough room. - */ - - union fi_bgq_context * next = context->next; - - /* remove the context from the match queue */ - if (prev) prev->next = next; - else bgq_ep->rx.poll.rfifo[poll_msg].mq.head = next; - - if (!next) bgq_ep->rx.poll.rfifo[poll_msg].mq.tail = prev; - - context->byte_counter = 0; - fi_bgq_cq_enqueue_completed(bgq_ep->recv_cq, context, 0); - - } else { - - complete_receive_operation(bgq_ep, pkt, - 0, context, 0, 1, is_manual_progress); - - if (context->len < bgq_ep->rx.poll.min_multi_recv) { - /* after processing this message there is not - * enough space available in the multi-receive - * buffer to receive the next message; post a - * 'FI_MULTI_RECV' event to the completion - * queue and return. */ - - union fi_bgq_context * next = context->next; - - /* remove the context from the match queue */ - if (prev) prev->next = next; - else bgq_ep->rx.poll.rfifo[poll_msg].mq.head = next; - - if (!next) bgq_ep->rx.poll.rfifo[poll_msg].mq.tail = prev; - - /* post a completion event for the multi-receive */ - context->byte_counter = 0; - fi_bgq_cq_enqueue_completed(bgq_ep->recv_cq, context, 0); /* TODO - IS lock required? */ - - } - } - return; - } - - } else { - prev = context; - context = context->next; - } - } - - /* did not find a match .. add this packet to the unexpected queue */ - -#ifdef FI_BGQ_TRACE - fprintf(stderr, "process_rfifo_packet_optimized - did not find a match .. add this packet to the unexpected queue \n"); - fflush(stderr); -#endif - if (bgq_ep->rx.poll.rfifo[poll_msg].ue.free == NULL) { /* unlikely */ - struct fi_bgq_mu_packet * block = NULL; - int rc __attribute__ ((unused)); - rc = posix_memalign((void **)&block, - 32, sizeof(struct fi_bgq_mu_packet)*FI_BGQ_UEPKT_BLOCKSIZE); - assert(rc==0); - unsigned i; - for (i=0; i<(FI_BGQ_UEPKT_BLOCKSIZE-1); ++i) block[i].next = &block[i+1]; - block[FI_BGQ_UEPKT_BLOCKSIZE-1].next = NULL; - bgq_ep->rx.poll.rfifo[poll_msg].ue.free = block; - } - - /* pop the free list */ - struct fi_bgq_mu_packet * uepkt = bgq_ep->rx.poll.rfifo[poll_msg].ue.free; - bgq_ep->rx.poll.rfifo[poll_msg].ue.free = uepkt->next; - - /* copy the packet and append to the ue queue */ - size_t bytes_to_copy = (pkt->hdr.muhwi.NetworkHeader.pt2pt.Byte8.Size + 1) * 32; - memcpy((void*)uepkt, (const void *)pkt, bytes_to_copy); - uepkt->next = NULL; - if (bgq_ep->rx.poll.rfifo[poll_msg].ue.head == NULL) { - bgq_ep->rx.poll.rfifo[poll_msg].ue.head = uepkt; - } else { - bgq_ep->rx.poll.rfifo[poll_msg].ue.tail->next = uepkt; - } - bgq_ep->rx.poll.rfifo[poll_msg].ue.tail = uepkt; - - return; -} - -static inline -void process_rfifo_packet (struct fi_bgq_ep * bgq_ep, struct fi_bgq_mu_packet * pkt, const unsigned poll_msg, const unsigned is_manual_progress) -{ - process_rfifo_packet_optimized(bgq_ep, pkt, poll_msg, is_manual_progress); -} - -static inline -int poll_rfifo (struct fi_bgq_ep * bgq_ep, const unsigned is_manual_progress) { - - /* - * The mu reception fifo is consumed by software at the 'head' and - * produced by hardware at the 'tail'. - */ - MUSPI_Fifo_t * fifo_ptr = &bgq_ep->rx.poll.muspi_recfifo->_fifo; - assert(fifo_ptr); - volatile uint64_t pa_tail = MUSPI_getHwTail(fifo_ptr); - const uintptr_t pa_start = MUSPI_getStartPa(fifo_ptr); - const uintptr_t offset_tail = pa_tail - pa_start; - - const uintptr_t va_head = (uintptr_t) MUSPI_getHeadVa(fifo_ptr); - const uintptr_t va_start = (uintptr_t) MUSPI_getStartVa(fifo_ptr); - const uintptr_t offset_head = va_head - va_start; - - MUHWI_PacketHeader_t * hdr = (MUHWI_PacketHeader_t *) va_head; - - if (offset_head < offset_tail) { /* likely */ - - muspi_dcbt(va_head, 0); - _bgq_msync(); - - const uintptr_t stop = va_head + offset_tail - offset_head; - int process_rfifo_iter = 0; - while (((uintptr_t)hdr < stop) && (process_rfifo_iter < PROCESS_RFIFO_MAX)) { - - process_rfifo_iter++; - struct fi_bgq_mu_packet *pkt = (struct fi_bgq_mu_packet *) hdr; - const uint64_t packet_type = fi_bgq_mu_packet_type_get(pkt); - - if (packet_type & FI_BGQ_MU_PACKET_TYPE_TAG) { /* likely */ - process_rfifo_packet(bgq_ep, pkt, 0, is_manual_progress); - } else { - process_rfifo_packet(bgq_ep, pkt, 1, is_manual_progress); - } - - hdr += hdr->NetworkHeader.pt2pt.Byte8.Size + 1; - muspi_dcbt(hdr, 0); - } - - MUSPI_setHeadVa(fifo_ptr, (void*)hdr); - MUSPI_setHwHead(fifo_ptr, (uintptr_t)hdr-va_start); - - - } else if (offset_head > offset_tail) { /* unlikely ? */ - - /* check if the head packet wraps */ - const uintptr_t va_end = (uintptr_t) fifo_ptr->va_end; - if ((va_head + 544) < va_end) { /* likely */ - - /* head packet does not wrap */ - muspi_dcbt(va_head, 0); - _bgq_msync(); - - const uintptr_t stop = va_end - 544; - int process_rfifo_iter = 0; - while (((uintptr_t)hdr < stop) && (process_rfifo_iter < PROCESS_RFIFO_MAX)) { - - process_rfifo_iter++; - struct fi_bgq_mu_packet *pkt = (struct fi_bgq_mu_packet *) hdr; - const uint64_t packet_type = fi_bgq_mu_packet_type_get(pkt); - - if (packet_type & FI_BGQ_MU_PACKET_TYPE_TAG) { /* likely */ - process_rfifo_packet(bgq_ep, pkt, 0, is_manual_progress); - } else { - process_rfifo_packet(bgq_ep, pkt, 1, is_manual_progress); - } - - hdr += hdr->NetworkHeader.pt2pt.Byte8.Size + 1; - muspi_dcbt(hdr, 0); - } - - MUSPI_setHeadVa(fifo_ptr, (void*)hdr); - MUSPI_setHwHead(fifo_ptr, (uintptr_t)hdr-va_start); - - } else { /* unlikely */ - - /* head packet may wrap */ - muspi_dcbt(va_head, 0); - _bgq_msync(); - - uint32_t packet_bytes = ((uint32_t)hdr->NetworkHeader.pt2pt.Byte8.Size + 1) << 5; - const uintptr_t bytes_before_wrap = va_end - va_head; - if (packet_bytes < bytes_before_wrap) { - struct fi_bgq_mu_packet *pkt = (struct fi_bgq_mu_packet *) hdr; - const uint64_t packet_type = fi_bgq_mu_packet_type_get(pkt); - - if (packet_type & FI_BGQ_MU_PACKET_TYPE_TAG) { /* likely */ - process_rfifo_packet(bgq_ep, pkt, 0, is_manual_progress); - } else { - process_rfifo_packet(bgq_ep, pkt, 1, is_manual_progress); - } - - const uintptr_t new_offset_head = offset_head + packet_bytes; - MUSPI_setHeadVa(fifo_ptr, (void*)(va_start + new_offset_head)); - MUSPI_setHwHead(fifo_ptr, new_offset_head); - - } else if (packet_bytes == bytes_before_wrap) { - struct fi_bgq_mu_packet *pkt = (struct fi_bgq_mu_packet *) hdr; - const uint64_t packet_type = fi_bgq_mu_packet_type_get(pkt); - - if (packet_type & FI_BGQ_MU_PACKET_TYPE_TAG) { /* likely */ - process_rfifo_packet(bgq_ep, pkt, 0, is_manual_progress); - } else { - process_rfifo_packet(bgq_ep, pkt, 1, is_manual_progress); - } - - MUSPI_setHeadVa(fifo_ptr, (void*)(va_start)); - MUSPI_setHwHead(fifo_ptr, 0); - - } else { - uint8_t tmp_pkt[544] __attribute__((__aligned__(32))); - - memcpy((void*)&tmp_pkt[0], (void*)va_head, bytes_before_wrap); - const uintptr_t bytes_after_wrap = packet_bytes - bytes_before_wrap; - memcpy((void*)&tmp_pkt[bytes_before_wrap], (void*)va_start, bytes_after_wrap); - - hdr = (MUHWI_PacketHeader_t *)&tmp_pkt[0]; - struct fi_bgq_mu_packet *pkt = (struct fi_bgq_mu_packet *) hdr; - const uint64_t packet_type = fi_bgq_mu_packet_type_get(pkt); - - if (packet_type & FI_BGQ_MU_PACKET_TYPE_TAG) { /* likely */ - process_rfifo_packet(bgq_ep, pkt, 0, is_manual_progress); - } else { - process_rfifo_packet(bgq_ep, pkt, 1, is_manual_progress); - } - - MUSPI_setHeadVa(fifo_ptr, (void*)(va_start + bytes_after_wrap)); - MUSPI_setHwHead(fifo_ptr, bytes_after_wrap); - } - } - } - - - return 0; -} - - -/* rx_op_flags is only checked for FI_PEEK | FI_CLAIM | FI_MULTI_RECV - * rx_op_flags is only used if FI_PEEK | FI_CLAIM | cancel_context - * is_context_ext is only used if FI_PEEK | cancel_context | iovec - * - * The "normal" data movement functions, such as fi_[t]recv(), can safely - * specify '0' for cancel_context, rx_op_flags, and is_context_ext, in - * order to reduce code path. - */ -static inline -int process_mfifo_context (struct fi_bgq_ep * bgq_ep, const unsigned poll_msg, - const uint64_t cancel_context, union fi_bgq_context * context, - const uint64_t rx_op_flags, const uint64_t is_context_ext, - const unsigned is_manual_progress) { -#ifdef FI_BGQ_TRACE - fprintf(stderr,"process_mfifo_context starting - context->tag is %d\n",context->tag); - if (rx_op_flags & FI_PEEK) - fprintf(stderr,"just peeking\n"); - fflush(stderr); -#endif - if (cancel_context) { /* branch should compile out */ - const uint64_t compare_context = is_context_ext ? - (uint64_t)(((struct fi_bgq_context_ext *)context)->msg.op_context) : - (uint64_t)context; - - if (compare_context == cancel_context) { - - struct fi_bgq_context_ext * ext; - if (is_context_ext) { - ext = (struct fi_bgq_context_ext *)context; - } else { - posix_memalign((void**)&ext, 32, sizeof(struct fi_bgq_context_ext)); - ext->bgq_context.flags = FI_BGQ_CQ_CONTEXT_EXT; - } - - ext->bgq_context.byte_counter = 0; - ext->err_entry.op_context = (void *)cancel_context; - ext->err_entry.flags = rx_op_flags; - ext->err_entry.len = 0; - ext->err_entry.buf = 0; - ext->err_entry.data = 0; - ext->err_entry.tag = context->tag; - ext->err_entry.olen = 0; - ext->err_entry.err = FI_ECANCELED; - ext->err_entry.prov_errno = 0; - ext->err_entry.err_data = NULL; - - fi_bgq_cq_enqueue_err (bgq_ep->recv_cq, ext,0); - - return FI_ECANCELED; - } - } - - if ((rx_op_flags & (FI_PEEK | FI_CLAIM | FI_MULTI_RECV)) == 0) { /* likely */ - - /* search the unexpected packet queue */ - struct fi_bgq_mu_packet * head = bgq_ep->rx.poll.rfifo[poll_msg].ue.head; - struct fi_bgq_mu_packet * tail = bgq_ep->rx.poll.rfifo[poll_msg].ue.tail; - struct fi_bgq_mu_packet * prev = NULL; - struct fi_bgq_mu_packet * uepkt = head; - - unsigned found_match = 0; - while (uepkt != NULL) { - -#ifdef FI_BGQ_TRACE - fprintf(stderr,"process_mfifo_context - searching unexpected queue\n"); - fflush(stderr); -#endif - if (is_match(uepkt, context, poll_msg)) { -#ifdef FI_BGQ_TRACE - fprintf(stderr,"process_mfifo_context - found match on unexpected queue\n"); - fflush(stderr); -#endif - - /* branch will compile out */ - if (poll_msg) - complete_receive_operation(bgq_ep, uepkt, - 0, context, 0, 0, is_manual_progress); - else - complete_receive_operation(bgq_ep, uepkt, - uepkt->hdr.pt2pt.ofi_tag, context, 0, 0, is_manual_progress); - - /* remove the uepkt from the ue queue */ - if (head == tail) { - bgq_ep->rx.poll.rfifo[poll_msg].ue.head = NULL; - bgq_ep->rx.poll.rfifo[poll_msg].ue.tail = NULL; - } else if (prev == NULL) { - bgq_ep->rx.poll.rfifo[poll_msg].ue.head = uepkt->next; - } else if (tail == uepkt) { - bgq_ep->rx.poll.rfifo[poll_msg].ue.tail = prev; - prev->next = NULL; - } else { - prev->next = uepkt->next; - } - - /* ... and prepend the uehdr to the ue free list. */ - uepkt->next = bgq_ep->rx.poll.rfifo[poll_msg].ue.free; - bgq_ep->rx.poll.rfifo[poll_msg].ue.free = uepkt; - - /* found a match; break from the loop */ - uepkt = NULL; - found_match = 1; - - } else { - - /* a match was not found; advance to the next ue header */ - prev = uepkt; - uepkt = uepkt->next; - } - } - - if (!found_match) { - -#ifdef FI_BGQ_TRACE - fprintf(stderr,"process_mfifo_context - nothing found on unexpected queue adding to match queue for poll_msg %u context->tag is %d context is %p mq addr is %p\n",poll_msg,context->tag,context,&(bgq_ep->rx.poll.rfifo[poll_msg].mq)); - fflush(stderr); -#endif - /* - * no unexpected headers were matched; add this match - * information to the appropriate match queue - */ - - union fi_bgq_context * tail = bgq_ep->rx.poll.rfifo[poll_msg].mq.tail; - - context->next = NULL; - if (tail == NULL) { - bgq_ep->rx.poll.rfifo[poll_msg].mq.head = context; - } else { - tail->next = context; - } - bgq_ep->rx.poll.rfifo[poll_msg].mq.tail = context; - } - - } else if (rx_op_flags & FI_PEEK) { /* unlikely */ - - /* search the unexpected packet queue */ - struct fi_bgq_mu_packet * head = bgq_ep->rx.poll.rfifo[poll_msg].ue.head; - struct fi_bgq_mu_packet * tail = bgq_ep->rx.poll.rfifo[poll_msg].ue.tail; - struct fi_bgq_mu_packet * prev = NULL; - struct fi_bgq_mu_packet * uepkt = head; - -#ifdef FI_BGQ_TRACE - fprintf(stderr,"process_mfifo_context - rx_op_flags & FI_PEEK searching unexpected queue\n"); - if (uepkt == NULL) - fprintf(stderr,"uepkt == NULL\n"); - else - fprintf(stderr,"uepkt != NULL\n"); - - fflush(stderr); -#endif - unsigned found_match = 0; - while (uepkt != NULL) { - -#ifdef FI_BGQ_TRACE - fprintf(stderr,"process_mfifo_context uepkt != NULL - rx_op_flags & FI_PEEK searching unexpected queue\n"); - fflush(stderr); -#endif - if (is_match(uepkt, context, poll_msg)) { - - const uint64_t packet_type = fi_bgq_mu_packet_type_get(uepkt); - if (packet_type & FI_BGQ_MU_PACKET_TYPE_RENDEZVOUS) { - const uint64_t niov = uepkt->hdr.pt2pt.rendezvous.niov_minus_1 + 1; - uint64_t len = 0; - unsigned i; - for (i=0; ipayload.rendezvous.mu_iov[i].message_length; - context->len = len; - } else { /* "eager" or "eager with completion" packet type */ - context->len = uepkt->hdr.pt2pt.send.message_length; - } - context->tag = poll_msg ? 0 : uepkt->hdr.pt2pt.ofi_tag; - context->byte_counter = 0; - - if (rx_op_flags & FI_CLAIM) { /* both FI_PEEK and FI_CLAIM were specified */ - assert((rx_op_flags & FI_BGQ_CQ_CONTEXT_EXT) == 0); - - context->claim = uepkt; - - /* remove the uepkt from the ue queue */ - if (head == tail) { - bgq_ep->rx.poll.rfifo[poll_msg].ue.head = NULL; - bgq_ep->rx.poll.rfifo[poll_msg].ue.tail = NULL; - } else if (prev == NULL) { - bgq_ep->rx.poll.rfifo[poll_msg].ue.head = uepkt->next; - } else if (tail == uepkt) { - bgq_ep->rx.poll.rfifo[poll_msg].ue.tail = prev; - prev->next = NULL; - } else { - prev->next = uepkt->next; - } - } - /* tranfer immediate data from pkt to context for matching FI_PEEK */ - context->data = uepkt->hdr.pt2pt.immediate_data; - - /* post a completion event for the receive */ - fi_bgq_cq_enqueue_completed(bgq_ep->recv_cq, context, 0); /* TODO - IS lock required? */ - - found_match = 1; - uepkt = NULL; - - } else { - - /* a match was not found; advance to the next ue header */ - prev = uepkt; - uepkt = uepkt->next; - } - } - - if (!found_match) { - -#ifdef FI_BGQ_TRACE - fprintf(stderr,"didn't find a match for this FI_PEEK\n"); - fflush(stderr); -#endif - /* did not find a match for this "peek" */ - - - struct fi_bgq_context_ext * ext; - uint64_t mfifo_value; - if (is_context_ext) { - ext = (struct fi_bgq_context_ext *)context; - mfifo_value = (uint64_t)context >> 3; - } else { - posix_memalign((void**)&ext, 32, sizeof(struct fi_bgq_context_ext)); - ext->bgq_context.flags = rx_op_flags | FI_BGQ_CQ_CONTEXT_EXT; - - mfifo_value = (uint64_t)ext >> 3; - } - - ext->err_entry.op_context = context; - ext->err_entry.flags = rx_op_flags; - ext->err_entry.len = 0; - ext->err_entry.buf = 0; - ext->err_entry.data = 0; - ext->err_entry.tag = 0; - ext->err_entry.olen = 0; - ext->err_entry.err = FI_ENOMSG; - ext->err_entry.prov_errno = 0; - ext->err_entry.err_data = NULL; - ext->bgq_context.byte_counter = 0; - -#ifdef FI_BGQ_TRACE - fprintf(stderr,"process_mfifo_context - no match found on unexpected queue posting error\n"); - fflush(stderr); -#endif - fi_bgq_cq_enqueue_err (bgq_ep->recv_cq, ext,0); - - } - - } else if (rx_op_flags & FI_CLAIM) { /* unlikely */ - assert((rx_op_flags & FI_BGQ_CQ_CONTEXT_EXT) == 0); -#ifdef FI_BGQ_TRACE - fprintf(stderr,"process_mfifo_context - rx_op_flags & FI_CLAIM complete receive operation\n"); -#endif - - /* only FI_CLAIM was specified - * - * this occurs after a previous FI_PEEK + FI_CLAIM - * operation has removed an unexpected packet from - * the queue and saved a pointer to it in the context - * - * complete the receive for this "claimed" message ... */ - struct fi_bgq_mu_packet * claimed_pkt = context->claim; - if (poll_msg) - complete_receive_operation(bgq_ep, claimed_pkt, - 0, context, 0, 0, is_manual_progress); - else - complete_receive_operation(bgq_ep, claimed_pkt, - claimed_pkt->hdr.pt2pt.ofi_tag, context, 0, 0, is_manual_progress); - - /* ... and prepend the uehdr to the ue free list. */ - claimed_pkt->next = bgq_ep->rx.poll.rfifo[poll_msg].ue.free; - bgq_ep->rx.poll.rfifo[poll_msg].ue.free = claimed_pkt; - - } else if (poll_msg && (rx_op_flags & FI_MULTI_RECV)) { /* unlikely - branch should compile out for tagged receives */ - /* search the unexpected packet queue */ - struct fi_bgq_mu_packet * head = bgq_ep->rx.poll.rfifo[poll_msg].ue.head; - struct fi_bgq_mu_packet * tail = bgq_ep->rx.poll.rfifo[poll_msg].ue.tail; - struct fi_bgq_mu_packet * prev = NULL; - struct fi_bgq_mu_packet * uepkt = head; - - unsigned full_multirecv_buffer = 0; - while (uepkt != NULL) { - - if (is_match(uepkt, context, poll_msg)) { - - /* verify that there is enough space available in - * the multi-receive buffer for the incoming data */ - const uint64_t recv_len = context->len; - const uint64_t packet_type = fi_bgq_mu_packet_type_get(uepkt); - uint64_t send_len = 0; - - if (packet_type & FI_BGQ_MU_PACKET_TYPE_EAGER) { - send_len = uepkt->hdr.pt2pt.send.message_length; - } else if (packet_type & FI_BGQ_MU_PACKET_TYPE_RENDEZVOUS) { - - /* This code functionaliy is unverified - exit with an error mesg for now - * when we have an mpich case for this we will then verify. - */ - - fprintf(stderr,"BGQ Provider does not support FI_MULTI_RECV and RENDEZVOUS protocol\n"); - fflush(stderr); - exit(1); - - const uint64_t niov = uepkt->hdr.pt2pt.rendezvous.niov_minus_1 + 1; - send_len = uepkt->payload.rendezvous.mu_iov[0].message_length; - uint64_t i; - for (i=1; ipayload.rendezvous.mu_iov[i].message_length; - } - - if (send_len > recv_len) { - /* There is not enough room for the next subcontext multirec. - * to preserver the ordering just break off here with whatever - * matches are in the buffer and hopefully the next multirecv - * has space. - */ - - uepkt = NULL; - full_multirecv_buffer = 1; - context->byte_counter = 0; - fi_bgq_cq_enqueue_completed(bgq_ep->recv_cq, context, 0); - - } else { - complete_receive_operation(bgq_ep, uepkt, - 0, context, 0, 1, is_manual_progress); - - /* remove the uepkt from the ue queue */ - if (head == tail) { - bgq_ep->rx.poll.rfifo[poll_msg].ue.head = NULL; - bgq_ep->rx.poll.rfifo[poll_msg].ue.tail = NULL; - } else if (prev == NULL) { - bgq_ep->rx.poll.rfifo[poll_msg].ue.head = uepkt->next; - } else if (tail == uepkt) { - bgq_ep->rx.poll.rfifo[poll_msg].ue.tail = prev; - prev->next = NULL; - } else { - prev->next = uepkt->next; - } - - struct fi_bgq_mu_packet *matched_uepkt_next = uepkt->next; - - /* ... and prepend the uehdr to the ue free list. */ - uepkt->next = bgq_ep->rx.poll.rfifo[poll_msg].ue.free; - bgq_ep->rx.poll.rfifo[poll_msg].ue.free = uepkt; - - if (context->len < bgq_ep->rx.poll.min_multi_recv) { - /* after processing this message there is not - * enough space available in the multi-receive - * buffer to receive the next message; break - * from the loop and post a 'FI_MULTI_RECV' - * event to the completion queue. */ - uepkt = NULL; - full_multirecv_buffer = 1; - - /* post a completion event for the multi-receive */ - context->byte_counter = 0; - fi_bgq_cq_enqueue_completed(bgq_ep->recv_cq, context, 0); /* TODO - IS lock required? */ - } - else { - uepkt = matched_uepkt_next; - } - - } - - } else { - - /* a match was not found; advance to the next ue header */ - prev = uepkt; - uepkt = uepkt->next; - } - } - - if (!full_multirecv_buffer) { - - /* The multirecv context has room in its buffer. - * Post to match queue for further filling. - */ - - union fi_bgq_context * tail = bgq_ep->rx.poll.rfifo[poll_msg].mq.tail; - - context->next = NULL; - if (tail == NULL) { - bgq_ep->rx.poll.rfifo[poll_msg].mq.head = context; - } else { - tail->next = context; - } - bgq_ep->rx.poll.rfifo[poll_msg].mq.tail = context; - } - } - - return 0; -} - - -static inline -int poll_mfifo (struct fi_bgq_ep * bgq_ep, const unsigned poll_msg, const uint64_t cancel_context, const unsigned is_manual_progress) { - -#ifdef DEBUG - if (bgq_ep->rx.poll.rfifo[poll_msg].ue.head == NULL) assert(bgq_ep->rx.poll.rfifo[poll_msg].ue.tail == NULL); - if (bgq_ep->rx.poll.rfifo[poll_msg].ue.tail == NULL) assert(bgq_ep->rx.poll.rfifo[poll_msg].ue.head == NULL); - if (bgq_ep->rx.poll.rfifo[poll_msg].mq.head == NULL) assert(bgq_ep->rx.poll.rfifo[poll_msg].mq.tail == NULL); - if (bgq_ep->rx.poll.rfifo[poll_msg].mq.tail == NULL) assert(bgq_ep->rx.poll.rfifo[poll_msg].mq.head == NULL); -#endif - - /* - * attempt to match each new match element from the match fifo with any - * unexpected headers and compete the receives; if no match is found, - * append the match element to the match queue which will be searched - * for a match as each rfifo packet is processed - */ - uint64_t mfifo_value; - struct l2atomic_fifo_consumer * consumer = &bgq_ep->rx.poll.rfifo[poll_msg].match; - unsigned loop_count = 0; - while (++loop_count < 16 && l2atomic_fifo_consume(consumer, &mfifo_value) == 0) { - - union fi_bgq_context * context = (union fi_bgq_context *)(mfifo_value << 3); - const uint64_t rx_op_flags = context->flags; - const uint64_t is_context_ext = rx_op_flags & FI_BGQ_CQ_CONTEXT_EXT; - -#ifdef FI_BGQ_TRACE - fprintf(stderr,"poll_mfifo calling process_mfifo_context\n"); -#endif - process_mfifo_context(bgq_ep, poll_msg, cancel_context, - context, rx_op_flags, is_context_ext, is_manual_progress); - - } - - return 0; -} - - -static inline -int cancel_match_queue (struct fi_bgq_ep * bgq_ep, const unsigned poll_msg, const uint64_t cancel_context) { - - /* search the match queue */ - union fi_bgq_context * head = bgq_ep->rx.poll.rfifo[poll_msg].mq.head; - union fi_bgq_context * tail = bgq_ep->rx.poll.rfifo[poll_msg].mq.tail; - union fi_bgq_context * context = head; - union fi_bgq_context * prev = NULL; - while (context) { - - const uint64_t is_context_ext = context->flags & FI_BGQ_CQ_CONTEXT_EXT; - const uint64_t compare_context = is_context_ext ? - (uint64_t)(((struct fi_bgq_context_ext *)context)->msg.op_context) : - (uint64_t)context; - - if (compare_context == cancel_context) { - - /* remove the context from the match queue */ - if (context == head) - bgq_ep->rx.poll.rfifo[poll_msg].mq.head = context->next; - else - prev->next = context->next; - - if (context == tail) - bgq_ep->rx.poll.rfifo[poll_msg].mq.tail = prev; - - struct fi_bgq_context_ext * ext; - if (is_context_ext) { - ext = (struct fi_bgq_context_ext *)context; - } else { - posix_memalign((void**)&ext, 32, sizeof(struct fi_bgq_context_ext)); - ext->bgq_context.flags = FI_BGQ_CQ_CONTEXT_EXT; - } - - ext->bgq_context.byte_counter = 0; - ext->err_entry.op_context = (void *)cancel_context; - ext->err_entry.flags = context->flags; - ext->err_entry.len = 0; - ext->err_entry.buf = 0; - ext->err_entry.data = 0; - ext->err_entry.tag = context->tag; - ext->err_entry.olen = 0; - ext->err_entry.err = FI_ECANCELED; - ext->err_entry.prov_errno = 0; - ext->err_entry.err_data = NULL; - - fi_bgq_cq_enqueue_err (bgq_ep->recv_cq, ext,0); - - return FI_ECANCELED; - } - else - prev = context; - context = context->next; - } - - return 0; -} - -static inline -void poll_cfifo (struct fi_bgq_ep * bgq_ep, const unsigned is_manual_progress) { /* TODO - make no inline */ - - struct l2atomic_fifo_consumer * consumer = &bgq_ep->rx.poll.control; - uint64_t value = 0; - if (l2atomic_fifo_consume(consumer, &value) == 0) { - - const unsigned poll_fi_msg = bgq_ep->rx.caps & FI_MSG; - const unsigned poll_fi_tag = bgq_ep->rx.caps & FI_TAGGED; - - /* const uint64_t flags = value & 0xE000000000000000ull; -- currently not used */ - const uint64_t cancel_context = value << 3; - - if (poll_fi_msg && poll_fi_tag) { - if (FI_ECANCELED != cancel_match_queue(bgq_ep, 0, cancel_context)) { - if (FI_ECANCELED != poll_mfifo(bgq_ep, 0, cancel_context, is_manual_progress)) { - - if (FI_ECANCELED != cancel_match_queue(bgq_ep, 1, cancel_context)) { - if (FI_ECANCELED != poll_mfifo(bgq_ep, 1, cancel_context, is_manual_progress)) { - /* did not find a match */ - } - } - } - } - } else if (poll_fi_msg) { - if (FI_ECANCELED != cancel_match_queue(bgq_ep, 1, cancel_context)) { - if (FI_ECANCELED != poll_mfifo(bgq_ep, 1, cancel_context, is_manual_progress)) { - /* did not find a match */ - } - } - } else if (poll_fi_tag) { - if (FI_ECANCELED != cancel_match_queue(bgq_ep, 0, cancel_context)) { - if (FI_ECANCELED != poll_mfifo(bgq_ep, 0, cancel_context, is_manual_progress)) { - /* did not find a match */ - } - } - } - } -} - -static inline -void poll_rx (struct fi_bgq_ep * bgq_ep, - const unsigned poll_fi_msg, - const unsigned poll_fi_tag) { - - volatile uint64_t * async_is_enabled = &bgq_ep->async.enabled; - while (L2_AtomicLoad(async_is_enabled)) { - unsigned loop_count = 64; - do { - if (poll_fi_msg) { - poll_mfifo(bgq_ep, 1, 0, 0); - poll_rfifo(bgq_ep, 0); - } - if (poll_fi_tag) { - poll_mfifo(bgq_ep, 0, 0, 0); - poll_rfifo(bgq_ep, 0); - } - } while (--loop_count); - - poll_cfifo(bgq_ep, 0); - } -} - -static inline -void * poll_fn (void *arg) { -//fprintf(stderr, "%s:%s():%d .... arg = %p\n", __FILE__, __func__, __LINE__, arg); - struct fi_bgq_ep * bgq_ep = (struct fi_bgq_ep *) arg; - - volatile uint64_t * async_is_active = &bgq_ep->async.active; - L2_AtomicStore(async_is_active, 1); - - uint64_t rx_caps = bgq_ep->rx.caps & (FI_MSG | FI_TAGGED); - - if (rx_caps == (FI_MSG | FI_TAGGED)) { - poll_rx(bgq_ep, 1, 1); - } else if (rx_caps == FI_MSG) { - poll_rx(bgq_ep, 1, 0); - } else if (rx_caps == FI_TAGGED) { - poll_rx(bgq_ep, 0, 1); - } - - L2_AtomicStore(async_is_active, 0); - - return NULL; -} - - - -#endif /* _FI_PROV_BGQ_RX_H_ */ diff --git a/prov/bgq/include/rdma/bgq/fi_bgq_spi.h b/prov/bgq/include/rdma/bgq/fi_bgq_spi.h deleted file mode 100644 index 262a6021b2c..00000000000 --- a/prov/bgq/include/rdma/bgq/fi_bgq_spi.h +++ /dev/null @@ -1,194 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef _FI_PROV_BGQ_SPI_H_ -#define _FI_PROV_BGQ_SPI_H_ - -/* - * Certain BGQ SPI files expect the '__LINUX__' macro to be defined to '0' - * or '1' instead of simply checking if the macro is defined or not. - * Specifically, the following file needs hacking, although there are probably - * others. - * - * spi/include/mu/Addressing.h - */ -#ifndef __LINUX__ -#define __LINUX__ 0 -#endif - -/* - * The bgq system software, specifically 'hwi/include/common/compiler_support.h', - * will define the __INLINE__ macro if it is not already defined to the following: - * - * #define __INLINE__ extern inline __attribute__((always_inline)) - * - * This is the non-portable "gnu 89" style which easily results in undefined - * symbols or multiple defined symbols when used by software coded to a more - * recent C standard. - * - * As a workaround the __INLINE__ macro will be defined to the more appropriate - * 'static inline' style only for the bgq system software includes and then - * undefined at the end of this file. This seems to fix the problem without - * requiring any changes to the installed bgq system software files. - */ -#ifdef __INLINE__ -#error __INLINE__ already defined! -#else -#define __INLINE__ static inline -#endif - -#include -#include -#include -#include - -/* - * Avoid the pervasive "function declared static but never defined" warning for - * unused kernel spi functions, defined in 'spi/include/kernel/MU.h' by - * providing an implementation. - */ -int32_t Kernel_GetNDExpectedTokens(uint32_t a, uint32_t b, uint32_t *c) { assert(0); return -1; } -int32_t Kernel_GetNDExpectedTokensDCR(uint32_t a, uint64_t *b) { assert(0); return -1; } - -/* Work around buggy SPI code when -DNDEBUG is specified */ -#ifdef NDEBUG -#undef NDEBUG -#include -#define NDEBUG -#else -#include -#endif - -#include -#include -#include -#include -#include -#include -#include - -#undef __INLINE__ - -#ifndef MIN -#define MIN(a,b) (b^((a^b)&-(asw_freeSpace)) { /* unlikely */ - do { - /* mmio read from hardware to update shadow state */ - *(f->sw_freeSpace) = f->hw_injfifo->freeSpace; - } while (0 == *(f->sw_freeSpace)); - } - - return (MUHWI_Descriptor_t *) *f->sw_tailva; /* updated via MUSPI_InjFifoAdvanceDesc */ -} - - -static inline -void * fi_bgq_spi_injfifo_immediate_payload (struct fi_bgq_spi_injfifo *f, - MUHWI_Descriptor_t *desc, uint64_t *paddr) { - - assert(f); - assert(f->immediate_payload_base_vaddr != 0); - assert(f->immediate_payload_sizeof != 0); - assert(f->va_start != 0); - - const uint64_t offset = - (((uintptr_t)desc - f->va_start) >> BGQ_MU_DESCRIPTOR_SIZE_IN_POWER_OF_2) * - f->immediate_payload_sizeof; - - *paddr = f->immediate_payload_base_paddr + offset; - - return (void*)(f->immediate_payload_base_vaddr + offset); -} - - -static inline -MUHWI_Destination_t fi_bgq_spi_coordinates_to_destination (BG_CoordinateMapping_t coords) { - - union foo { - BG_CoordinateMapping_t coords; - uint32_t raw; - }; - - const union foo tmp = {.coords=coords}; - - const uint32_t tmp2 = (tmp.raw & 0x3FFFFFC0ul) | (tmp.raw >> 31); - const MUHWI_Destination_t * const out = (const MUHWI_Destination_t * const)&tmp2; - - return *out; -} - - - -#endif /* _FI_PROV_BGQ_SPI_H_ */ diff --git a/prov/bgq/include/rdma/fi_direct.h.in b/prov/bgq/include/rdma/fi_direct.h.in deleted file mode 100644 index 5755cae3124..00000000000 --- a/prov/bgq/include/rdma/fi_direct.h.in +++ /dev/null @@ -1,378 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef _FI_BGQ_DIRECT_H_ -#define _FI_BGQ_DIRECT_H_ - -#ifdef FABRIC_DIRECT -#define FABRIC_DIRECT_ 1 - -#include "rdma/fabric.h" - -struct fi_context { - void *internal[8]; -}; - -#endif - -static const uint64_t FI_BGQ_MAX_MSG_SIZE = (0x7FFFFFFFULL); /* 2^31-1 */ -static const uint64_t FI_BGQ_MAX_PREFIX_SIZE = (0ULL); -static const uint64_t FI_BGQ_INJECT_SIZE = (512ULL); -static const uint64_t FI_BGQ_MAX_ORDER_RAW_SIZE = (256ULL); /* FI_MR_BASIC: 2^37 */ -static const uint64_t FI_BGQ_MAX_ORDER_WAR_SIZE = (256ULL); /* FI_MR_BASIC: 2^37 */ -static const uint64_t FI_BGQ_MAX_ORDER_WAW_SIZE = (256ULL); /* FI_MR_BASIC: 2^37 */ -static const size_t FI_BGQ_TOTAL_BUFFERED_RECV = (512ULL); -static const uint64_t FI_BGQ_TX_SIZE = (16*1024); -static const uint64_t FI_BGQ_RX_SIZE = (16*1024); -static const uint64_t FI_BGQ_MR_KEY_SIZE = (2); -static const size_t FI_BGQ_REMOTE_CQ_DATA_SIZE= 4; -/* TODO: revisit these values, these are just placeholders now */ -/* -static const uint64_t FI_BGQ_CMD_SLOT_AVAIL_POLL= (1ULL<<10); -static const uint64_t FI_BGQ_LE_ME_COUNT = (1ULL<<8); -static const uint64_t FI_BGQ_UNEXPECTED_COUNT = (1ULL<<8); -static const uint64_t FI_BGQ_TRIG_OP_COUNT = (0); -static const uint64_t FI_BGQ_MAX_NUM_EP = (1ULL<<10); // TODO: is it needed? -static const size_t FI_BGQ_CACHE_LINE_SIZE = 128; -static const size_t FI_BGQ_DEFAULT_CQ_DEPTH = 32768; -*/ -static const uint64_t FI_BGQ_MEM_TAG_FORMAT = (0xFFFFFFFFFFFFFFFFULL); - -static const uint64_t FI_BGQ_DEFAULT_MSG_ORDER = (FI_ORDER_RAR | - FI_ORDER_RAW | - FI_ORDER_WAW | - FI_ORDER_WAS | - FI_ORDER_SAW | - FI_ORDER_SAS); - -static const uint64_t FI_BGQ_DEFAULT_CAPS = (FI_MSG | FI_RMA | FI_TAGGED | FI_ATOMIC | - FI_SEND | FI_RECV | FI_READ | FI_WRITE | FI_REMOTE_READ | FI_REMOTE_WRITE | - FI_NAMED_RX_CTX | FI_DIRECTED_RECV | - FI_MULTI_RECV | FI_SOURCE); - -#define FI_BGQ_FABRIC_DIRECT_PROGRESS @bgq_fabric_direct_progress@ -#define FI_BGQ_FABRIC_DIRECT_AV @bgq_fabric_direct_av@ -#define FI_BGQ_FABRIC_DIRECT_MR @bgq_fabric_direct_mr@ -#define FI_BGQ_FABRIC_DIRECT_THREAD @bgq_fabric_direct_thread@ - - -/* Macro indirection in order to support other macros as arguments - * C requires another indirection for expanding macros since - * operands of the token pasting operator are not expanded */ - -#define FI_BGQ_RMA_SPECIALIZED_FUNC(LOCK) \ - FI_BGQ_RMA_SPECIALIZED_FUNC_(LOCK) - -#define FI_BGQ_RMA_SPECIALIZED_FUNC_(LOCK) \ - static inline ssize_t \ - fi_bgq_writemsg_ ## LOCK \ - (struct fid_ep *ep, const struct fi_msg_rma *msg, \ - uint64_t flags) \ - { \ - return fi_bgq_writemsg_generic(ep, msg, flags, \ - LOCK); \ - } \ - static inline ssize_t \ - fi_bgq_writev_ ## LOCK \ - (struct fid_ep *ep, const struct iovec *iov, \ - void **desc, size_t count, fi_addr_t dest_addr, \ - uint64_t addr, uint64_t key, void *context) \ - { \ - return fi_bgq_writev_generic(ep, iov, desc, count, \ - dest_addr, addr, key, context, LOCK); \ - } \ - static inline ssize_t \ - fi_bgq_write_ ## LOCK \ - (struct fid_ep *ep, const void *buf, size_t len, \ - void *desc, fi_addr_t dst_addr, uint64_t addr, \ - uint64_t key, void *context) \ - { \ - return fi_bgq_write_generic(ep, buf, len, desc, \ - dst_addr, addr, key, context, LOCK); \ - } \ - static inline ssize_t \ - fi_bgq_inject_write_ ## LOCK \ - (struct fid_ep *ep, const void *buf, size_t len, \ - fi_addr_t dst_addr, uint64_t addr, \ - uint64_t key) \ - { \ - return fi_bgq_inject_write_generic(ep, buf, len, \ - dst_addr, addr, key, LOCK); \ - } \ - static inline ssize_t \ - fi_bgq_readmsg_ ## LOCK \ - (struct fid_ep *ep, const struct fi_msg_rma *msg, \ - uint64_t flags) \ - { \ - return fi_bgq_readmsg_generic(ep, msg, flags, \ - LOCK); \ - } \ - static inline ssize_t \ - fi_bgq_readv_ ## LOCK \ - (struct fid_ep *ep, const struct iovec *iov, \ - void **desc, size_t count, fi_addr_t src_addr, \ - uint64_t addr, uint64_t key, void *context) \ - { \ - return fi_bgq_writev_generic(ep, iov, desc, count, \ - src_addr, addr, key, context, LOCK); \ - } \ - static inline ssize_t \ - fi_bgq_read_ ## LOCK \ - (struct fid_ep *ep, void *buf, size_t len, \ - void *desc, fi_addr_t src_addr, uint64_t addr, \ - uint64_t key, void *context) \ - { \ - return fi_bgq_read_generic(ep, buf, len, desc, \ - src_addr, addr, key, context, LOCK); \ - } - -#define FI_BGQ_RMA_SPECIALIZED_FUNC_NAME(TYPE, LOCK) \ - FI_BGQ_RMA_SPECIALIZED_FUNC_NAME_(TYPE, LOCK) - -#define FI_BGQ_RMA_SPECIALIZED_FUNC_NAME_(TYPE, LOCK) \ - fi_bgq_ ## TYPE ## _ ## LOCK - - - -#define FI_BGQ_CQ_SPECIALIZED_FUNC(FORMAT, LOCK) \ - FI_BGQ_CQ_SPECIALIZED_FUNC_(FORMAT, LOCK) - -#define FI_BGQ_CQ_SPECIALIZED_FUNC_(FORMAT, LOCK) \ - static inline ssize_t \ - fi_bgq_cq_read_ ## FORMAT ## _ ## LOCK \ - (struct fid_cq *cq, void *buf, size_t count) \ - { \ - return fi_bgq_cq_read_generic(cq, buf, count, \ - FORMAT, LOCK); \ - } \ - static inline ssize_t \ - fi_bgq_cq_readfrom_ ## FORMAT ## _ ## LOCK \ - (struct fid_cq *cq, void *buf, size_t count, \ - fi_addr_t *src_addr) \ - { \ - return fi_bgq_cq_readfrom_generic(cq, buf, count, src_addr, \ - FORMAT, LOCK); \ - } \ - -#define FI_BGQ_CQ_SPECIALIZED_FUNC_NAME(TYPE, FORMAT, LOCK) \ - FI_BGQ_CQ_SPECIALIZED_FUNC_NAME_(TYPE, FORMAT, LOCK) - -#define FI_BGQ_CQ_SPECIALIZED_FUNC_NAME_(TYPE, FORMAT, LOCK) \ - fi_bgq_ ## TYPE ## _ ## FORMAT ## _ ## LOCK - - - - -#define FI_BGQ_ATOMIC_SPECIALIZED_FUNC(LOCK) \ - FI_BGQ_ATOMIC_SPECIALIZED_FUNC_(LOCK) - -#define FI_BGQ_ATOMIC_SPECIALIZED_FUNC_(LOCK) \ - static inline ssize_t \ - fi_bgq_atomic_ ## LOCK \ - (struct fid_ep *ep, const void *buf, size_t count, \ - void *desc, fi_addr_t dst_addr, uint64_t addr, \ - uint64_t key, enum fi_datatype datatype, \ - enum fi_op op, void *context) \ - { \ - return fi_bgq_atomic_generic(ep, buf, count, \ - dst_addr, addr, key, datatype, op, \ - context, LOCK); \ - } \ - static inline ssize_t \ - fi_bgq_inject_atomic_ ## LOCK \ - (struct fid_ep *ep, const void *buf, size_t count, \ - fi_addr_t dst_addr, uint64_t addr, \ - uint64_t key, enum fi_datatype datatype, \ - enum fi_op op) \ - { \ - return fi_bgq_inject_atomic_generic(ep, buf, count, \ - dst_addr, addr, key, datatype, op, \ - LOCK); \ - } \ - static inline ssize_t \ - fi_bgq_fetch_atomic_ ## LOCK \ - (struct fid_ep *ep, const void *buf, size_t count, \ - void *desc, void *result, void *result_desc, \ - fi_addr_t dest_addr, uint64_t addr, \ - uint64_t key, enum fi_datatype datatype, \ - enum fi_op op, void *context) \ - { \ - return fi_bgq_fetch_atomic_generic(ep, buf, count, desc,\ - result, result_desc, dest_addr, addr, \ - key, datatype, op, context, \ - LOCK); \ - } \ - static inline ssize_t \ - fi_bgq_compare_atomic_ ## LOCK \ - (struct fid_ep *ep, const void *buf, size_t count, \ - void *desc, const void *compare, \ - void *compare_desc, void *result, \ - void *result_desc, fi_addr_t dest_addr, \ - uint64_t addr, uint64_t key, \ - enum fi_datatype datatype, enum fi_op op, \ - void *context) \ - { \ - return fi_bgq_compare_atomic_generic(ep, buf, count, \ - desc, compare, compare_desc, result, \ - result_desc, dest_addr, addr, key, \ - datatype, op, context, LOCK); \ - } - -#define FI_BGQ_ATOMIC_SPECIALIZED_FUNC_NAME(TYPE, LOCK) \ - FI_BGQ_ATOMIC_SPECIALIZED_FUNC_NAME_(TYPE, LOCK) - -#define FI_BGQ_ATOMIC_SPECIALIZED_FUNC_NAME_(TYPE, LOCK) \ - fi_bgq_ ## TYPE ## _ ## LOCK - - -#define FI_BGQ_MSG_SPECIALIZED_FUNC(LOCK) \ - FI_BGQ_MSG_SPECIALIZED_FUNC_(LOCK) - -#define FI_BGQ_MSG_SPECIALIZED_FUNC_(LOCK) \ - static inline ssize_t \ - fi_bgq_send_ ## LOCK \ - (struct fid_ep *ep, const void *buf, size_t len, \ - void *desc, fi_addr_t dst_addr, void *context) \ - { \ - return fi_bgq_send_generic(ep, buf, len, desc, \ - dst_addr, 0, context, LOCK, 1); \ - } \ - static inline ssize_t \ - fi_bgq_recv_ ## LOCK \ - (struct fid_ep *ep, void *buf, size_t len, \ - void *desc, fi_addr_t src_addr, void *context) \ - { \ - return fi_bgq_recv_generic(ep, buf, len, desc, \ - src_addr, 0, (uint64_t)-1, context, LOCK, 1); \ - } \ - static inline ssize_t \ - fi_bgq_inject_ ## LOCK \ - (struct fid_ep *ep, const void *buf, size_t len, \ - fi_addr_t dst_addr) \ - { \ - return fi_bgq_inject(ep, buf, len, \ - dst_addr, LOCK); \ - } \ - static inline ssize_t \ - fi_bgq_recvmsg_ ## LOCK \ - (struct fid_ep *ep, const struct fi_msg *msg, \ - uint64_t flags) \ - { \ - return fi_bgq_recvmsg_generic(ep, msg, flags, \ - LOCK); \ - } \ - static inline ssize_t \ - fi_bgq_senddata_ ## LOCK \ - (struct fid_ep *ep, const void *buf, size_t len, \ - void *desc, uint64_t data, fi_addr_t dest_addr, \ - void *context) \ - { \ - return fi_bgq_senddata_generic(ep, buf, len, desc, data,\ - dest_addr, 0, context, LOCK, 1); \ - } \ - static inline ssize_t \ - fi_bgq_injectdata_ ## LOCK \ - (struct fid_ep *ep, const void *buf, size_t len, \ - uint64_t data, fi_addr_t dest_addr) \ - { \ - return fi_bgq_injectdata_generic(ep, buf, len, data, \ - dest_addr, 0, LOCK, 1); \ - } - -#define FI_BGQ_MSG_SPECIALIZED_FUNC_NAME(TYPE, LOCK) \ - FI_BGQ_MSG_SPECIALIZED_FUNC_NAME_(TYPE, LOCK) - -#define FI_BGQ_MSG_SPECIALIZED_FUNC_NAME_(TYPE, LOCK) \ - fi_bgq_ ## TYPE ## _ ## LOCK - - -#define FI_BGQ_TAGGED_SPECIALIZED_FUNC(LOCK) \ - FI_BGQ_TAGGED_SPECIALIZED_FUNC_(LOCK) - -#define FI_BGQ_TAGGED_SPECIALIZED_FUNC_(LOCK) \ - static inline ssize_t \ - fi_bgq_tsend_ ## LOCK \ - (struct fid_ep *ep, const void *buf, size_t len, \ - void *desc, fi_addr_t dst_addr, uint64_t tag, void *context) \ - { \ - return fi_bgq_send_generic(ep, buf, len, desc, \ - dst_addr, tag, context, LOCK, 0); \ - } \ - static inline ssize_t \ - fi_bgq_trecv_ ## LOCK \ - (struct fid_ep *ep, void *buf, size_t len, \ - void *desc, fi_addr_t src_addr, uint64_t tag, uint64_t ignore, void *context) \ - { \ - return fi_bgq_recv_generic(ep, buf, len, desc, \ - src_addr, tag, ignore, context, LOCK, 0); \ - } \ - static inline ssize_t \ - fi_bgq_tinject_ ## LOCK \ - (struct fid_ep *ep, const void *buf, size_t len, \ - fi_addr_t dst_addr, uint64_t tag) \ - { \ - return fi_bgq_tinject(ep, buf, len, \ - dst_addr, tag, LOCK); \ - } \ - static inline ssize_t \ - fi_bgq_tsenddata_ ## LOCK \ - (struct fid_ep *ep, const void *buf, size_t len, \ - void *desc, uint64_t data, fi_addr_t dest_addr, \ - uint64_t tag, void *context) \ - { \ - return fi_bgq_senddata_generic(ep, buf, len, desc, data,\ - dest_addr, tag, context, LOCK, 0); \ - } \ - static inline ssize_t \ - fi_bgq_tinjectdata_ ## LOCK \ - (struct fid_ep *ep, const void *buf, size_t len, \ - uint64_t data, fi_addr_t dest_addr, \ - uint64_t tag) \ - { \ - return fi_bgq_injectdata_generic(ep, buf, len, data, \ - dest_addr, tag, LOCK, 0); \ - } \ - static inline ssize_t \ - fi_bgq_trecvmsg_ ## LOCK \ - (struct fid_ep *ep, const struct fi_msg_tagged *msg, \ - uint64_t flags) \ - { \ - return fi_bgq_trecvmsg_generic(ep, msg, flags, \ - LOCK); \ - } - -#define FI_BGQ_TAGGED_SPECIALIZED_FUNC_NAME(TYPE, LOCK) \ - FI_BGQ_TAGGED_SPECIALIZED_FUNC_NAME_(TYPE, LOCK) - -#define FI_BGQ_TAGGED_SPECIALIZED_FUNC_NAME_(TYPE, LOCK) \ - fi_bgq_ ## TYPE ## _ ## LOCK - -#endif /* _FI_BGQ_DIRECT_H_ */ diff --git a/prov/bgq/include/rdma/fi_direct_atomic.h b/prov/bgq/include/rdma/fi_direct_atomic.h deleted file mode 100644 index f0a1aebcbdf..00000000000 --- a/prov/bgq/include/rdma/fi_direct_atomic.h +++ /dev/null @@ -1,997 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef _FI_BGQ_DIRECT_ATOMIC_H_ -#define _FI_BGQ_DIRECT_ATOMIC_H_ - -#define FABRIC_DIRECT_ATOMIC 1 - -#include "rdma/bgq/fi_bgq_compiler.h" -#include "rdma/bgq/fi_bgq_spi.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#if 0 - -#define FI_BGQ_DATATYPES \ - sizeof(int8_t), \ - sizeof(uint8_t), \ - sizeof(int16_t), \ - sizeof(uint16_t), \ - sizeof(int32_t), \ - sizeof(uint32_t), \ - sizeof(int64_t), \ - sizeof(uint64_t), \ - sizeof(float), \ - sizeof(double), \ - sizeof(float complex), \ - sizeof(double complex), \ - sizeof(long double), \ - sizeof(long double complex), - -#ifdef __cplusplus -struct __fi_bgq_datatype{ - static const size_t size(int index){ - static size_t __fi_bgq_datatype_size[] = - { - FI_BGQ_DATATYPES - }; - return __fi_bgq_datatype_size[index]; - } -}; -#else -static size_t __fi_bgq_datatype_size[] = -{ - FI_BGQ_DATATYPES -}; -#endif - -/* - * Warning: bogus datatype will result in out of bounds array access. - * Use with caution. - */ -static inline size_t fi_bgq_datatype_size_unsafe(enum fi_datatype dt) -{ -#ifdef __cplusplus - return __fi_bgq_datatype::size(dt); -#else - return __fi_bgq_datatype_size[dt]; -#endif -} - -static inline size_t fi_bgq_datatype_size(enum fi_datatype dt) -{ - return - (((int)dt) < 0 || dt >= FI_DATATYPE_LAST) - ? 0 - : fi_bgq_datatype_size_unsafe(dt); -} -#endif - - - -static inline int fi_bgq_check_atomic(struct fi_bgq_ep *bgq_ep, - enum fi_av_type av_type, enum fi_datatype dt, enum fi_op op, - size_t count) -{ -#ifdef DEBUG - switch((int)op) { - case FI_MIN: - case FI_MAX: - case FI_SUM: - case FI_PROD: - case FI_LOR: - case FI_LAND: - case FI_BOR: - case FI_BAND: - case FI_LXOR: - case FI_ATOMIC_READ: - case FI_ATOMIC_WRITE: - case FI_CSWAP: - case FI_CSWAP_NE: - case FI_CSWAP_LE: - case FI_CSWAP_LT: - case FI_CSWAP_GE: - case FI_CSWAP_GT: - case FI_MSWAP: - break; - default: - return -FI_EINVAL; - } - if (((int) dt >= FI_DATATYPE_LAST) || ((int) dt < 0)) - return -FI_EINVAL; - - if (!bgq_ep) - return -FI_EINVAL; - if (bgq_ep->state != FI_BGQ_EP_ENABLED) - return -FI_EINVAL; - - if (count == 0) - return -FI_EINVAL; - - if (av_type == FI_AV_UNSPEC) - return -FI_EINVAL; - if (av_type == FI_AV_MAP && bgq_ep->av_type != FI_AV_MAP) - return -FI_EINVAL; - if (av_type == FI_AV_TABLE && bgq_ep->av_type != FI_AV_TABLE) - return -FI_EINVAL; -#endif - return 0; -} - -static inline size_t sizeofdt(const enum fi_datatype datatype) { - - static const size_t sizeofdt[FI_DATATYPE_LAST] = { - sizeof(int8_t), /* FI_INT8 */ - sizeof(uint8_t), /* FI_UINT8 */ - sizeof(int16_t), /* FI_INT16 */ - sizeof(uint16_t), /* FI_UINT16 */ - sizeof(int32_t), /* FI_INT32 */ - sizeof(uint32_t), /* FI_UINT32 */ - sizeof(int64_t), /* FI_INT64 */ - sizeof(uint64_t), /* FI_UINT64 */ - sizeof(float), /* FI_FLOAT */ - sizeof(double), /* FI_DOUBLE */ - sizeof(complex float), /* FI_FLOAT_COMPLEX */ - sizeof(complex double), /* FI_DOUBLE_COMPLEX */ - sizeof(long double), /* FI_LONG_DOUBLE */ - sizeof(complex long double) /* FI_LONG_DOUBLE_COMPLEX */ - }; - - return sizeofdt[datatype]; -} - -static inline size_t maxcount (const enum fi_datatype datatype, - const unsigned is_compare, - const unsigned is_fetch) { - -#define INIT_MAXCOUNT_ARRAY(maxbytes) \ - maxbytes / sizeof(int8_t), /* FI_INT8 */ \ - maxbytes / sizeof(uint8_t), /* FI_UINT8 */ \ - maxbytes / sizeof(int16_t), /* FI_INT16 */ \ - maxbytes / sizeof(uint16_t), /* FI_UINT16 */ \ - maxbytes / sizeof(int32_t), /* FI_INT32 */ \ - maxbytes / sizeof(uint32_t), /* FI_UINT32 */ \ - maxbytes / sizeof(int64_t), /* FI_INT64 */ \ - maxbytes / sizeof(uint64_t), /* FI_UINT64 */ \ - maxbytes / sizeof(float), /* FI_FLOAT */ \ - maxbytes / sizeof(double), /* FI_DOUBLE */ \ - maxbytes / sizeof(complex float), /* FI_FLOAT_COMPLEX */ \ - maxbytes / sizeof(complex double), /* FI_DOUBLE_COMPLEX */ \ - maxbytes / sizeof(long double), /* FI_LONG_DOUBLE */ \ - maxbytes / sizeof(complex long double) /* FI_LONG_DOUBLE_COMPLEX */ - - static const size_t maxcount[2][2][FI_DATATYPE_LAST] = { - { - { /* !compare, !fetch */ - INIT_MAXCOUNT_ARRAY(512) - }, - { /* !compare, fetch */ - INIT_MAXCOUNT_ARRAY((512-sizeof(struct fi_bgq_mu_fetch_metadata))) - } - }, - { - { /* compare, !fetch */ - INIT_MAXCOUNT_ARRAY(256) - }, - { /* compare, fetch */ - INIT_MAXCOUNT_ARRAY((256-sizeof(struct fi_bgq_mu_fetch_metadata))) - } - } - }; - -#undef INIT_MAXCOUNT_ARRAY - - return maxcount[is_compare][is_fetch][datatype]; -} - -static inline void fi_bgq_atomic_fence (struct fi_bgq_ep * bgq_ep, - const uint64_t tx_op_flags, - const union fi_bgq_addr * bgq_dst_addr, - union fi_bgq_context * bgq_context, - const int lock_required) -{ - const uint64_t do_cq = ((tx_op_flags & FI_COMPLETION) == FI_COMPLETION); - - struct fi_bgq_cntr * write_cntr = bgq_ep->write_cntr; - const uint64_t do_cntr = (write_cntr != 0); - - assert(do_cq || do_cntr); - - MUHWI_Descriptor_t * model = &bgq_ep->tx.atomic.emulation.fence.mfifo_model; - - MUHWI_Descriptor_t * desc = - fi_bgq_spi_injfifo_tail_wait(&bgq_ep->tx.injfifo); - - qpx_memcpy64((void*)desc, (const void*)model); - - /* set the destination torus address and fifo map */ - desc->PacketHeader.NetworkHeader.pt2pt.Destination = fi_bgq_uid_get_destination(bgq_dst_addr->uid.fi); - - const uint64_t fifo_map = (uint64_t) fi_bgq_addr_get_fifo_map(bgq_dst_addr->fi); - desc->Torus_FIFO_Map = fifo_map; - - desc->PacketHeader.messageUnitHeader.Packet_Types.Memory_FIFO.Rec_FIFO_Id = - fi_bgq_addr_rec_fifo_id(bgq_dst_addr->fi); - - /* locate the payload lookaside slot */ - void * payload = - fi_bgq_spi_injfifo_immediate_payload(&bgq_ep->tx.injfifo, - desc, &desc->Pa_Payload); - - if (do_cntr && !do_cq) { /* likely */ - - /* increment the origin fi_cntr value */ - - /* copy the 'fi_atomic' counter completion descriptor - * model into the payload lookaside slot */ - model = &bgq_ep->tx.atomic.emulation.fence.cntr_model; - MUHWI_Descriptor_t * cntr_desc = (MUHWI_Descriptor_t *) payload; - qpx_memcpy64((void*)cntr_desc, (const void*)model); - - cntr_desc->Torus_FIFO_Map = fifo_map; - - MUSPI_SetRecPayloadBaseAddressInfo(cntr_desc, write_cntr->std.batid, - MUSPI_GetAtomicAddress(0, MUHWI_ATOMIC_OPCODE_STORE_ADD)); /* TODO - init */ - - } else if (do_cq) { - - /* add the cq byte counter decrement direct-put - * descriptor to the tail of the rget/mfifo payload */ - - /* initialize the completion entry */ - assert(bgq_context); - assert(((uintptr_t)bgq_context & 0x07ull) == 0); /* must be 8 byte aligned */ - bgq_context->flags = FI_RMA | FI_READ; - bgq_context->len = 0; - bgq_context->buf = NULL; - bgq_context->byte_counter = 1; - bgq_context->tag = 0; - - uint64_t byte_counter_paddr = 0; - uint32_t cnk_rc __attribute__ ((unused)); - cnk_rc = fi_bgq_cnk_vaddr2paddr((void*)&bgq_context->byte_counter, - sizeof(uint64_t), &byte_counter_paddr); - assert(cnk_rc == 0); - - /* copy the 'fi_atomic' cq completion descriptor - * model into the payload lookaside slot */ - model = &bgq_ep->tx.atomic.emulation.fence.cq_model; - MUHWI_Descriptor_t * cq_desc = (MUHWI_Descriptor_t *) payload; - qpx_memcpy64((void*)cq_desc, (const void*)model); - - cq_desc->Torus_FIFO_Map = fifo_map; - - MUSPI_SetRecPayloadBaseAddressInfo(cq_desc, - FI_BGQ_MU_BAT_ID_GLOBAL, byte_counter_paddr); - - fi_bgq_cq_enqueue_pending(bgq_ep->send_cq, bgq_context, lock_required); - - if (do_cntr) { - - /* increment the origin fi_cntr value */ - - /* copy the 'fi_atomic' counter completion descriptor - * model into the payload lookaside slot */ - model = &bgq_ep->tx.atomic.emulation.fence.cntr_model; - MUHWI_Descriptor_t * cntr_desc = &(((MUHWI_Descriptor_t *) payload)[1]); - qpx_memcpy64((void*)cntr_desc, (const void*)model); - - cntr_desc->Torus_FIFO_Map = fifo_map; - - MUSPI_SetRecPayloadBaseAddressInfo(cntr_desc, write_cntr->std.batid, - MUSPI_GetAtomicAddress(0, MUHWI_ATOMIC_OPCODE_STORE_ADD)); /* TODO - init */ - - desc->Message_Length += sizeof(MUHWI_Descriptor_t); - union fi_bgq_mu_packet_hdr * hdr = (union fi_bgq_mu_packet_hdr *) &desc->PacketHeader; - hdr->rma.ndesc += 1; - } - - } else { /* !do_cntr && !do_cq */ - - assert(0); - - } - - MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo); -} - -static inline size_t fi_bgq_atomic_internal(struct fi_bgq_ep *bgq_ep, - const void *buf, size_t count, union fi_bgq_addr *bgq_dst_addr, - uint64_t addr, uint64_t key, enum fi_datatype datatype, - enum fi_op op, void *context, - const unsigned is_fetch, const void * fetch_vaddr, - const unsigned is_compare, const void * compare_vaddr, - const uint64_t tx_op_flags, const int lock_required, - const uint64_t enable_cntr, const uint64_t enable_cq, - const unsigned is_inject) -{ - assert((is_fetch==0)||(is_fetch==1)); - assert((is_compare==0)||(is_compare==1)); - - const uint64_t do_cq = enable_cq && ((tx_op_flags & FI_COMPLETION) == FI_COMPLETION); - struct fi_bgq_cntr * write_cntr = bgq_ep->tx.write_cntr; - const uint64_t do_cntr = enable_cntr && (write_cntr != 0); - - MUHWI_Descriptor_t * desc = - fi_bgq_spi_injfifo_tail_wait(&bgq_ep->tx.injfifo); - - qpx_memcpy64((void*)desc, (const void*)&bgq_ep->tx.atomic.emulation.mfifo_model); - - /* set the destination torus address and fifo map */ - desc->PacketHeader.NetworkHeader.pt2pt.Destination = fi_bgq_uid_get_destination(bgq_dst_addr->uid.fi); - const uint64_t fifo_map = (uint64_t) fi_bgq_addr_get_fifo_map(bgq_dst_addr->fi); - desc->Torus_FIFO_Map = fifo_map; - - desc->PacketHeader.messageUnitHeader.Packet_Types.Memory_FIFO.Rec_FIFO_Id = - fi_bgq_addr_rec_fifo_id(bgq_dst_addr->fi); - - const size_t max_count = maxcount(datatype, is_compare, is_fetch); - const size_t xfer_count = MIN(max_count,count); - const uint32_t nbytes = (uint32_t)(sizeofdt(datatype) * xfer_count); - - union fi_bgq_mu_packet_hdr * hdr = (union fi_bgq_mu_packet_hdr *) &desc->PacketHeader; - hdr->atomic.dt = datatype; - hdr->atomic.op = op; - hdr->atomic.do_cntr = do_cntr; - hdr->atomic.cntr_bat_id = do_cntr ? write_cntr->std.batid : -1; - hdr->atomic.nbytes_minus_1 = nbytes - 1; - hdr->atomic.key = (uint16_t)key; - hdr->atomic.offset = addr; - hdr->atomic.is_local = fi_bgq_addr_is_local(bgq_dst_addr->fi); - - hdr->atomic.is_fetch = is_fetch; - - - if (is_inject) { /* const expression with cause branch to compile out */ - - /* locate the payload lookaside slot */ - void * payload = - fi_bgq_spi_injfifo_immediate_payload(&bgq_ep->tx.injfifo, - desc, &desc->Pa_Payload); - - desc->Message_Length = nbytes; - - if (buf) memcpy((void *)payload, (const void *)buf, nbytes); - - } else if (!is_fetch && !is_compare) { /* const expression with cause branch to compile out */ - - desc->Message_Length = nbytes; - fi_bgq_cnk_vaddr2paddr(buf, nbytes, &desc->Pa_Payload); - - assert(!do_cq); - - } else { - - /* locate the payload lookaside slot */ - union fi_bgq_mu_packet_payload * payload = - (union fi_bgq_mu_packet_payload *)fi_bgq_spi_injfifo_immediate_payload(&bgq_ep->tx.injfifo, - desc, &desc->Pa_Payload); - - /* initialize the atomic operation metadata in the packet payload */ - payload->atomic_fetch.metadata.fifo_map = fifo_map; - payload->atomic_fetch.metadata.cq_paddr = 0; - - if (is_fetch) { - fi_bgq_cnk_vaddr2paddr(fetch_vaddr, nbytes, - &payload->atomic_fetch.metadata.dst_paddr); - - /* copy the origin (source) data into the injection lookaside buffer */ - if (buf) memcpy((void*)&payload->atomic_fetch.data[0], (const void*) buf, nbytes); - desc->Message_Length = sizeof(struct fi_bgq_mu_fetch_metadata) + - nbytes + nbytes * is_compare; - - if (is_compare) { - /* copy the origin (compare) data into the injection lookaside buffer */ - memcpy((void*)&payload->atomic_fetch.data[nbytes], compare_vaddr, nbytes); - } - - if (do_cq) { - - /* initialize the completion entry */ - assert(context); - assert(((uintptr_t)context & 0x07ull) == 0); /* must be 8 byte aligned */ - union fi_bgq_context * bgq_context = (union fi_bgq_context *)context; - bgq_context->flags = 0; /* TODO */ - bgq_context->len = nbytes; - bgq_context->buf = NULL; - bgq_context->byte_counter = nbytes; - bgq_context->tag = 0; - - fi_bgq_cnk_vaddr2paddr((const void*)&bgq_context->byte_counter, - sizeof(uint64_t), &payload->atomic_fetch.metadata.cq_paddr); - - fi_bgq_cq_enqueue_pending(bgq_ep->tx.send_cq, bgq_context, lock_required); - } - - } else { - assert(0); /* !fetch, compare */ - } - } - - MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo); - - return xfer_count; -} - - -static inline ssize_t fi_bgq_atomic_generic(struct fid_ep *ep, - const void *buf, size_t count, - fi_addr_t dst_addr, uint64_t addr, - uint64_t key, enum fi_datatype datatype, - enum fi_op op, void* context, - const int lock_required) -{ - int ret; - struct fi_bgq_ep *bgq_ep; - - bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid); - - /* TODO - if this is a FI_CLASS_STX_CTX, then the lock is required */ - ret = fi_bgq_lock_if_required(&bgq_ep->lock, lock_required); - if (ret) return ret; - - size_t xfer __attribute__ ((unused)); - xfer = fi_bgq_atomic_internal(bgq_ep, buf, count, - (union fi_bgq_addr *)&dst_addr, addr, key, datatype, op, - context, 0, NULL, 0, NULL, - bgq_ep->tx.op_flags, lock_required, 0, 0, 0); - assert(xfer == count); - - /* TODO - if this is a FI_CLASS_STX_CTX, then the lock is required */ - ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required); - if (ret) return ret; - - return 0; -} - -static inline ssize_t fi_bgq_atomic_writemsg_generic(struct fid_ep *ep, - const struct fi_msg_atomic *msg, const uint64_t flags, - const int lock_required) -{ - int ret; - struct fi_bgq_ep *bgq_ep; - - bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid); - - const enum fi_datatype datatype = msg->datatype; - const enum fi_op op = msg->op; - - ret = fi_bgq_check_atomic(bgq_ep, FI_BGQ_FABRIC_DIRECT_AV, datatype, op, 1); - if (ret) return ret; - - ret = fi_bgq_lock_if_required(&bgq_ep->lock, lock_required); - if (ret) return ret; - - union fi_bgq_addr * bgq_dst_addr = (union fi_bgq_addr *)&msg->addr; - - const size_t dtsize = sizeofdt(datatype); - - size_t rma_iov_index = 0; - const size_t rma_iov_count = msg->rma_iov_count; - uint64_t rma_iov_dtcount = msg->rma_iov[rma_iov_index].count; - uint64_t rma_iov_addr = msg->rma_iov[rma_iov_index].addr; - uint64_t rma_iov_key = msg->rma_iov[rma_iov_index].key; - - size_t msg_iov_index = 0; - const size_t msg_iov_count = msg->iov_count; - uint64_t msg_iov_dtcount = msg->msg_iov[msg_iov_index].count; - uintptr_t msg_iov_vaddr = (uintptr_t)msg->msg_iov[msg_iov_index].addr; - - while (msg_iov_dtcount != 0 && rma_iov_dtcount != 0) { - - const size_t count_requested = MIN(msg_iov_dtcount,rma_iov_dtcount); - - const size_t count_transfered = - fi_bgq_atomic_internal(bgq_ep, (void*)msg_iov_vaddr, - count_requested, bgq_dst_addr, rma_iov_addr, - rma_iov_key, datatype, op, NULL, - 0, NULL, 0, NULL, flags, lock_required, 0, 0, 0); - - const size_t bytes_transfered = dtsize * count_transfered; - - msg_iov_dtcount -= count_transfered; - msg_iov_vaddr += bytes_transfered; - - if ((msg_iov_dtcount == 0) && ((msg_iov_index+1) < msg_iov_count)) { - ++msg_iov_index; - msg_iov_dtcount = msg->msg_iov[msg_iov_index].count; - msg_iov_vaddr = (uintptr_t)msg->msg_iov[msg_iov_index].addr; - } - - rma_iov_dtcount -= count_transfered; - rma_iov_addr += bytes_transfered; - - if ((rma_iov_dtcount == 0) && ((rma_iov_index+1) < rma_iov_count)) { - ++rma_iov_index; - rma_iov_dtcount = msg->rma_iov[rma_iov_index].count; - rma_iov_addr = msg->rma_iov[rma_iov_index].addr; - rma_iov_key = msg->rma_iov[rma_iov_index].key; - } - } - - fi_bgq_atomic_fence(bgq_ep, flags, bgq_dst_addr, - (union fi_bgq_context *)msg->context, - lock_required); - - ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required); - if (ret) return ret; - - return 0; -} - - - -static inline ssize_t fi_bgq_atomic_readwritemsg_generic (struct fid_ep *ep, - const struct fi_msg_atomic *msg, - struct fi_ioc *resultv, - const size_t result_count, - const uint64_t flags, - const int lock_required) -{ - int ret; - struct fi_bgq_ep *bgq_ep; - - bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid); - - const enum fi_datatype datatype = msg->datatype; - const enum fi_op op = msg->op; - - ret = fi_bgq_check_atomic(bgq_ep, FI_BGQ_FABRIC_DIRECT_AV, datatype, op, 1); - if (ret) return ret; - - ret = fi_bgq_lock_if_required(&bgq_ep->lock, lock_required); - if (ret) return ret; - - union fi_bgq_addr * bgq_dst_addr = (union fi_bgq_addr *)&msg->addr; - - const size_t dtsize = sizeofdt(datatype); - - size_t rma_iov_index = 0; - const size_t rma_iov_count = msg->rma_iov_count; - uint64_t rma_iov_dtcount = msg->rma_iov[rma_iov_index].count; - uint64_t rma_iov_addr = msg->rma_iov[rma_iov_index].addr; - uint64_t rma_iov_key = msg->rma_iov[rma_iov_index].key; - - size_t rst_iov_index = 0; - const size_t rst_iov_count = result_count; - uint64_t rst_iov_dtcount = resultv[rst_iov_index].count; - uintptr_t rst_iov_vaddr = (uintptr_t)resultv[rst_iov_index].addr; - - if (op != FI_ATOMIC_READ) { /* likely */ - - size_t msg_iov_index = 0; - const size_t msg_iov_count = msg->iov_count; - uint64_t msg_iov_dtcount = msg->msg_iov[msg_iov_index].count; - uintptr_t msg_iov_vaddr = (uintptr_t)msg->msg_iov[msg_iov_index].addr; - - size_t count_requested = MIN3(msg_iov_dtcount, rma_iov_dtcount, rst_iov_dtcount); - - while (count_requested > 0) { - - const size_t count_transfered = - fi_bgq_atomic_internal(bgq_ep, (void*)msg_iov_vaddr, - count_requested, bgq_dst_addr, rma_iov_addr, - rma_iov_key, datatype, op, NULL, - 1, (const void *)rst_iov_vaddr, 0, NULL, - flags, lock_required, 0, 0, 0); - - const size_t bytes_transfered = dtsize * count_transfered; - - msg_iov_dtcount -= count_transfered; - msg_iov_vaddr += bytes_transfered; - - if ((msg_iov_dtcount == 0) && ((msg_iov_index+1) < msg_iov_count)) { - ++msg_iov_index; - msg_iov_dtcount = msg->msg_iov[msg_iov_index].count; - msg_iov_vaddr = (uintptr_t)msg->msg_iov[msg_iov_index].addr; - } - - rma_iov_dtcount -= count_transfered; - rma_iov_addr += bytes_transfered; - - if ((rma_iov_dtcount == 0) && ((rma_iov_index+1) < rma_iov_count)) { - ++rma_iov_index; - rma_iov_dtcount = msg->rma_iov[rma_iov_index].count; - rma_iov_addr = msg->rma_iov[rma_iov_index].addr; - rma_iov_key = msg->rma_iov[rma_iov_index].key; - } - - rst_iov_dtcount -= count_transfered; - rst_iov_vaddr += bytes_transfered; - - if ((rst_iov_dtcount == 0) && ((rst_iov_index+1) < rst_iov_count)) { - ++rst_iov_index; - rst_iov_dtcount = resultv[rst_iov_index].count; - rst_iov_vaddr = (uintptr_t)resultv[rst_iov_index].addr; - } - - count_requested = MIN3(msg_iov_dtcount, rma_iov_dtcount, rst_iov_dtcount); - } - - } else { - - size_t count_requested = MIN(rma_iov_dtcount, rst_iov_dtcount); - - while (rma_iov_dtcount != 0 && rst_iov_dtcount != 0) { - - const size_t count_transfered = - fi_bgq_atomic_internal(bgq_ep, NULL, - count_requested, bgq_dst_addr, rma_iov_addr, - rma_iov_key, datatype, op, NULL, - 1, (const void *)rst_iov_vaddr, 0, NULL, - flags, lock_required, 0, 0, 0); - - const size_t bytes_transfered = dtsize * count_transfered; - - rma_iov_dtcount -= count_transfered; - rma_iov_addr += bytes_transfered; - - if ((rma_iov_dtcount == 0) && ((rma_iov_index+1) < rma_iov_count)) { - ++rma_iov_index; - rma_iov_dtcount = msg->rma_iov[rma_iov_index].count; - rma_iov_addr = msg->rma_iov[rma_iov_index].addr; - rma_iov_key = msg->rma_iov[rma_iov_index].key; - } - - rst_iov_dtcount -= count_transfered; - rst_iov_vaddr += bytes_transfered; - - if ((rst_iov_dtcount == 0) && ((rst_iov_index+1) < rst_iov_count)) { - ++rst_iov_index; - rst_iov_dtcount = resultv[rst_iov_index].count; - rst_iov_vaddr = (uintptr_t)resultv[rst_iov_index].addr; - } - - count_requested = MIN(rma_iov_dtcount, rst_iov_dtcount); - } - } - - fi_bgq_atomic_fence(bgq_ep, flags, bgq_dst_addr, - (union fi_bgq_context *)msg->context, - lock_required); - - ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required); - if (ret) return ret; - - return 0; -} - -static inline ssize_t fi_bgq_atomic_compwritemsg_generic (struct fid_ep *ep, - const struct fi_msg_atomic *msg, - const struct fi_ioc *comparev, - size_t compare_count, - struct fi_ioc *resultv, - size_t result_count, - uint64_t flags, - const int lock_required) -{ - int ret; - struct fi_bgq_ep *bgq_ep; - - bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid); - - const enum fi_datatype datatype = msg->datatype; - const enum fi_op op = msg->op; - - ret = fi_bgq_check_atomic(bgq_ep, FI_BGQ_FABRIC_DIRECT_AV, datatype, op, 1); - if (ret) return ret; - - ret = fi_bgq_lock_if_required(&bgq_ep->lock, lock_required); - if (ret) return ret; - - union fi_bgq_addr * bgq_dst_addr = (union fi_bgq_addr *)&msg->addr; - - const size_t dtsize = sizeofdt(datatype); - - size_t rma_iov_index = 0; - const size_t rma_iov_count = msg->rma_iov_count; - uint64_t rma_iov_dtcount = msg->rma_iov[rma_iov_index].count; - uint64_t rma_iov_addr = msg->rma_iov[rma_iov_index].addr; - uint64_t rma_iov_key = msg->rma_iov[rma_iov_index].key; - - size_t msg_iov_index = 0; - const size_t msg_iov_count = msg->iov_count; - uint64_t msg_iov_dtcount = msg->msg_iov[msg_iov_index].count; - uintptr_t msg_iov_vaddr = (uintptr_t)msg->msg_iov[msg_iov_index].addr; - - size_t rst_iov_index = 0; - const size_t rst_iov_count = result_count; - uint64_t rst_iov_dtcount = resultv[rst_iov_index].count; - uintptr_t rst_iov_vaddr = (uintptr_t)resultv[rst_iov_index].addr; - - size_t cmp_iov_index = 0; - const size_t cmp_iov_count = compare_count; - uint64_t cmp_iov_dtcount = comparev[cmp_iov_index].count; - uintptr_t cmp_iov_vaddr = (uintptr_t)comparev[cmp_iov_index].addr; - - while (msg_iov_dtcount != 0 && rma_iov_dtcount != 0 && rst_iov_dtcount != 0 && cmp_iov_dtcount != 0) { - - const size_t count_requested = - MIN4(msg_iov_dtcount,rma_iov_dtcount,rst_iov_dtcount,cmp_iov_dtcount); - - const size_t count_transfered = - fi_bgq_atomic_internal(bgq_ep, (void*)msg_iov_vaddr, - count_requested, bgq_dst_addr, rma_iov_addr, - rma_iov_key, datatype, op, NULL, - 1, (const void *)rst_iov_vaddr, 1, (const void *)cmp_iov_vaddr, - flags, lock_required, 0, 0, 0); - - const size_t bytes_transfered = dtsize * count_transfered; - - msg_iov_dtcount -= count_transfered; - msg_iov_vaddr += bytes_transfered; - - if ((msg_iov_dtcount == 0) && ((msg_iov_index+1) < msg_iov_count)) { - ++msg_iov_index; - msg_iov_dtcount = msg->msg_iov[msg_iov_index].count; - msg_iov_vaddr = (uintptr_t)msg->msg_iov[msg_iov_index].addr; - } - - rma_iov_dtcount -= count_transfered; - rma_iov_addr += bytes_transfered; - - if ((rma_iov_dtcount == 0) && ((rma_iov_index+1) < rma_iov_count)) { - ++rma_iov_index; - rma_iov_dtcount = msg->rma_iov[rma_iov_index].count; - rma_iov_addr = msg->rma_iov[rma_iov_index].addr; - rma_iov_key = msg->rma_iov[rma_iov_index].key; - } - - rst_iov_dtcount -= count_transfered; - rst_iov_vaddr += bytes_transfered; - - if ((rst_iov_dtcount == 0) && ((rst_iov_index+1) < rst_iov_count)) { - ++rst_iov_index; - rst_iov_dtcount = resultv[rst_iov_index].count; - rst_iov_vaddr = (uintptr_t)resultv[rst_iov_index].addr; - } - - cmp_iov_dtcount -= count_transfered; - cmp_iov_vaddr += bytes_transfered; - - if ((cmp_iov_dtcount == 0) && ((cmp_iov_index+1) < cmp_iov_count)) { - ++cmp_iov_index; - cmp_iov_dtcount = comparev[cmp_iov_index].count; - cmp_iov_vaddr = (uintptr_t)comparev[cmp_iov_index].addr; - } - } - - fi_bgq_atomic_fence(bgq_ep, flags, bgq_dst_addr, - (union fi_bgq_context *)msg->context, - lock_required); - - ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required); - if (ret) return ret; - - return 0; -} - -/* - * Generic function to handle both fetching (1 operand) and compare - * (2 operand) atomics. - */ -static inline ssize_t fi_bgq_fetch_compare_atomic_generic(struct fid_ep *ep, - const void *buf, size_t count, - void *desc, - const void *compare, void *compare_desc, - void *result, void *result_desc, - fi_addr_t dest_addr, uint64_t addr, - uint64_t key, enum fi_datatype datatype, - enum fi_op op, void *context, - int lock_required) -{ - int ret; - struct fi_bgq_ep *bgq_ep; -/* MPICH does NOT call fi_fetch_atomic or fi_compare_atomic so these functions - * have not been properly tested - for now just assert 0 and come back later - * and implement if an application on BGQ needs this. - */ - assert(0); - bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid); - - ret = fi_bgq_check_atomic(bgq_ep, FI_BGQ_FABRIC_DIRECT_AV, datatype, op, count); - if (ret) - return ret; - - ret = fi_bgq_lock_if_required(&bgq_ep->lock, lock_required); - if (ret) - return ret; - - ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required); - if (ret) - return ret; - - return 0; - -} - -static inline ssize_t fi_bgq_fetch_atomic_generic(struct fid_ep *ep, - const void *buf, size_t count, - void *desc, - void *result, void *result_desc, - fi_addr_t dest_addr, uint64_t addr, - uint64_t key, enum fi_datatype datatype, - enum fi_op op, void *context, - int lock_required) -{ - - - - return fi_bgq_fetch_compare_atomic_generic(ep, - buf, count, desc, NULL, NULL, - result, result_desc, dest_addr, addr, - key, datatype, op, context, - lock_required); -} - -static inline ssize_t fi_bgq_compare_atomic_generic(struct fid_ep *ep, - const void *buf, size_t count, void *desc, - const void *compare, void *compare_desc, - void *result, void *result_desc, - fi_addr_t dest_addr, uint64_t addr, - uint64_t key, enum fi_datatype datatype, - enum fi_op op, void *context, - int lock_required) -{ - return fi_bgq_fetch_compare_atomic_generic(ep, - buf, count, desc, compare, compare_desc, - result, result_desc, dest_addr, addr, - key, datatype, op, context, - lock_required); -} - -static inline ssize_t fi_bgq_inject_atomic_generic(struct fid_ep *ep, - const void *buf, size_t count, - fi_addr_t dest_addr, uint64_t addr, uint64_t key, - enum fi_datatype datatype, enum fi_op op, - int lock_required) -{ - int ret = 0; - struct fi_bgq_ep *bgq_ep; - - bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid); - ret = fi_bgq_check_atomic(bgq_ep, FI_BGQ_FABRIC_DIRECT_AV, datatype, op, count); - if (ret) - return ret; - - ret = fi_bgq_lock_if_required(&bgq_ep->lock, lock_required); - if (ret) - return ret; - - fi_bgq_atomic_internal(bgq_ep, buf, count, - (union fi_bgq_addr *)&dest_addr, addr, key, datatype, op, - NULL, 0, NULL, 0, NULL, - bgq_ep->tx.op_flags, lock_required, 1, 0, 1); - - ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required); - if (ret) - return ret; - - return 0; -} - -/* Declare specialized functions that qualify for FABRIC_DIRECT. - * - No locks - */ - -#define FI_BGQ_ATOMIC_FABRIC_DIRECT_LOCK 0 - -FI_BGQ_ATOMIC_SPECIALIZED_FUNC(FI_BGQ_ATOMIC_FABRIC_DIRECT_LOCK) - -#ifdef FABRIC_DIRECT -#define fi_atomic(ep, buf, count, desc, dest_addr, \ - addr, key, datatype, op, context) \ - (FI_BGQ_ATOMIC_SPECIALIZED_FUNC_NAME(atomic, \ - FI_BGQ_ATOMIC_FABRIC_DIRECT_LOCK) \ - (ep, buf, count, desc, dest_addr, addr, key, \ - datatype, op, context)) - -#define fi_inject_atomic(ep, buf, count, dest_addr, addr, key, \ - datatype, op) \ - (FI_BGQ_ATOMIC_SPECIALIZED_FUNC_NAME(inject_atomic, \ - FI_BGQ_ATOMIC_FABRIC_DIRECT_LOCK) \ - (ep, buf, count, dest_addr, addr, key, datatype, op)) - -#define fi_fetch_atomic(ep, buf, count, desc, result, result_desc, \ - dest_addr, addr, key, datatype, op, context) \ - (FI_BGQ_ATOMIC_SPECIALIZED_FUNC_NAME(fetch_atomic, \ - FI_BGQ_ATOMIC_FABRIC_DIRECT_LOCK) \ - (ep, buf, count, desc, result, result_desc, \ - dest_addr, addr, key, datatype, op, context)) - -#define fi_compare_atomic(ep, buf, count, desc, compare, compare_desc, \ - result, result_desc, dest_addr, addr, key, datatype, \ - op, context) \ - (FI_BGQ_ATOMIC_SPECIALIZED_FUNC_NAME(compare_atomic, \ - FI_BGQ_ATOMIC_FABRIC_DIRECT_LOCK) \ - (ep, buf, count, desc, compare, compare_desc, \ - result, result_desc, dest_addr, addr, key, \ - datatype, op, context)) - -static inline int -fi_atomicvalid(struct fid_ep *ep, - enum fi_datatype datatype, enum fi_op op, size_t *count) -{ - return ep->atomic->writevalid(ep, datatype, op, count); -} - -static inline int -fi_fetch_atomicvalid(struct fid_ep *ep, - enum fi_datatype datatype, enum fi_op op, size_t *count) -{ - return ep->atomic->readwritevalid(ep, datatype, op, count); -} - -static inline int -fi_compare_atomicvalid(struct fid_ep *ep, - enum fi_datatype datatype, enum fi_op op, size_t *count) -{ - return ep->atomic->compwritevalid(ep, datatype, op, count); -} - -static inline ssize_t -fi_atomicmsg(struct fid_ep *ep, - const struct fi_msg_atomic *msg, uint64_t flags) -{ - return ep->atomic->writemsg(ep, msg, flags); -} - -static inline ssize_t -fi_fetch_atomicmsg(struct fid_ep *ep, - const struct fi_msg_atomic *msg, - struct fi_ioc *resultv, void **result_desc, size_t result_count, - uint64_t flags) -{ - return ep->atomic->readwritemsg(ep, msg, resultv, result_desc, - result_count, flags); -} - -static inline ssize_t -fi_compare_atomicmsg(struct fid_ep *ep, const struct fi_msg_atomic *msg, - const struct fi_ioc *comparev, void **compare_desc, - size_t compare_count, struct fi_ioc *resultv, - void **result_desc, size_t result_count, uint64_t flags) -{ - return ep->atomic->compwritemsg(ep, msg, comparev, compare_desc, - compare_count, resultv, result_desc, result_count, flags); -} - -#endif - -#ifdef __cplusplus -} -#endif - -#endif /* _FI_BGQ_DIRECT_ATOMIC_H_ */ diff --git a/prov/bgq/include/rdma/fi_direct_atomic_def.h b/prov/bgq/include/rdma/fi_direct_atomic_def.h deleted file mode 100644 index c6dd11f5b6c..00000000000 --- a/prov/bgq/include/rdma/fi_direct_atomic_def.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef _FI_BGQ_DIRECT_ATOMIC_DEF_H_ -#define _FI_BGQ_DIRECT_ATOMIC_DEF_H_ - -#ifdef FABRIC_DIRECT -#define FABRIC_DIRECT_ATOMIC_DEF 1 - -enum fi_datatype { - FI_INT8, /* 0 */ - FI_UINT8, /* 1 */ - FI_INT16, /* 2 */ - FI_UINT16, /* 3 */ - FI_INT32, /* 4 */ - FI_UINT32, /* 5 */ - FI_INT64, /* 7 */ - FI_UINT64, /* 8 */ - FI_FLOAT, /* 6 */ - FI_DOUBLE, /* 9 */ - FI_FLOAT_COMPLEX, /* 10 */ - FI_DOUBLE_COMPLEX, /* 11 */ - FI_LONG_DOUBLE, /* 12 */ - FI_LONG_DOUBLE_COMPLEX, /* 13 */ - FI_DATATYPE_LAST /* 14 */ -}; -enum fi_op { - FI_MIN, - FI_MAX, - FI_SUM, - FI_PROD, - FI_LOR, - FI_LAND, - FI_BOR, - FI_BAND, - FI_LXOR, - FI_BXOR, - FI_ATOMIC_READ, - FI_ATOMIC_WRITE, - FI_CSWAP, - FI_CSWAP_NE, - FI_CSWAP_LE, - FI_CSWAP_LT, - FI_CSWAP_GE, - FI_CSWAP_GT, - FI_MSWAP, - FI_ATOMIC_OP_LAST -}; -#endif - - -#endif /* _FI_BGQ_DIRECT_ATOMIC_DEF_H_ */ diff --git a/prov/bgq/include/rdma/fi_direct_cm.h b/prov/bgq/include/rdma/fi_direct_cm.h deleted file mode 100644 index 1dc5803a869..00000000000 --- a/prov/bgq/include/rdma/fi_direct_cm.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef _FI_BGQ_DIRECT_CM_H_ -#define _FI_BGQ_DIRECT_CM_H_ - -#ifdef FABRIC_DIRECT -#define FABRIC_DIRECT_CM 1 - -static inline int fi_getname(fid_t fid, void *addr, size_t *addrlen) -{ - struct fid_ep *ep = container_of(fid, struct fid_ep, fid); - return ep->cm->getname(fid, addr, addrlen); -} - -static inline int fi_listen(struct fid_pep *pep) -{ - return pep->cm->listen(pep); -} - -static inline int -fi_connect(struct fid_ep *ep, const void *addr, - const void *param, size_t paramlen) -{ - return ep->cm->connect(ep, addr, param, paramlen); -} - -static inline int -fi_accept(struct fid_ep *ep, const void *param, size_t paramlen) -{ - return ep->cm->accept(ep, param, paramlen); -} - -static inline int -fi_reject(struct fid_pep *pep, fid_t handle, - const void *param, size_t paramlen) -{ - return pep->cm->reject(pep, handle, param, paramlen); -} - -static inline int fi_shutdown(struct fid_ep *ep, uint64_t flags) -{ - return ep->cm->shutdown(ep, flags); -} - -#endif - -#endif /* _FI_BGQ_DIRECT_CM_H_ */ diff --git a/prov/bgq/include/rdma/fi_direct_domain.h b/prov/bgq/include/rdma/fi_direct_domain.h deleted file mode 100644 index 782db8ddd59..00000000000 --- a/prov/bgq/include/rdma/fi_direct_domain.h +++ /dev/null @@ -1,368 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef _FI_PROV_BGQ_DOMAIN_H_ -#define _FI_PROV_BGQ_DOMAIN_H_ - -#define FABRIC_DIRECT_DOMAIN 1 - -#include -#include -#include - -#include "rdma/bgq/fi_bgq_spi.h" - -#include "rdma/bgq/fi_bgq_l2atomic.h" -#include "rdma/bgq/fi_bgq_progress.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct fi_bgq_ep; /* forward declaration */ - -struct fi_bgq_bat_entry { - uintptr_t vaddr; - uint64_t paddr; -}; - -#define FI_BGQ_DOMAIN_MAX_TX_CTX ((BGQ_MU_NUM_INJ_FIFO_GROUPS-1)*BGQ_MU_NUM_INJ_FIFOS_PER_GROUP) /* defensively set to the number of mu injection fifos per node */ -#define FI_BGQ_DOMAIN_MAX_RX_CTX ((BGQ_MU_NUM_REC_FIFO_GROUPS-1)*BGQ_MU_NUM_REC_FIFOS_PER_GROUP) /* defensively set to the number of mu reception fifos per node */ -struct fi_bgq_domain { - struct fid_domain domain_fid; - struct fi_bgq_fabric *fabric; - - enum fi_threading threading; - enum fi_resource_mgmt resource_mgmt; - enum fi_mr_mode mr_mode; - enum fi_progress data_progress; - - struct { - struct l2atomic_lock lock; - } mu; - - struct { - uint32_t max_fifos; - uint32_t max; - uint32_t count; - MUSPI_RecFifo_t *rfifo[BGQ_MU_NUM_REC_FIFOS_PER_GROUP*(BGQ_MU_NUM_REC_FIFO_GROUPS-1)]; /* do not mess with 17th core rec fifos */ - struct fi_bgq_ep *ctx[FI_BGQ_DOMAIN_MAX_RX_CTX]; - } rx; - - struct { - uint32_t count; - uint8_t rget_subgroup_base; - } tx; - - uint64_t num_mr_keys; - struct fi_bgq_bat_entry *bat; /* only for FI_MR_SCALABLE */ - - BG_CoordinateMapping_t my_coords; - struct l2atomic_lock lock; - void *rfifo_mem; - MUSPI_RecFifoSubGroup_t rfifo_subgroup[BGQ_MU_NUM_FIFO_SUBGROUPS_PER_NODE]; - MUSPI_InjFifoSubGroup_t ififo_subgroup[BGQ_MU_NUM_FIFO_SUBGROUPS_PER_NODE]; - - struct { - MUSPI_GIBarrier_t barrier; - uint32_t is_leader; - uint32_t leader_tcoord; - } gi; - - struct { - uint64_t value; - uint64_t paddr; - } zero; - struct { - uint64_t value; - uint64_t paddr; - } one; - - uint64_t max_ep; - - - struct { - struct fi_bgq_progress thread[64]; - uint64_t max_threads; - uint64_t num_threads_active; - void *memptr; - - } progress; - - - uint64_t subgroups_per_process; - struct l2atomic_counter ref_cnt; -}; - -struct fi_bgq_av { - struct fid_av av_fid; - struct fi_bgq_domain *domain; - enum fi_av_type type; - void *map_addr; - struct l2atomic_counter ref_cnt; -}; - -struct fi_bgq_mr { - struct fid_mr mr_fid; - struct fi_bgq_domain *domain; - const void *buf; - size_t len; - size_t offset; - uint64_t access; - uint64_t flags; - uint64_t cntr_bflags; - struct fi_bgq_cntr *cntr; - struct fi_bgq_ep *ep; -}; - -static inline void -fi_bgq_domain_bat_read(struct fi_bgq_bat_entry * bat, uint64_t key, uintptr_t *vaddr, uint64_t *paddr) -{ - assert(bat); - *vaddr = bat[key].vaddr; - *paddr = bat[key].paddr; -} - -static inline void * -fi_bgq_domain_bat_read_vaddr(struct fi_bgq_bat_entry * bat, uint64_t key) -{ - assert(bat); - return (void*)bat[key].vaddr; -} - -static inline uint64_t -fi_bgq_domain_bat_read_paddr(struct fi_bgq_bat_entry * bat, uint64_t key) -{ - assert(bat); - return bat[key].paddr; -} - -static inline void -fi_bgq_domain_bat_write(struct fi_bgq_domain *bgq_domain, uint64_t requested_key, const void *buf, size_t len) -{ - assert(requested_key < bgq_domain->num_mr_keys); - - bgq_domain->bat[requested_key].vaddr = (uintptr_t)buf; - if (buf == NULL) { - bgq_domain->bat[requested_key].paddr = 0; - } else { - Kernel_MemoryRegion_t cnk_mr; - uint32_t cnk_rc __attribute__ ((unused)); - cnk_rc = Kernel_CreateMemoryRegion(&cnk_mr, (void *)buf, len); - assert(cnk_rc == 0); - - bgq_domain->bat[requested_key].paddr = - (uint64_t)cnk_mr.BasePa + ((uint64_t)buf - (uint64_t)cnk_mr.BaseVa); - } - - { /* this "l1p flush" hack is only needed to flush *writes* from a processor cache to the memory system */ - volatile uint64_t *mu_register = - (volatile uint64_t *)(BGQ_MU_STATUS_CONTROL_REGS_START_OFFSET(0, 0) + - 0x030 - PHYMAP_PRIVILEGEDOFFSET); - *mu_register = 0; - } - ppc_msync(); -} - -static inline void -fi_bgq_domain_bat_clear(struct fi_bgq_domain *bgq_domain, uint64_t key) -{ - assert(key < bgq_domain->num_mr_keys); - - bgq_domain->bat[key].vaddr = (uintptr_t)0; - bgq_domain->bat[key].paddr = (uint64_t)0; - - { /* this "l1p flush" hack is only needed to flush *writes* from a processor cache to the memory system */ - volatile uint64_t *mu_register = - (volatile uint64_t *)(BGQ_MU_STATUS_CONTROL_REGS_START_OFFSET(0, 0) + - 0x030 - PHYMAP_PRIVILEGEDOFFSET); - *mu_register = 0; - } - ppc_msync(); -} - -static inline uint32_t -fi_bgq_domain_get_tx_max(struct fi_bgq_domain *bgq_domain) { - - /* - * The maximum number of tx contexts depends on how many mu injection - * fifos are available and how many rx contexts have been allocated - - * each tx context requires 2 mu injection fifos, and each allocated - * rx context consumes an additional mu injection fifo. - */ - - const uint32_t ppn = Kernel_ProcessCount(); - return ((FI_BGQ_DOMAIN_MAX_TX_CTX / ppn) - bgq_domain->rx.count) / 2; -} - -static inline uint32_t -fi_bgq_domain_get_rx_max(struct fi_bgq_domain *bgq_domain) { - - /* - * The maximum number of rx contexts depends on how many mu reception - * fifos are available and how many tx contexts have been allocated - - * each rx context requires 1 mu reception fifo and 1 mu injection fifo - */ - - const uint32_t ppn = Kernel_ProcessCount(); - - return MIN((FI_BGQ_DOMAIN_MAX_RX_CTX / ppn),((FI_BGQ_DOMAIN_MAX_TX_CTX / ppn) - (bgq_domain->tx.count * 2))); -} - - -#ifdef FABRIC_DIRECT -static inline int -fi_domain(struct fid_fabric *fabric, struct fi_info *info, - struct fid_domain **domain, void *context) -{ - return fabric->ops->domain(fabric, info, domain, context); -} - -static inline int -fi_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, - struct fid_cq **cq, void *context) -{ - return domain->ops->cq_open(domain, attr, cq, context); -} - -static inline int -fi_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr, - struct fid_cntr **cntr, void *context) -{ - return domain->ops->cntr_open(domain, attr, cntr, context); -} - -static inline int -fi_mr_reg(struct fid_domain *domain, const void *buf, size_t len, - uint64_t access, uint64_t offset, uint64_t requested_key, - uint64_t flags, struct fid_mr **mr, void *context) -{ - return domain->mr->reg(&domain->fid, buf, len, access, offset, - requested_key, flags, mr, context); -} - -static inline -int fi_mr_bind(struct fid_mr *mr, struct fid *bfid, uint64_t flags) -{ - return mr->fid.ops->bind(&mr->fid, bfid, flags); -} - -static inline -void *fi_mr_desc(struct fid_mr *mr) -{ - return mr->mem_desc; -} - -static inline -uint64_t fi_mr_key(struct fid_mr *mr) -{ - return mr->key; -} - -static inline int -fi_av_open(struct fid_domain *domain, struct fi_av_attr *attr, - struct fid_av **av, void *context) -{ - return domain->ops->av_open(domain, attr, av, context); -} - -static inline int -fi_av_insert(struct fid_av *av, const void *addr, size_t count, - fi_addr_t *fi_addr, uint64_t flags, void *context) -{ - return av->ops->insert(av, addr, count, fi_addr, flags, context); -} - -static inline int -fi_av_insertsvc(struct fid_av *av, const char *node, const char *service, - fi_addr_t *fi_addr, uint64_t flags, void *context) -{ - return av->ops->insertsvc(av, node, service, fi_addr, flags, context); -} - -static inline int -fi_av_insertsym(struct fid_av *av, const char *node, size_t nodecnt, - const char *service, size_t svccnt, - fi_addr_t *fi_addr, uint64_t flags, void *context) -{ - return av->ops->insertsym(av, node, nodecnt, service, svccnt, fi_addr, flags, context); -} - -static inline int -fi_av_lookup(struct fid_av *av, fi_addr_t fi_addr, void *addr, size_t *addrlen) -{ - return av->ops->lookup(av, fi_addr, addr, addrlen); -} - -static inline int -fi_av_remove(struct fid_av *av, fi_addr_t *fi_addr, size_t count, uint64_t flags) -{ - return av->ops->remove(av, fi_addr, count, flags); -} - -static inline fi_addr_t -fi_rx_addr(fi_addr_t fi_addr, int rx_index, int rx_ctx_bits) -{ - /* - * The rx information for bgq is the rec fifo id, this is stored - * across the bits in the 'rx_lsb' and 'rx_msb' - * fields in the uid, these fields should be concatenated to determine - * the rx base fifo id, then the rx_index should be added to this to identify - * the correct rec fifo id for this rx and then restored in the 'rx_lsb' - * and 'rx_msb' bits to support scalable endpoints. - * - */ - - union fi_bgq_addr bgq_addr = {.fi=fi_addr}; - - uint32_t rec_fifo_id = 0; - rec_fifo_id = (rec_fifo_id | (((bgq_addr.uid.fi & 0xF0000000u) >> 23) | ((bgq_addr.uid.fi & 0x0000003Eu) >> 1))) + rx_index; - bgq_addr.uid.fi = (bgq_addr.uid.fi & 0x0FFFFFC1u) | /* clear rx_msb and rx_lsb */ - ((rec_fifo_id << 23) & 0xF0000000u) | /* set rx_msb */ - ((rec_fifo_id << 1) & 0x0000003Eu); /* set rx_lsb */ - - return bgq_addr.fi; -} - -static inline int fi_wait_open(struct fid_fabric *fabric, - struct fi_wait_attr *attr, - struct fid_wait **waitset) -{ - return -FI_ENOSYS; /* TODO - implement this */ -} - -#endif - -#ifdef __cplusplus -} -#endif - -#endif /* _FI_PROV_BGQ_DOMAIN_H_ */ diff --git a/prov/bgq/include/rdma/fi_direct_endpoint.h b/prov/bgq/include/rdma/fi_direct_endpoint.h deleted file mode 100644 index d3287098ff9..00000000000 --- a/prov/bgq/include/rdma/fi_direct_endpoint.h +++ /dev/null @@ -1,1205 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef _FI_BGQ_DIRECT_EP_H_ -#define _FI_BGQ_DIRECT_EP_H_ - -#define FABRIC_DIRECT_ENDPOINT 1 - -#include -#include -#include - -#include "rdma/bgq/fi_bgq_compiler.h" -#include "rdma/bgq/fi_bgq_hwi.h" -#include "rdma/bgq/fi_bgq_spi.h" - -#include "rdma/fi_direct_eq.h" - -#include "rdma/bgq/fi_bgq_mu.h" -#include "rdma/bgq/fi_bgq_progress.h" -#include "rdma/bgq/fi_bgq_flight_recorder.h" - -#define FI_BGQ_L2FIFO_CTL_SIZE (1024) - -#define IS_TAG (0) -#define IS_MSG (1) - -// #define FI_BGQ_TRACE 1 -// #define FI_BGQ_REMOTE_COMPLETION - -enum fi_bgq_ep_state { - FI_BGQ_EP_UNINITIALIZED = 0, - FI_BGQ_EP_INITITALIZED_DISABLED, - FI_BGQ_EP_INITITALIZED_ENABLED -}; - -struct fi_bgq_stx { - struct fid_stx stx_fid; /* 80 bytes */ - struct fi_bgq_domain *domain; - struct fi_tx_attr attr; - - - struct fi_bgq_spi_injfifo injfifo; - struct fi_bgq_spi_injfifo rgetfifo; - - MUSPI_InjFifoSubGroup_t injfifo_subgroup; - MUSPI_InjFifoSubGroup_t rgetfifo_subgroup; - - struct l2atomic_counter ref_cnt; - struct l2atomic_lock lock; -}; - -struct rx_operation { - MUSPI_RecFifo_t *rfifo; - struct l2atomic_fifo fifo; - struct { - union fi_bgq_context *head; - union fi_bgq_context *tail; - } mq; - struct { - struct fi_bgq_mu_packet *head; - struct fi_bgq_mu_packet *tail; - struct fi_bgq_mu_packet *free; - } ue; -}; - -struct fi_bgq_ep_tx { - - struct fi_bgq_spi_injfifo injfifo; /* cloned from stx; 88 bytes */ - uint64_t unused0[5]; - - /* == L2 CACHE LINE == */ - - uint64_t op_flags; - struct fi_bgq_cq *send_cq; - struct fi_bgq_cntr *send_cntr; - struct fi_bgq_cntr *write_cntr; - uint64_t unused[12]; - - /* == L2 CACHE LINE == */ - - struct { /* three l2 cache lines */ - MUHWI_Descriptor_t send_model; - MUHWI_Descriptor_t local_completion_model; /* only "local completion eager" */ - MUHWI_Descriptor_t rzv_model[2]; /* [0]=="internode"; [1]=="intranode" */ - MUHWI_Descriptor_t remote_completion_model; - uint8_t unused[64]; - } send __attribute((aligned(L2_CACHE_LINE_SIZE))); - /* == L2 CACHE LINE == */ - - struct { - struct { - MUHWI_Descriptor_t rget_model; - MUHWI_Descriptor_t dput_model; - } direct; - /* == L2 CACHE LINE == */ - - struct { - MUHWI_Descriptor_t mfifo_model; - MUHWI_Descriptor_t dput_model; - } emulation; - /* == L2 CACHE LINE == */ - - MUHWI_Descriptor_t cntr_model; - MUHWI_Descriptor_t cq_model; - /* == L2 CACHE LINE == */ - - uint64_t global_one_paddr; - uint64_t global_zero_paddr; - - uint8_t unused[48]; - } read __attribute((aligned(L2_CACHE_LINE_SIZE))); - - /* == L2 CACHE LINE == */ - - struct { - struct { - MUHWI_Descriptor_t dput_model; - MUHWI_Descriptor_t unused; - } direct; - /* == L2 CACHE LINE == */ - - struct { - MUHWI_Descriptor_t mfifo_model; - MUHWI_Descriptor_t rget_model; - /* == L2 CACHE LINE == */ - - MUHWI_Descriptor_t dput_model; - MUHWI_Descriptor_t cntr_model; - } emulation; - } write __attribute((aligned(L2_CACHE_LINE_SIZE))); - - /* == L2 CACHE LINE == */ - - struct { - struct { - MUHWI_Descriptor_t mfifo_model; - MUHWI_Descriptor_t unused; - - /* == L2 CACHE LINE == */ - - struct { - MUHWI_Descriptor_t mfifo_model; - MUHWI_Descriptor_t cntr_model; - - /* == L2 CACHE LINE == */ - - MUHWI_Descriptor_t cq_model; - MUHWI_Descriptor_t unused; - } fence; - } emulation; - } atomic __attribute((aligned(L2_CACHE_LINE_SIZE))); - - /* == L2 CACHE LINE == */ - - uint64_t caps; - uint64_t mode; - enum fi_bgq_ep_state state; - - struct fi_bgq_stx *stx; - struct fi_bgq_stx exclusive_stx; - ssize_t index; - -} __attribute((aligned(L2_CACHE_LINE_SIZE))); - -struct fi_bgq_ep_rx { - - /* == L2 CACHE LINE == */ - - /* 'post' data is used when a thread is INITIATING recv operations */ - struct { - uint64_t op_flags; - struct l2atomic_fifo_producer match[2]; /* 0 == 'tag', 1 == 'msg' */ /* TODO - use an enum */ - struct l2atomic_fifo_producer control; - - } post __attribute((aligned(L2_CACHE_LINE_SIZE))); - - /* == L2 CACHE LINE == */ - - /* 'poll' data is used when a thread is making PROGRESS on recv operations */ - struct { - - MUSPI_RecFifo_t *muspi_recfifo; - uintptr_t pad; - - /* 'rfifo' data is used to poll a mu reception fifo */ - struct { - - struct l2atomic_fifo_consumer match; - - struct { - union fi_bgq_context *head; - union fi_bgq_context *tail; - } mq; - - struct { - struct fi_bgq_mu_packet *head; - struct fi_bgq_mu_packet *tail; - struct fi_bgq_mu_packet *free; - } ue; - - - } rfifo[2]; /* 0 == 'tag', 1 == 'msg' */ - - struct fi_bgq_spi_injfifo injfifo; - - /* == L2 CACHE LINE == */ - - struct { - MUHWI_Descriptor_t rget_model[2]; /* [0]=="internode"; [1]=="intranode" */ - /* == L2 CACHE LINE == */ - MUHWI_Descriptor_t dput_model[2]; /* [0]=="internode"; [1]=="intranode" */ - /* == L2 CACHE LINE == */ - MUHWI_Descriptor_t dput_completion_model; - MUHWI_Descriptor_t multi_recv_ack_model; - - } rzv __attribute((aligned(L2_CACHE_LINE_SIZE))); /* TODO reorganize rzv for better cache layout */ - - /* == L2 CACHE LINE == */ - - /* TODO reorganize ack (with rzv) for better cache layout */ - MUHWI_Descriptor_t ack_model[2]; /* [0]=="internode"; [1]=="intranode" */ - - /* == L2 CACHE LINE == */ - - MUHWI_Descriptor_t atomic_dput_model; - struct fi_bgq_bat_entry *bat; - uint64_t pad_1[7]; - - /* == L2 CACHE LINE == */ - - MUHWI_Descriptor_t atomic_cntr_update_model[2]; /* [0]=="internode"; [1]=="intranode" */ - - /* == L2 CACHE LINE == */ - - struct fi_bgq_cq *recv_cq; - struct fi_bgq_cntr *write_cntr; - uint64_t min_multi_recv; - - /* -- non-critical -- */ - struct fi_bgq_domain *domain; - struct l2atomic_fifo_consumer control; - - MUSPI_InjFifoSubGroup_t injfifo_subgroup; - - } poll __attribute((aligned(L2_CACHE_LINE_SIZE))); - - uint64_t caps; - uint64_t mode; - uint64_t op_flags; - size_t total_buffered_recv; /* TODO - is this only used by receive operations? */ - struct fi_bgq_sep *sep; - struct fi_bgq_ep *srx; - union fi_bgq_addr self; - - /* -- non-critical -- */ - ssize_t index; - uint64_t min_multi_recv; - void *l2atomic_memptr; - enum fi_bgq_ep_state state; - -} __attribute((aligned(L2_CACHE_LINE_SIZE))); - -/* - * The 'fi_bgq_ep' struct defines an endpoint with a single tx context and a - * single rx context. The tx context is only valid if the FI_READ, FI_WRITE, - * or FI_SEND capability is specified. The rx context is only valid if the - * FI_RECV, FI_REMOTE_READ, or FI_REMOTE_WRITE flags are specified. - * - * A 'scalable tx context' is simply an endpoint structure with only the - * tx flags specified, and a 'scalable rx context' is simply an endpoint - * structure with only the rx flags specified. - * - * As such, multiple OFI 'classes' share this endpoint structure: - * FI_CLASS_EP - * FI_CLASS_TX_CTX - * --- no FI_CLASS_STX_CTX - * FI_CLASS_RX_CTX - * -- no FI_CLASS_SRX_CTX - */ -struct fi_bgq_ep { - - struct fid_ep ep_fid; /* 80 bytes */ - struct l2atomic_lock lock; /* 16 bytes */ - uint32_t threading; - uint32_t av_type; - uint32_t mr_mode; - uint8_t unused[20]; - - /* == L2 CACHE LINE == */ - - struct fi_bgq_ep_tx tx; - struct fi_bgq_ep_rx rx; - - struct fi_bgq_cntr *read_cntr; - struct fi_bgq_cntr *write_cntr; - struct fi_bgq_cntr *send_cntr; - struct fi_bgq_cntr *recv_cntr; - - struct fi_bgq_cq *send_cq; - struct fi_bgq_cq *recv_cq; - - struct fi_bgq_domain *domain; - void *mem; - - struct fi_bgq_av *av; - struct fi_bgq_sep *sep; - - struct { - volatile uint64_t enabled; - volatile uint64_t active; - pthread_t thread; - } async; - enum fi_bgq_ep_state state; - -} __attribute((aligned(L2_CACHE_LINE_SIZE))); - -/* - * A 'scalable endpoint' may not be directly specified in a data movement - * functions, such as fi_tsend(), as it is only a container for multiple - * tx and rx contexts. - * - * The scalable contexts share certain resources, such as the address vector. - */ -struct fi_bgq_sep { - struct fid_ep ep_fid; - - struct fi_bgq_domain *domain; - struct fi_bgq_av *av; - struct fi_info *info; - void *memptr; - - struct l2atomic_counter ref_cnt; - -} __attribute((aligned(L2_CACHE_LINE_SIZE))); - - -void fi_bgq_ep_progress_manual_cancel (struct fi_bgq_ep * bgq_ep, - const uint64_t cancel_context); - -int fi_bgq_ep_progress_manual_recv (struct fi_bgq_ep *bgq_ep, - const uint64_t is_msg, - union fi_bgq_context * context, - const uint64_t rx_op_flags, - const uint64_t is_context_ext); - -/* See: process_mfifo_context() */ -int fi_bgq_ep_progress_manual_recv_fast (struct fi_bgq_ep *bgq_ep, - const uint64_t is_msg, - union fi_bgq_context * context); - -int fi_bgq_ep_progress_manual (struct fi_bgq_ep *bgq_ep); - -static inline -int fi_bgq_check_endpoint(struct fi_bgq_ep *bgq_ep, enum fi_av_type av_type) -{ -#ifdef DEBUG - if (!bgq_ep) - return -FI_EINVAL; - if (bgq_ep->state != FI_BGQ_EP_ENABLED) - return -FI_EINVAL; - - if (av_type == FI_AV_UNSPEC) - return -FI_EINVAL; - if (av_type == FI_AV_MAP && bgq_ep->av_type != FI_AV_MAP) - return -FI_EINVAL; - if (av_type == FI_AV_TABLE && bgq_ep->av_type != FI_AV_TABLE) - return -FI_EINVAL; - - /* currently, only FI_AV_MAP is supported */ - if (av_type == FI_AV_TABLE) { - return -FI_ENOSYS; - } else if (av_type != FI_AV_MAP) { - return -FI_EINVAL; - } -#endif - return 0; -} - -static inline -int fi_bgq_ep_tx_check(struct fi_bgq_ep_tx * tx, enum fi_av_type av_type) -{ -#ifdef DEBUG - if (!tx) - return -FI_EINVAL; - if (tx->state != FI_BGQ_TX_ENABLED) - return -FI_EINVAL; - - if (av_type == FI_AV_UNSPEC) - return -FI_EINVAL; - if (av_type == FI_AV_MAP && tx->av_type != FI_MAP) - return -FI_EINVAL; - if (av_type == FI_AV_TABLE && tx->av_type != FI_TABLE) - return -FI_EINVAL; - - /* currently, only FI_AV_MAP is supported */ - if (av_type == FI_AV_TABLE) - return -FI_ENOSYS; - if (av_type != FI_AV_MAP) - return -FI_EINVAL; -#endif - return 0; -} - - -/* - * NOTE-CACHE: This code touches 4 cachelines in the code path - not including - * cachelines used for the actual injection fifo descriptor and the hardware - * SRAM - when the message length <= 8 bytes. - * - * NOTE-CACHE: This code touches 5 cachelines in the code path - not including - * cachelines used for the actual injection fifo descriptor, the hardware SRAM, - * and the immediate payload - when the message length > 8 bytes. - * - * TODO - reorganize fi_bgq structures and implement a slimmed down version of - * MUSPI_InjFifoAdvanceDesc to reduce cache pressure. The "<=8" code path - * should only need to touch 2 cachelines and the ">8" code path should only - * need to touch 3 cachelines. - */ -static inline -ssize_t fi_bgq_inject_generic(struct fid_ep *ep, - const void *buf, - size_t len, - fi_addr_t dest_addr, - uint64_t tag, - const uint32_t data, - int lock_required, - const unsigned is_msg) -{ - assert(is_msg == 0 || is_msg == 1); - - struct fi_bgq_ep *bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid); - - ssize_t ret; - ret = fi_bgq_ep_tx_check(&bgq_ep->tx, FI_BGQ_FABRIC_DIRECT_AV); - if (ret) return ret; - - /* TODO - if this is a FI_CLASS_STX_CTX, then the lock is required */ - ret = fi_bgq_lock_if_required(&bgq_ep->lock, lock_required); - if (ret) return ret; - - /* get the destination bgq torus address */ - const union fi_bgq_addr bgq_dst_addr = {.fi=dest_addr}; - - /* eager with lookaside payload buffer and no completion */ - - /* busy-wait until a fifo slot is available ... */ - MUHWI_Descriptor_t * send_desc = fi_bgq_spi_injfifo_tail_wait(&bgq_ep->tx.injfifo); - - /* copy the descriptor model into the injection fifo */ - qpx_memcpy64((void*)send_desc, (const void*)&bgq_ep->tx.send.send_model); - - /* set the destination torus address and fifo map */ - send_desc->PacketHeader.NetworkHeader.pt2pt.Destination = fi_bgq_uid_get_destination(bgq_dst_addr.uid.fi); - send_desc->Torus_FIFO_Map = fi_bgq_addr_get_fifo_map(dest_addr); - - send_desc->PacketHeader.messageUnitHeader.Packet_Types.Memory_FIFO.Rec_FIFO_Id = - fi_bgq_addr_rec_fifo_id(dest_addr); - - union fi_bgq_mu_packet_hdr * hdr = (union fi_bgq_mu_packet_hdr *) &send_desc->PacketHeader; - - if (is_msg) { - fi_bgq_mu_packet_type_set(hdr, FI_BGQ_MU_PACKET_TYPE_EAGER); - } - - /* locate the payload lookaside slot */ - uint64_t payload_paddr = 0; - void *payload_vaddr = - fi_bgq_spi_injfifo_immediate_payload(&bgq_ep->tx.injfifo, - send_desc, &payload_paddr); - send_desc->Pa_Payload = payload_paddr; - - send_desc->Message_Length = len; - if (len) memcpy(payload_vaddr, buf, len); /* TODO use a qpx-optimized memcpy instead */ - - hdr->pt2pt.send.message_length = len; - hdr->pt2pt.ofi_tag = tag; - hdr->pt2pt.immediate_data = data; - -#ifdef FI_BGQ_TRACE - fprintf(stderr,"fi_bgq_inject_generic dest addr is:\n"); - FI_BGQ_ADDR_DUMP((fi_addr_t *)&dest_addr); -#endif - MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo); - - /* TODO - if this is a FI_CLASS_STX_CTX, then the lock is required */ - ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required); - if (ret) return ret; - - return 0; -} - -static inline -ssize_t fi_bgq_send_generic_flags(struct fid_ep *ep, - const void *buf, size_t len, void *desc, - fi_addr_t dest_addr, uint64_t tag, void *context, - const uint32_t data, int lock_required, - const unsigned is_msg, const unsigned is_contiguous, - const unsigned override_flags, uint64_t tx_op_flags) -{ -#ifdef FI_BGQ_TRACE - fprintf(stderr,"fi_bgq_send_generic_flags starting\n"); -#endif - assert(is_msg == 0 || is_msg == 1); - assert(is_contiguous == 0 || is_contiguous == 1); - - struct fi_bgq_ep *bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid); - - ssize_t ret; - ret = fi_bgq_ep_tx_check(&bgq_ep->tx, FI_BGQ_FABRIC_DIRECT_AV); - if (ret) return ret; - - /* TODO - if this is a FI_CLASS_STX_CTX, then the lock is required */ - ret = fi_bgq_lock_if_required(&bgq_ep->lock, lock_required); - if (ret) return ret; - - /* get the destination bgq torus address */ - const union fi_bgq_addr bgq_dst_addr = {.fi=dest_addr}; - - size_t xfer_len = 0; - if (is_contiguous) xfer_len = len; - else { - size_t i; - const struct iovec * iov = (const struct iovec *)buf; - for (i=0; itx.op_flags; - - /* busy-wait until a fifo slot is available .. */ - MUHWI_Descriptor_t * send_desc = fi_bgq_spi_injfifo_tail_wait(&bgq_ep->tx.injfifo); - - if (xfer_len <= FI_BGQ_TOTAL_BUFFERED_RECV) { - /* eager */ - - /* copy the descriptor model into the injection fifo */ - qpx_memcpy64((void*)send_desc, (const void *)&bgq_ep->tx.send.send_model); - - /* set the destination torus address and fifo map */ - send_desc->PacketHeader.NetworkHeader.pt2pt.Destination = fi_bgq_uid_get_destination(bgq_dst_addr.uid.fi); - send_desc->Torus_FIFO_Map = fi_bgq_addr_get_fifo_map(dest_addr); - - send_desc->Message_Length = xfer_len; - - send_desc->PacketHeader.messageUnitHeader.Packet_Types.Memory_FIFO.Rec_FIFO_Id = - fi_bgq_addr_rec_fifo_id(dest_addr); - - if (is_contiguous && ((tx_op_flags & FI_INJECT) == 0)) { - fi_bgq_cnk_vaddr2paddr(buf, len, &send_desc->Pa_Payload); - } else { - /* locate the payload lookaside slot */ - uint64_t payload_paddr = 0; - uintptr_t payload_vaddr = - (uintptr_t) fi_bgq_spi_injfifo_immediate_payload(&bgq_ep->tx.injfifo, - send_desc, &payload_paddr); - send_desc->Pa_Payload = payload_paddr; - - if (is_contiguous) { - if (len) memcpy((void*)payload_vaddr, buf, len); - } else { - unsigned i; - const struct iovec * iov = (const struct iovec *)buf; - for (i=0; iPacketHeader; - hdr->pt2pt.send.message_length = xfer_len; - hdr->pt2pt.ofi_tag = tag; - hdr->pt2pt.immediate_data = data; - -#ifdef FI_BGQ_TRACE - fprintf(stderr,"eager sending to dest:\n"); - FI_BGQ_ADDR_DUMP(&dest_addr); -#endif - if (is_msg) { - fi_bgq_mu_packet_type_set(hdr, FI_BGQ_MU_PACKET_TYPE_EAGER); /* clear the 'TAG' bit in the packet type */ - } - - MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo); - -#ifdef FI_BGQ_REMOTE_COMPLETION - if (tx_op_flags & (FI_TRANSMIT_COMPLETE | FI_DELIVERY_COMPLETE)) { - - /* - * TODO - this code is buggy and results in a hang at job completion for 'cpi' - * - * Suspect that remote processes are exiting before the 'request for ack' - * remote completion packet is received, then the process that issued the - * 'request for ack' messagee will hang because the ack is never received. - * - * Alternative implementations: - * 1. Do not support remote completions on bgq (current) - * 2. Support remote completions via rendezvous protocol - */ - - /* inject the 'remote completion' descriptor */ - send_desc = fi_bgq_spi_injfifo_tail_wait(&bgq_ep->tx.injfifo); - - /* copy the descriptor model into the injection fifo */ - qpx_memcpy64((void*)send_desc, (const void *)&bgq_ep->tx.send.remote_completion_model); - - /* initialize the completion entry */ - assert(context); - assert(((uintptr_t)context & 0x07ull) == 0); /* must be 8 byte aligned */ - union fi_bgq_context * bgq_context = (union fi_bgq_context *)context; - bgq_context->flags = 0; /* TODO */ - bgq_context->len = xfer_len; - bgq_context->buf = NULL; /* TODO */ - bgq_context->byte_counter = xfer_len; - bgq_context->tag = tag; - - uint64_t byte_counter_paddr = 0; - fi_bgq_cnk_vaddr2paddr((const void*)&bgq_context->byte_counter, sizeof(uint64_t), &byte_counter_paddr); - - /* set the destination torus address and fifo map */ - send_desc->PacketHeader.NetworkHeader.pt2pt.Destination = fi_bgq_uid_get_destination(bgq_dst_addr.uid.fi); - send_desc->Torus_FIFO_Map = (uint64_t) bgq_dst_addr.fifo_map; - send_desc->PacketHeader.messageUnitHeader.Packet_Types.Memory_FIFO.Rec_FIFO_Id = - fi_bgq_addr_rec_fifo_id(dest_addr); - - hdr = (union fi_bgq_mu_packet_hdr *) &send_desc->PacketHeader; - hdr->completion.is_local = fi_bgq_addr_is_local(dest_addr); - hdr->completion.cntr_paddr_rsh3b = byte_counter_paddr >> 3; - - fi_bgq_cq_enqueue_pending(bgq_ep->send_cq, bgq_context, lock_required); - - MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo); - - } else -#endif - { - - if (tx_op_flags & (FI_INJECT_COMPLETE | FI_TRANSMIT_COMPLETE | FI_DELIVERY_COMPLETE)) { - -#ifdef FI_BGQ_TRACE - fprintf(stderr,"eager injecting local completion dput\n"); -#endif - - /* inject the 'local completion' direct put descriptor */ - send_desc = fi_bgq_spi_injfifo_tail_wait(&bgq_ep->tx.injfifo); - - /* copy the descriptor model into the injection fifo */ - qpx_memcpy64((void*)send_desc, (const void *)&bgq_ep->tx.send.local_completion_model); - - /* initialize the completion entry */ - assert(context); - assert(((uintptr_t)context & 0x07ull) == 0); /* must be 8 byte aligned */ - union fi_bgq_context * bgq_context = (union fi_bgq_context *)context; - bgq_context->flags = 0; /* TODO */ - bgq_context->len = xfer_len; - bgq_context->buf = NULL; /* TODO */ - bgq_context->byte_counter = xfer_len; - bgq_context->tag = tag; - - uint64_t byte_counter_paddr = 0; - fi_bgq_cnk_vaddr2paddr((const void*)&bgq_context->byte_counter, sizeof(uint64_t), &byte_counter_paddr); - - send_desc->Pa_Payload = - MUSPI_GetAtomicAddress(byte_counter_paddr, - MUHWI_ATOMIC_OPCODE_LOAD_CLEAR); - - fi_bgq_cq_enqueue_pending(bgq_ep->send_cq, bgq_context, lock_required); - - MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo); - } - } - - } else { - /* rendezvous */ - -#ifdef FI_BGQ_TRACE - fprintf(stderr,"rendezvous sending to dest:\n"); - FI_BGQ_ADDR_DUMP(&dest_addr); -#endif - - assert((tx_op_flags & FI_INJECT) == 0); - - const uint64_t is_local = fi_bgq_addr_is_local(dest_addr); - - /* copy the descriptor model into the injection fifo */ - qpx_memcpy64((void*)send_desc, (const void *)&bgq_ep->tx.send.rzv_model[is_local]); - - /* set the destination torus address and fifo map */ - send_desc->PacketHeader.NetworkHeader.pt2pt.Destination = fi_bgq_uid_get_destination(bgq_dst_addr.uid.fi); - send_desc->Torus_FIFO_Map = fi_bgq_addr_get_fifo_map(dest_addr); - - send_desc->PacketHeader.messageUnitHeader.Packet_Types.Memory_FIFO.Rec_FIFO_Id = - fi_bgq_addr_rec_fifo_id(dest_addr); - - union fi_bgq_mu_packet_hdr * hdr = (union fi_bgq_mu_packet_hdr *) &send_desc->PacketHeader; - - if (is_msg) { - fi_bgq_mu_packet_type_set(hdr, FI_BGQ_MU_PACKET_TYPE_RENDEZVOUS); - } - - /* locate the payload lookaside slot */ - uint64_t payload_paddr = 0; - union fi_bgq_mu_packet_payload *payload = - (union fi_bgq_mu_packet_payload *) fi_bgq_spi_injfifo_immediate_payload(&bgq_ep->tx.injfifo, - send_desc, &payload_paddr); - send_desc->Pa_Payload = payload_paddr; - - payload->rendezvous.fifo_map = fi_bgq_addr_get_fifo_map(bgq_dst_addr.fi); - - if (is_contiguous) { - /* only send one mu iov */ - fi_bgq_cnk_vaddr2paddr(buf, len, &payload->rendezvous.mu_iov[0].src_paddr); - payload->rendezvous.mu_iov[0].message_length = len; - hdr->pt2pt.rendezvous.niov_minus_1 = 0; - } else { - assert(len <= 31); - size_t i; - const struct iovec * iov = (const struct iovec *)buf; - send_desc->Message_Length += (len-1) * sizeof(struct fi_bgq_mu_iov); - for (i=0; irendezvous.mu_iov[i].src_paddr); - payload->rendezvous.mu_iov[i].message_length = iov[i].iov_len; - } - hdr->pt2pt.rendezvous.niov_minus_1 = len - 1; - } - - /* initialize the completion entry */ - assert(context); - assert(((uintptr_t)context & 0x07ull) == 0); /* must be 8 byte aligned */ - union fi_bgq_context * bgq_context = (union fi_bgq_context *)context; - bgq_context->flags = 0; /* TODO */ - bgq_context->len = xfer_len; - bgq_context->buf = NULL; /* TODO */ - bgq_context->byte_counter = xfer_len; - bgq_context->tag = tag; - - uint64_t byte_counter_paddr = 0; - fi_bgq_cnk_vaddr2paddr((const void*)&bgq_context->byte_counter, sizeof(uint64_t), &byte_counter_paddr); - payload->rendezvous.cntr_paddr_rsh3b = byte_counter_paddr >> 3; - - hdr->pt2pt.ofi_tag = tag; - hdr->pt2pt.immediate_data = data; - - MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo); - - fi_bgq_cq_enqueue_pending(bgq_ep->send_cq, bgq_context, lock_required); - } - - /* TODO - if this is a FI_CLASS_STX_CTX, then the lock is required */ - ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required); - if (ret) return ret; - - return 0; -} - -static inline -ssize_t fi_bgq_send_generic(struct fid_ep *ep, - const void *buf, size_t len, void *desc, - fi_addr_t dst_addr, uint64_t tag, void *context, - int lock_required, - const unsigned is_msg) -{ - assert(is_msg == 0 || is_msg == 1); - return fi_bgq_send_generic_flags(ep, buf, len, desc, dst_addr, - tag, context, 0, lock_required, is_msg, - 1 /* is_contiguous */, - 0 /* do not override flags */, - 0 /* no flags */); -} - -/* - * In FI_PROGRESS_MANUAL mode: - * The bgq 'recv' implementation is THREAD SAFE and LOCKLESS due to its use of - * the L2 atomic operations to post the match information to the progress thread. - * The 'fi_bgq_lock_if_required()' utility function is not used. - */ -static inline -ssize_t fi_bgq_recv_generic(struct fid_ep *ep, - void *buf, size_t len, void *desc, - fi_addr_t src_addr, uint64_t tag, uint64_t ignore, void *context, - const int lock_required, - const uint64_t is_msg) -{ - assert(is_msg == 0 || is_msg == 1); - struct fi_bgq_ep *bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid); - - const uint64_t rx_op_flags = bgq_ep->rx.post.op_flags; - - assert(context); - assert(((uintptr_t)context & 0x07ull) == 0); /* must be 8 byte aligned */ - union fi_bgq_context * bgq_context = (union fi_bgq_context *)context; - bgq_context->flags = rx_op_flags; - bgq_context->len = len; - bgq_context->buf = buf; - bgq_context->src_addr = src_addr; - -#ifdef FI_BGQ_TRACE - fprintf(stderr,"fi_bgq_recv_generic from source addr:\n"); - FI_BGQ_ADDR_DUMP(&bgq_context->src_addr); -#endif - - bgq_context->tag = tag; - bgq_context->ignore = ignore; - bgq_context->byte_counter = (uint64_t)-1; - - if (FI_BGQ_FABRIC_DIRECT_PROGRESS == FI_PROGRESS_MANUAL) { /* constant expression will compile out */ - - int ret; - ret = fi_bgq_lock_if_required(&bgq_ep->lock, lock_required); - if (ret) return ret; -#ifdef FI_BGQ_TRACE - fprintf(stderr,"fi_bgq_recv_generic calling fi_bgq_ep_progress_manual_recv_fast:\n"); -#endif - - fi_bgq_ep_progress_manual_recv_fast(bgq_ep, is_msg, context); - - ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required); - if (ret) return ret; - - } else { - - /* the *only* difference between a 'tagged' and 'non-tagged' recv is - * the L2 atomic fifo used to post the receive information */ - struct l2atomic_fifo_producer * fifo = &bgq_ep->rx.post.match[is_msg]; - - uint64_t context_rsh3b = (uint64_t)context >> 3; - while (l2atomic_fifo_produce(fifo, context_rsh3b) != 0); /* spin loop! */ - } - - return 0; -} - -/* - * In FI_PROGRESS_AUTO mode: - * The bgq 'recv' implementation is THREAD SAFE and LOCKLESS due to its use of - * the L2 atomic operations to post the match information to the progress thread. - * The 'fi_bgq_lock_if_required()' utility function is not used. - * - * \note The bgq provider asserts the following mode bits which affect - * the behavior of this routine: - * - * - 'FI_ASYNC_IOV' mode bit which requires the application to maintain - * the 'msg->msg_iov' iovec array until the operation completes - * - * - 'FI_LOCAL_MR' mode bit which allows the provider to ignore the 'desc' - * parameter .. no memory regions are required to access the local - * memory - */ -static inline -ssize_t fi_bgq_recvmsg_generic(struct fid_ep *ep, - const struct fi_msg *msg, uint64_t flags, - int lock_required) -{ - struct fi_bgq_ep *bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid); - - uint64_t context_rsh3b = 0; - uint64_t rx_op_flags = 0; - - if (flags | FI_MULTI_RECV) { - - assert(msg->context); - assert(((uintptr_t)msg->context & 0x07ull) == 0); /* must be 8 byte aligned */ - union fi_bgq_context * bgq_context = - (union fi_bgq_context *) msg->context; - - uint64_t len = msg->msg_iov[0].iov_len; - void * base = msg->msg_iov[0].iov_base; - - assert(msg->iov_count == 1); - assert(base != NULL); - if ((uintptr_t)base & 0x07ull) { - uintptr_t new_base = (((uintptr_t)base + 8) & (~0x07ull)); - len -= (new_base - (uintptr_t)base); - base = (void *)new_base; - } - assert(((uintptr_t)base & 0x07ull) == 0); - assert(len >= (sizeof(union fi_bgq_context) + bgq_ep->rx.min_multi_recv)); - bgq_context->flags = FI_MULTI_RECV; - bgq_context->len = len - sizeof(union fi_bgq_context); - bgq_context->buf = (void *)((uintptr_t)base + sizeof(union fi_bgq_context)); - bgq_context->src_addr = msg->addr; - bgq_context->byte_counter = 0; - bgq_context->multi_recv_next = (union fi_bgq_context *)base; - bgq_context->ignore = (uint64_t)-1; - - context_rsh3b = (uint64_t)bgq_context >> 3; - rx_op_flags = flags; - - } else if (msg->iov_count == 0) { - - assert(msg->context); - assert(((uintptr_t)msg->context & 0x07ull) == 0); /* must be 8 byte aligned */ - - union fi_bgq_context * bgq_context = - (union fi_bgq_context *) msg->context; - bgq_context->flags = flags; - bgq_context->len = 0; - bgq_context->buf = NULL; - bgq_context->src_addr = msg->addr; - bgq_context->tag = 0; - bgq_context->ignore = (uint64_t)-1; - bgq_context->byte_counter = (uint64_t)-1; - - context_rsh3b = (uint64_t)bgq_context >> 3; - rx_op_flags = flags; - - } else if (msg->iov_count == 1) { - assert(msg->context); - assert(((uintptr_t)msg->context & 0x07ull) == 0); /* must be 8 byte aligned */ - - union fi_bgq_context * bgq_context = - (union fi_bgq_context *) msg->context; - bgq_context->flags = flags; - bgq_context->len = msg->msg_iov[0].iov_len; - bgq_context->buf = msg->msg_iov[0].iov_base; - bgq_context->src_addr = msg->addr; - bgq_context->tag = 0; - bgq_context->ignore = (uint64_t)-1; - bgq_context->byte_counter = (uint64_t)-1; - - context_rsh3b = (uint64_t)bgq_context >> 3; - rx_op_flags = flags; - - } else { - struct fi_bgq_context_ext * ext; - posix_memalign((void**)&ext, 32, sizeof(struct fi_bgq_context_ext)); - - ext->bgq_context.flags = flags | FI_BGQ_CQ_CONTEXT_EXT; - ext->bgq_context.byte_counter = (uint64_t)-1; - ext->bgq_context.src_addr = msg->addr; - ext->bgq_context.tag = 0; - ext->bgq_context.ignore = (uint64_t)-1; - ext->msg.op_context = (struct fi_context *)msg->context; - ext->msg.iov_count = msg->iov_count; - ext->msg.iov = (struct iovec *)msg->msg_iov; - - context_rsh3b = (uint64_t)ext >> 3; - rx_op_flags = flags | FI_BGQ_CQ_CONTEXT_EXT; - if (FI_BGQ_FABRIC_DIRECT_PROGRESS == FI_PROGRESS_MANUAL) { /* constant expression will compile out */ - - int ret; - ret = fi_bgq_lock_if_required(&bgq_ep->lock, lock_required); - if (ret) return ret; - - fi_bgq_ep_progress_manual_recv(bgq_ep, - 1, /* is_msg */ - (union fi_bgq_context *)(context_rsh3b << 3), - rx_op_flags, - 1 /* is_context_ext */); - - ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required); - if (ret) return ret; - - return 0; - } - } - - if (FI_BGQ_FABRIC_DIRECT_PROGRESS == FI_PROGRESS_MANUAL) { /* constant expression will compile out */ - - int ret; - ret = fi_bgq_lock_if_required(&bgq_ep->lock, lock_required); - if (ret) return ret; - - fi_bgq_ep_progress_manual_recv(bgq_ep, - 1, /* is_msg */ - (union fi_bgq_context *)(context_rsh3b << 3), - rx_op_flags, - 0 /* is_context_ext */); - - ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required); - if (ret) return ret; - - } else { - - /* the *only* difference between a 'tagged' and 'non-tagged' recv is - * the L2 atomic fifo used to post the receive information */ - struct l2atomic_fifo_producer * fifo = &bgq_ep->rx.post.match[1]; /* TODO - use enum */ - - while (l2atomic_fifo_produce(fifo, context_rsh3b) != 0); /* spin loop! */ - - } - - return 0; -} - - -static inline -ssize_t fi_bgq_injectdata_generic(struct fid_ep *ep, - const void *buf, size_t len, uint64_t data, - fi_addr_t dst_addr, uint64_t tag, - int lock_required, - const unsigned is_msg) -{ - return fi_bgq_inject_generic(ep, buf, len, dst_addr, tag, data, - lock_required, is_msg); -} - - -static inline -ssize_t fi_bgq_senddata_generic(struct fid_ep *ep, - const void *buf, size_t len, void *desc, uint64_t data, - fi_addr_t dst_addr, uint64_t tag, void *context, - int lock_required, - const unsigned is_msg) -{ - - assert(is_msg == 0 || is_msg == 1); - return fi_bgq_send_generic_flags(ep, buf, len, desc, dst_addr, - tag, context, data, lock_required, is_msg, - 1 /* is_contiguous */, - 0 /* do not override flags */, - 0 /* no flags */); -} - - -static inline -ssize_t fi_bgq_inject(struct fid_ep *ep, - const void *buf, - size_t len, - fi_addr_t dest_addr, - int lock_required) -{ - return fi_bgq_inject_generic(ep, buf, len, dest_addr, 0, 0, - lock_required, 1); -} - - -/* - * Declare specialized functions that qualify for FABRIC_DIRECT. - * - No locks - */ - -#define FI_BGQ_MSG_FABRIC_DIRECT_LOCK 0 - -FI_BGQ_MSG_SPECIALIZED_FUNC(FI_BGQ_MSG_FABRIC_DIRECT_LOCK) - - -#ifdef FABRIC_DIRECT -#define fi_send(ep, buf, len, desc, dest_addr, context) \ - (FI_BGQ_MSG_SPECIALIZED_FUNC_NAME(send, \ - FI_BGQ_MSG_FABRIC_DIRECT_LOCK) \ - (ep, buf, len, desc, dest_addr, context)) - -#define fi_recv(ep, buf, len, desc, src_addr, context) \ - (FI_BGQ_MSG_SPECIALIZED_FUNC_NAME(recv, \ - FI_BGQ_MSG_FABRIC_DIRECT_LOCK) \ - (ep, buf, len, desc, src_addr, context)) - -#define fi_inject(ep, buf, len, dest_addr) \ - (FI_BGQ_MSG_SPECIALIZED_FUNC_NAME(inject, \ - FI_BGQ_MSG_FABRIC_DIRECT_LOCK) \ - (ep, buf, len, dest_addr)) - -#define fi_recvmsg(ep, msg, flags) \ - (FI_BGQ_MSG_SPECIALIZED_FUNC_NAME(recvmsg, \ - FI_BGQ_MSG_FABRIC_DIRECT_LOCK) \ - (ep, msg, flags)) - -#define fi_senddata(ep, buf, len, desc, data, dest_addr, context) \ - (FI_BGQ_MSG_SPECIALIZED_FUNC_NAME(senddata, \ - FI_BGQ_MSG_FABRIC_DIRECT_LOCK) \ - (ep, buf, len, desc, data, dest_addr, context)) - -#define fi_injectdata(ep, buf, len, data, dest_addr) \ - (FI_BGQ_MSG_SPECIALIZED_FUNC_NAME(injectdata, \ - FI_BGQ_MSG_FABRIC_DIRECT_LOCK) \ - (ep, buf, len, data, dest_addr)) - -static inline ssize_t -fi_sendmsg(struct fid_ep *ep, const struct fi_msg *msg, uint64_t flags) -{ - return ep->msg->sendmsg(ep, msg, flags); -} - -static inline ssize_t -fi_sendv(struct fid_ep *ep, const struct iovec *iov, void **desc, - size_t count, fi_addr_t dest_addr, void *context) -{ - return ep->msg->sendv(ep, iov, desc, count, dest_addr, context); -} - -static inline int -fi_enable(struct fid_ep *ep) -{ - return ep->fid.ops->control(&ep->fid, FI_ENABLE, NULL); -} - -static inline int -fi_cancel(fid_t fid, void *context) -{ - struct fid_ep *ep = container_of(fid, struct fid_ep, fid); - return ep->ops->cancel(fid, context); -} - -static inline int -fi_endpoint(struct fid_domain *domain, struct fi_info *info, - struct fid_ep **ep, void *context) -{ - return domain->ops->endpoint(domain, info, ep, context); -} - -static inline int -fi_scalable_ep(struct fid_domain *domain, struct fi_info *info, - struct fid_ep **sep, void *context) -{ - return domain->ops->scalable_ep(domain, info, sep, context); -} - -static inline int -fi_setopt(fid_t fid, int level, int optname, - const void *optval, size_t optlen) -{ - struct fid_ep *ep = container_of(fid, struct fid_ep, fid); - return ep->ops->setopt(fid, level, optname, optval, optlen); -} - -static inline int -fi_getopt(fid_t fid, int level, int optname, - void *optval, size_t *optlen) -{ - struct fid_ep *ep = container_of(fid, struct fid_ep, fid); - return ep->ops->getopt(fid, level, optname, optval, optlen); -} - -static inline int -fi_tx_context(struct fid_ep *ep, int index, struct fi_tx_attr *attr, - struct fid_ep **tx_ep, void *context) -{ - return ep->ops->tx_ctx(ep, index, attr, tx_ep, context); -} - -static inline int -fi_rx_context(struct fid_ep *ep, int index, struct fi_rx_attr *attr, - struct fid_ep **rx_ep, void *context) -{ - return ep->ops->rx_ctx(ep, index, attr, rx_ep, context); -} - -static inline int -fi_ep_bind(struct fid_ep *ep, struct fid *bfid, uint64_t flags) -{ - return ep->fid.ops->bind(&ep->fid, bfid, flags); -} - -static inline int -fi_scalable_ep_bind(struct fid_ep *sep, struct fid *bfid, uint64_t flags) -{ - return sep->fid.ops->bind(&sep->fid, bfid, flags); -} - -static inline int -fi_stx_context(struct fid_domain *domain, struct fi_tx_attr *attr, - struct fid_stx **stx, void *context) -{ - return domain->ops->stx_ctx(domain, attr, stx, context); -} - -static inline int -fi_ep_alias(struct fid_ep *ep, struct fid_ep **alias_ep, uint64_t flags) -{ - return -FI_ENOSYS; -} - -static inline int -fi_passive_ep(struct fid_fabric *fabric, struct fi_info *info, - struct fid_pep **pep, void *context) -{ - return fabric->ops->passive_ep(fabric, info, pep, context); -} - -static inline int fi_pep_bind(struct fid_pep *pep, struct fid *bfid, uint64_t flags) -{ - return pep->fid.ops->bind(&pep->fid, bfid, flags); -} - -#endif - -#endif /* _FI_BGQ_DIRECT_EP_H_ */ diff --git a/prov/bgq/include/rdma/fi_direct_eq.h b/prov/bgq/include/rdma/fi_direct_eq.h deleted file mode 100644 index 7dcb5e85a0a..00000000000 --- a/prov/bgq/include/rdma/fi_direct_eq.h +++ /dev/null @@ -1,698 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef _FI_BGQ_DIRECT_EQ_H_ -#define _FI_BGQ_DIRECT_EQ_H_ - -#define FABRIC_DIRECT_EQ 1 - -#include -#include - -#include "rdma/bgq/fi_bgq_hwi.h" - -#include "rdma/bgq/fi_bgq_l2atomic.h" -#include "rdma/bgq/fi_bgq_mu.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct fi_bgq_cntr { - struct fid_cntr cntr_fid; - - struct { - volatile uint64_t *l2_vaddr; - uint64_t paddr; - uint64_t batid; - } std; - struct { - volatile uint64_t *l2_vaddr; - uint64_t paddr; - uint64_t batid; - } err; - - volatile uint64_t data[2]; - - struct { - uint64_t ep_count; - struct fi_bgq_ep *ep[64]; /* TODO - check this array size */ - } progress; - - uint64_t ep_bind_count; - struct fi_bgq_ep *ep[64]; /* TODO - check this array size */ - - struct fi_cntr_attr *attr; - struct fi_bgq_domain *domain; -}; - -#define FI_BGQ_CQ_CONTEXT_EXT (0x8000000000000000ull) -#define FI_BGQ_CQ_CONTEXT_MULTIRECV (0x4000000000000000ull) - - -union fi_bgq_context { - struct fi_context context; - struct { - union fi_bgq_context *next; // fi_cq_entry::op_context - uint64_t flags; // fi_cq_msg_entry::flags - size_t len; // fi_cq_msg_entry::len (only need 37 bits) - void *buf; // fi_cq_data_entry::buf (unused for tagged cq's and non-multi-receive message cq's) - - union { - uint64_t data; // fi_cq_data_entry::data; only used after a message is matched - fi_addr_t src_addr; /* only used before a message is matched ('FI_DIRECTED_RECEIVE') */ - }; - - union { - uint64_t tag; // fi_cq_tagged_entry::tag - union fi_bgq_context *multi_recv_next; // only for multi-receives - }; - union { - uint64_t ignore; // only for tagged receive - struct fi_bgq_mu_packet *claim; // only for peek/claim - void *multi_recv_context; // only for individual FI_MULTI_RECV's - }; - - volatile uint64_t byte_counter; - }; -}; - -struct fi_bgq_context_ext { - union fi_bgq_context bgq_context; - struct fi_cq_err_entry err_entry; - struct { - struct fi_context *op_context; - size_t iov_count; - struct iovec *iov; - } msg; -}; - -/* This structure is organized in a way that minimizes cacheline use for the - * "FI_PROGRESS_MANUAL + inject" poll scenario. - */ -struct fi_bgq_cq { - struct fid_cq cq_fid; /* must be the first field in the structure */ - uint64_t pad_0[5]; - - /* == L2 CACHE LINE == */ - - struct fi_bgq_context_ext *err_head; - - union fi_bgq_context *pending_head; - union fi_bgq_context *pending_tail; - union fi_bgq_context *completed_head; - union fi_bgq_context *completed_tail; - - struct { - uint64_t ep_count; - struct fi_bgq_ep *ep[64]; /* TODO - check this array size */ - } progress; - - struct fi_bgq_context_ext *err_tail; - uint64_t pad_1[9]; - - /* == L2 CACHE LINE == */ - - struct l2atomic_lock lock; - - struct l2atomic_fifo_consumer err_consumer; - struct l2atomic_fifo_consumer std_consumer; - struct l2atomic_fifo_producer err_producer; - struct l2atomic_fifo_producer std_producer; - - - struct fi_bgq_domain *domain; - uint64_t bflags; /* fi_bgq_bind_ep_cq() */ - size_t size; - enum fi_cq_format format; - - MUHWI_Descriptor_t local_completion_model; - - uint64_t ep_bind_count; - struct fi_bgq_ep *ep[64]; /* TODO - check this array size */ - - struct fi_cq_bgq_l2atomic_data *fifo_memptr; - struct l2atomic_counter ref_cnt; -}; - -#define DUMP_ENTRY_INPUT(entry) \ -({ \ - fprintf(stderr,"%s:%s():%d entry = %p\n", __FILE__, __func__, __LINE__, (entry)); \ - fprintf(stderr,"%s:%s():%d op_context = %p\n", __FILE__, __func__, __LINE__, (entry)->tagged.op_context); \ - fprintf(stderr,"%s:%s():%d flags = 0x%016lx\n", __FILE__, __func__, __LINE__, (entry)->tagged.flags); \ - fprintf(stderr,"%s:%s():%d len = %zu\n", __FILE__, __func__, __LINE__, (entry)->tagged.len); \ - fprintf(stderr,"%s:%s():%d buf = %p\n", __FILE__, __func__, __LINE__, (entry)->tagged.buf); \ - fprintf(stderr,"%s:%s():%d ignore = 0x%016lx\n", __FILE__, __func__, __LINE__, (entry)->recv.ignore); \ - fprintf(stderr,"%s:%s():%d tag = 0x%016lx\n", __FILE__, __func__, __LINE__, (entry)->tagged.tag); \ - fprintf(stderr,"%s:%s():%d entry_kind = %u\n", __FILE__, __func__, __LINE__, (entry)->recv.entry_kind); \ - fprintf(stderr,"%s:%s():%d entry_id = %u\n", __FILE__, __func__, __LINE__, (entry)->recv.entry_id); \ -}) - -int fi_bgq_cq_enqueue_err (struct fi_bgq_cq * bgq_cq, - struct fi_bgq_context_ext * ext, - const int lock_required); - -static inline -int fi_bgq_cq_enqueue_pending (struct fi_bgq_cq * bgq_cq, - union fi_bgq_context * context, - const int lock_required) -{ - if (FI_BGQ_FABRIC_DIRECT_PROGRESS == FI_PROGRESS_MANUAL) { - - int ret; - ret = fi_bgq_lock_if_required(&bgq_cq->lock, lock_required); - if (ret) return ret; - - union fi_bgq_context * tail = bgq_cq->pending_tail; - context->next = NULL; - if (tail) { - tail->next = context; - } else { - bgq_cq->pending_head = context; - } - bgq_cq->pending_tail = context; - - ret = fi_bgq_unlock_if_required(&bgq_cq->lock, lock_required); - if (ret) return ret; - - } else { - - struct l2atomic_fifo_producer * std_producer = &bgq_cq->std_producer; - uint64_t context_rsh3b = (uint64_t)context >> 3; - while(0 != l2atomic_fifo_produce(std_producer, context_rsh3b)); /* spin loop! */ - } - - return 0; -} - - -static inline -int fi_bgq_cq_enqueue_completed (struct fi_bgq_cq * bgq_cq, - union fi_bgq_context * context, - const int lock_required) -{ - assert(0 == context->byte_counter); - - if (FI_BGQ_FABRIC_DIRECT_PROGRESS == FI_PROGRESS_MANUAL) { - - int ret; - ret = fi_bgq_lock_if_required(&bgq_cq->lock, lock_required); - if (ret) return ret; - - union fi_bgq_context * tail = bgq_cq->completed_tail; - context->next = NULL; - if (tail) { - - assert(NULL != bgq_cq->completed_head); - tail->next = context; - bgq_cq->completed_tail = context; - - } else { - - assert(NULL == bgq_cq->completed_head); - bgq_cq->completed_head = context; - bgq_cq->completed_tail = context; - } - - ret = fi_bgq_unlock_if_required(&bgq_cq->lock, lock_required); - if (ret) return ret; - - } else { - - struct l2atomic_fifo_producer * std_producer = &bgq_cq->std_producer; - uint64_t context_rsh3b = (uint64_t)context >> 3; - while(0 != l2atomic_fifo_produce(std_producer, context_rsh3b)); /* spin loop! */ - } - - return 0; -} - - - -static size_t fi_bgq_cq_fill(uintptr_t output, - union fi_bgq_context * context, - const enum fi_cq_format format) -{ - assert((context->flags & FI_BGQ_CQ_CONTEXT_EXT)==0); -#ifndef FABRIC_DIRECT - fprintf(stderr,"BGQ provider must be run in fabric-direct mode only\n"); - assert(0); -#endif - assert(sizeof(struct fi_context) == sizeof(union fi_bgq_context)); - - struct fi_cq_tagged_entry * entry = (struct fi_cq_tagged_entry *) output; - switch (format) { - case FI_CQ_FORMAT_CONTEXT: - if ((context->flags & FI_BGQ_CQ_CONTEXT_MULTIRECV) == 0) { /* likely */ - entry->op_context = (void *)context; - } else { - entry->op_context = (void *)context->multi_recv_context; - } - return sizeof(struct fi_cq_entry); - break; - case FI_CQ_FORMAT_MSG: - *((struct fi_cq_msg_entry *)output) = *((struct fi_cq_msg_entry *)context); - if ((context->flags & FI_BGQ_CQ_CONTEXT_MULTIRECV) == 0) { /* likely */ - entry->op_context = (void *)context; - } else { - entry->op_context = (void *)context->multi_recv_context; - } - return sizeof(struct fi_cq_msg_entry); - break; - case FI_CQ_FORMAT_DATA: - *((struct fi_cq_data_entry *)output) = *((struct fi_cq_data_entry *)context); - if ((context->flags & FI_BGQ_CQ_CONTEXT_MULTIRECV) == 0) { /* likely */ - entry->op_context = (void *)context; - } else { - entry->op_context = (void *)context->multi_recv_context; - } - return sizeof(struct fi_cq_data_entry); - break; - case FI_CQ_FORMAT_TAGGED: - *((struct fi_cq_tagged_entry *)output) = *((struct fi_cq_tagged_entry *)context); - if ((context->flags & FI_BGQ_CQ_CONTEXT_MULTIRECV) == 0) { /* likely */ - entry->op_context = (void *)context; - } else { - entry->op_context = (void *)context->multi_recv_context; - } - return sizeof(struct fi_cq_tagged_entry); - break; - default: - assert(0); - } - - return 0; -} - -int fi_bgq_ep_progress_manual (struct fi_bgq_ep *bgq_ep); - -static ssize_t fi_bgq_cq_poll (struct fi_bgq_cq *bgq_cq, - void *buf, - size_t count, - const enum fi_cq_format format) -{ - if (FI_BGQ_FABRIC_DIRECT_PROGRESS == FI_PROGRESS_MANUAL) { /* branch will compile out */ /* TODO - FI_PROGRESS_AUTO + 64 ppn */ - - /* check if the err list has anything in it and return */ - if (NULL != bgq_cq->err_head) { /* unlikely */ - assert(NULL != bgq_cq->err_tail); - - errno = FI_EAVAIL; - return -errno; - } - - } else if (FI_BGQ_FABRIC_DIRECT_PROGRESS == FI_PROGRESS_AUTO) { /* branch will compile out */ - - /* check if the err fifo has anything in it and return */ - if (!l2atomic_fifo_isempty(&bgq_cq->err_consumer)) { - - errno = FI_EAVAIL; - return -errno; - } - - } else assert(0); /* huh? */ - - ssize_t num_entries = 0; - uintptr_t output = (uintptr_t)buf; - - /* examine each context in the pending completion queue and, if the - * operation is complete, initialize the cq entry in the application - * buffer and remove the context from the queue. */ - union fi_bgq_context * pending_head = bgq_cq->pending_head; - union fi_bgq_context * pending_tail = bgq_cq->pending_tail; - if (NULL != pending_head) { - union fi_bgq_context * context = pending_head; - union fi_bgq_context * prev = NULL; - while ((count - num_entries) > 0 && context != NULL) { - - const uint64_t byte_counter = context->byte_counter; - - if (byte_counter == 0) { - output += fi_bgq_cq_fill(output, context, format); - ++ num_entries; - - if (prev) - prev->next = context->next; - else - /* remove the head */ - pending_head = context->next; - - if (!(context->next)) - /* remove the tail */ - pending_tail = prev; - } - else - prev = context; - context = context->next; - } - - /* save the updated pending head and pending tail pointers */ - bgq_cq->pending_head = pending_head; - bgq_cq->pending_tail = pending_tail; - } - - if (FI_BGQ_FABRIC_DIRECT_PROGRESS == FI_PROGRESS_MANUAL) { /* branch will compile out */ - - union fi_bgq_context * head = bgq_cq->completed_head; - if (head) { - union fi_bgq_context * context = head; - while ((count - num_entries) > 0 && context != NULL) { - output += fi_bgq_cq_fill(output, context, format); - ++ num_entries; - context = context->next; - } - bgq_cq->completed_head = context; - if (!context) bgq_cq->completed_tail = NULL; - - } - - } else if (FI_BGQ_FABRIC_DIRECT_PROGRESS == FI_PROGRESS_AUTO) { /* branch will compile out */ - - /* drain the std fifo and initialize the cq entries in the application - * buffer if the operation is complete; otherwise append to the - * pending completion queue */ - uint64_t value = 0; - struct l2atomic_fifo_consumer * consumer = &bgq_cq->std_consumer; - while ((count - num_entries) > 0 && - l2atomic_fifo_consume(consumer, &value) == 0) { - - /* const uint64_t flags = value & 0xE000000000000000ull; -- currently not used */ - - /* convert the fifo value into a context pointer */ - union fi_bgq_context *context = (union fi_bgq_context *) (value << 3); - - if (context->byte_counter == 0) { - output += fi_bgq_cq_fill(output, context, format); - ++ num_entries; - } else { - context->next = NULL; - if (pending_tail) - pending_tail->next = context; - else - pending_head = context; - pending_tail = context; - } - } - - /* save the updated pending head and pending tail pointers */ - bgq_cq->pending_head = pending_head; - bgq_cq->pending_tail = pending_tail; - } - - return num_entries; -} - -static ssize_t fi_bgq_cq_poll_inline(struct fid_cq *cq, void *buf, size_t count, - fi_addr_t *src_addr, const enum fi_cq_format format, - const int lock_required) -{ - ssize_t num_entries = 0; - - struct fi_bgq_cq *bgq_cq = (struct fi_bgq_cq *)cq; - - int ret; - ret = fi_bgq_lock_if_required(&bgq_cq->lock, lock_required); - if (ret) return ret; - - - if (FI_BGQ_FABRIC_DIRECT_PROGRESS == FI_PROGRESS_MANUAL) { /* branch will compile out */ /* TODO - FI_PROGRESS_AUTO + 64 ppn */ - - const uint64_t count = bgq_cq->progress.ep_count; - uint64_t i; - for (i=0; iprogress.ep[i]); - } - - const uintptr_t tmp_eh = (const uintptr_t)bgq_cq->err_head; - const uintptr_t tmp_ph = (const uintptr_t)bgq_cq->pending_head; - const uintptr_t tmp_ch = (const uintptr_t)bgq_cq->completed_head; - - /* check for "all empty" and return */ - if (0 == (tmp_eh | tmp_ph | tmp_ch)) { - - ret = fi_bgq_unlock_if_required(&bgq_cq->lock, lock_required); - if (ret) return ret; - - errno = FI_EAGAIN; - return -errno; - } - - /* check for "fast path" and return */ - if (tmp_ch == (tmp_eh | tmp_ph | tmp_ch)) { - - uintptr_t output = (uintptr_t)buf; - - union fi_bgq_context * context = (union fi_bgq_context *)tmp_ch; - while ((count - num_entries) > 0 && context != NULL) { - output += fi_bgq_cq_fill(output, context, format); - ++ num_entries; - context = context->next; - } - bgq_cq->completed_head = context; - if (!context) bgq_cq->completed_tail = NULL; - - ret = fi_bgq_unlock_if_required(&bgq_cq->lock, lock_required); - if (ret) return ret; - - return num_entries; - } - } - - num_entries = fi_bgq_cq_poll(bgq_cq, buf, count, format); - - ret = fi_bgq_unlock_if_required(&bgq_cq->lock, lock_required); - if (ret) return ret; - - if (num_entries == 0) { - errno = FI_EAGAIN; - return -errno; - } - - return num_entries; -} - - -static inline -ssize_t fi_bgq_cq_read_generic (struct fid_cq *cq, void *buf, size_t count, - const enum fi_cq_format format, const int lock_required) -{ - int ret; - ret = fi_bgq_cq_poll_inline(cq, buf, count, NULL, format, lock_required); - return ret; -} - -static inline -ssize_t fi_bgq_cq_readfrom_generic (struct fid_cq *cq, void *buf, size_t count, fi_addr_t *src_addr, - const enum fi_cq_format format, const int lock_required) -{ - int ret; - ret = fi_bgq_cq_poll_inline(cq, buf, count, src_addr, format, lock_required); - if (ret > 0) { - unsigned n; - for (n=0; nops->readerr(cq, buf, flags); -} - -static inline -uint64_t fi_cntr_read(struct fid_cntr *cntr) -{ - return cntr->ops->read(cntr); -} - -static inline -int fi_cntr_wait(struct fid_cntr *cntr, uint64_t threshold, int timeout) -{ - return cntr->ops->wait(cntr, threshold, timeout); -} - -static inline -int fi_trywait(struct fid_fabric *fabric, struct fid **fids, int count) -{ - return -FI_ENOSYS; /* TODO - implement this */ -} - -static inline -int fi_wait(struct fid_wait *waitset, int timeout) -{ - return -FI_ENOSYS; /* TODO - implement this */ -} - -static inline -int fi_poll(struct fid_poll *pollset, void **context, int count) -{ - return -FI_ENOSYS; /* TODO - implement this */ -} - -static inline -int fi_poll_add(struct fid_poll *pollset, struct fid *event_fid, - uint64_t flags) -{ - return -FI_ENOSYS; /* TODO - implement this */ -} - -static inline -int fi_poll_del(struct fid_poll *pollset, struct fid *event_fid, - uint64_t flags) -{ - return -FI_ENOSYS; /* TODO - implement this */ -} - -static inline -int fi_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr, - struct fid_eq **eq, void *context) -{ - return -FI_ENOSYS; /* TODO - implement this */ -} - -static inline -ssize_t fi_eq_read(struct fid_eq *eq, uint32_t *event, void *buf, - size_t len, uint64_t flags) -{ - return -FI_ENOSYS; /* TODO - implement this */ -} - -static inline -ssize_t fi_eq_readerr(struct fid_eq *eq, - struct fi_eq_err_entry *buf, uint64_t flags) -{ - return -FI_ENOSYS; /* TODO - implement this */ -} - -static inline -ssize_t fi_eq_write(struct fid_eq *eq, uint32_t event, - const void *buf, size_t len, uint64_t flags) -{ - return -FI_ENOSYS; /* TODO - implement this */ -} - -static inline -ssize_t fi_eq_sread(struct fid_eq *eq, uint32_t *event, void *buf, - size_t len, int timeout, uint64_t flags) -{ - return -FI_ENOSYS; /* TODO - implement this */ -} - -static inline -const char *fi_eq_strerror(struct fid_eq *eq, int prov_errno, - const void *err_data, char *buf, - size_t len) -{ - return NULL; /* TODO - implement this */ -} - -static inline -ssize_t fi_cq_sread(struct fid_cq *cq, void *buf, size_t count, - const void *cond, int timeout) -{ - return -FI_ENOSYS; /* TODO - implement this */ -} - -static inline -ssize_t fi_cq_sreadfrom(struct fid_cq *cq, void *buf, - size_t count, fi_addr_t *src_addr, - const void *cond, int timeout) -{ - return -FI_ENOSYS; /* TODO - implement this */ -} - -static inline -int fi_cq_signal(struct fid_cq *cq) -{ - return -FI_ENOSYS; /* TODO - implement this */ -} - -static inline -const char *fi_cq_strerror(struct fid_cq *cq, int prov_errno, - const void *err_data, char *buf, - size_t len) -{ - return NULL; /* TODO - implement this */ -} - -static inline -uint64_t fi_cntr_readerr(struct fid_cntr *cntr) -{ - return 0; /* TODO - implement this */ -} - -static inline -int fi_cntr_add(struct fid_cntr *cntr, uint64_t value) -{ - return -FI_ENOSYS; /* TODO - implement this */ -} - -static inline -int fi_cntr_set(struct fid_cntr *cntr, uint64_t value) -{ - return -FI_ENOSYS; /* TODO - implement this */ -} - -#endif - -#ifdef __cplusplus -} -#endif - -#endif /* _FI_BGQ_DIRECT_EQ_H_ */ diff --git a/prov/bgq/include/rdma/fi_direct_rma.h b/prov/bgq/include/rdma/fi_direct_rma.h deleted file mode 100644 index 956500a380d..00000000000 --- a/prov/bgq/include/rdma/fi_direct_rma.h +++ /dev/null @@ -1,1017 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef _FI_BGQ_DIRECT_RMA_H_ -#define _FI_BGQ_DIRECT_RMA_H_ - -#define FABRIC_DIRECT_RMA 1 - -#include - -#include "rdma/bgq/fi_bgq_compiler.h" -#include "rdma/bgq/fi_bgq_spi.h" - -#ifdef __cplusplus -extern "C" { -#endif - -static inline int fi_bgq_check_rma(struct fi_bgq_ep *bgq_ep, - enum fi_av_type av_type) -{ -#ifdef DEBUG - if (!bgq_ep) - return -FI_EINVAL; - if (bgq_ep->state != FI_BGQ_EP_ENABLED) - return -FI_EINVAL; - - if (av_type == FI_AV_UNSPEC) - return -FI_EINVAL; - if (av_type == FI_AV_MAP && bgq_ep->av_type != FI_AV_MAP) - return -FI_EINVAL; - if (av_type == FI_AV_TABLE && bgq_ep->av_type != FI_AV_TABLE) - return -FI_EINVAL; -#endif - return 0; -} - - -static inline void fi_bgq_readv_internal (struct fi_bgq_ep * bgq_ep, - const struct iovec * iov, - const size_t niov, - const union fi_bgq_addr * bgq_target_addr, - const uint64_t * addr, - const uint64_t * key, - union fi_bgq_context * bgq_context, - const uint64_t tx_op_flags, - const uint64_t enable_cq, - const uint64_t enable_cntr, - const int lock_required) -{ -#ifdef FI_BGQ_TRACE -fprintf(stderr,"fi_bgq_readv_internal starting - niov is %ld do_cntr is %d\n",niov,(enable_cntr && ( bgq_ep->write_cntr != 0))); -fflush(stderr); -#endif - assert(niov <= 8); - - const uint64_t do_cq = enable_cq && (tx_op_flags & FI_COMPLETION); - - struct fi_bgq_cntr * write_cntr = bgq_ep->write_cntr; - const uint64_t do_cntr = enable_cntr && (write_cntr != 0); - - MUHWI_Descriptor_t * model = &bgq_ep->tx.read.emulation.mfifo_model; - - const uint64_t fifo_map = fi_bgq_addr_get_fifo_map(bgq_target_addr->fi); - - /* busy-wait until a fifo slot is available .. */ - MUHWI_Descriptor_t * desc = - fi_bgq_spi_injfifo_tail_wait(&bgq_ep->tx.injfifo); - - /* copy the descriptor model into the injection fifo */ - qpx_memcpy64((void*)desc, (const void *)model); - - /* set the target torus address and fifo map */ - desc->PacketHeader.NetworkHeader.pt2pt.Destination = fi_bgq_uid_get_destination(bgq_target_addr->uid.fi); - desc->Torus_FIFO_Map = fifo_map; - - /* locate the payload lookaside slot */ - MUHWI_Descriptor_t * dput_desc = - (MUHWI_Descriptor_t *)fi_bgq_spi_injfifo_immediate_payload(&bgq_ep->tx.injfifo, - desc, &desc->Pa_Payload); - desc->Message_Length = (niov << BGQ_MU_DESCRIPTOR_SIZE_IN_POWER_OF_2); - - - desc->PacketHeader.messageUnitHeader.Packet_Types.Memory_FIFO.Rec_FIFO_Id = - fi_bgq_addr_rec_fifo_id(bgq_target_addr->fi); - - union fi_bgq_mu_packet_hdr * hdr = (union fi_bgq_mu_packet_hdr *) &desc->PacketHeader; - hdr->rma.ndesc = niov; - - /* TODO - how to specify multiple remote injection fifos? */ - - union fi_bgq_mu_descriptor * fi_dput_desc = (union fi_bgq_mu_descriptor *) dput_desc; - - unsigned i; - for (i = 0; i < niov; ++i) { /* on fence this loop will compile out (niov is 0) */ - - qpx_memcpy64((void*)&dput_desc[i], - (const void*)&bgq_ep->tx.read.emulation.dput_model); - - dput_desc[i].Torus_FIFO_Map = fifo_map; - dput_desc[i].Message_Length = iov[i].iov_len; - dput_desc[i].Pa_Payload = addr[i]; - - /* determine the physical address of the destination data location */ - uint64_t iov_base_paddr = 0; - uint32_t cnk_rc __attribute__ ((unused)); - cnk_rc = fi_bgq_cnk_vaddr2paddr(iov[i].iov_base, iov[i].iov_len, &iov_base_paddr); - assert(cnk_rc==0); - MUSPI_SetRecPayloadBaseAddressInfo(&dput_desc[i], FI_BGQ_MU_BAT_ID_GLOBAL, iov_base_paddr); - - assert((key[i] & 0xFFFF000000000000ul) == 0); /* TODO - change this when key size > 48b */ - fi_dput_desc[i].rma.key_lsb = key[i]; - } - - if (do_cntr && niov < 8) { /* likely */ -#ifdef FI_BGQ_TRACE -fprintf(stderr,"fi_bgq_readv_internal do_cntr && niov %ld < 8\n",niov); -fflush(stderr); -#endif - /* add the counter update direct-put descriptor to the - * tail of the rget/mfifo payload */ - - qpx_memcpy64((void*)&dput_desc[niov], - (const void*)&bgq_ep->tx.read.cntr_model); - - dput_desc[niov].Torus_FIFO_Map = fifo_map; - MUSPI_SetRecPayloadBaseAddressInfo(&dput_desc[niov], - FI_BGQ_MU_BAT_ID_GLOBAL, - MUSPI_GetAtomicAddress(write_cntr->std.paddr, MUHWI_ATOMIC_OPCODE_STORE_ADD)); - - desc->Message_Length += sizeof(MUHWI_Descriptor_t); - union fi_bgq_mu_packet_hdr * hdr = (union fi_bgq_mu_packet_hdr *) &desc->PacketHeader; - hdr->rma.ndesc += 1; - - if (!do_cq) { /* likely */ - -#ifdef FI_BGQ_TRACE -fprintf(stderr,"fi_bgq_readv_internal do_cntr && niov < 8 AND (!do_cq)\n"); -fflush(stderr); -#endif - MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo); - - } else if (niov < 7) { - - /* add the cq update direct-put descriptor to the - * tail of the rget/mfifo payload (after the cntr update) */ - - /* initialize the completion entry */ - assert(bgq_context); - assert(((uintptr_t)bgq_context & 0x07ull) == 0); /* must be 8 byte aligned */ - bgq_context->flags = FI_RMA | FI_READ; - bgq_context->len = 0; - bgq_context->buf = NULL; - bgq_context->byte_counter = 1; - bgq_context->tag = 0; - - uint64_t byte_counter_paddr = 0; - uint32_t cnk_rc __attribute__ ((unused)); - cnk_rc = fi_bgq_cnk_vaddr2paddr((void*)&bgq_context->byte_counter, - sizeof(uint64_t), &byte_counter_paddr); - assert(cnk_rc == 0); - - MUHWI_Descriptor_t * cq_desc = &dput_desc[niov+1]; - - qpx_memcpy64((void*)cq_desc, - (const void*)&bgq_ep->tx.read.cq_model); - - cq_desc->Torus_FIFO_Map = fifo_map; - MUSPI_SetRecPayloadBaseAddressInfo(cq_desc, - FI_BGQ_MU_BAT_ID_GLOBAL, byte_counter_paddr); - - desc->Message_Length += sizeof(MUHWI_Descriptor_t); - union fi_bgq_mu_packet_hdr * hdr = (union fi_bgq_mu_packet_hdr *) &desc->PacketHeader; - hdr->rma.ndesc += 1; - - MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo); - - fi_bgq_cq_enqueue_pending(bgq_ep->send_cq, bgq_context, lock_required); - - } else { - - /* the rget/mfifo payload is full - inject the data - * movement descriptors, then inject the counter - * completion descriptor */ - MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo); - - /* be lazy and do a single recursive call */ - fi_bgq_readv_internal(bgq_ep, - NULL, 0, /* no iovec array */ - bgq_target_addr, - NULL, NULL, /* no addr array, no key array */ - bgq_context, tx_op_flags, - 1, /* enable cq */ - 0, /* disable cntr */ - lock_required); - } - - } else if (do_cntr) { /* unlikely */ - - /* the rget/mfifo payload is full - inject the data - * movement descriptors, then inject any counter or cq - * completion descriptor(s) via a recursive call */ - MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo); - - fi_bgq_readv_internal(bgq_ep, - NULL, 0, /* no iovec array */ - bgq_target_addr, - NULL, NULL, /* no addr array, no key array */ - bgq_context, tx_op_flags, - do_cq, - 1, /* enable cntr */ - lock_required); - - } else if (do_cq && niov < 8) { - - /* no cntr completion - * - * add the cq byte counter decrement direct-put - * descriptor to the tail of the rget/mfifo payload */ - - /* initialize the completion entry */ - assert(bgq_context); - assert(((uintptr_t)bgq_context & 0x07ull) == 0); /* must be 8 byte aligned */ - bgq_context->flags = FI_RMA | FI_READ; - bgq_context->len = 0; - bgq_context->buf = NULL; - bgq_context->byte_counter = 1; - bgq_context->tag = 0; - - uint64_t byte_counter_paddr = 0; - uint32_t cnk_rc __attribute__ ((unused)); - cnk_rc = fi_bgq_cnk_vaddr2paddr((void*)&bgq_context->byte_counter, - sizeof(uint64_t), &byte_counter_paddr); - assert(cnk_rc == 0); - - MUHWI_Descriptor_t * cq_desc = &dput_desc[niov]; - - qpx_memcpy64((void*)cq_desc, - (const void*)&bgq_ep->tx.read.cq_model); - - cq_desc->Torus_FIFO_Map = fifo_map; - MUSPI_SetRecPayloadBaseAddressInfo(cq_desc, - FI_BGQ_MU_BAT_ID_GLOBAL, byte_counter_paddr); - - desc->Message_Length += sizeof(MUHWI_Descriptor_t); - union fi_bgq_mu_packet_hdr * hdr = (union fi_bgq_mu_packet_hdr *) &desc->PacketHeader; - hdr->rma.ndesc += 1; - - MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo); - - fi_bgq_cq_enqueue_pending(bgq_ep->send_cq, bgq_context, lock_required); - - } else if (do_cq) { - - /* the rget/mfifo payload is full - inject the data - * movement descriptors, then inject the cq completion - * descriptor via a recursive call */ - MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo); - - fi_bgq_readv_internal(bgq_ep, - NULL, 0, /* no iovec array */ - bgq_target_addr, - NULL, NULL, /* no addr array, no key array */ - bgq_context, tx_op_flags, - 1, /* enable cq */ - 0, /* disable cntr */ - lock_required); - - } else { - /* no cntr and no cq? very unlikely, if not invalid */ - - /* if there are no completion operations then there *must* be - * at least one data movement operations */ - assert(niov > 0); - - MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo); - } -} - -static inline ssize_t fi_bgq_inject_write_generic(struct fid_ep *ep, - const void *buf, size_t len, fi_addr_t dst_addr, - uint64_t addr, uint64_t key, - int lock_required) -{ -#ifdef FI_BGQ_TRACE - fprintf(stderr,"fi_bgq_inject_write_generic starting\n"); -#endif - int ret; - struct fi_bgq_ep *bgq_ep; - - bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid); - - ret = fi_bgq_check_rma(bgq_ep, FI_BGQ_FABRIC_DIRECT_AV); - if (ret) return ret; - -// if (av_type == FI_AV_TABLE) -// dst_addr = bgq_ep->av->table[(size_t)dst_addr]; - - ret = fi_bgq_lock_if_required(&bgq_ep->lock, lock_required); - if (ret) return ret; - - MUHWI_Descriptor_t * model = - (FI_BGQ_FABRIC_DIRECT_MR == FI_MR_BASIC) ? - &bgq_ep->tx.write.direct.dput_model : - &bgq_ep->tx.write.emulation.mfifo_model; - - /* - * busy-wait until a fifo slot is available .. - */ - MUHWI_Descriptor_t * desc = - fi_bgq_spi_injfifo_tail_wait(&bgq_ep->tx.injfifo); - - /* copy the descriptor model into the injection fifo */ - qpx_memcpy64((void*)desc, (const void *)model); - - /* set the destination torus address and fifo map */ - union fi_bgq_addr * bgq_dst_addr = (union fi_bgq_addr *)&dst_addr; - desc->PacketHeader.NetworkHeader.pt2pt.Destination = fi_bgq_uid_get_destination(bgq_dst_addr->uid.fi); - desc->Torus_FIFO_Map = fi_bgq_addr_get_fifo_map(bgq_dst_addr->fi); - desc->Message_Length = len; - - /* locate the payload lookaside slot */ - void * payload = - fi_bgq_spi_injfifo_immediate_payload(&bgq_ep->tx.injfifo, - desc, &desc->Pa_Payload); - assert(len <= sizeof(union fi_bgq_mu_packet_payload)); - memcpy(payload, buf, len); - - if (FI_BGQ_FABRIC_DIRECT_MR == FI_MR_BASIC) { /* branch will compile out */ -#ifdef FI_BGQ_TRACE - fprintf(stderr,"fi_bgq_inject_write_generic - virtual addr is 0x%016lx physical addr is 0x%016lx key is %lu \n",addr,(addr-key),key); -#endif - - /* the 'key' is the paddr of the remote memory region */ - MUSPI_SetRecPayloadBaseAddressInfo(desc, FI_BGQ_MU_BAT_ID_GLOBAL, addr-key); - - } else if (FI_BGQ_FABRIC_DIRECT_MR == FI_MR_SCALABLE) { /* branch will compile out */ - - desc->PacketHeader.messageUnitHeader.Packet_Types.Memory_FIFO.Rec_FIFO_Id = - fi_bgq_addr_rec_fifo_id(bgq_dst_addr->fi); - - /* the 'key' is used to index into the remote base address table */ - union fi_bgq_mu_packet_hdr * hdr = (union fi_bgq_mu_packet_hdr *) &desc->PacketHeader; - hdr->rma.key = key; - hdr->rma.offset = addr; - hdr->rma.nbytes = len; - hdr->rma.ndesc = 0; - - } else { - assert(0); - } - - /* the src buffer is available for reuse - increment the endpoint counter */ - struct fi_bgq_cntr * write_cntr = bgq_ep->write_cntr; - if (write_cntr) L2_AtomicStoreAdd(write_cntr->std.l2_vaddr, 1); - - MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo); - - ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required); - if (ret) return ret; - - return 0; -} - -static inline void fi_bgq_write_fence (struct fi_bgq_ep * bgq_ep, - const uint64_t tx_op_flags, - const union fi_bgq_addr * bgq_dst_addr, - union fi_bgq_context * bgq_context, - const int lock_required) -{ - fi_bgq_readv_internal(bgq_ep, - NULL, 0, /* no iovec array */ - bgq_dst_addr, - NULL, NULL, /* no addr array, key array */ - bgq_context, tx_op_flags, - 1, - 1, - lock_required); -} - -static inline void fi_bgq_write_internal (struct fi_bgq_ep * bgq_ep, - const void * buf, - size_t len, - const union fi_bgq_addr * bgq_dst_addr, - uint64_t addr, - const uint64_t key, - union fi_bgq_context * bgq_context, - const uint64_t tx_op_flags, - const uint64_t enable_cq, - const uint64_t enable_cntr, - const int lock_required) -{ - -#ifdef FI_BGQ_TRACE - fprintf(stderr,"fi_bgq_write_internal starting\n"); -#endif - const uint64_t do_cq = enable_cq && ((tx_op_flags & FI_COMPLETION) == FI_COMPLETION); - - struct fi_bgq_cntr * write_cntr = bgq_ep->write_cntr; - const uint64_t do_cntr = enable_cntr && (write_cntr != 0); - - MUHWI_Descriptor_t * model = - (FI_BGQ_FABRIC_DIRECT_MR == FI_MR_BASIC) ? - &bgq_ep->tx.write.direct.dput_model : - &bgq_ep->tx.write.emulation.mfifo_model; - - /* busy-wait until a fifo slot is available .. */ - MUHWI_Descriptor_t * desc = - fi_bgq_spi_injfifo_tail_wait(&bgq_ep->tx.injfifo); - - /* copy the descriptor model into the injection fifo */ - qpx_memcpy64((void*)desc, (const void *)model); - - /* set the destination torus address and fifo map */ - desc->PacketHeader.NetworkHeader.pt2pt.Destination = fi_bgq_uid_get_destination(bgq_dst_addr->uid.fi); - desc->Torus_FIFO_Map = fi_bgq_addr_get_fifo_map(bgq_dst_addr->fi); - - if (tx_op_flags & FI_INJECT) { /* unlikely */ - - assert(len <= sizeof(union fi_bgq_mu_packet_payload)); - - /* locate the payload lookaside slot */ - void * payload = - fi_bgq_spi_injfifo_immediate_payload(&bgq_ep->tx.injfifo, - desc, &desc->Pa_Payload); - - memcpy(payload, buf, len); - desc->Message_Length = len; - - if (FI_BGQ_FABRIC_DIRECT_MR == FI_MR_BASIC) { /* branch will compile out */ - -#ifdef FI_BGQ_TRACE - fprintf(stderr,"fi_bgq_write_internal tx_op_flags & FI_INJECT - virtual addr is 0x%016lx physical addr is 0x%016lx key is %lu \n",addr,(addr-key),key); -#endif - /* the 'key' is the paddr of the remote memory region */ - MUSPI_SetRecPayloadBaseAddressInfo(desc, FI_BGQ_MU_BAT_ID_GLOBAL, addr-key); - - } else if (FI_BGQ_FABRIC_DIRECT_MR == FI_MR_SCALABLE) { /* branch will compile out */ - - desc->PacketHeader.messageUnitHeader.Packet_Types.Memory_FIFO.Rec_FIFO_Id = - fi_bgq_addr_rec_fifo_id(bgq_dst_addr->fi); - - /* the 'key' is used to index into the remote base address table */ - union fi_bgq_mu_packet_hdr * hdr = (union fi_bgq_mu_packet_hdr *) &desc->PacketHeader; - hdr->rma.key = key; - hdr->rma.offset = addr; - hdr->rma.nbytes = len; - hdr->rma.ndesc = 0; - - } else { - assert(0); - } - - MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo); - - /* FI_TRANSMIT_COMPLETE and FI_DELIVERY_COMPLETE are not supported */ - assert((tx_op_flags & (FI_COMPLETION | FI_TRANSMIT_COMPLETE)) != (FI_COMPLETION | FI_TRANSMIT_COMPLETE)); - assert((tx_op_flags & (FI_COMPLETION | FI_DELIVERY_COMPLETE)) != (FI_COMPLETION | FI_DELIVERY_COMPLETE)); - - if (do_cq) { - - assert(bgq_context); - assert(((uintptr_t)bgq_context & 0x07ull) == 0); /* must be 8 byte aligned */ - bgq_context->flags = FI_RMA | FI_WRITE; - bgq_context->len = 0; - bgq_context->buf = NULL; - bgq_context->byte_counter = 0; - bgq_context->tag = 0; - - fi_bgq_cq_enqueue_completed(bgq_ep->send_cq, bgq_context, lock_required); - } - - /* the src buffer is available for reuse - increment the endpoint counter */ - if (do_cntr) L2_AtomicStoreAdd(write_cntr->std.l2_vaddr, 1); - - } else { - size_t xfer_bytes = MIN(len, sizeof(union fi_bgq_mu_packet_payload)); - - if (FI_BGQ_FABRIC_DIRECT_MR == FI_MR_BASIC) { /* branch will compile out */ - -#ifdef FI_BGQ_TRACE - fprintf(stderr,"fi_bgq_write_internal - NOT tx_op_flags & FI_INJECT - virtual addr is 0x%016lx physical addr is 0x%016lx key is %lu \n",addr,(addr-key),key); -#endif - /* the 'key' is the paddr of the remote memory region */ - MUSPI_SetRecPayloadBaseAddressInfo(desc, FI_BGQ_MU_BAT_ID_GLOBAL, addr-key); - - } else if (FI_BGQ_FABRIC_DIRECT_MR == FI_MR_SCALABLE) { /* branch will compile out */ - - desc->PacketHeader.messageUnitHeader.Packet_Types.Memory_FIFO.Rec_FIFO_Id = - fi_bgq_addr_rec_fifo_id(bgq_dst_addr->fi); - - /* the 'key' is used to index into the remote base address table */ - union fi_bgq_mu_packet_hdr * hdr = (union fi_bgq_mu_packet_hdr *) &desc->PacketHeader; - hdr->rma.key = key; - hdr->rma.offset = addr; - hdr->rma.nbytes = xfer_bytes; - hdr->rma.ndesc = 0; - - } else { - assert(0); - } - - /* determine the physical address of the source data */ - uint64_t src_paddr = 0; - uint32_t cnk_rc __attribute__ ((unused)); - cnk_rc = fi_bgq_cnk_vaddr2paddr(buf, len, &src_paddr); - assert(cnk_rc==0); - - desc->Message_Length = xfer_bytes; - desc->Pa_Payload = src_paddr; - - if (len <= sizeof(union fi_bgq_mu_packet_payload)) { /* likely */ - - MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo); - - } else { - - MUHWI_Descriptor_t model = *desc; - MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo); - - src_paddr += xfer_bytes; - len -= xfer_bytes; - addr += xfer_bytes; - - while (len > 0) { - desc = fi_bgq_spi_injfifo_tail_wait(&bgq_ep->tx.injfifo); - - qpx_memcpy64((void*)desc, (const void*)&model); - - xfer_bytes = MIN(len, sizeof(union fi_bgq_mu_packet_payload)); - desc->Message_Length = xfer_bytes; - desc->Pa_Payload = src_paddr; - - union fi_bgq_mu_packet_hdr * hdr = (union fi_bgq_mu_packet_hdr *) &desc->PacketHeader; - if (FI_BGQ_FABRIC_DIRECT_MR == FI_MR_BASIC) { -#ifdef FI_BGQ_TRACE - fprintf(stderr,"fi_bgq_write_internal for multiple packets - NOT tx_op_flags & FI_INJECT - virtual addr is 0x%016lx physical addr is 0x%016lx key is %lu \n",addr,(addr-key),key); -#endif - /* the 'key' is the paddr of the remote memory region */ - MUSPI_SetRecPayloadBaseAddressInfo(desc, FI_BGQ_MU_BAT_ID_GLOBAL, addr-key); - - } - else if (FI_BGQ_FABRIC_DIRECT_MR == FI_MR_SCALABLE) { - hdr->rma.offset = addr; - hdr->rma.nbytes = xfer_bytes; - } - else { - assert(0); - } - - - MUSPI_InjFifoAdvanceDesc(bgq_ep->tx.injfifo.muspi_injfifo); - - src_paddr += xfer_bytes; - len -= xfer_bytes; - addr += xfer_bytes; - } - } - - if (do_cq || do_cntr) - fi_bgq_readv_internal(bgq_ep, NULL, 0, bgq_dst_addr, - NULL, NULL, bgq_context, - tx_op_flags, do_cq, do_cntr, lock_required); - } -} - - - - - -static inline ssize_t fi_bgq_write_generic(struct fid_ep *ep, - const void *buf, size_t len, void *desc, fi_addr_t dst_addr, - uint64_t addr, uint64_t key, void *context, - int lock_required) -{ - int ret; - struct fi_bgq_ep *bgq_ep; - - bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid); - - ret = fi_bgq_check_rma(bgq_ep, FI_BGQ_FABRIC_DIRECT_AV); - if (ret) return ret; - - ret = fi_bgq_lock_if_required(&bgq_ep->lock, lock_required); - if (ret) return ret; - - fi_bgq_write_internal(bgq_ep, buf, len, (union fi_bgq_addr *)&dst_addr, - addr, key, (union fi_bgq_context *)context, - bgq_ep->tx.op_flags, 1, 1, lock_required); - - ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required); - if (ret) { - return ret; - } - - return 0; -} - -static inline ssize_t fi_bgq_writev_generic(struct fid_ep *ep, - const struct iovec *iov, void **desc, size_t count, - fi_addr_t dst_addr, uint64_t addr, uint64_t key, void *context, - int lock_required) -{ - int ret; - struct fi_bgq_ep *bgq_ep; - - bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid); - - ret = fi_bgq_check_rma(bgq_ep, FI_BGQ_FABRIC_DIRECT_AV); - if (ret) return ret; - - ret = fi_bgq_lock_if_required(&bgq_ep->lock, lock_required); - if (ret) return ret; - - const union fi_bgq_addr bgq_dst_addr = *((union fi_bgq_addr *)&dst_addr); - - size_t index = 0; - for (index = 0; index < count; ++index) { - - size_t len = iov[index].iov_len; - void * buf = iov[index].iov_base; - - fi_bgq_write_internal(bgq_ep, buf, len, &bgq_dst_addr, - addr, key, (union fi_bgq_context *)context, - 0, 0, 0, lock_required); - - addr += len; - } - - fi_bgq_write_fence(bgq_ep, bgq_ep->tx.op_flags, &bgq_dst_addr, (union fi_bgq_context *)context, - lock_required); - - ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required); - if (ret) return ret; - - return 0; -} - - -static inline ssize_t fi_bgq_writemsg_generic(struct fid_ep *ep, - const struct fi_msg_rma *msg, uint64_t flags, - int lock_required) -{ - int ret; - struct fi_bgq_ep *bgq_ep; - - bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid); - - ret = fi_bgq_check_rma(bgq_ep, FI_BGQ_FABRIC_DIRECT_AV); - if (ret) return ret; - - ret = fi_bgq_lock_if_required(&bgq_ep->lock, lock_required); - if (ret) return ret; - - union fi_bgq_addr * bgq_dst_addr = (union fi_bgq_addr *)&msg->addr; - - - size_t rma_iov_index = 0; - const size_t rma_iov_count = msg->rma_iov_count; - uint64_t rma_iov_bytes = msg->rma_iov[rma_iov_index].len; - uint64_t rma_iov_addr = msg->rma_iov[rma_iov_index].addr; - uint64_t rma_iov_key = msg->rma_iov[rma_iov_index].key; - - size_t msg_iov_index = 0; - const size_t msg_iov_count = msg->iov_count; - uint64_t msg_iov_bytes = msg->msg_iov[msg_iov_index].iov_len; - uintptr_t msg_iov_vaddr = (uintptr_t)msg->msg_iov[msg_iov_index].iov_base; - -#ifdef FI_BGQ_TRACE -fprintf(stderr,"fi_bgq_writemsg_generic msg_iov_bytes is %lu rma_iov_bytes is %lu base vadder is 0x%016lx lock_required is %d\n",msg_iov_bytes,rma_iov_bytes,msg_iov_vaddr,lock_required); -fflush(stderr); -#endif - while (msg_iov_bytes != 0 && rma_iov_bytes != 0) { - - size_t len = (msg_iov_bytes <= rma_iov_bytes) ? msg_iov_bytes : rma_iov_bytes; - -#ifdef FI_BGQ_TRACE -fprintf(stderr,"fi_bgq_writemsg_generic calling fi_bgq_write_internal with msg_iov_vaddr 0x%016lx and len %lu\n",msg_iov_vaddr,len); -fflush(stderr); -#endif - fi_bgq_write_internal(bgq_ep, (void*)msg_iov_vaddr, len, bgq_dst_addr, - rma_iov_addr, rma_iov_key, NULL, 0, 0, 0, lock_required); - - msg_iov_bytes -= len; - msg_iov_vaddr += len; - - if ((msg_iov_bytes == 0) && ((msg_iov_index+1) < msg_iov_count)) { - ++msg_iov_index; - msg_iov_bytes = msg->msg_iov[msg_iov_index].iov_len; - msg_iov_vaddr = (uintptr_t)msg->msg_iov[msg_iov_index].iov_base; - } - - rma_iov_bytes -= len; - rma_iov_addr += len; - - if ((rma_iov_bytes == 0) && ((rma_iov_index+1) < rma_iov_count)) { - ++rma_iov_index; - rma_iov_bytes = msg->rma_iov[rma_iov_index].len; - rma_iov_addr = msg->rma_iov[rma_iov_index].addr; - rma_iov_key = msg->rma_iov[rma_iov_index].key; - } - } - -#ifdef FI_BGQ_TRACE -fprintf(stderr,"fi_bgq_writemsg_generic calling fi_bgq_write_fence\n"); -fflush(stderr); -#endif - fi_bgq_write_fence(bgq_ep, flags, bgq_dst_addr, - (union fi_bgq_context *)msg->context, - lock_required); - - ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required); - if (ret) return ret; - - return 0; -} - - -static inline ssize_t fi_bgq_read_generic(struct fid_ep *ep, - void *buf, size_t len, void *desc, fi_addr_t src_addr, - uint64_t addr, uint64_t key, void *context, - int lock_required) -{ - int ret; - struct fi_bgq_ep *bgq_ep; - - bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid); - - ret = fi_bgq_check_rma(bgq_ep, FI_BGQ_FABRIC_DIRECT_AV); - if (ret) return ret; - - ret = fi_bgq_lock_if_required(&bgq_ep->lock, lock_required); - if (ret) return ret; - - struct iovec iov; - iov.iov_base = buf; - iov.iov_len = len; - - fi_bgq_readv_internal(bgq_ep, &iov, 1, (union fi_bgq_addr *)&src_addr, - &addr, &key, (union fi_bgq_context *)context, - bgq_ep->tx.op_flags, 1, 1, lock_required); - - ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required); - if (ret) - return ret; - - return 0; -} - -static inline ssize_t fi_bgq_readv_generic (struct fid_ep *ep, - const struct iovec *iov, void **desc, size_t count, - fi_addr_t src_addr, uint64_t addr, uint64_t key, void *context, - int lock_required) -{ - -#ifdef FI_BGQ_TRACE -fprintf(stderr,"fi_bgq_readv_generic count is %lu addr is 0x%016lx key is 0x%016lx\n",count,addr,key); -fflush(stderr); -#endif - - int ret; - struct fi_bgq_ep *bgq_ep; - - bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid); - - ret = fi_bgq_check_rma(bgq_ep, FI_BGQ_FABRIC_DIRECT_AV); - if (ret) return ret; - - ret = fi_bgq_lock_if_required(&bgq_ep->lock, lock_required); - if (ret) return ret; - - union fi_bgq_addr * bgq_addr = (union fi_bgq_addr *)&src_addr; - union fi_bgq_context * bgq_context = (union fi_bgq_context *)context; - const uint64_t tx_op_flags = bgq_ep->tx.op_flags; - - uint64_t addr_v[8] = { addr, addr, addr, addr, addr, addr, addr, addr }; - uint64_t key_v[8] = { key, key, key, key, key, key, key, key }; - - /* max 8 descriptors (iovecs) per readv_internal */ - size_t index = 0; - const size_t full_count = count >> 3; - for (index = 0; index < full_count; index += 8) { - - fi_bgq_readv_internal(bgq_ep, &iov[index], 8, bgq_addr, - addr_v, key_v, NULL, 0, 0, 0, - lock_required); - } - - /* if 'partial_ndesc' is zero, the fi_bgq_readv_internal() will fence */ - const size_t partial_ndesc = count & 0x07ull; - fi_bgq_readv_internal(bgq_ep, &iov[index], partial_ndesc, bgq_addr, - addr_v, key_v, bgq_context, tx_op_flags, 1, 1, - lock_required); - - ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required); - if (ret) - return ret; - - return 0; -} - - -static inline ssize_t fi_bgq_readmsg_generic(struct fid_ep *ep, - const struct fi_msg_rma *msg, uint64_t flags, - int lock_required) -{ -#ifdef FI_BGQ_TRACE -fprintf(stderr,"fi_bgq_readmsg_generic starting\n"); -fflush(stderr); -#endif - int ret; - struct fi_bgq_ep *bgq_ep; - - bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid); - - ret = fi_bgq_check_rma(bgq_ep, FI_BGQ_FABRIC_DIRECT_AV); - if (ret) return ret; - - ret = fi_bgq_lock_if_required(&bgq_ep->lock, lock_required); - if (ret) return ret; - - struct fi_bgq_cq * cq = bgq_ep->send_cq; - const uint64_t enable_cq = - (cq == NULL) || ((cq != NULL) && ((cq->bflags & FI_SELECTIVE_COMPLETION) && (flags & FI_COMPLETION) == 0)) ? 0 : 1; - - union fi_bgq_context * bgq_context = (union fi_bgq_context *) msg->context; - union fi_bgq_addr * bgq_src_addr = (union fi_bgq_addr *)&msg->addr; - - /* for fi_read*(), the 'src' is the remote data */ - size_t src_iov_index = 0; - const size_t src_iov_count = msg->rma_iov_count; - uint64_t src_iov_bytes = msg->rma_iov[0].len; - uint64_t src_iov_addr = msg->rma_iov[0].addr; - uint64_t src_iov_key = msg->rma_iov[0].key; - - /* for fi_read*(), the 'dst' is the local data */ - size_t dst_iov_index = 0; - const size_t dst_iov_count = msg->iov_count; - uint64_t dst_iov_bytes = msg->msg_iov[0].iov_len; - void * dst_iov_vaddr = msg->msg_iov[0].iov_base; - - size_t niov; - struct iovec iov[8]; - uint64_t addr[8]; - uint64_t key[8]; - - while (src_iov_index < src_iov_count) { - - for (niov = 0; niov < 8; ++niov) { - const size_t len = (dst_iov_bytes <= src_iov_bytes) ? dst_iov_bytes : src_iov_bytes; - iov[niov].iov_len = len; - iov[niov].iov_base = dst_iov_vaddr; - addr[niov] = src_iov_addr; - key[niov] = src_iov_key; - - dst_iov_bytes -= len; - src_iov_bytes -= len; - - if (src_iov_bytes == 0) { - - /* all done with this src rma iovec */ - - if (src_iov_index == (src_iov_count-1)) { - - /* this is the last src rma iovec .. perform - * read with completion processing and return - * - * the 'dst_iov_bytes' must be zero and it must - * be the last dst iovec as well */ - assert(dst_iov_bytes==0); - assert(dst_iov_index == (dst_iov_count-1)); - - fi_bgq_readv_internal(bgq_ep, iov, niov+1, - bgq_src_addr, addr, key, - bgq_context, - flags, - enable_cq, 1, /* enable_cq, enable_cntr */ - lock_required); - - ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required); - if (ret) return ret; - - return 0; - - } else { - - /* advance to next src rma iovec */ - ++src_iov_index; - src_iov_bytes = msg->rma_iov[src_iov_index].len; - src_iov_addr = msg->rma_iov[src_iov_index].addr; - src_iov_key = msg->rma_iov[src_iov_index].key; - } - } else { - src_iov_addr += len; - } - - - if (dst_iov_bytes == 0) { - - /* all done with this dst iovec */ - - if (dst_iov_index == (dst_iov_count-1)) { - /* this is the last dst iovec .. do nothing since - * the 'src_iov_bytes' must be zero and it must - * be the last src rma iovec as well */ - assert(src_iov_bytes==0); - assert(src_iov_index == (src_iov_count-1)); - - /* in fact, it should be impossible to get here */ - assert(0); - } else { - - /* advance to next dst iovec */ - ++dst_iov_index; - dst_iov_bytes = msg->msg_iov[dst_iov_index].iov_len; - dst_iov_vaddr = msg->msg_iov[dst_iov_index].iov_base; - } - } else { - dst_iov_vaddr = (void*)((uintptr_t)dst_iov_vaddr + len); - } - - - } /* end for */ - - fi_bgq_readv_internal(bgq_ep, iov, 8, bgq_src_addr, addr, key, - NULL, 0, - 0, 0, /* disable_cq, disable_cntr */ - lock_required); - - } /* end while */ - - /* should never get here */ - assert(0); - - ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required); - if (ret) return ret; - - return 0; -} - - -/* Declare specialized functions that qualify for FABRIC_DIRECT. - * - No locks - */ - -#define FI_BGQ_RMA_FABRIC_DIRECT_LOCK 0 - -FI_BGQ_RMA_SPECIALIZED_FUNC(FI_BGQ_RMA_FABRIC_DIRECT_LOCK) - -#ifdef FABRIC_DIRECT - -#define fi_write(ep, buf, len, desc, dst_addr, addr, key, context) \ - (FI_BGQ_RMA_SPECIALIZED_FUNC_NAME(write, \ - FI_BGQ_RMA_FABRIC_DIRECT_LOCK) \ - (ep, buf, len, desc, dst_addr, addr, key, context)) - -#define fi_inject_write(ep, buf, len, dst_addr, addr, key) \ - (FI_BGQ_RMA_SPECIALIZED_FUNC_NAME(inject_write, \ - FI_BGQ_RMA_FABRIC_DIRECT_LOCK) \ - (ep, buf, len, dst_addr, addr, key)) - -#define fi_read(ep, buf, len, desc, src_addr, addr, key, context) \ - (FI_BGQ_RMA_SPECIALIZED_FUNC_NAME(read, \ - FI_BGQ_RMA_FABRIC_DIRECT_LOCK) \ - (ep, buf, len, desc, src_addr, addr, key, context)) - -#define fi_readmsg(ep, msg, flags) \ - (FI_BGQ_RMA_SPECIALIZED_FUNC_NAME(readmsg, \ - FI_BGQ_RMA_FABRIC_DIRECT_LOCK) \ - (ep, msg, flags)) - -static inline ssize_t -fi_writemsg(struct fid_ep *ep, const struct fi_msg_rma *msg, uint64_t flags) -{ - return ep->rma->writemsg(ep, msg, flags); -} -static inline ssize_t -fi_writev(struct fid_ep *ep, const struct iovec *iov, void **desc, size_t count, - fi_addr_t dest_addr, uint64_t addr, uint64_t key, void *context) -{ - return ep->rma->writev(ep, iov, desc, count, dest_addr, addr, key, context); -} - -#endif - -#ifdef __cplusplus -} -#endif - -#endif /* _FI_BGQ_DIRECT_RMA_H_ */ diff --git a/prov/bgq/include/rdma/fi_direct_tagged.h b/prov/bgq/include/rdma/fi_direct_tagged.h deleted file mode 100644 index 6adad205af4..00000000000 --- a/prov/bgq/include/rdma/fi_direct_tagged.h +++ /dev/null @@ -1,244 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef _FI_BGQ_DIRECT_TAGGED_H_ -#define _FI_BGQ_DIRECT_TAGGED_H_ - -#define FABRIC_DIRECT_TAGGED 1 - -#include -#include -#include -#include -#include -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -static inline -ssize_t fi_bgq_tinject(struct fid_ep *ep, - const void *buf, - size_t len, - fi_addr_t dest_addr, - uint64_t tag, - int lock_required) -{ - return fi_bgq_inject_generic(ep, buf, len, dest_addr, tag, 0, - lock_required, 0); -} - -/* - * In FI_PROGRESS_AUTO mode: - * The bgq 'recv' implementation is THREAD SAFE and LOCKLESS due to its use of - * the L2 atomic operations to post the match information to the progress thread. - * The 'fi_bgq_lock_if_required()' utility function is not used. - * - * \note The bgq provider asserts the following mode bits which affect - * the behavior of this routine: - * - * - 'FI_ASYNC_IOV' mode bit which requires the application to maintain - * the 'msg->msg_iov' iovec array until the operation completes - * - * - 'FI_LOCAL_MR' mode bit which allows the provider to ignore the 'desc' - * parameter .. no memory regions are required to access the local - * memory - */ -static inline -ssize_t fi_bgq_trecvmsg_generic (struct fid_ep *ep, - const struct fi_msg_tagged *msg, - uint64_t flags, - const int lock_required) -{ - struct fi_bgq_ep * bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid); - uint64_t context_rsh3b = 0; - - if (msg->iov_count == 0) { - assert(msg->context); - assert(((uintptr_t)msg->context & 0x07ull) == 0); /* must be 8 byte aligned */ - - union fi_bgq_context * bgq_context = - (union fi_bgq_context *) msg->context; - bgq_context->flags = flags; - bgq_context->len = 0; - bgq_context->buf = NULL; - bgq_context->byte_counter = (uint64_t)-1; - if ((flags & (FI_PEEK | FI_CLAIM)) != FI_CLAIM) { - /* do not overwrite state from a previous "peek|claim" operation */ - bgq_context->tag = msg->tag; - bgq_context->ignore = msg->ignore; - bgq_context->src_addr = (fi_addr_t ) (msg->addr); - } - - context_rsh3b = (uint64_t)bgq_context >> 3; - - } else if (msg->iov_count == 1) { - assert(msg->context); - assert(((uintptr_t)msg->context & 0x07ull) == 0); /* must be 8 byte aligned */ - - union fi_bgq_context * bgq_context = - (union fi_bgq_context *) msg->context; - bgq_context->flags = flags; - bgq_context->len = msg->msg_iov[0].iov_len; - bgq_context->buf = msg->msg_iov[0].iov_base; - bgq_context->byte_counter = (uint64_t)-1; - if ((flags & (FI_PEEK | FI_CLAIM)) != FI_CLAIM) { - /* do not overwrite state from a previous "peek|claim" operation */ - bgq_context->tag = msg->tag; - bgq_context->ignore = msg->ignore; - bgq_context->src_addr = (fi_addr_t ) (msg->addr); - } - - context_rsh3b = (uint64_t)bgq_context >> 3; - - } else { - assert((flags & (FI_PEEK | FI_CLAIM)) != FI_CLAIM); /* TODO - why not? */ - - struct fi_bgq_context_ext * ext; - posix_memalign((void**)&ext, 32, sizeof(struct fi_bgq_context_ext)); - flags |= FI_BGQ_CQ_CONTEXT_EXT; - - ext->bgq_context.flags = flags; - ext->bgq_context.byte_counter = (uint64_t)-1; - ext->bgq_context.tag = msg->tag; - ext->bgq_context.src_addr = (fi_addr_t ) (msg->addr); - ext->bgq_context.ignore = msg->ignore; - ext->msg.op_context = msg->context; - ext->msg.iov_count = msg->iov_count; - ext->msg.iov = (struct iovec *)msg->msg_iov; - - context_rsh3b = (uint64_t)ext >> 3; - if (FI_BGQ_FABRIC_DIRECT_PROGRESS == FI_PROGRESS_MANUAL) { /* constant expression will compile out */ /* TODO FI_PROGRESS_AUTO + 64 ppn */ - - int ret; - ret = fi_bgq_lock_if_required(&bgq_ep->lock, lock_required); - if (ret) return ret; - -#ifdef FI_BGQ_TRACE - fprintf(stderr,"fi_bgq_trecvmsg_generic calling fi_bgq_ep_progress_manual_recv with 1 is_context_ext:\n"); -#endif - fi_bgq_ep_progress_manual_recv(bgq_ep, - 0, /* is_msg */ - (union fi_bgq_context *)(context_rsh3b << 3), - flags, - 1 /* is_context_ext */); - - ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required); - if (ret) return ret; - - return 0; - } - } - - if (FI_BGQ_FABRIC_DIRECT_PROGRESS == FI_PROGRESS_MANUAL) { /* constant expression will compile out */ /* TODO FI_PROGRESS_AUTO + 64 ppn */ - - int ret; - ret = fi_bgq_lock_if_required(&bgq_ep->lock, lock_required); - if (ret) return ret; - -#ifdef FI_BGQ_TRACE - fprintf(stderr,"fi_bgq_trecvmsg_generic calling fi_bgq_ep_progress_manual_recv with 0 is_context_ext:\n"); -#endif - fi_bgq_ep_progress_manual_recv(bgq_ep, - 0, /* is_msg */ - (union fi_bgq_context *)(context_rsh3b << 3), - flags, - 0 /* is_context_ext */); - - ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required); - if (ret) return ret; - - } else { - - /* the *only* difference between a 'tagged' and 'non-tagged' recv is - * the L2 atomic fifo used to post the receive information */ - struct l2atomic_fifo_producer * fifo = &bgq_ep->rx.post.match[0]; /* TODO - use enum */ - - while (l2atomic_fifo_produce(fifo, context_rsh3b) != 0); /* spin loop! */ - } - - return 0; -} - -/* - * Declare specialized functions that qualify for FABRIC_DIRECT. - * - No locks - */ -#define FI_BGQ_TAGGED_FABRIC_DIRECT_LOCK 0 - -FI_BGQ_TAGGED_SPECIALIZED_FUNC(FI_BGQ_TAGGED_FABRIC_DIRECT_LOCK) - -#ifdef FABRIC_DIRECT -#define fi_tsend(ep, buf, len, desc, dest_addr, tag, context) \ - (FI_BGQ_TAGGED_SPECIALIZED_FUNC_NAME(tsend, \ - FI_BGQ_TAGGED_FABRIC_DIRECT_LOCK) \ - (ep, buf, len, desc, dest_addr, tag, context)) - -#define fi_trecv(ep, buf, len, desc, src_addr, tag, ignore, context) \ - (FI_BGQ_TAGGED_SPECIALIZED_FUNC_NAME(trecv, \ - FI_BGQ_TAGGED_FABRIC_DIRECT_LOCK) \ - (ep, buf, len, desc, src_addr, tag, ignore, context)) - -#define fi_tinject(ep, buf, len, dest_addr, tag) \ - (FI_BGQ_TAGGED_SPECIALIZED_FUNC_NAME(tinject, \ - FI_BGQ_TAGGED_FABRIC_DIRECT_LOCK) \ - (ep, buf, len, dest_addr, tag)) - -#define fi_tsenddata(ep, buf, len, desc, data, dest_addr, tag, context) \ - (FI_BGQ_TAGGED_SPECIALIZED_FUNC_NAME(tsenddata, \ - FI_BGQ_TAGGED_FABRIC_DIRECT_LOCK) \ - (ep, buf, len, desc, data, dest_addr, tag, context)) - -#define fi_tinjectdata(ep, buf, len, data, dest_addr, tag) \ - (FI_BGQ_TAGGED_SPECIALIZED_FUNC_NAME(tinjectdata, \ - FI_BGQ_TAGGED_FABRIC_DIRECT_LOCK) \ - (ep, buf, len, data, dest_addr, tag)) - -#define fi_trecvmsg(ep, msg, flags) \ - (FI_BGQ_TAGGED_SPECIALIZED_FUNC_NAME(trecvmsg, \ - FI_BGQ_TAGGED_FABRIC_DIRECT_LOCK) \ - (ep, msg, flags)) - -static inline ssize_t -fi_tsendmsg(struct fid_ep *ep, const struct fi_msg_tagged *msg, uint64_t flags) -{ - return ep->tagged->sendmsg(ep, msg, flags); -} - -#endif - -#ifdef __cplusplus -} -#endif - -#endif /* _FI_BGQ_DIRECT_TAGGED_H_ */ diff --git a/prov/bgq/include/rdma/fi_direct_trigger.h b/prov/bgq/include/rdma/fi_direct_trigger.h deleted file mode 100644 index 6bec3249c27..00000000000 --- a/prov/bgq/include/rdma/fi_direct_trigger.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef _FI_BGQ_DIRECT_TRIGGER_H_ -#define _FI_BGQ_DIRECT_TRIGGER_H_ - -#define FABRIC_DIRECT_TRIGGER 1 - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef FABRIC_DIRECT -/* Size must match struct fi_context */ -struct fi_triggered_context { - enum fi_trigger_event event_type; - union { - struct fi_trigger_threshold threshold; - void *internal[3]; - }; -}; -#endif - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/prov/bgq/provider_FABRIC_1.0.map b/prov/bgq/provider_FABRIC_1.0.map deleted file mode 100644 index ef3732ed088..00000000000 --- a/prov/bgq/provider_FABRIC_1.0.map +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - /* - * used for exporting BGQ provider - * symbols when building to support FI_DIRECT - */ diff --git a/prov/bgq/src/fi_bgq_agent.c b/prov/bgq/src/fi_bgq_agent.c deleted file mode 100644 index 68d980ae081..00000000000 --- a/prov/bgq/src/fi_bgq_agent.c +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include - -int main (int argc, char ** argv) { - - setbuf(stdout, NULL); - setbuf(stderr, NULL); - - while (1) usleep(1000); - - return 0; -} diff --git a/prov/bgq/src/fi_bgq_atomic.c b/prov/bgq/src/fi_bgq_atomic.c deleted file mode 100644 index dddd8dc3397..00000000000 --- a/prov/bgq/src/fi_bgq_atomic.c +++ /dev/null @@ -1,856 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "rdma/bgq/fi_bgq.h" -#include - -#include - -/* - * --------------------------- begin: rx atomics ------------------------------ - */ -#define FI_BGQ_RX_ATOMIC_SPECIALIZED_MACRO_NAME(OP) \ - FI_BGQ_RX_ATOMIC_SPECIALIZED_MACRO_NAME_(OP) - -#define FI_BGQ_RX_ATOMIC_SPECIALIZED_MACRO_NAME_(OP) \ - FI_BGQ_RX_ATOMIC_DO_ ## OP - -#define FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC(OP, DT, CTYPE) \ - FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC_(OP, DT, CTYPE) - -#define FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC_(OP, DT, CTYPE) \ - void fi_bgq_rx_atomic_ ## OP ## _ ## DT \ - (void * buf, void * addr, size_t nbytes) \ - { \ - FI_BGQ_RX_ATOMIC_SPECIALIZED_MACRO_NAME(OP)(buf, addr, CTYPE) \ - } - -#define FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC_NAME(OP, DT) \ - FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC_NAME_(OP, DT) - -#define FI_BGQ_RX_ATOMIC_SPECIALIZED_FUNC_NAME_(OP, DT) \ - fi_bgq_rx_atomic_ ## OP ## _ ## DT - - -#define FI_BGQ_RX_ATOMIC_DO_MIN(buf_, addr_, ctype) \ - ctype * buf__ = (ctype *)buf_; \ - ctype * addr__ = (ctype *)addr_; \ - const size_t count = nbytes / sizeof(ctype); \ - unsigned i; \ - for (i=0; i addr__[i]) \ - addr__[i] = buf__[i]; \ -} - -#define FI_BGQ_RX_ATOMIC_DO_SUM(buf_, addr_, ctype) \ -{ \ - ctype * buf__ = (ctype *)buf_; \ - ctype * addr__ = (ctype *)addr_; \ - const size_t count = nbytes / sizeof(ctype); \ - unsigned i; \ - for (i=0; i= addr__[i]) \ - addr__[i] = buf__[i]; \ -} - -#define FI_BGQ_RX_ATOMIC_DO_CSWAP_GT(buf_, addr_, ctype) \ -{ \ - ctype * buf__ = (ctype *)buf_; \ - ctype * addr__ = (ctype *)addr_; \ - const size_t count = nbytes / sizeof(ctype); \ - const ctype * compare__ = &buf__[count]; \ - unsigned i; \ - for (i=0; i addr__[i]) \ - addr__[i] = buf__[i]; \ -} - -#define FI_BGQ_RX_ATOMIC_DO_MSWAP_(buf_, addr_, ctype) \ -{ \ - ctype * buf__ = (ctype *)buf_; \ - ctype * addr__ = (ctype *)addr_; \ - const size_t count = nbytes / sizeof(ctype); \ - const ctype * compare__ = &buf__[count]; \ - unsigned i; \ - for (i=0; ithreading) { - case FI_THREAD_ENDPOINT: - case FI_THREAD_DOMAIN: - case FI_THREAD_COMPLETION: - lock_required = 0; - break; - case FI_THREAD_FID: - case FI_THREAD_UNSPEC: - case FI_THREAD_SAFE: - lock_required = 1; - break; - default: - return -FI_EINVAL; - } - - return fi_bgq_atomic_generic(ep, buf, count, dst_addr, - addr, key, datatype, op, context, - lock_required); - -} - -ssize_t fi_bgq_fetch_atomic(struct fid_ep *ep, - const void *buf, size_t count, - void *desc, - void *result, void *result_desc, - fi_addr_t dest_addr, uint64_t addr, - uint64_t key, enum fi_datatype datatype, - enum fi_op op, void *context) -{ - int lock_required = 0; - - struct fi_bgq_ep * bgq_ep; - bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid); - - switch (bgq_ep->threading) { - case FI_THREAD_ENDPOINT: - case FI_THREAD_DOMAIN: - case FI_THREAD_COMPLETION: - lock_required = 0; - break; - case FI_THREAD_FID: - case FI_THREAD_UNSPEC: - case FI_THREAD_SAFE: - lock_required = 1; - break; - default: - return -FI_EINVAL; - } - - return fi_bgq_fetch_atomic_generic(ep, - buf, count, desc, - result, result_desc, dest_addr, addr, - key, datatype, op, context, - lock_required); -} - -ssize_t fi_bgq_compare_atomic(struct fid_ep *ep, - const void *buf, size_t count, void *desc, - const void *compare, void *compare_desc, - void *result, void *result_desc, - fi_addr_t dest_addr, uint64_t addr, - uint64_t key, enum fi_datatype datatype, - enum fi_op op, void *context) -{ - int lock_required = 0; - - struct fi_bgq_ep * bgq_ep; - bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid); - - switch (bgq_ep->threading) { - case FI_THREAD_ENDPOINT: - case FI_THREAD_DOMAIN: - case FI_THREAD_COMPLETION: - lock_required = 0; - break; - case FI_THREAD_FID: - case FI_THREAD_UNSPEC: - case FI_THREAD_SAFE: - lock_required = 1; - break; - default: - return -FI_EINVAL; - } - - return fi_bgq_compare_atomic_generic(ep, - buf, count, desc, - compare, compare_desc, - result, result_desc, dest_addr, addr, - key, datatype, op, context, - lock_required); -} - -ssize_t fi_bgq_inject_atomic(struct fid_ep *ep, - const void *buf, size_t count, - fi_addr_t dest_addr, uint64_t addr, uint64_t key, - enum fi_datatype datatype, enum fi_op op) -{ - int lock_required = 0; - - struct fi_bgq_ep * bgq_ep; - bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid); - - switch (bgq_ep->threading) { - case FI_THREAD_ENDPOINT: - case FI_THREAD_DOMAIN: - case FI_THREAD_COMPLETION: - lock_required = 0; - break; - case FI_THREAD_FID: - case FI_THREAD_UNSPEC: - case FI_THREAD_SAFE: - lock_required = 1; - break; - default: - return -FI_EINVAL; - } - - return fi_bgq_inject_atomic_generic(ep, - buf, count, - dest_addr, addr, - key, datatype, op, - lock_required); -} - -ssize_t fi_bgq_atomicv(struct fid_ep *ep, - const struct fi_ioc *iov, void **desc, size_t count, - uint64_t addr, uint64_t key, - enum fi_datatype datatype, enum fi_op op, void *context) -{ - errno = FI_ENOSYS; - return -errno; -} - -ssize_t fi_bgq_atomic_writemsg(struct fid_ep *ep, - const struct fi_msg_atomic *msg, uint64_t flags) -{ - struct fi_bgq_ep * bgq_ep; - bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid); - - switch (bgq_ep->threading) { - case FI_THREAD_ENDPOINT: - case FI_THREAD_DOMAIN: - case FI_THREAD_COMPLETION: - return fi_bgq_atomic_writemsg_generic(ep, msg, flags, - 0); - case FI_THREAD_FID: - case FI_THREAD_UNSPEC: - case FI_THREAD_SAFE: - return fi_bgq_atomic_writemsg_generic(ep, msg, flags, - 1); - } - - errno = FI_EINVAL; - return -errno; -} - -ssize_t fi_bgq_atomic_readwritemsg(struct fid_ep *ep, - const struct fi_msg_atomic *msg, - struct fi_ioc *resultv, - void **result_desc, size_t result_count, - uint64_t flags) -{ - struct fi_bgq_ep * bgq_ep; - bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid); - - switch (bgq_ep->threading) { - case FI_THREAD_ENDPOINT: - case FI_THREAD_DOMAIN: - case FI_THREAD_COMPLETION: - return fi_bgq_atomic_readwritemsg_generic(ep, msg, - resultv, result_count, flags, - 0); - case FI_THREAD_FID: - case FI_THREAD_UNSPEC: - case FI_THREAD_SAFE: - return fi_bgq_atomic_readwritemsg_generic(ep, msg, - resultv, result_count, flags, - 1); - } - - errno = FI_EINVAL; - return -errno; -} - -ssize_t fi_bgq_atomic_compwritemsg(struct fid_ep *ep, - const struct fi_msg_atomic *msg, - const struct fi_ioc *comparev, - void **compare_desc, size_t compare_count, - struct fi_ioc *resultv, void **result_desc, - size_t result_count, - uint64_t flags) -{ - struct fi_bgq_ep * bgq_ep; - bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid); - - switch (bgq_ep->threading) { - case FI_THREAD_ENDPOINT: - case FI_THREAD_DOMAIN: - case FI_THREAD_COMPLETION: - return fi_bgq_atomic_compwritemsg_generic(ep, msg, - comparev, compare_count, - resultv, result_count, - flags, 0); - case FI_THREAD_FID: - case FI_THREAD_UNSPEC: - case FI_THREAD_SAFE: - return fi_bgq_atomic_compwritemsg_generic(ep, msg, - comparev, compare_count, - resultv, result_count, - flags, 1); - } - - errno = FI_EINVAL; - return -errno; -} - -int fi_bgq_atomic_writevalid(struct fid_ep *ep, enum fi_datatype datatype, - enum fi_op op, size_t *count) -{ - static size_t sizeofdt[FI_DATATYPE_LAST] = { - sizeof(int8_t), /* FI_INT8 */ - sizeof(uint8_t), /* FI_UINT8 */ - sizeof(int16_t), /* FI_INT16 */ - sizeof(uint16_t), /* FI_UINT16 */ - sizeof(int32_t), /* FI_INT32 */ - sizeof(uint32_t), /* FI_UINT32 */ - sizeof(int64_t), /* FI_INT64 */ - sizeof(uint64_t), /* FI_UINT64 */ - sizeof(float), /* FI_FLOAT */ - sizeof(double), /* FI_DOUBLE */ - sizeof(complex float), /* FI_FLOAT_COMPLEX */ - sizeof(complex double), /* FI_DOUBLE_COMPLEX */ - sizeof(long double), /* FI_LONG_DOUBLE */ - sizeof(complex long double) /* FI_LONG_DOUBLE_COMPLEX */ - }; - - if ((op > FI_ATOMIC_WRITE) || (datatype >= FI_DATATYPE_LAST)) { - *count = 0; - errno = FI_EOPNOTSUPP; - return -errno; - } - - *count = sizeof(union fi_bgq_mu_packet_payload) / sizeofdt[datatype]; - return 0; -} - -int fi_bgq_atomic_readwritevalid(struct fid_ep *ep, - enum fi_datatype datatype, - enum fi_op op, size_t *count) -{ - static size_t sizeofdt[FI_DATATYPE_LAST] = { - sizeof(int8_t), /* FI_INT8 */ - sizeof(uint8_t), /* FI_UINT8 */ - sizeof(int16_t), /* FI_INT16 */ - sizeof(uint16_t), /* FI_UINT16 */ - sizeof(int32_t), /* FI_INT32 */ - sizeof(uint32_t), /* FI_UINT32 */ - sizeof(int64_t), /* FI_INT64 */ - sizeof(uint64_t), /* FI_UINT64 */ - sizeof(float), /* FI_FLOAT */ - sizeof(double), /* FI_DOUBLE */ - sizeof(complex float), /* FI_FLOAT_COMPLEX */ - sizeof(complex double), /* FI_DOUBLE_COMPLEX */ - sizeof(long double), /* FI_LONG_DOUBLE */ - sizeof(complex long double) /* FI_LONG_DOUBLE_COMPLEX */ - }; - - if ((op > FI_ATOMIC_WRITE) || (datatype >= FI_DATATYPE_LAST)) { - *count = 0; - errno = FI_EOPNOTSUPP; - return -errno; - } - - *count = (sizeof(union fi_bgq_mu_packet_payload) - sizeof(struct fi_bgq_mu_fetch_metadata)) / sizeofdt[datatype]; - return 0; -} - -int fi_bgq_atomic_compwritevalid(struct fid_ep *ep, - enum fi_datatype datatype, - enum fi_op op, size_t *count) -{ - static size_t sizeofdt[FI_DATATYPE_LAST] = { - sizeof(int8_t), /* FI_INT8 */ - sizeof(uint8_t), /* FI_UINT8 */ - sizeof(int16_t), /* FI_INT16 */ - sizeof(uint16_t), /* FI_UINT16 */ - sizeof(int32_t), /* FI_INT32 */ - sizeof(uint32_t), /* FI_UINT32 */ - sizeof(int64_t), /* FI_INT64 */ - sizeof(uint64_t), /* FI_UINT64 */ - sizeof(float), /* FI_FLOAT */ - sizeof(double), /* FI_DOUBLE */ - sizeof(complex float), /* FI_FLOAT_COMPLEX */ - sizeof(complex double), /* FI_DOUBLE_COMPLEX */ - sizeof(long double), /* FI_LONG_DOUBLE */ - sizeof(complex long double) /* FI_LONG_DOUBLE_COMPLEX */ - }; - - if ((op < FI_CSWAP) || (op >= FI_ATOMIC_OP_LAST) || (datatype >= FI_DATATYPE_LAST)) { - *count = 0; - errno = FI_EOPNOTSUPP; - return -errno; - } - - *count = (sizeof(union fi_bgq_mu_packet_payload) / 2) / sizeofdt[datatype]; - return 0; -} - -static struct fi_ops_atomic fi_bgq_ops_atomic = { - .size = sizeof(struct fi_ops_atomic), - .write = fi_no_atomic_write, - .writev = fi_no_atomic_writev, - .writemsg = fi_bgq_atomic_writemsg, - .inject = fi_no_atomic_inject, - .readwrite = fi_no_atomic_readwrite, - .readwritev = fi_no_atomic_readwritev, - .readwritemsg = fi_bgq_atomic_readwritemsg, - .compwrite = fi_no_atomic_compwrite, - .compwritev = fi_no_atomic_compwritev, - .compwritemsg = fi_bgq_atomic_compwritemsg, - .writevalid = fi_bgq_atomic_writevalid, - .readwritevalid = fi_bgq_atomic_readwritevalid, - .compwritevalid = fi_bgq_atomic_compwritevalid -}; - - -int fi_bgq_init_atomic_ops(struct fi_bgq_ep *bgq_ep, struct fi_info *info) -{ - if (!info || !bgq_ep) - goto err; - - if (info->caps & FI_ATOMICS || - (info->tx_attr && - (info->tx_attr->caps & FI_ATOMICS))) { - bgq_ep->ep_fid.atomic = &fi_bgq_ops_atomic; - } - return 0; -err: - errno = FI_EINVAL; - return -errno; -} - -int fi_bgq_enable_atomic_ops(struct fi_bgq_ep *bgq_ep) -{ - if (!bgq_ep || !bgq_ep->domain) - goto err; - - if (!bgq_ep->ep_fid.atomic) { - /* atomic ops not enabled on this endpoint */ - return 0; - } - /* fill in atomic formats */ - - return 0; -err: - errno = FI_EINVAL; - return -errno; -} - - -int fi_bgq_finalize_atomic_ops(struct fi_bgq_ep *bgq_ep) -{ - return 0; -} diff --git a/prov/bgq/src/fi_bgq_av.c b/prov/bgq/src/fi_bgq_av.c deleted file mode 100644 index df1c0a1a619..00000000000 --- a/prov/bgq/src/fi_bgq_av.c +++ /dev/null @@ -1,494 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "rdma/bgq/fi_bgq.h" -#include - -#include "rdma/bgq/fi_bgq_spi.h" - -static int fi_bgq_close_av(fid_t fid) -{ - int ret; - struct fi_bgq_av *bgq_av = - container_of(fid, struct fi_bgq_av, av_fid); - - ret = fi_bgq_fid_check(fid, FI_CLASS_AV, "address vector"); - if (ret) - return ret; - - if (bgq_av->map_addr) free(bgq_av->map_addr); - - ret = fi_bgq_ref_dec(&bgq_av->domain->ref_cnt, "domain"); - if (ret) - return ret; - - ret = fi_bgq_ref_finalize(&bgq_av->ref_cnt, "address vector"); - if (ret) - return ret; - - free(bgq_av); - return 0; -} - -/* - * The 'addr' is a representation of the address - not a string - * - * 'flags' is allowed to be ignored - * 'context' is not used ... what is the purpose? - */ -static int -fi_bgq_av_insert(struct fid_av *av, const void *addr, size_t count, - fi_addr_t *fi_addr, uint64_t flags, void *context) -{ - struct fi_bgq_av *bgq_av = - container_of(av, struct fi_bgq_av, av_fid); - - if (!bgq_av) { - errno = FI_EINVAL; - return -errno; - } - - switch (bgq_av->type) { - case FI_AV_TABLE: - /* The address table is internal and the application uses a - * 'monotonically increasing integer' to index the table and - * retrieve the actual internal address - */ - errno = FI_ENOSYS; - return -errno; - break; - case FI_AV_MAP: - /* The address map is maintained by the application ('fi_addr') and - * the provider must fill in the map with the actual network - * address of each . - */ - if (!addr) { - errno = FI_EINVAL; - return -errno; - } - break; - default: - errno = FI_EINVAL; - return -errno; - } - - BG_CoordinateMapping_t my_coords = bgq_av->domain->my_coords; - BG_CoordinateMapping_t * your_coords = (BG_CoordinateMapping_t *) addr; - union fi_bgq_addr * output = (union fi_bgq_addr *) fi_addr; - uint32_t ppn = Kernel_ProcessCount(); - - Personality_t personality; - int rc; - rc = Kernel_GetPersonality(&personality, sizeof(Personality_t)); - if (rc) { - errno = FI_EINVAL; - return -errno; - } - uint64_t dcr_value = DCRReadUser(ND_500_DCR(CTRL_CUTOFFS)); - - - uint32_t n; - for (n=0; ntype) { - case FI_AV_TABLE: - /* The address table is internal and the application uses a - * 'monotonically increasing integer' to index the table and - * retrieve the actual internal address - */ - errno = FI_ENOSYS; - return -errno; - break; - case FI_AV_MAP: - /* The address map is maintained by the application ('fi_addr') and - * the provider must fill in the map with the actual network - * address of each . - */ - - break; - default: - errno = FI_EINVAL; - return -errno; - } - - /* - * convert the string representation of the node ("#.#.#.#.#.#") into - * torus coordinates and the 't' coordinate. - */ - uint32_t a, b, c, d, e, t; - const char * node_str = (const char *) node; - sscanf(node_str, "%u.%u.%u.%u.%u.%u", &a, &b, &c, &d, &e, &t); - BG_CoordinateMapping_t your_coords; - your_coords.a = a; - your_coords.b = b; - your_coords.c = c; - your_coords.d = d; - your_coords.e = e; - your_coords.t = t; - - BG_CoordinateMapping_t my_coords = bgq_av->domain->my_coords; - - const uint32_t fifo_map = - fi_bgq_mu_calculate_fifo_map_single(my_coords, your_coords); - - const MUHWI_Destination_t destination = - fi_bgq_spi_coordinates_to_destination(your_coords); - - const uint32_t base_rx = - fi_bgq_addr_calculate_base_rx(your_coords.t, Kernel_ProcessCount()); - - *fi_addr = fi_bgq_addr_create(destination, fifo_map, base_rx); - - return 0; -} - -/* - * This is similar to "ranks to coords" syscall. The "node" is the string - * representation of the torus coordinates of a node and the 't' coordinate, - * such as "0.0.0.0.0.0", and the "service" is the string representation of - * what could be considered a pami-style "client id". Currently, only a single - * "service" per "node" is supported - the service parameter is ignored and - * a svccnt != 1 is considered an error. - * - * If the "node" parameter is NULL, then the insert begins at coordinate - * 0.0.0.0.0.0 and increments according to the default ABCDET map order until - * "nodecnt" addresses have been inserted. In this respect, "nodecnt" is the - * same as the job size. - * - * The bgq provider does not support rank reorder via mapfiles. - */ -static int -fi_bgq_av_insertsym(struct fid_av *av, const char *node, size_t nodecnt, - const char *service, size_t svccnt, - fi_addr_t *fi_addr, uint64_t flags, void *context) -{ - struct fi_bgq_av *bgq_av = - container_of(av, struct fi_bgq_av, av_fid); - - if (!bgq_av) { - errno = FI_EINVAL; - return -errno; - } - - if (svccnt != 1) { - fprintf(stderr, "Error. Only one 'service' per 'node' is supported by the bgq provider\n"); - errno = FI_EINVAL; - return -errno; - } - - switch (bgq_av->type) { - case FI_AV_TABLE: - /* The address table is internal and the application uses a - * 'monotonically increasing integer' to index the table and - * retrieve the actual internal address - */ - errno = FI_ENOSYS; - return -errno; - break; - case FI_AV_MAP: - /* The address map is maintained by the application ('fi_addr') and - * the provider must fill in the map with the actual network - * address of each . - */ - - break; - default: - errno = FI_EINVAL; - return -errno; - } - - /* - * convert the string representation of the node ("#.#.#.#.#") into - * torus coordinates and convert the string representation of the - * service, a.k.a. "process", into a t coordinate. - */ - uint32_t a, b, c, d, e, t; - if (node) - sscanf(node, "%u.%u.%u.%u.%u.%u", &a, &b, &c, &d, &e, &t); - else - a = b = c = d = e = t = 0; - - Personality_t personality; - int rc; - rc = Kernel_GetPersonality(&personality, sizeof(Personality_t)); - if (rc) { - errno = FI_EINVAL; /* is this the correct errno? */ - return -errno; - } - uint32_t ppn = Kernel_ProcessCount(); - size_t node_count = personality.Network_Config.Anodes * - personality.Network_Config.Bnodes * - personality.Network_Config.Cnodes * - personality.Network_Config.Dnodes * - personality.Network_Config.Enodes * - ppn; - - uint32_t maximum_to_insert = (node_count < nodecnt) ? node_count : nodecnt; - - BG_CoordinateMapping_t my_coords = bgq_av->domain->my_coords; - BG_CoordinateMapping_t your_coords; - uint64_t dcr_value = DCRReadUser(ND_500_DCR(CTRL_CUTOFFS)); - - int n = 0; - uint32_t _a, _b, _c, _d, _e, _t; - union fi_bgq_addr * output = (union fi_bgq_addr *) fi_addr; - for (_a = a; _a < personality.Network_Config.Anodes; ++_a) { - your_coords.a = _a; - for (_b = b; _b < personality.Network_Config.Bnodes; ++_b) { - your_coords.b = _b; - for (_c = c; _c < personality.Network_Config.Cnodes; ++_c) { - your_coords.c = _c; - for (_d = d; _d < personality.Network_Config.Dnodes; ++_d) { - your_coords.d = _d; - for (_e = e; _e < personality.Network_Config.Enodes; ++_e) { - your_coords.e = _e; - for (_t = t; _t < ppn; ++_t) { - your_coords.t = _t; - - if (n == maximum_to_insert) break; - - const uint32_t fifo_map = - fi_bgq_mu_calculate_fifo_map(my_coords, your_coords, - &personality, dcr_value); - - const MUHWI_Destination_t destination = - fi_bgq_spi_coordinates_to_destination(your_coords); - - const uint32_t base_rx = - fi_bgq_addr_calculate_base_rx(your_coords.t, ppn); - - fi_addr[n++] = fi_bgq_addr_create(destination, fifo_map, base_rx); - - }}}}}} - - return n; -} - -static int -fi_bgq_av_remove(struct fid_av *av, fi_addr_t *fi_addr, size_t count, uint64_t flags) -{ - return 0; /* noop on bgq */ -} - -static int -fi_bgq_av_lookup(struct fid_av *av, fi_addr_t fi_addr, void *addr, size_t *addrlen) -{ - const union fi_bgq_addr bgq_addr = {.fi=fi_addr}; - - BG_CoordinateMapping_t tmp; - tmp.a = bgq_addr.uid.a; - tmp.b = bgq_addr.uid.b; - tmp.c = bgq_addr.uid.c; - tmp.d = bgq_addr.uid.d; - tmp.e = bgq_addr.uid.e; - - const uint32_t ppn = Kernel_ProcessCount(); - const uint32_t rx_per_node = ((BGQ_MU_NUM_REC_FIFO_GROUPS-1) * BGQ_MU_NUM_REC_FIFOS_PER_GROUP) / 2; /* each rx uses two mu reception fifos */ - const uint32_t rx_per_process = rx_per_node / ppn; - tmp.t = fi_bgq_addr_rec_fifo_id(bgq_addr.fi) / rx_per_process; - - memcpy(addr, (const void *)&tmp, *addrlen); - - *addrlen = sizeof(BG_CoordinateMapping_t); - - return 0; -} - -static const char * -fi_bgq_av_straddr(struct fid_av *av, const void *addr, - char *buf, size_t *len) -{ - BG_CoordinateMapping_t * input = (BG_CoordinateMapping_t *) addr; - snprintf(buf, *len, "%u.%u.%u.%u.%u.%u", input->a, input->b, input->c, - input->d, input->e, input->t); - - *len = 16; /* "aa.bb.cc.dd.e.tt" */ - return buf; -} - -static struct fi_ops fi_bgq_fi_ops = { - .size = sizeof(struct fi_ops), - .close = fi_bgq_close_av, - .bind = fi_no_bind, - .control = fi_no_control, - .ops_open = fi_no_ops_open -}; - -int fi_bgq_bind_ep_av(struct fi_bgq_ep *bgq_ep, - struct fi_bgq_av *bgq_av, uint64_t flags) -{ - if (bgq_ep->av) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_DOMAIN, - "Address vector already bound to TX endpoint\n"); - errno = FI_EINVAL; - return -errno; - } - - bgq_ep->av = bgq_av; - - fi_bgq_ref_inc(&bgq_av->ref_cnt, "address vector"); - - return 0; -} - -static struct fi_ops_av fi_bgq_av_ops = { - .size = sizeof(struct fi_ops_av), - .insert = fi_bgq_av_insert, - .insertsvc = fi_bgq_av_insertsvc, - .insertsym = fi_bgq_av_insertsym, - .remove = fi_bgq_av_remove, - .lookup = fi_bgq_av_lookup, - .straddr = fi_bgq_av_straddr -}; - -int fi_bgq_av_open(struct fid_domain *dom, - struct fi_av_attr *attr, struct fid_av **av, - void *context) -{ - int ret; - struct fi_bgq_av *bgq_av = NULL; - - if (!attr) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_AV, - "no attr provided\n"); - errno = FI_EINVAL; - return -errno; - } - - ret = fi_bgq_fid_check(&dom->fid, FI_CLASS_DOMAIN, "domain"); - if (ret) - return ret; - - bgq_av = calloc(1, sizeof(*bgq_av)); - if (!bgq_av) { - errno = FI_ENOMEM; - goto err; - } - - bgq_av->av_fid.fid.fclass = FI_CLASS_AV; - bgq_av->av_fid.fid.context= context; - bgq_av->av_fid.fid.ops = &fi_bgq_fi_ops; - bgq_av->av_fid.ops = &fi_bgq_av_ops; - - bgq_av->domain = (struct fi_bgq_domain *) dom; - bgq_av->type = attr->type; - - bgq_av->map_addr = NULL; - if (attr->name != NULL && (attr->flags & FI_READ)) { - - assert(0 == attr->map_addr); - - - Personality_t personality; - int rc; - rc = Kernel_GetPersonality(&personality, sizeof(Personality_t)); - if (rc) { - errno = FI_EINVAL; - return -errno; - } - - const uint32_t ppn = Kernel_ProcessCount(); - const size_t node_count = personality.Network_Config.Anodes * - personality.Network_Config.Bnodes * - personality.Network_Config.Cnodes * - personality.Network_Config.Dnodes * - personality.Network_Config.Enodes; - - size_t mapsize = node_count * ppn; - BG_CoordinateMapping_t map[mapsize]; - uint64_t ep_count; /* one endpoint per process */ - rc = Kernel_RanksToCoords(sizeof(map), map, &ep_count); - - fi_addr_t *addr = (fi_addr_t *)malloc(sizeof(fi_addr_t)*ep_count); /* TODO - mmap this into shared memory */ - - size_t n = 0; - int i; - - BG_CoordinateMapping_t my_coords = bgq_av->domain->my_coords; - uint64_t dcr_value = DCRReadUser(ND_500_DCR(CTRL_CUTOFFS)); - for (i=0;imap_addr = (void *)addr; - attr->map_addr = (void *)addr; - } - - *av = &bgq_av->av_fid; - - fi_bgq_ref_init(&bgq_av->domain->fabric->node, &bgq_av->ref_cnt, "address vector"); - fi_bgq_ref_inc(&bgq_av->domain->ref_cnt, "domain"); - - return 0; -err: - if (bgq_av) - free(bgq_av); - return -errno; -} diff --git a/prov/bgq/src/fi_bgq_cm.c b/prov/bgq/src/fi_bgq_cm.c deleted file mode 100644 index 5e91273298f..00000000000 --- a/prov/bgq/src/fi_bgq_cm.c +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "rdma/bgq/fi_bgq.h" -#include - -int fi_bgq_getname(fid_t fid, void *addr, size_t *addrlen) -{ - - if (*addrlen == 0) { - *addrlen = 24; - return 0; - } - - if (!fid || !addr || !addrlen) { - errno = FI_EINVAL; - return -errno; - } - - if (*addrlen < 24) { - errno = FI_ETOOSMALL; - return -errno; - } - - char * addr_str; - struct fi_bgq_ep *bgq_ep; - struct fi_bgq_sep *bgq_sep; - switch(fid->fclass) { - case FI_CLASS_EP: - bgq_ep = container_of(fid, struct fi_bgq_ep, ep_fid); - addr_str = (char *) addr; - sprintf(addr_str, "%u.%u.%u.%u.%u.%u", - bgq_ep->domain->my_coords.a, - bgq_ep->domain->my_coords.b, - bgq_ep->domain->my_coords.c, - bgq_ep->domain->my_coords.d, - bgq_ep->domain->my_coords.e, - bgq_ep->domain->my_coords.t); - break; - case FI_CLASS_SEP: - bgq_sep = container_of(fid, struct fi_bgq_sep, ep_fid); - addr_str = (char *) addr; - sprintf(addr_str, "%u.%u.%u.%u.%u.%u", - bgq_sep->domain->my_coords.a, - bgq_sep->domain->my_coords.b, - bgq_sep->domain->my_coords.c, - bgq_sep->domain->my_coords.d, - bgq_sep->domain->my_coords.e, - bgq_sep->domain->my_coords.t); - break; - - default: - errno = FI_EINVAL; - return -errno; - } - - *addrlen = 24; - - return 0; -} - -static struct fi_ops_cm fi_bgq_cm_ops = { - .size = sizeof(struct fi_ops_cm), - .getname = fi_bgq_getname, - .getpeer = fi_no_getpeer, - .connect = fi_no_connect, - .listen = fi_no_listen, - .accept = fi_no_accept, - .reject = fi_no_reject, - .shutdown = fi_no_shutdown, -}; - -int fi_bgq_init_cm_ops(struct fid_ep *ep_fid, struct fi_info *info) -{ - ep_fid->cm = &fi_bgq_cm_ops; - - return 0; -} - -int fi_bgq_finalize_cm_ops(struct fi_bgq_ep *bgq_ep) -{ - return 0; -} diff --git a/prov/bgq/src/fi_bgq_cntr.c b/prov/bgq/src/fi_bgq_cntr.c deleted file mode 100644 index 69642dc2f9f..00000000000 --- a/prov/bgq/src/fi_bgq_cntr.c +++ /dev/null @@ -1,283 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "rdma/bgq/fi_bgq.h" -#include -#include -#include - -static int fi_bgq_close_cntr(struct fid *fid) -{ - int ret; - struct fi_bgq_cntr *bgq_cntr = - container_of(fid, struct fi_bgq_cntr, cntr_fid); - - ret = fi_bgq_fid_check(fid, FI_CLASS_CNTR, "counter"); - if (ret) - return ret; - - struct l2atomic_lock * lock = &bgq_cntr->domain->mu.lock; - struct fi_bgq_node * node = &bgq_cntr->domain->fabric->node; - fi_bgq_node_bat_free(node, lock, bgq_cntr->std.batid); - fi_bgq_node_bat_free(node, lock, bgq_cntr->err.batid); - - ret = fi_bgq_ref_dec(&bgq_cntr->domain->ref_cnt, "domain"); - if (ret) - return ret; - - free(bgq_cntr->attr); - free(bgq_cntr); - return 0; -} - -static int fi_bgq_bind_cntr(struct fid *fid, struct fid *bfid, uint64_t flags) -{ - errno = FI_ENOSYS; - return -errno; -} - -static uint64_t fi_bgq_cntr_read(struct fid_cntr *cntr) -{ - struct fi_bgq_cntr *bgq_cntr = - container_of(cntr, struct fi_bgq_cntr, cntr_fid); - - const uint64_t value = L2_AtomicLoad(bgq_cntr->std.l2_vaddr); - - if (FI_BGQ_FABRIC_DIRECT_PROGRESS == FI_PROGRESS_MANUAL) { - const uint64_t count = bgq_cntr->progress.ep_count; - uint64_t i; - for (i=0; iprogress.ep[i]); - } - } - - return value; -} - -static uint64_t fi_bgq_cntr_readerr(struct fid_cntr *cntr) -{ - struct fi_bgq_cntr *bgq_cntr = - container_of(cntr, struct fi_bgq_cntr, cntr_fid); - - return L2_AtomicLoad(bgq_cntr->err.l2_vaddr); -} - -static int fi_bgq_cntr_add(struct fid_cntr *cntr, uint64_t value) -{ - struct fi_bgq_cntr *bgq_cntr = - container_of(cntr, struct fi_bgq_cntr, cntr_fid); - - L2_AtomicStoreAdd(bgq_cntr->std.l2_vaddr, value); - - return 0; -} - -static int fi_bgq_cntr_set(struct fid_cntr *cntr, uint64_t value) -{ - struct fi_bgq_cntr *bgq_cntr = - container_of(cntr, struct fi_bgq_cntr, cntr_fid); - - L2_AtomicStore(bgq_cntr->std.l2_vaddr, value); - - return 0; -} - -static int -fi_bgq_cntr_wait(struct fid_cntr *cntr, uint64_t threshold, int timeout) -{ - struct fi_bgq_cntr *bgq_cntr = - container_of(cntr, struct fi_bgq_cntr, cntr_fid); - - uint64_t timeout_cycles = (timeout < 0) ? - ULLONG_MAX : - GetTimeBase() + (1600UL * 1000 * timeout); - - uint64_t current_value = 0; - do { - current_value = L2_AtomicLoad(bgq_cntr->std.l2_vaddr); - - if (FI_BGQ_FABRIC_DIRECT_PROGRESS == FI_PROGRESS_MANUAL) { - const uint64_t count = bgq_cntr->progress.ep_count; - uint64_t i; - for (i=0; iprogress.ep[i]); - } - } - - if (threshold <= current_value) return 0; - } while (GetTimeBase() < timeout_cycles); - - errno = FI_ETIMEDOUT; - return -errno; -} - -static struct fi_ops fi_bgq_fi_ops = { - .size = sizeof(struct fi_ops), - .close = fi_bgq_close_cntr, - .bind = fi_bgq_bind_cntr, - .control = fi_no_control, - .ops_open = fi_no_ops_open -}; - -int fi_bgq_bind_ep_cntr(struct fi_bgq_ep *bgq_ep, - struct fi_bgq_cntr *bgq_cntr, uint64_t flags) -{ - if (!(flags & (FI_WRITE | - FI_READ | - FI_SEND | - FI_RECV | - FI_REMOTE_READ | - FI_REMOTE_WRITE))) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_CQ, - "unclear flags while binding counter\n"); - goto err; - } - - if (flags & FI_WRITE) - bgq_ep->write_cntr = bgq_cntr; - if (flags & FI_READ) - bgq_ep->read_cntr = bgq_cntr; - if (flags & FI_SEND) - bgq_ep->send_cntr = bgq_cntr; - if (flags & FI_RECV) - bgq_ep->recv_cntr = bgq_cntr; - - bgq_cntr->ep[(bgq_cntr->ep_bind_count)++] = bgq_ep; - - if (ofi_recv_allowed(bgq_ep->rx.caps) || ofi_rma_target_allowed(bgq_ep->rx.caps)) { - bgq_cntr->progress.ep[(bgq_cntr->progress.ep_count)++] = bgq_ep; - } - - return 0; -err: - errno = FI_EINVAL; - return -errno; -} - -static struct fi_ops_cntr fi_bgq_ops_cntr = { - .size = sizeof(struct fi_ops_cntr), - .read = fi_bgq_cntr_read, - .readerr = fi_bgq_cntr_readerr, - .add = fi_bgq_cntr_add, - .set = fi_bgq_cntr_set, - .wait = fi_bgq_cntr_wait -}; - -int fi_bgq_cntr_open(struct fid_domain *domain, - struct fi_cntr_attr *attr, - struct fid_cntr **cntr, void *context) -{ - int ret; - struct fi_bgq_cntr *bgq_cntr; - - if (!attr) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_CQ, - "no attr supplied\n"); - errno = FI_EINVAL; - return -errno; - } - ret = fi_bgq_fid_check(&domain->fid, FI_CLASS_DOMAIN, "domain"); - if (ret) - return ret; - - bgq_cntr = calloc(1, sizeof(*bgq_cntr)); - if (!bgq_cntr) { - errno = FI_ENOMEM; - goto err; - } - - bgq_cntr->cntr_fid.fid.fclass = FI_CLASS_CNTR; - bgq_cntr->cntr_fid.fid.context = context; - bgq_cntr->cntr_fid.fid.ops = &fi_bgq_fi_ops; - bgq_cntr->cntr_fid.ops = &fi_bgq_ops_cntr; - - bgq_cntr->domain = (struct fi_bgq_domain *) domain; - - /* ---- allocate and initialize the "std" and "err" mu/l2 counters ---- */ - { - uint32_t cnk_rc __attribute__ ((unused)); - struct l2atomic_lock * lock = &bgq_cntr->domain->mu.lock; - struct fi_bgq_node * node = &bgq_cntr->domain->fabric->node; - - /* ---- initialize the "std" counter ---- */ - bgq_cntr->std.batid = - fi_bgq_node_bat_allocate(node, lock); - - bgq_cntr->std.paddr = - node->bat.l2_cntr_paddr[bgq_cntr->std.batid]; - - cnk_rc = Kernel_Physical2Virtual( - (void *)bgq_cntr->std.paddr, - (void**)&bgq_cntr->std.l2_vaddr); - assert(cnk_rc == 0); - - L2_AtomicStore(bgq_cntr->std.l2_vaddr, 0); - - fi_bgq_node_bat_write(node, lock, - bgq_cntr->std.batid, - bgq_cntr->std.paddr); - - /* ---- initialize the "err" counter ---- */ - bgq_cntr->err.batid = - fi_bgq_node_bat_allocate(node, lock); - - bgq_cntr->err.paddr = - node->bat.l2_cntr_paddr[bgq_cntr->err.batid]; - - cnk_rc = Kernel_Physical2Virtual( - (void *)bgq_cntr->err.paddr, - (void**)&bgq_cntr->err.l2_vaddr); - assert(cnk_rc == 0); - - L2_AtomicStore(bgq_cntr->err.l2_vaddr, 0); - - fi_bgq_node_bat_write(node, lock, - bgq_cntr->err.batid, - bgq_cntr->err.paddr); - } - - bgq_cntr->ep_bind_count = 0; - bgq_cntr->progress.ep_count = 0; - unsigned i; - for (i=0; i<64; ++i) { /* TODO - check this array size */ - bgq_cntr->ep[i] = NULL; - bgq_cntr->progress.ep[i] = NULL; - } - - fi_bgq_ref_inc(&bgq_cntr->domain->ref_cnt, "domain"); - - *cntr = &bgq_cntr->cntr_fid; - return 0; -err: - if (bgq_cntr) - free(bgq_cntr); - return -errno; -} diff --git a/prov/bgq/src/fi_bgq_cq.c b/prov/bgq/src/fi_bgq_cq.c deleted file mode 100644 index b96f7e2b06c..00000000000 --- a/prov/bgq/src/fi_bgq_cq.c +++ /dev/null @@ -1,596 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "rdma/bgq/fi_bgq.h" -#include -#include - -#include - -#include "rdma/bgq/fi_bgq_spi.h" - -#define FI_BGQ_DEFAULT_CQ_DEPTH (8192) -#define FI_BGQ_MAXIMUM_CQ_DEPTH (8192) - -#define FI_BGQ_L2ATOMIC_ERR_FIFO_DATA_SIZE (512) - -struct fi_cq_bgq_l2atomic_data { - struct l2atomic_boundedcounter_data entry_counter; - struct l2atomic_boundedcounter_data bounded_counter; - struct l2atomic_fifo_data err_fifo_data; - uint64_t err_packet[FI_BGQ_L2ATOMIC_ERR_FIFO_DATA_SIZE]; - struct l2atomic_fifo_data std_fifo_data; - uint64_t std_packet[0]; -} __attribute((aligned(32))); - -static int fi_bgq_close_cq(fid_t fid) -{ - int ret; - struct fi_bgq_cq *bgq_cq = - container_of(fid, struct fi_bgq_cq, cq_fid); - - ret = fi_bgq_fid_check(fid, FI_CLASS_CQ, "completion queue"); - if (ret) - return ret; - - ret = fi_bgq_ref_dec(&bgq_cq->domain->ref_cnt, "domain"); - if (ret) - return ret; - - ret = fi_bgq_ref_finalize(&bgq_cq->ref_cnt, "completion queue"); - if (ret) - return ret; - - free(bgq_cq); - - return 0; -} - -static int fi_bgq_bind_cq(struct fid *fid, struct fid *bfid, - uint64_t flags) -{ - errno = FI_ENOSYS; - return -errno; -} - -static int fi_bgq_control_cq(fid_t fid, int command, void *arg) -{ - errno = FI_ENOSYS; - return -errno; -} - -static int fi_bgq_ops_open_cq(struct fid *fid, const char *name, - uint64_t flags, void **ops, void *context) -{ - errno = FI_ENOSYS; - return -errno; -} - -static struct fi_ops fi_bgq_fi_ops = { - .size = sizeof(struct fi_ops), - .close = fi_bgq_close_cq, - .bind = fi_bgq_bind_cq, - .control = fi_bgq_control_cq, - .ops_open = fi_bgq_ops_open_cq -}; - -static ssize_t fi_bgq_cq_read(struct fid_cq *cq, void *buf, size_t count) -{ - int lock_required; - int ret; - struct fi_bgq_cq *bgq_cq = container_of(cq, struct fi_bgq_cq, cq_fid); - - switch (bgq_cq->domain->threading) { - case FI_THREAD_ENDPOINT: - case FI_THREAD_DOMAIN: - lock_required = 0; - default: - lock_required = 1; - } - - ret = fi_bgq_cq_read_generic(cq, buf, count, bgq_cq->format, lock_required); - return ret; -} - -static ssize_t -fi_bgq_cq_readfrom(struct fid_cq *cq, void *buf, size_t count, fi_addr_t *src_addr) -{ - int lock_required; - int ret; - struct fi_bgq_cq *bgq_cq = container_of(cq, struct fi_bgq_cq, cq_fid); - - switch (bgq_cq->domain->threading) { - case FI_THREAD_ENDPOINT: - case FI_THREAD_DOMAIN: - lock_required = 0; - break; - default: - lock_required = 1; - break; - } - - ret = fi_bgq_cq_readfrom_generic(cq, buf, count, src_addr, bgq_cq->format, lock_required); - if (ret > 0) { - unsigned n; - for (n=0; nerr_head; - if (NULL == ext) { - errno = FI_EAGAIN; - return -errno; - } - - if (ext->bgq_context.byte_counter != 0) { - /* perhaps an in-progress truncated rendezvous receive? */ - errno = FI_EAGAIN; - return -errno; - } - - assert(ext->bgq_context.flags & FI_BGQ_CQ_CONTEXT_EXT); /* DEBUG */ - - int lock_required = 0; - switch (bgq_cq->domain->threading) { - case FI_THREAD_ENDPOINT: - case FI_THREAD_DOMAIN: - lock_required = 0; - break; - default: - lock_required = 1; - break; - } - - int ret; - ret = fi_bgq_lock_if_required(&bgq_cq->lock, lock_required); - if (ret) return ret; - - bgq_cq->err_head = (struct fi_bgq_context_ext *)ext->bgq_context.next; - if (NULL == bgq_cq->err_head) - bgq_cq->err_tail = NULL; - - *buf = ext->err_entry; - free(ext); - - ret = fi_bgq_unlock_if_required(&bgq_cq->lock, lock_required); - if (ret) return ret; - - } else { - - uint64_t value = 0; - if (l2atomic_fifo_peek(&bgq_cq->err_consumer, &value) != 0) { - errno = FI_EAGAIN; - return -errno; - } - - /* const uint64_t flags = value & 0xE000000000000000ull; -- currently not used */ - - /* convert the fifo value into a context pointer */ - struct fi_bgq_context_ext * ext = (struct fi_bgq_context_ext *) (value << 3); - - if (ext->bgq_context.byte_counter != 0) { - /* perhaps an in-progress truncated rendezvous receive? */ - errno = FI_EAGAIN; - return -errno; - } - - assert(ext->bgq_context.flags & FI_BGQ_CQ_CONTEXT_EXT); /* DEBUG */ - - *buf = ext->err_entry; - free(ext); - - l2atomic_fifo_advance(&bgq_cq->err_consumer); - } - - return 1; -} - -static ssize_t -fi_bgq_cq_sread(struct fid_cq *cq, void *buf, size_t len, const void *cond, int timeout) -{ - int lock_required; - struct fi_bgq_cq *bgq_cq = container_of(cq, struct fi_bgq_cq, cq_fid); - - switch (bgq_cq->domain->threading) { - case FI_THREAD_ENDPOINT: - case FI_THREAD_DOMAIN: - lock_required = 0; - default: - lock_required = 1; - } - - uint64_t timeout_cycles = (timeout < 0) ? - ULLONG_MAX : - GetTimeBase() + (1600UL * 1000 * timeout); - do { - ssize_t count = fi_bgq_cq_read_generic(cq, buf, len, bgq_cq->format, lock_required); - if (count) return count; - - } while (GetTimeBase() < timeout_cycles); - errno = FI_EAGAIN; - return -errno; -} - -static ssize_t -fi_bgq_cq_sreadfrom(struct fid_cq *cq, void *buf, size_t len, - fi_addr_t *src_addr, const void *cond, int timeout) -{ - int lock_required; - struct fi_bgq_cq *bgq_cq = container_of(cq, struct fi_bgq_cq, cq_fid); - - switch (bgq_cq->domain->threading) { - case FI_THREAD_ENDPOINT: - case FI_THREAD_DOMAIN: - lock_required = 0; - default: - lock_required = 1; - } - - uint64_t timeout_cycles = (timeout < 0) ? - ULLONG_MAX : - GetTimeBase() + (1600UL * 1000 * timeout); - do { - ssize_t count = fi_bgq_cq_readfrom_generic(cq, buf, len, src_addr, bgq_cq->format, lock_required); - if (count) return count; - - } while (GetTimeBase() < timeout_cycles); - errno = FI_EAGAIN; - return -errno; -} - -static const char * -fi_bgq_cq_strerror(struct fid_cq *cq, int prov_errno, const void *err_data, - char *buf, size_t len) -{ - errno = FI_ENOSYS; - return NULL; -} - -int fi_bgq_bind_ep_cq(struct fi_bgq_ep *bgq_ep, - struct fi_bgq_cq *bgq_cq, uint64_t flags) -{ - if (!(flags & (FI_SEND | FI_RECV))) - goto err; - - if (flags & FI_SEND) { - fi_bgq_ref_inc(&bgq_cq->ref_cnt, "completion queue"); - bgq_ep->send_cq = bgq_cq; - bgq_ep->tx.send.local_completion_model = bgq_cq->local_completion_model; - } - if (flags & FI_RECV) { - fi_bgq_ref_inc(&bgq_cq->ref_cnt, "completion queue"); - bgq_ep->recv_cq = bgq_cq; - } - bgq_cq->bflags = flags; - - if (FI_CLASS_RX_CTX == bgq_ep->ep_fid.fid.fclass || - FI_CLASS_EP == bgq_ep->ep_fid.fid.fclass) { - bgq_cq->ep[(bgq_cq->ep_bind_count)++] = bgq_ep; - } - - if (ofi_recv_allowed(bgq_ep->rx.caps) || ofi_rma_target_allowed(bgq_ep->rx.caps)) { - bgq_cq->progress.ep[(bgq_cq->progress.ep_count)++] = bgq_ep; - } - - return 0; -err: - errno = FI_EINVAL; - return -errno; -} - -int fi_bgq_cq_enqueue_err (struct fi_bgq_cq * bgq_cq, - struct fi_bgq_context_ext * ext, - const int lock_required) -{ - if (FI_BGQ_FABRIC_DIRECT_PROGRESS == FI_PROGRESS_MANUAL) { - - int lock_required = 0; - switch (bgq_cq->domain->threading) { - case FI_THREAD_ENDPOINT: - case FI_THREAD_DOMAIN: - lock_required = 0; - default: - lock_required = 0; - } - - int ret; - ret = fi_bgq_lock_if_required(&bgq_cq->lock, lock_required); - if (ret) return ret; - - struct fi_bgq_context_ext * tail = bgq_cq->err_tail; - if (tail) { - assert(NULL != bgq_cq->err_head); - - tail->bgq_context.next = (union fi_bgq_context *)ext; - bgq_cq->err_tail = ext; - - } else { - assert(NULL == bgq_cq->err_head); - - bgq_cq->err_tail = ext; - bgq_cq->err_head = ext; - } - ext->bgq_context.next = NULL; - - ret = fi_bgq_unlock_if_required(&bgq_cq->lock, lock_required); - if (ret) return ret; - - } else { - - struct l2atomic_fifo_producer * err_producer = &bgq_cq->err_producer; - uint64_t ext_rsh3b = (uint64_t)ext >> 3; - while(0 != l2atomic_fifo_produce(err_producer, ext_rsh3b)); - } - - return 0; -} - -FI_BGQ_CQ_SPECIALIZED_FUNC(FI_CQ_FORMAT_UNSPEC, 0) -FI_BGQ_CQ_SPECIALIZED_FUNC(FI_CQ_FORMAT_UNSPEC, 1) -FI_BGQ_CQ_SPECIALIZED_FUNC(FI_CQ_FORMAT_CONTEXT, 0) -FI_BGQ_CQ_SPECIALIZED_FUNC(FI_CQ_FORMAT_CONTEXT, 1) -FI_BGQ_CQ_SPECIALIZED_FUNC(FI_CQ_FORMAT_MSG, 0) -FI_BGQ_CQ_SPECIALIZED_FUNC(FI_CQ_FORMAT_MSG, 1) -FI_BGQ_CQ_SPECIALIZED_FUNC(FI_CQ_FORMAT_DATA, 0) -FI_BGQ_CQ_SPECIALIZED_FUNC(FI_CQ_FORMAT_DATA, 1) -/* "FI_BGQ_CQ_SPECIALIZED_FUNC(FI_CQ_FORMAT_TAGGED, 0)" is already declared via FABRIC_DIRECT */ -FI_BGQ_CQ_SPECIALIZED_FUNC(FI_CQ_FORMAT_TAGGED, 1) - -#define FI_BGQ_CQ_OPS_STRUCT_NAME(FORMAT, LOCK) \ - fi_bgq_ops_cq_ ## FORMAT ## _ ## LOCK \ - -#define FI_BGQ_CQ_OPS_STRUCT(FORMAT, LOCK) \ -static struct fi_ops_cq \ - FI_BGQ_CQ_OPS_STRUCT_NAME(FORMAT, LOCK) = { \ - .size = sizeof(struct fi_ops_cq), \ - .read = FI_BGQ_CQ_SPECIALIZED_FUNC_NAME(cq_read, FORMAT, LOCK), \ - .readfrom = FI_BGQ_CQ_SPECIALIZED_FUNC_NAME(cq_readfrom, FORMAT, LOCK), \ - .readerr = fi_bgq_cq_readerr, \ - .sread = fi_bgq_cq_sread, \ - .sreadfrom = fi_bgq_cq_sreadfrom, \ - .signal = fi_no_cq_signal, \ - .strerror = fi_bgq_cq_strerror, \ -} - -FI_BGQ_CQ_OPS_STRUCT(FI_CQ_FORMAT_UNSPEC, 0); -FI_BGQ_CQ_OPS_STRUCT(FI_CQ_FORMAT_UNSPEC, 1); -FI_BGQ_CQ_OPS_STRUCT(FI_CQ_FORMAT_CONTEXT, 0); -FI_BGQ_CQ_OPS_STRUCT(FI_CQ_FORMAT_CONTEXT, 1); -FI_BGQ_CQ_OPS_STRUCT(FI_CQ_FORMAT_MSG, 0); -FI_BGQ_CQ_OPS_STRUCT(FI_CQ_FORMAT_MSG, 1); -FI_BGQ_CQ_OPS_STRUCT(FI_CQ_FORMAT_DATA, 0); -FI_BGQ_CQ_OPS_STRUCT(FI_CQ_FORMAT_DATA, 1); -FI_BGQ_CQ_OPS_STRUCT(FI_CQ_FORMAT_TAGGED, 0); -FI_BGQ_CQ_OPS_STRUCT(FI_CQ_FORMAT_TAGGED, 1); - - -static struct fi_ops_cq fi_bgq_ops_cq_default = { - .size = sizeof(struct fi_ops_cq), - .read = fi_bgq_cq_read, - .readfrom = fi_bgq_cq_readfrom, - .readerr = fi_bgq_cq_readerr, - .signal = fi_no_cq_signal, - .sread = fi_bgq_cq_sread, - .sreadfrom = fi_bgq_cq_sreadfrom, - .strerror = fi_bgq_cq_strerror -}; - - -int fi_bgq_cq_open(struct fid_domain *dom, - struct fi_cq_attr *attr, - struct fid_cq **cq, void *context) -{ - int ret; - struct fi_bgq_cq *bgq_cq; - int lock_required; - - if (!attr) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_CQ, - "no attr supplied\n"); - errno = FI_EINVAL; - return -errno; - } - ret = fi_bgq_fid_check(&dom->fid, FI_CLASS_DOMAIN, "domain"); - if (ret) - return ret; - - bgq_cq = calloc(1, sizeof(*bgq_cq)); - if (!bgq_cq) { - errno = FI_ENOMEM; - goto err; - } - - bgq_cq->cq_fid.fid.fclass = FI_CLASS_CQ; - bgq_cq->cq_fid.fid.context= context; - bgq_cq->cq_fid.fid.ops = &fi_bgq_fi_ops; - - bgq_cq->size = attr->size ? attr->size : FI_BGQ_DEFAULT_CQ_DEPTH; - - bgq_cq->domain = (struct fi_bgq_domain *) dom; - - bgq_cq->format = attr->format ? attr->format : FI_CQ_FORMAT_CONTEXT; - - bgq_cq->pending_head = NULL; - bgq_cq->pending_tail = NULL; - bgq_cq->completed_head = NULL; - bgq_cq->completed_tail = NULL; - bgq_cq->err_head = NULL; - bgq_cq->err_tail = NULL; - - switch (bgq_cq->domain->threading) { - case FI_THREAD_ENDPOINT: - case FI_THREAD_DOMAIN: - case FI_THREAD_COMPLETION: - lock_required = 0; - break; - case FI_THREAD_FID: - case FI_THREAD_UNSPEC: - case FI_THREAD_SAFE: - lock_required = 1; - break; - default: - errno = FI_EINVAL; - goto err; - } - - if (lock_required == 0 && - bgq_cq->format == FI_CQ_FORMAT_UNSPEC) { - bgq_cq->cq_fid.ops = - &FI_BGQ_CQ_OPS_STRUCT_NAME(FI_CQ_FORMAT_UNSPEC, 0); - } else if (lock_required == 0 && - bgq_cq->format == FI_CQ_FORMAT_CONTEXT) { - bgq_cq->cq_fid.ops = - &FI_BGQ_CQ_OPS_STRUCT_NAME(FI_CQ_FORMAT_CONTEXT, 0); - } else if (lock_required == 0 && - bgq_cq->format == FI_CQ_FORMAT_MSG) { - bgq_cq->cq_fid.ops = - &FI_BGQ_CQ_OPS_STRUCT_NAME(FI_CQ_FORMAT_MSG, 0); - } else if (lock_required == 0 && - bgq_cq->format == FI_CQ_FORMAT_DATA) { - bgq_cq->cq_fid.ops = - &FI_BGQ_CQ_OPS_STRUCT_NAME(FI_CQ_FORMAT_DATA, 0); - } else if (lock_required == 0 && - bgq_cq->format == FI_CQ_FORMAT_TAGGED) { - bgq_cq->cq_fid.ops = - &FI_BGQ_CQ_OPS_STRUCT_NAME(FI_CQ_FORMAT_TAGGED, 0); - } else if (lock_required == 1 && - bgq_cq->format == FI_CQ_FORMAT_UNSPEC) { - bgq_cq->cq_fid.ops = - &FI_BGQ_CQ_OPS_STRUCT_NAME(FI_CQ_FORMAT_UNSPEC, 1); - } else if (lock_required == 1 && - bgq_cq->format == FI_CQ_FORMAT_CONTEXT) { - bgq_cq->cq_fid.ops = - &FI_BGQ_CQ_OPS_STRUCT_NAME(FI_CQ_FORMAT_CONTEXT, 1); - } else if (lock_required == 1 && - bgq_cq->format == FI_CQ_FORMAT_MSG) { - bgq_cq->cq_fid.ops = - &FI_BGQ_CQ_OPS_STRUCT_NAME(FI_CQ_FORMAT_MSG, 1); - } else if (lock_required == 1 && - bgq_cq->format == FI_CQ_FORMAT_DATA) { - bgq_cq->cq_fid.ops = - &FI_BGQ_CQ_OPS_STRUCT_NAME(FI_CQ_FORMAT_DATA, 1); - } else if (lock_required == 1 && - bgq_cq->format == FI_CQ_FORMAT_TAGGED) { - bgq_cq->cq_fid.ops = - &FI_BGQ_CQ_OPS_STRUCT_NAME(FI_CQ_FORMAT_TAGGED, 1); - - } else { - bgq_cq->cq_fid.ops = - &fi_bgq_ops_cq_default; - } - - /* initialize the 'local completion' direct-put descriptor model */ - { - MUHWI_Descriptor_t * desc = &bgq_cq->local_completion_model; - MUSPI_DescriptorZeroOut(desc); - - desc->Half_Word0.Prefetch_Only = MUHWI_DESCRIPTOR_PRE_FETCH_ONLY_NO; - desc->Half_Word1.Interrupt = MUHWI_DESCRIPTOR_DO_NOT_INTERRUPT_ON_PACKET_ARRIVAL; - desc->Pa_Payload = 0; /* specified at injection time */ - desc->Message_Length = sizeof(uint64_t); - desc->Torus_FIFO_Map = - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_LOCAL0 | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_LOCAL1; - desc->PacketHeader.NetworkHeader.pt2pt.Data_Packet_Type = MUHWI_PT2PT_DATA_PACKET_TYPE; - desc->PacketHeader.NetworkHeader.pt2pt.Hints = - MUHWI_PACKET_HINT_A_NONE | - MUHWI_PACKET_HINT_B_NONE | - MUHWI_PACKET_HINT_C_NONE | - MUHWI_PACKET_HINT_D_NONE; - desc->PacketHeader.NetworkHeader.pt2pt.Byte2.Byte2 = 0; - - desc->PacketHeader.NetworkHeader.pt2pt.Byte3.Byte3 = - MUHWI_PACKET_VIRTUAL_CHANNEL_DETERMINISTIC; - - desc->PacketHeader.NetworkHeader.pt2pt.Destination.Destination.Destination = 0; /* not used for local transfers */ - - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Byte8 = MUHWI_PACKET_TYPE_PUT; - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Size = 16; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Rec_Payload_Base_Address_Id = FI_BGQ_MU_BAT_ID_COUNTER; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Pacing = MUHWI_PACKET_DIRECT_PUT_IS_NOT_PACED; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Put_Offset_MSB = 0; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Put_Offset_LSB = 0; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Unused1 = 0; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Rec_Counter_Base_Address_Id = FI_BGQ_MU_BAT_ID_COUNTER; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Valid_Bytes_In_Payload = 0; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Unused2 = 0; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Counter_Offset = 0; - } - - /* allocate the 'std' and 'err' l2atomic fifos */ - { - struct fi_cq_bgq_l2atomic_data * memptr = NULL; - size_t bytes = sizeof(struct fi_cq_bgq_l2atomic_data) + - sizeof(uint64_t) * bgq_cq->size; - if (posix_memalign((void **)&memptr, 32, bytes)) { - errno = FI_ENOMEM; - goto err; - } - memset((void*)memptr, 0, bytes); - bgq_cq->fifo_memptr = (void*)memptr; - - l2atomic_fifo_initialize(&bgq_cq->err_consumer, - &bgq_cq->err_producer, - &memptr->err_fifo_data, FI_BGQ_L2ATOMIC_ERR_FIFO_DATA_SIZE); - l2atomic_fifo_initialize(&bgq_cq->std_consumer, - &bgq_cq->std_producer, - &memptr->std_fifo_data, bgq_cq->size); - }; - - bgq_cq->ep_bind_count = 0; - bgq_cq->progress.ep_count = 0; - unsigned i; - for (i=0; i<64; ++i) { /* TODO - check this array size */ - bgq_cq->ep[i] = NULL; - bgq_cq->progress.ep[i] = NULL; - } - - - fi_bgq_ref_init(&bgq_cq->domain->fabric->node, &bgq_cq->ref_cnt, "completion queue"); - fi_bgq_ref_inc(&bgq_cq->domain->ref_cnt, "domain"); - - *cq = &bgq_cq->cq_fid; - - return 0; -err: - if(bgq_cq) - free(bgq_cq); - return -errno; -} diff --git a/prov/bgq/src/fi_bgq_domain.c b/prov/bgq/src/fi_bgq_domain.c deleted file mode 100644 index 90c8e02f152..00000000000 --- a/prov/bgq/src/fi_bgq_domain.c +++ /dev/null @@ -1,586 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "rdma/bgq/fi_bgq.h" -#include - -#include -#include -#include -#include -#include -#include - -#include "rdma/bgq/fi_bgq_spi.h" - -uint64_t fi_bgq_global_reception_counter __attribute__((__aligned__(L2_CACHE_LINE_SIZE))); - -static int fi_bgq_close_domain(fid_t fid) -{ - int ret; - struct fi_bgq_domain *bgq_domain = - container_of(fid, struct fi_bgq_domain, domain_fid); - - ret = fi_bgq_fid_check(fid, FI_CLASS_DOMAIN, "domain"); - if (ret) - return ret; - - - /* close/finalize/deallocate the MU hardware? */ - - ret = fi_bgq_finalize_mr_ops(bgq_domain); - if (ret) - return ret; - - unsigned i; - for (i=0; iprogress.max_threads; ++i) { - ret = fi_bgq_progress_disable(bgq_domain, i); - if (ret) - return ret; - } - - ret = fi_bgq_progress_fini(bgq_domain); - if (ret) - return ret; - - ret = fi_bgq_ref_finalize(&bgq_domain->ref_cnt, "domain"); - if (ret) - return ret; - - ret = fi_bgq_ref_dec(&bgq_domain->fabric->ref_cnt, "fabric"); - if (ret) - return ret; - - - free(bgq_domain); - - return 0; -} - -static struct fi_ops fi_bgq_fi_ops = { - .size = sizeof(struct fi_ops), - .close = fi_bgq_close_domain, - .bind = fi_no_bind, - .control = fi_no_control, - .ops_open = fi_no_ops_open -}; - -static struct fi_ops_domain fi_bgq_domain_ops = { - .size = sizeof(struct fi_ops_domain), - .av_open = fi_bgq_av_open, - .cq_open = fi_bgq_cq_open, - .endpoint = fi_bgq_endpoint, - .scalable_ep = fi_bgq_scalable_ep, - .cntr_open = fi_bgq_cntr_open, - .poll_open = fi_no_poll_open, - .stx_ctx = fi_bgq_stx_context, - .srx_ctx = fi_no_srx_context -}; - -static int fi_bgq_mu_init(struct fi_bgq_domain *bgq_domain, - struct fi_info *info) -{ - int rc; - rc = fi_bgq_node_mu_lock_init(&bgq_domain->fabric->node, &bgq_domain->mu.lock); - if (rc) { - goto err; - } - l2atomic_lock_acquire(&bgq_domain->mu.lock); - - const uint32_t ppn = Kernel_ProcessCount(); - const uint32_t tcoord = Kernel_MyTcoord(); - const uint32_t subgroup_total = BGQ_MU_NUM_REC_FIFO_SUBGROUPS*(BGQ_MU_NUM_REC_FIFO_GROUPS-1); /* do not consider 17th core subgroups */ - const uint32_t subgroups_per_process = subgroup_total / ppn; - const uint32_t subgroup_offset = subgroups_per_process * tcoord; - const uint32_t recfifo_total = BGQ_MU_NUM_REC_FIFOS_PER_GROUP*(BGQ_MU_NUM_REC_FIFO_GROUPS-1); /* do not mess with 17th core group recfifos */ - - /* - * Create four mu reception fifos in each of the subgroups "owned" by - * this process. - */ - uint8_t * memptr; - size_t nbytes = FI_BGQ_MU_RECFIFO_BYTES * BGQ_MU_NUM_REC_FIFOS_PER_SUBGROUP * subgroups_per_process; - rc = posix_memalign((void**)&memptr, 32, nbytes); - if (rc) goto err; - - Kernel_MemoryRegion_t mregion; - rc = Kernel_CreateMemoryRegion(&mregion, (void*)memptr, nbytes); - if (rc) goto err; - - bgq_domain->rfifo_mem = (void*)memptr; - - bgq_domain->rx.max = 0; - bgq_domain->rx.count = 0; - - uint32_t n; - for (n = 0; n < recfifo_total; ++n) { - bgq_domain->rx.rfifo[n] = NULL; - } - - const uint32_t subgroups_to_allocate_per_process = ppn == 64 ? 1 : ppn == 32 ? 2 : ppn == 16 ? 4 : ppn == 8 ? 8 : ppn == 4 ? 16 : ppn == 2 ? 32 : 64; - for (n = 0; n < subgroups_to_allocate_per_process; ++n) { - - const uint32_t requested_subgroup = subgroup_offset + n; - - uint32_t free_fifo_num; - uint32_t free_fifo_ids[BGQ_MU_NUM_REC_FIFOS_PER_SUBGROUP]; - rc = Kernel_QueryRecFifos(requested_subgroup, &free_fifo_num, free_fifo_ids); - if (rc) goto err; - if (free_fifo_num < 4) goto err; - if (free_fifo_ids[0] != 0) goto err; - if (free_fifo_ids[1] != 1) goto err; - if (free_fifo_ids[2] != 2) goto err; - if (free_fifo_ids[3] != 3) goto err; - - Kernel_RecFifoAttributes_t rfifo_attrs[4]; - memset((void*)&rfifo_attrs[0], 0, sizeof(Kernel_RecFifoAttributes_t)*4); - rc = Kernel_AllocateRecFifos(requested_subgroup, - &bgq_domain->rfifo_subgroup[requested_subgroup], - 4, free_fifo_ids, rfifo_attrs); - if (rc) goto err; - - uint32_t i; - for (i = 0; i < BGQ_MU_NUM_REC_FIFOS_PER_SUBGROUP; ++i) { - rc = Kernel_RecFifoInit(&bgq_domain->rfifo_subgroup[requested_subgroup], - i, - &mregion, - ((uint64_t)memptr) - (uint64_t)mregion.BaseVa, - FI_BGQ_MU_RECFIFO_BYTES - 1); - if (rc) goto err; - - memptr += FI_BGQ_MU_RECFIFO_BYTES; - } - - uint64_t shift = (BGQ_MU_NUM_REC_FIFOS_PER_GROUP-1) - - ((requested_subgroup&3)*BGQ_MU_NUM_FIFO_SUBGROUPS); - rc = Kernel_RecFifoEnable(requested_subgroup>>2, 0x01ULL << shift); - if (rc) goto err; - rc = Kernel_RecFifoEnable(requested_subgroup>>2, 0x01ULL << (shift-1)); - if (rc) goto err; - rc = Kernel_RecFifoEnable(requested_subgroup>>2, 0x01ULL << (shift-2)); - if (rc) goto err; - rc = Kernel_RecFifoEnable(requested_subgroup>>2, 0x01ULL << (shift-3)); - if (rc) goto err; - - for (i = 0; i< BGQ_MU_NUM_REC_FIFOS_PER_SUBGROUP; ++i) { - bgq_domain->rx.rfifo[requested_subgroup*BGQ_MU_NUM_REC_FIFOS_PER_SUBGROUP+i] = - &bgq_domain->rfifo_subgroup[requested_subgroup]._recfifos[i]; - } - - bgq_domain->rx.max += 4; /* initialized 4 mu reception fifos, 1 mu reception fifo is used in each fi rx ctx */ - } - - bgq_domain->tx.count = 0; - - /* initialize the mu gi barrier */ - bgq_domain->gi.leader_tcoord = bgq_domain->fabric->node.leader_tcoord; - bgq_domain->gi.is_leader = bgq_domain->fabric->node.is_leader; - if (bgq_domain->gi.is_leader) { - rc = MUSPI_GIBarrierInit(&bgq_domain->gi.barrier, 0); - assert(rc==0); - } - - bgq_domain->subgroups_per_process = 64 / Kernel_ProcessCount(); - - l2atomic_lock_release(&bgq_domain->mu.lock); - - - /* global barrier after mu initialization is complete */ - l2atomic_barrier_enter(&bgq_domain->fabric->node.barrier); - if (bgq_domain->gi.is_leader) { - rc = MUSPI_GIBarrierEnterAndWait(&bgq_domain->gi.barrier); - assert(rc==0); - } - l2atomic_barrier_enter(&bgq_domain->fabric->node.barrier); - - return 0; -err: - if (l2atomic_lock_isbusy(&bgq_domain->mu.lock)) { - l2atomic_lock_release(&bgq_domain->mu.lock); - } - return -1; -} - -int fi_bgq_alloc_default_domain_attr(struct fi_domain_attr **domain_attr) -{ - struct fi_domain_attr *attr; - - attr = calloc(1, sizeof(*attr)); - if (!attr) - goto err; - - uint32_t ppn = Kernel_ProcessCount(); - - /* - * See: fi_bgq_stx_init() for the number of mu injection fifos - * allocated for each tx context. Each rx context uses one - * mu injection fifo and one mu reception fifo. - */ - const unsigned tx_ctx_cnt = (((BGQ_MU_NUM_INJ_FIFO_GROUPS-1) * BGQ_MU_NUM_INJ_FIFOS_PER_GROUP) / 3) / ppn; - - /* - * The number of rx contexts on a node is the minimum of: - * 1. number of mu injection fifos on the node not used by tx contexts - * 2. total number mu reception fifos on the node - */ - const unsigned rx_ctx_cnt = MIN((((BGQ_MU_NUM_INJ_FIFO_GROUPS-1) * BGQ_MU_NUM_INJ_FIFOS_PER_GROUP) - (tx_ctx_cnt * ppn)), ((BGQ_MU_NUM_REC_FIFO_GROUPS-1) * BGQ_MU_NUM_REC_FIFOS_PER_GROUP)) / ppn; - - attr->domain = NULL; - attr->name = NULL; - - attr->threading = FI_THREAD_ENDPOINT; - attr->control_progress = FI_PROGRESS_MANUAL; - attr->data_progress = FI_BGQ_FABRIC_DIRECT_PROGRESS; - attr->resource_mgmt = FI_RM_DISABLED; - attr->av_type = FI_AV_MAP; - attr->mr_mode = FI_BGQ_FABRIC_DIRECT_MR; - attr->mr_key_size = 2; /* 2^16 keys */ - attr->cq_data_size = FI_BGQ_REMOTE_CQ_DATA_SIZE; - attr->cq_cnt = 128 / ppn; - attr->ep_cnt = 1; /* TODO - what about endpoints that only use a shared receive context and a shared transmit context? */ - attr->tx_ctx_cnt = tx_ctx_cnt; - attr->rx_ctx_cnt = rx_ctx_cnt; - - attr->max_ep_tx_ctx = ((BGQ_MU_NUM_INJ_FIFO_GROUPS-1) * BGQ_MU_NUM_INJ_FIFOS_PER_GROUP) / ppn / 2; - attr->max_ep_rx_ctx = ((BGQ_MU_NUM_REC_FIFO_GROUPS-1) * BGQ_MU_NUM_REC_FIFOS_PER_GROUP) / ppn; - - attr->max_ep_stx_ctx = attr->max_ep_tx_ctx; - attr->max_ep_srx_ctx = 0; /* TODO - reserve some mu reception fifos for use as shared receive context ... how to address? */ - attr->mr_iov_limit = 1; - attr->mr_cnt = 1 << (attr->mr_key_size << 3); - - *domain_attr = attr; - - return 0; -err: - *domain_attr = NULL; - errno = FI_ENOMEM; - return -1; -} - -int fi_bgq_choose_domain(uint64_t caps, struct fi_domain_attr *domain_attr, - const struct fi_domain_attr *hints) -{ - if (!domain_attr) { - goto err; - } - - *domain_attr = *fi_bgq_global.default_domain_attr; - /* Set the data progress mode to the option used in the configure. - * Ignore any setting by the application. - */ - domain_attr->data_progress = FI_BGQ_FABRIC_DIRECT_PROGRESS; - - /* Set the mr_mode to the option used in the configure. - * Ignore any setting by the application - the checkinfo should have verified - * it was set to the same setting. - */ - domain_attr->mr_mode = FI_BGQ_FABRIC_DIRECT_MR; - - if (hints) { - if (hints->domain) { - struct fi_bgq_domain *bgq_domain = bgq_domain = container_of(hints->domain, struct fi_bgq_domain, domain_fid); - - domain_attr->threading = bgq_domain->threading; - domain_attr->resource_mgmt = bgq_domain->resource_mgmt; - domain_attr->tx_ctx_cnt = fi_bgq_domain_get_tx_max(bgq_domain); - domain_attr->rx_ctx_cnt = fi_bgq_domain_get_rx_max(bgq_domain); - domain_attr->max_ep_tx_ctx = fi_bgq_domain_get_tx_max(bgq_domain); - domain_attr->max_ep_rx_ctx = fi_bgq_domain_get_rx_max(bgq_domain); - domain_attr->max_ep_stx_ctx = fi_bgq_domain_get_tx_max(bgq_domain); - - } else { - - if (hints->threading) domain_attr->threading = hints->threading; - if (hints->control_progress) domain_attr->control_progress = hints->control_progress; - if (hints->resource_mgmt) domain_attr->resource_mgmt = hints->resource_mgmt; - if (hints->av_type) domain_attr->av_type = hints->av_type; - if (hints->mr_key_size) domain_attr->mr_key_size = hints->mr_key_size; - if (hints->cq_data_size) domain_attr->cq_data_size = hints->cq_data_size; - if (hints->cq_cnt) domain_attr->cq_cnt = hints->cq_cnt; - if (hints->ep_cnt) domain_attr->ep_cnt = hints->ep_cnt; - if (hints->tx_ctx_cnt) domain_attr->tx_ctx_cnt = hints->tx_ctx_cnt; - if (hints->rx_ctx_cnt) domain_attr->rx_ctx_cnt = hints->rx_ctx_cnt; - if (hints->max_ep_tx_ctx) domain_attr->max_ep_tx_ctx = hints->max_ep_tx_ctx; - if (hints->max_ep_rx_ctx) domain_attr->max_ep_rx_ctx = hints->max_ep_rx_ctx; - if (hints->max_ep_stx_ctx) domain_attr->max_ep_stx_ctx = hints->max_ep_stx_ctx; - if (hints->max_ep_srx_ctx) domain_attr->max_ep_srx_ctx = hints->max_ep_srx_ctx; - if (hints->mr_iov_limit) domain_attr->mr_iov_limit = hints->mr_iov_limit; - } - } - - if (FI_BGQ_FABRIC_DIRECT_PROGRESS == FI_PROGRESS_AUTO) - if (Kernel_ProcessCount() > 16) { - fprintf(stderr,"BGQ Provider configure in FI_PROGRESS_AUTO mode and cannot be run higher than 16 ppn due to need for progress thread\n"); - assert(0); - exit(1); - } - - - domain_attr->name = strdup(FI_BGQ_PROVIDER_NAME); - if (!domain_attr->name) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_DOMAIN, - "no memory\n"); - errno = FI_ENOMEM; - return -errno; - } - - domain_attr->cq_data_size = FI_BGQ_REMOTE_CQ_DATA_SIZE; - - return 0; -err: - errno = FI_EINVAL; - return -errno; -} - -int fi_bgq_check_domain_attr(const struct fi_domain_attr *attr) -{ - switch(attr->threading) { - case FI_THREAD_UNSPEC: - case FI_THREAD_SAFE: - case FI_THREAD_FID: - case FI_THREAD_ENDPOINT: - case FI_THREAD_COMPLETION: - case FI_THREAD_DOMAIN: - break; - default: - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_DOMAIN, - "incorrect threading level\n"); - goto err; - } - if (attr->control_progress && - attr->control_progress != FI_PROGRESS_MANUAL) { - fprintf(stderr,"BGQ Provider only supports control_progress of FI_PROGRESS_MANUAL\n"); - assert(0); - exit(1); - } - if (FI_BGQ_FABRIC_DIRECT_PROGRESS == FI_PROGRESS_AUTO) { - if (attr->data_progress && - attr->data_progress == FI_PROGRESS_MANUAL) { - fprintf(stderr,"BGQ Provider configured with data progress mode of FI_PROGRESS_AUTO but application specified FI_PROGRESS_MANUAL\n"); - fflush(stderr); - assert(0); - exit(1); - } - } - else if (FI_BGQ_FABRIC_DIRECT_PROGRESS == FI_PROGRESS_MANUAL) { - if (attr->data_progress && - attr->data_progress == FI_PROGRESS_AUTO) { - fprintf(stderr,"BGQ Provider configured with data progress mode of FI_PROGRESS_MANUAL but application specified FI_PROGRESS_AUTO\n"); - fflush(stderr); - assert(0); - exit(1); - } - } - else { - fprintf(stderr,"BGQ Provider progress mode not properly configured.\n"); - fflush(stderr); - assert(0); - exit(1); - } - - if (FI_BGQ_FABRIC_DIRECT_MR == FI_MR_SCALABLE) { - if (attr->mr_mode != FI_MR_SCALABLE) { - fprintf(stderr,"BGQ Provider configured with mr mode of FI_MR_SCALABLE but application specified something else.\n"); - fflush(stderr); - assert(0); - exit(1); - } - } - else if (FI_BGQ_FABRIC_DIRECT_MR == FI_MR_BASIC) { - if (attr->mr_mode != FI_MR_BASIC) { - fprintf(stderr,"BGQ Provider configured with mr mode of FI_MR_BASIC but application specified something else.\n"); - fflush(stderr); - assert(0); - exit(1); - } - } - else { - fprintf(stderr,"BGQ Provider mr mode not properly configured.\n"); - fflush(stderr); - assert(0); - exit(1); - } - if (attr->mr_key_size) { - if (attr->mr_key_size > FI_BGQ_MR_KEY_SIZE) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_DOMAIN, - "memory key size too large\n"); - goto err; - } - } - if (attr->cq_data_size) { - if (attr->cq_data_size > FI_BGQ_REMOTE_CQ_DATA_SIZE) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_DOMAIN, - "max cq data supported is %d\n", - FI_BGQ_REMOTE_CQ_DATA_SIZE); - goto err; - } - } - - return 0; - -err: - errno = FI_EINVAL; - return -errno; -} - -int fi_bgq_domain(struct fid_fabric *fabric, - struct fi_info *info, - struct fid_domain **dom, void *context) -{ - int ret; - struct fi_bgq_domain *bgq_domain = NULL; - struct fi_bgq_fabric *bgq_fabric = - container_of(fabric, struct fi_bgq_fabric, fabric_fid); - - if (!info) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_DOMAIN, - "no info supplied\n"); - errno = FI_EINVAL; - return -errno; - } - - ret = fi_bgq_fid_check(&fabric->fid, FI_CLASS_FABRIC, "fabric"); - if (ret) - return ret; - - bgq_domain = calloc(1, sizeof(struct fi_bgq_domain)); - if (!bgq_domain) { - errno = FI_ENOMEM; - goto err; - } - - /* fill in default domain attributes */ - bgq_domain->threading = fi_bgq_global.default_domain_attr->threading; - bgq_domain->resource_mgmt = fi_bgq_global.default_domain_attr->resource_mgmt; - - if (info->domain_attr) { - if (info->domain_attr->domain) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_DOMAIN, - "domain cannot be supplied\n"); - goto err; - } - ret = fi_bgq_check_domain_attr(info->domain_attr); - if (ret) - goto err; - bgq_domain->threading = info->domain_attr->threading; - bgq_domain->resource_mgmt = info->domain_attr->resource_mgmt; - } - - /* Set the data progress mode to the option used in the configure. - * Ignore any setting by the application. - */ - bgq_domain->data_progress = FI_BGQ_FABRIC_DIRECT_PROGRESS; - - - uint32_t ppn = Kernel_ProcessCount(); - if (FI_BGQ_FABRIC_DIRECT_PROGRESS == FI_PROGRESS_AUTO) { - uint32_t ppn = Kernel_ProcessCount(); - if (ppn > 16) { - fprintf(stderr,"BGQ Provider configure in FI_PROGRESS_AUTO mode and cannot be run higher than 16 ppn due to need for progress thread\n"); - assert(0); - exit(1); - } - } - - bgq_domain->fabric = bgq_fabric; - - - Personality_t personality; - if (Kernel_GetPersonality(&personality, sizeof(Personality_t))) goto err; - bgq_domain->my_coords.a = personality.Network_Config.Acoord; - bgq_domain->my_coords.b = personality.Network_Config.Bcoord; - bgq_domain->my_coords.c = personality.Network_Config.Ccoord; - bgq_domain->my_coords.d = personality.Network_Config.Dcoord; - bgq_domain->my_coords.e = personality.Network_Config.Ecoord; - bgq_domain->my_coords.t = Kernel_MyTcoord(); - - bgq_domain->zero.value = 0; - fi_bgq_cnk_vaddr2paddr((const void *)&bgq_domain->zero.value, - sizeof(uint64_t), &bgq_domain->zero.paddr); - - bgq_domain->one.value = 1; - fi_bgq_cnk_vaddr2paddr((const void *)&bgq_domain->one.value, - sizeof(uint64_t), &bgq_domain->one.paddr); - - ret = fi_bgq_mu_init(bgq_domain, info); - if (ret) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_DOMAIN, - "error initializing the MU\n"); - errno = FI_EOTHER; - goto err; - } - - fi_bgq_ref_init(&bgq_fabric->node, &bgq_domain->ref_cnt, "domain"); - - if (FI_BGQ_FABRIC_DIRECT_PROGRESS == FI_PROGRESS_AUTO) { - uint32_t ppn = Kernel_ProcessCount(); - fi_bgq_progress_init(bgq_domain, 64/ppn - 1); /* TODO - what should the "max threads" be? */ - if (0 != fi_bgq_progress_enable(bgq_domain, 0)) { - - /* Unable to start progress threads! */ - fprintf(stderr,"BGQ Provider unable to start progress thread for FI_PROGRESS_AUTO mode\n"); - assert(0); - exit(1); - } - } else { - fi_bgq_progress_init(bgq_domain, 0); - } - - - bgq_domain->domain_fid.fid.fclass = FI_CLASS_DOMAIN; - bgq_domain->domain_fid.fid.context = context; - bgq_domain->domain_fid.fid.ops = &fi_bgq_fi_ops; - bgq_domain->domain_fid.ops = &fi_bgq_domain_ops; - - ret = fi_bgq_init_mr_ops(bgq_domain, info); - if (ret) - goto err; - - unsigned i = 0; - for (i = 0; i < FI_BGQ_DOMAIN_MAX_RX_CTX; ++i) { - bgq_domain->rx.ctx[i] = NULL; - } - - if (fi_bgq_node_lock_allocate(&bgq_fabric->node, &bgq_domain->lock)) goto err; - - fi_bgq_ref_inc(&bgq_domain->fabric->ref_cnt, "fabric"); - - *dom = &bgq_domain->domain_fid; - - return 0; - -err: - fi_bgq_finalize_mr_ops(bgq_domain); - if (bgq_domain) - free(bgq_domain); - return -errno; -} diff --git a/prov/bgq/src/fi_bgq_ep.c b/prov/bgq/src/fi_bgq_ep.c deleted file mode 100644 index fa2e5e1b983..00000000000 --- a/prov/bgq/src/fi_bgq_ep.c +++ /dev/null @@ -1,2053 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "rdma/bgq/fi_bgq.h" - -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "rdma/bgq/fi_bgq_spi.h" -#include "rdma/bgq/fi_bgq_rx.h" - -static int fi_bgq_close_stx_nofree(struct fi_bgq_stx *bgq_stx) -{ - int ret; - - ret = fi_bgq_ref_finalize(&bgq_stx->ref_cnt, "shared context"); - if (ret) - return ret; - - ret = fi_bgq_ref_dec(&bgq_stx->domain->ref_cnt, "domain"); - if (ret) - return ret; - - return 0; -} - -static int fi_bgq_close_stx(fid_t fid) -{ - int ret; - ret = fi_bgq_fid_check(fid, FI_CLASS_STX_CTX, "shared context"); - if (ret) - return ret; - - struct fi_bgq_stx *bgq_stx = - container_of(fid, struct fi_bgq_stx, stx_fid); - - ret = fi_bgq_close_stx_nofree(bgq_stx); - if (ret) - return ret; - - free(bgq_stx); - return 0; -} - -int fi_bgq_bind_ep_stx(struct fi_bgq_ep *bgq_ep, - struct fi_bgq_stx *bgq_stx, uint64_t flags) -{ - if (!bgq_ep || !bgq_stx) - goto err; - - bgq_ep->tx.stx = bgq_stx; - - fi_bgq_ref_inc(&bgq_stx->ref_cnt, "shared context"); - - return 0; - -err: - return -errno; -} - -static struct fi_ops fi_bgq_stx_ops = { - .size = sizeof(struct fi_ops), - .close = fi_bgq_close_stx, - .bind = fi_no_bind, - .control = fi_no_control, - .ops_open = fi_no_ops_open -}; - -static struct fi_ops_ep fi_bgq_stx_ep_ops = { - .size = sizeof(struct fi_ops_ep), - .cancel = fi_no_cancel, - .getopt = fi_no_getopt, - .setopt = fi_no_setopt, - .tx_ctx = fi_no_tx_ctx, - .rx_ctx = fi_no_rx_ctx -}; - -int fi_bgq_stx_init (struct fi_bgq_domain *bgq_domain, struct fi_tx_attr *attr, - struct fi_bgq_stx *bgq_stx, void *context) { - - bgq_stx->stx_fid.fid.fclass = FI_CLASS_STX_CTX; - bgq_stx->stx_fid.fid.context= context; - bgq_stx->stx_fid.fid.ops = &fi_bgq_stx_ops; - bgq_stx->stx_fid.ops = &fi_bgq_stx_ep_ops; - - bgq_stx->domain = bgq_domain; - - bgq_stx->attr = attr ? *attr : *fi_bgq_global.default_tx_attr; - - l2atomic_lock_acquire(&bgq_domain->mu.lock); - - /* - * Three options for number of rget fifos to allocate for each tx context: - * - * 1. 1 rget fifo - * - same for all ppn (1..64) - * - maximizes the number of tx contexts that can be created - * - simplest implementation - * - lowest off-node bandwidth - * - can support multiple domains at all ppn - * - * 2. 6 rget fifos (a,b,c,d,e,local) - * - same for all ppn (1..64) - * - one subgroup for each tx context (including a single tx injfifo) - * - best for 64 ppn, half off-node bandwidth for all other ppn - * - can only support one domain at 64 ppn - * - requires rget fifo hash algorithm - * - * 3. 11 rget fifos (+a,-a,+b,-b,+c,-c,+d,-d,+e,-e,local) - * - must use 6 rget fifos (option 1) for 64 ppn - * - complex implementation - * - optimial off-node bandwidth - * - can only support one domain at 64 ppn and 32 ppn - * - requires rget fifo hash algorithm - * - * Allocating per-tx rget injection fifos could eliminate potential - * "rget injection fifo full" MU errors if the number of outstanding - * rget injection payload descriptors can be determined. This will - * limit the maximum number of tx contexts, and each individual rget - * injection fifo will not be saturated - only 11 rget injection fifos - * FOR EACH NODE are needed to maximize off-node aggregate bandwidth. - * - * For now, use option 1. - */ - uint32_t rget_fifos_to_allocate = 1; - - /* - * initialize the rget injection fifo(s) used for rendezvous; begin at - * fifo 0 of subgroup 0 and iterate *up* until an unallocated rget fifo - * is found - */ - if (rget_fifos_to_allocate != - fi_bgq_spi_injfifo_init(&bgq_stx->rgetfifo, - &bgq_stx->rgetfifo_subgroup, - rget_fifos_to_allocate, - FI_BGQ_TX_SIZE, - 0, /* immediate_payload_sizeof */ - 1, /* is_remote_get */ - 0)) /* is_top_down */ - { - goto err; - } - - /* - * Three options for number of injection fifos to allocate for each tx context: - * - * 1. 1 injection fifo - * - same for all ppn (1..64) - * - maximizes the number of tx contexts that can be created - * - simplest implementation - * - lowest off-node bandwidth - * - can support multiple domains at all ppn - * - may be sufficient because only single-packet messages are injected - * - * 2. 11 injection fifos (+a,-a,+b,-b,+c,-c,+d,-d,+e,-e,local) - * - must use 6 rget fifos (option 1) for 64 ppn - * - complex implementation - * - optimial off-node bandwidth - * - can only support one domain at 64 ppn and 32 ppn - * - requires injection fifo pinning algorithm - * - * For now, use option 1. - */ - uint32_t inj_fifos_to_allocate = 1; - - /* - * initialize the transmit injection fifo; begin at fifo 7 of subgroup 15 - * and iterate *down* until an unallocated fifo is found - */ - if (inj_fifos_to_allocate != - fi_bgq_spi_injfifo_init(&bgq_stx->injfifo, - &bgq_stx->injfifo_subgroup, - inj_fifos_to_allocate, - FI_BGQ_TX_SIZE, - sizeof(union fi_bgq_mu_packet_payload), - 0, /* is_remote_get */ - 1)) /* is_top_down */ - { - goto err; - } - - l2atomic_lock_release(&bgq_domain->mu.lock); - - fi_bgq_ref_init(&bgq_domain->fabric->node, &bgq_stx->ref_cnt, "shared context"); - fi_bgq_ref_inc(&bgq_domain->ref_cnt, "domain"); - - return 0; -err: - if (l2atomic_lock_isbusy(&bgq_domain->mu.lock)) { - l2atomic_lock_release(&bgq_domain->mu.lock); - } - /* TODO - other cleanup */ - return -1; -} - -int fi_bgq_stx_context(struct fid_domain *domain, struct fi_tx_attr *attr, - struct fid_stx **stx, void *context) -{ - int ret; - errno = 0; - struct fi_bgq_stx *bgq_stx = NULL; - struct fi_bgq_domain *bgq_domain = - container_of(domain, struct fi_bgq_domain, domain_fid); - - if (!domain || !stx) { - errno = FI_EINVAL; - return -errno; - } - - ret = fi_bgq_fid_check(&domain->fid, FI_CLASS_DOMAIN, "domain"); - if (ret) - return ret; - - bgq_stx = calloc(1, sizeof *bgq_stx); - if (!bgq_stx) { - errno = FI_ENOMEM; - goto err; - } - - if (fi_bgq_stx_init(bgq_domain, attr, bgq_stx, context)) { - errno = FI_EOTHER; - goto err; - } - - *stx = &bgq_stx->stx_fid; - return FI_SUCCESS; - -err: - if (bgq_stx) { - free(bgq_stx); - bgq_stx = NULL; - } - return -errno; -} - -static int fi_bgq_close_ep(fid_t fid) -{ - if (!fid) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_FABRIC, - "NULL ep object"); - errno = FI_EINVAL; - return -errno; - } - - if (fid->fclass != FI_CLASS_EP && - fid->fclass != FI_CLASS_TX_CTX && - fid->fclass != FI_CLASS_RX_CTX) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_FABRIC, - "wrong type of object. expected (FI_CLASS_EP), got (%d)\n", - fid->fclass); - errno = FI_EINVAL; - return -errno; - } - - int ret; - struct fi_bgq_ep *bgq_ep = container_of(fid, struct fi_bgq_ep, ep_fid); - - /* disable async progress threads */ - fi_bgq_progress_ep_disable(bgq_ep); - - ret = fi_bgq_ref_dec(&bgq_ep->domain->ref_cnt, "domain"); - if (ret) - return ret; - - /* av is only valid/required if tx capability is enabled */ - if (bgq_ep->av) { - ret = fi_bgq_ref_dec(&bgq_ep->av->ref_cnt, "address vector"); - if (ret) return ret; - } - - if (bgq_ep->sep) { - ret = fi_bgq_ref_dec(&bgq_ep->sep->ref_cnt, "scalable endpoint"); - if (ret) return ret; - } - - if (bgq_ep->tx.stx) { - ret = fi_bgq_ref_dec(&bgq_ep->tx.stx->ref_cnt, "shared tx context"); - if (ret) return ret; - - if (bgq_ep->tx.stx == &bgq_ep->tx.exclusive_stx) { - ret = fi_bgq_close_stx_nofree(bgq_ep->tx.stx); - if (ret) return ret; - } - bgq_ep->tx.stx = NULL; - } - - if (bgq_ep->send_cq) { - ret = fi_bgq_ref_dec(&bgq_ep->send_cq->ref_cnt, "completion queue"); - if (ret) return ret; - } - if (bgq_ep->recv_cq) { - ret = fi_bgq_ref_dec(&bgq_ep->recv_cq->ref_cnt, "completion queue"); - if (ret) return ret; - } - - if (ofi_recv_allowed(bgq_ep->rx.caps) || - ofi_rma_target_allowed(bgq_ep->rx.caps)) { - } - - fi_bgq_finalize_cm_ops(bgq_ep); - fi_bgq_finalize_msg_ops(bgq_ep); - fi_bgq_finalize_rma_ops(bgq_ep); - fi_bgq_finalize_tagged_ops(bgq_ep); - fi_bgq_finalize_atomic_ops(bgq_ep); - - void *mem = bgq_ep->mem; - free(mem); - - return 0; -} - -static int fi_bgq_bind_ep(struct fid *fid, struct fid *bfid, - uint64_t flags) -{ -if (!bfid) return 0; - int ret = 0; - struct fi_bgq_ep *bgq_ep = container_of(fid, struct fi_bgq_ep, ep_fid); - - switch (bfid->fclass) { - case FI_CLASS_CNTR: - ret = fi_bgq_bind_ep_cntr(bgq_ep, - container_of(bfid, struct fi_bgq_cntr, cntr_fid), flags); - if (ret) - goto err; - break; - case FI_CLASS_CQ: - ret = fi_bgq_bind_ep_cq(bgq_ep, - container_of(bfid, struct fi_bgq_cq, cq_fid), flags); - if (ret) - goto err; - break; - case FI_CLASS_AV: - ret = fi_bgq_bind_ep_av(bgq_ep, - container_of(bfid, struct fi_bgq_av, av_fid), flags); - if (ret) - goto err; - break; - case FI_CLASS_MR: - ret = fi_bgq_bind_ep_mr(bgq_ep, - container_of(bfid, struct fi_bgq_mr, mr_fid), flags); - if (ret) - goto err; - break; - case FI_CLASS_STX_CTX: - ret = fi_bgq_bind_ep_stx(bgq_ep, - container_of(bfid, struct fi_bgq_stx, stx_fid), - flags); - if (ret) - goto err; - break; - default: - errno = FI_ENOSYS; - goto err; - } - return ret; -err: - return -errno; -} - -static int fi_bgq_check_ep(struct fi_bgq_ep *bgq_ep) -{ - - - switch (bgq_ep->ep_fid.fid.fclass) { - case FI_CLASS_EP: - if (!bgq_ep->av) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_EP_DATA, - "no AV supplied"); - goto err; - } - break; - case FI_CLASS_RX_CTX: - case FI_CLASS_TX_CTX: - if (!bgq_ep->sep) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_EP_DATA, - "no Scalable Endpoint supplied"); - goto err; - } - bgq_ep->av = bgq_ep->sep->av; - if (!bgq_ep->sep->av) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_EP_DATA, - "no AV supplied on Scalable Endpoint"); - goto err; - } - break; - default: - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_EP_DATA, - "Invalid EP class %lu", - bgq_ep->ep_fid.fid.fclass); - goto err; - } - - if (!bgq_ep->domain) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_EP_DATA, - "no domain supplied\n"); - goto err; - } - - return 0; -err: - errno = FI_EINVAL; - return -errno; -} - -static int fi_bgq_ep_tx_init (struct fi_bgq_ep *bgq_ep, - struct fi_bgq_domain *bgq_domain) -{ - assert(bgq_ep); - assert(bgq_domain); - assert(bgq_ep->tx.state == FI_BGQ_EP_UNINITIALIZED); - - if (bgq_ep->tx.stx) { -#ifdef FI_BGQ_TRACE - fprintf(stderr,"fi_bgq_ep_tx_init - using tx shared on node not picking new fifos\n"); - fflush(stderr); -#endif - assert(bgq_domain == bgq_ep->tx.stx->domain); - - } else { - /* - * A shared transmit context was not provided; create an - * "exclusive" shared transmit context for use by only this - * endpoint transmit context - */ -#ifdef FI_BGQ_TRACE - fprintf(stderr,"fi_bgq_ep_tx_init - picking new fifos for new tx\n"); - fflush(stderr); -#endif - if (fi_bgq_stx_init(bgq_domain, 0, &bgq_ep->tx.exclusive_stx, NULL)) { - return -1; - } - bgq_ep->tx.stx = &bgq_ep->tx.exclusive_stx; - fi_bgq_ref_inc(&bgq_ep->tx.stx->ref_cnt, "exclusive shared context"); - } - - bgq_ep->threading = (uint32_t) bgq_domain->threading; - bgq_ep->av_type = (uint32_t) bgq_ep->av->type; - bgq_ep->mr_mode = (uint32_t) bgq_domain->mr_mode; - - /* copy the 'shared tx' resources and information */ - fi_bgq_spi_injfifo_clone(&bgq_ep->tx.injfifo, &bgq_ep->tx.stx->injfifo); - - BG_CoordinateMapping_t my_coords = bgq_domain->my_coords; - - const uint32_t fifo_map = - fi_bgq_mu_calculate_fifo_map_single(my_coords, my_coords); - - const MUHWI_Destination_t destination = - fi_bgq_spi_coordinates_to_destination(my_coords); - - const uint32_t base_rx = - fi_bgq_addr_calculate_base_rx(my_coords.t, Kernel_ProcessCount()); - - const union fi_bgq_addr self = {.fi=fi_bgq_addr_create(destination, fifo_map, base_rx)}; - -#ifdef FI_BGQ_TRACE - fprintf(stderr,"fi_bgq_ep_tx_init created addr:\n"); - FI_BGQ_ADDR_DUMP((fi_addr_t *)&self.fi); -#endif - /* - * fi_[t]send*() descriptor models - */ - { /* send model */ - MUHWI_Descriptor_t * desc = &bgq_ep->tx.send.send_model; - MUSPI_DescriptorZeroOut(desc); - - desc->Half_Word0.Prefetch_Only = - MUHWI_DESCRIPTOR_PRE_FETCH_ONLY_NO; - desc->Half_Word1.Interrupt = - MUHWI_DESCRIPTOR_DO_NOT_INTERRUPT_ON_PACKET_ARRIVAL; - desc->PacketHeader.NetworkHeader.pt2pt.Data_Packet_Type = - MUHWI_PT2PT_DATA_PACKET_TYPE; - desc->PacketHeader.NetworkHeader.pt2pt.Byte3.Byte3 = - MUHWI_PACKET_VIRTUAL_CHANNEL_DETERMINISTIC; - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Byte8 = - MUHWI_PACKET_TYPE_FIFO; - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Size = 16; - - union fi_bgq_mu_packet_hdr * hdr = (union fi_bgq_mu_packet_hdr *) &desc->PacketHeader; - fi_bgq_mu_packet_type_set(hdr, FI_BGQ_MU_PACKET_TYPE_TAG|FI_BGQ_MU_PACKET_TYPE_EAGER); - - hdr->pt2pt.uid.fi = self.uid.fi; - hdr->pt2pt.immediate_data = 0; - hdr->pt2pt.ofi_tag = (uint64_t)-1; - - /* specified at injection time */ - desc->Pa_Payload = 0; - desc->Message_Length = 0; - desc->Torus_FIFO_Map = 0; - desc->PacketHeader.NetworkHeader.pt2pt.Destination.Destination.Destination = -1; - desc->PacketHeader.messageUnitHeader.Packet_Types.Memory_FIFO.Rec_FIFO_Id = -1; - - - /* send rendezvous models */ - desc = &bgq_ep->tx.send.rzv_model[0]; /* "internode" */ /* TODO - use an enum */ - *desc = bgq_ep->tx.send.send_model; - - desc->Message_Length = sizeof(struct fi_bgq_mu_iov) + offsetof(union fi_bgq_mu_packet_payload, rendezvous.mu_iov); - - hdr = (union fi_bgq_mu_packet_hdr *) &desc->PacketHeader; - fi_bgq_mu_packet_type_set(hdr, FI_BGQ_MU_PACKET_TYPE_TAG|FI_BGQ_MU_PACKET_TYPE_RENDEZVOUS); - hdr->pt2pt.rendezvous.is_local = 0; - hdr->pt2pt.rendezvous.niov_minus_1 = 0; - hdr->pt2pt.rendezvous.rget_inj_fifo_id = bgq_ep->tx.stx->rgetfifo.node_scoped_fifo_id; - - desc = &bgq_ep->tx.send.rzv_model[1]; /* "intranode" */ /* TODO - use an enum */ - *desc = bgq_ep->tx.send.rzv_model[0]; - hdr = (union fi_bgq_mu_packet_hdr *) &desc->PacketHeader; - hdr->pt2pt.rendezvous.is_local = 1; - -#ifdef FI_BGQ_REMOTE_COMPLETION - /* remote completion model - used for FI_DELIVERY_COMPLETE */ - desc = &bgq_ep->tx.send.remote_completion_model; - *desc = bgq_ep->tx.send.send_model; - - hdr = (union fi_bgq_mu_packet_hdr *) &desc->PacketHeader; - fi_bgq_mu_packet_type_set(hdr, FI_BGQ_MU_PACKET_TYPE_EAGER|FI_BGQ_MU_PACKET_TYPE_ACK); - hdr->completion.origin = fi_bgq_uid_get_destination(self.uid.fi); - - /* specified at injection time */ - hdr->completion.is_local = 0; - hdr->completion.cntr_paddr_rsh3b = 0; -#endif - } - - /* - * fi_write*() descriptor models - */ - { /* - * "direct" model(s) for FI_MR_BASIC - */ - - /* direct-put model */ - MUHWI_Descriptor_t * desc = &bgq_ep->tx.write.direct.dput_model; - MUSPI_DescriptorZeroOut(desc); - - desc->Half_Word0.Prefetch_Only = - MUHWI_DESCRIPTOR_PRE_FETCH_ONLY_NO; - desc->Half_Word1.Interrupt = - MUHWI_DESCRIPTOR_DO_NOT_INTERRUPT_ON_PACKET_ARRIVAL; - desc->PacketHeader.NetworkHeader.pt2pt.Data_Packet_Type = - MUHWI_PT2PT_DATA_PACKET_TYPE; - desc->PacketHeader.NetworkHeader.pt2pt.Byte3.Byte3 = - MUHWI_PACKET_VIRTUAL_CHANNEL_DETERMINISTIC; - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Byte8 = - MUHWI_PACKET_TYPE_PUT; - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Size = 16; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Rec_Payload_Base_Address_Id = - FI_BGQ_MU_BAT_ID_GLOBAL; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Pacing = - MUHWI_PACKET_DIRECT_PUT_IS_NOT_PACED; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Rec_Counter_Base_Address_Id = - FI_BGQ_MU_BAT_ID_COUNTER; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Counter_Offset = 0; - - /* specified at injection time */ - desc->Pa_Payload = 0; - desc->Message_Length = 0; - desc->Torus_FIFO_Map = 0; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Put_Offset_MSB = 0; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Put_Offset_LSB = 0; - - /* - * "emulation" model(s) for FI_MR_SCALABLE - */ - - /* memory-fifo model */ - desc = &bgq_ep->tx.write.emulation.mfifo_model; - MUSPI_DescriptorZeroOut(desc); - - desc->Half_Word0.Prefetch_Only = - MUHWI_DESCRIPTOR_PRE_FETCH_ONLY_NO; - desc->Half_Word1.Interrupt = - MUHWI_DESCRIPTOR_DO_NOT_INTERRUPT_ON_PACKET_ARRIVAL; - desc->Pa_Payload = 0; - desc->Message_Length = 0; - desc->PacketHeader.NetworkHeader.pt2pt.Data_Packet_Type = - MUHWI_PT2PT_DATA_PACKET_TYPE; - desc->PacketHeader.NetworkHeader.pt2pt.Byte3.Byte3 = - MUHWI_PACKET_VIRTUAL_CHANNEL_DETERMINISTIC; - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Byte8 = - MUHWI_PACKET_TYPE_FIFO; - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Size = 16; - - /* specified at injection time */ - desc->Torus_FIFO_Map = -1; - desc->PacketHeader.NetworkHeader.pt2pt.Destination.Destination.Destination = -1; - desc->PacketHeader.messageUnitHeader.Packet_Types.Memory_FIFO.Rec_FIFO_Id = -1; - - union fi_bgq_mu_packet_hdr * hdr = (union fi_bgq_mu_packet_hdr *) &desc->PacketHeader; - fi_bgq_mu_packet_type_set(hdr, FI_BGQ_MU_PACKET_TYPE_RMA); - - /* remote-get model */ - desc = &bgq_ep->tx.write.emulation.rget_model; - MUSPI_DescriptorZeroOut(desc); - - desc->Half_Word0.Prefetch_Only = - MUHWI_DESCRIPTOR_PRE_FETCH_ONLY_NO; - desc->Half_Word1.Interrupt = - MUHWI_DESCRIPTOR_DO_NOT_INTERRUPT_ON_PACKET_ARRIVAL; - desc->PacketHeader.NetworkHeader.pt2pt.Data_Packet_Type = - MUHWI_PT2PT_DATA_PACKET_TYPE; - desc->PacketHeader.NetworkHeader.pt2pt.Byte3.Byte3 = - MUHWI_PACKET_VIRTUAL_CHANNEL_DETERMINISTIC; - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Byte8 = - MUHWI_PACKET_TYPE_GET; - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Size = 16; - desc->PacketHeader.NetworkHeader.pt2pt.Destination = - fi_bgq_uid_get_destination(self.uid.fi); - desc->PacketHeader.messageUnitHeader.Packet_Types.Remote_Get.Rget_Inj_FIFO_Id = - bgq_ep->tx.stx->rgetfifo.node_scoped_fifo_id; - - /* specified at injection time */ - desc->Pa_Payload = 0; - desc->Message_Length = 0; - desc->Torus_FIFO_Map = -1; - - /* direct-put model */ - desc = &bgq_ep->tx.write.emulation.dput_model; - MUSPI_DescriptorZeroOut(desc); - - desc->Half_Word0.Prefetch_Only = - MUHWI_DESCRIPTOR_PRE_FETCH_ONLY_NO; - desc->Half_Word1.Interrupt = - MUHWI_DESCRIPTOR_DO_NOT_INTERRUPT_ON_PACKET_ARRIVAL; - desc->PacketHeader.NetworkHeader.pt2pt.Data_Packet_Type = - MUHWI_PT2PT_DATA_PACKET_TYPE; - desc->PacketHeader.NetworkHeader.pt2pt.Byte3.Byte3 = - MUHWI_PACKET_VIRTUAL_CHANNEL_DETERMINISTIC; - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Byte8 = - MUHWI_PACKET_TYPE_PUT; - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Size = 16; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Rec_Payload_Base_Address_Id = - FI_BGQ_MU_BAT_ID_GLOBAL; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Pacing = - MUHWI_PACKET_DIRECT_PUT_IS_NOT_PACED; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Rec_Counter_Base_Address_Id = - FI_BGQ_MU_BAT_ID_COUNTER; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Counter_Offset = 0; - - union fi_bgq_mu_descriptor * fi_desc = (union fi_bgq_mu_descriptor *)desc; - fi_desc->rma.update_type = FI_BGQ_MU_DESCRIPTOR_UPDATE_BAT_TYPE_DST; - - /* specified at injection time */ - desc->Pa_Payload = 0; - desc->Message_Length = 0; - desc->Torus_FIFO_Map = -1; - desc->PacketHeader.NetworkHeader.pt2pt.Destination.Destination.Destination = -1; - - /* TODO counter update model */ - - } - - /* - * fi_read*() descriptor models - */ - { - bgq_ep->tx.read.global_one_paddr = - fi_bgq_node_bat_read(&bgq_domain->fabric->node, - FI_BGQ_MU_BAT_ID_ONE); - - bgq_ep->tx.read.global_zero_paddr = - fi_bgq_node_bat_read(&bgq_domain->fabric->node, - FI_BGQ_MU_BAT_ID_ZERO); - - MUHWI_Descriptor_t * desc = NULL; - union fi_bgq_mu_packet_hdr * hdr = NULL; - - /* memory-fifo model */ - desc = &bgq_ep->tx.read.emulation.mfifo_model; - MUSPI_DescriptorZeroOut(desc); - - desc->Half_Word0.Prefetch_Only = - MUHWI_DESCRIPTOR_PRE_FETCH_ONLY_NO; - desc->Half_Word1.Interrupt = - MUHWI_DESCRIPTOR_DO_NOT_INTERRUPT_ON_PACKET_ARRIVAL; - desc->PacketHeader.NetworkHeader.pt2pt.Destination = - fi_bgq_uid_get_destination(self.uid.fi); - desc->PacketHeader.NetworkHeader.pt2pt.Data_Packet_Type = - MUHWI_PT2PT_DATA_PACKET_TYPE; - desc->PacketHeader.NetworkHeader.pt2pt.Byte3.Byte3 = - MUHWI_PACKET_VIRTUAL_CHANNEL_DETERMINISTIC; - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Byte8 = - MUHWI_PACKET_TYPE_FIFO; - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Size = 16; - - hdr = (union fi_bgq_mu_packet_hdr *) &desc->PacketHeader; - fi_bgq_mu_packet_type_set(hdr, FI_BGQ_MU_PACKET_TYPE_RMA); - hdr->rma.nbytes = 0; /* no immediate bytes to 'put' for a read operation */ - hdr->rma.key = (uint64_t)-1; /* not used when nbytes == 0 */ - hdr->rma.offset = 0; /* not used when nbytes == 0 */ - - /* ==== specified at injection time ==== */ - desc->Torus_FIFO_Map = -1; - desc->PacketHeader.NetworkHeader.pt2pt.Destination.Destination.Destination = -1; - desc->Pa_Payload = 0; - desc->Message_Length = 0; - desc->PacketHeader.messageUnitHeader.Packet_Types.Memory_FIFO.Rec_FIFO_Id = -1; - /* ==== specified at injection time ==== */ - - /* direct-put model */ - desc = &bgq_ep->tx.read.emulation.dput_model; - MUSPI_DescriptorZeroOut(desc); - - desc->Half_Word0.Prefetch_Only = - MUHWI_DESCRIPTOR_PRE_FETCH_ONLY_NO; - desc->Half_Word1.Interrupt = - MUHWI_DESCRIPTOR_DO_NOT_INTERRUPT_ON_PACKET_ARRIVAL; - desc->PacketHeader.NetworkHeader.pt2pt.Data_Packet_Type = - MUHWI_PT2PT_DATA_PACKET_TYPE; - desc->PacketHeader.NetworkHeader.pt2pt.Byte3.Byte3 = - MUHWI_PACKET_VIRTUAL_CHANNEL_DETERMINISTIC; - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Byte8 = - MUHWI_PACKET_TYPE_PUT; - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Size = 16; - desc->PacketHeader.NetworkHeader.pt2pt.Destination = - fi_bgq_uid_get_destination(self.uid.fi); - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Rec_Payload_Base_Address_Id = - FI_BGQ_MU_BAT_ID_GLOBAL; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Pacing = - MUHWI_PACKET_DIRECT_PUT_IS_NOT_PACED; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Rec_Counter_Base_Address_Id = - FI_BGQ_MU_BAT_ID_COUNTER; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Counter_Offset = 0; - - union fi_bgq_mu_descriptor * fi_desc = (union fi_bgq_mu_descriptor *)desc; - fi_desc->rma.update_type = FI_BGQ_MU_DESCRIPTOR_UPDATE_BAT_TYPE_SRC; - - /* ==== specified at injection time ==== */ - desc->Torus_FIFO_Map = -1; - desc->Message_Length = 0; - desc->Pa_Payload = 0; - fi_desc->rma.key_msb = 0; /* TODO - change this when key size > 48b */ - fi_desc->rma.key_lsb = -1; - /* ==== specified at injection time ==== */ - - /* "counter" completion direct-put model */ - desc = &bgq_ep->tx.read.cntr_model; - MUSPI_DescriptorZeroOut(desc); - - desc->Half_Word0.Prefetch_Only = - MUHWI_DESCRIPTOR_PRE_FETCH_ONLY_NO; - desc->Half_Word1.Interrupt = - MUHWI_DESCRIPTOR_DO_NOT_INTERRUPT_ON_PACKET_ARRIVAL; - desc->PacketHeader.NetworkHeader.pt2pt.Data_Packet_Type = - MUHWI_PT2PT_DATA_PACKET_TYPE; - desc->PacketHeader.NetworkHeader.pt2pt.Byte3.Byte3 = - MUHWI_PACKET_VIRTUAL_CHANNEL_DETERMINISTIC; - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Byte8 = - MUHWI_PACKET_TYPE_PUT; - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Size = 16; - desc->PacketHeader.NetworkHeader.pt2pt.Destination = - fi_bgq_uid_get_destination(self.uid.fi); - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Pacing = - MUHWI_PACKET_DIRECT_PUT_IS_NOT_PACED; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Counter_Offset = 0; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Rec_Counter_Base_Address_Id = - FI_BGQ_MU_BAT_ID_COUNTER; - desc->Pa_Payload = bgq_ep->tx.read.global_one_paddr; - desc->Message_Length = 8; - - /* ==== specified at injection time ==== */ - desc->Torus_FIFO_Map = -1; - MUSPI_SetRecPayloadBaseAddressInfo(desc, FI_BGQ_MU_BAT_ID_GLOBAL, 0); /* offset will add atomic-ness at runtime */ - /* ==== specified at injection time ==== */ - - /* "cq" completion direct-put model */ - desc = &bgq_ep->tx.read.cq_model; - MUSPI_DescriptorZeroOut(desc); - - desc->Half_Word0.Prefetch_Only = - MUHWI_DESCRIPTOR_PRE_FETCH_ONLY_NO; - desc->Half_Word1.Interrupt = - MUHWI_DESCRIPTOR_DO_NOT_INTERRUPT_ON_PACKET_ARRIVAL; - desc->PacketHeader.NetworkHeader.pt2pt.Data_Packet_Type = - MUHWI_PT2PT_DATA_PACKET_TYPE; - desc->PacketHeader.NetworkHeader.pt2pt.Byte3.Byte3 = - MUHWI_PACKET_VIRTUAL_CHANNEL_DETERMINISTIC; - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Byte8 = - MUHWI_PACKET_TYPE_PUT; - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Size = 16; - desc->PacketHeader.NetworkHeader.pt2pt.Destination = - fi_bgq_uid_get_destination(self.uid.fi); - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Pacing = - MUHWI_PACKET_DIRECT_PUT_IS_NOT_PACED; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Counter_Offset = 0; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Rec_Counter_Base_Address_Id = - FI_BGQ_MU_BAT_ID_COUNTER; - desc->Pa_Payload = bgq_ep->tx.read.global_zero_paddr; - desc->Message_Length = 8; - - /* ==== specified at injection time ==== */ - desc->Torus_FIFO_Map = -1; - MUSPI_SetRecPayloadBaseAddressInfo(desc, FI_BGQ_MU_BAT_ID_GLOBAL, 0); - /* ==== specified at injection time ==== */ - } - - /* - * fi_atomic*() descriptor models - */ - { - MUHWI_Descriptor_t * desc = NULL; - union fi_bgq_mu_packet_hdr * hdr = NULL; - - /* - * emulation memory-fifo model - */ - desc = &bgq_ep->tx.atomic.emulation.mfifo_model; - MUSPI_DescriptorZeroOut(desc); - - desc->Half_Word0.Prefetch_Only = - MUHWI_DESCRIPTOR_PRE_FETCH_ONLY_NO; - desc->Half_Word1.Interrupt = - MUHWI_DESCRIPTOR_DO_NOT_INTERRUPT_ON_PACKET_ARRIVAL; - desc->PacketHeader.NetworkHeader.pt2pt.Destination = - fi_bgq_uid_get_destination(self.uid.fi); - desc->PacketHeader.NetworkHeader.pt2pt.Data_Packet_Type = - MUHWI_PT2PT_DATA_PACKET_TYPE; - desc->PacketHeader.NetworkHeader.pt2pt.Byte3.Byte3 = - MUHWI_PACKET_VIRTUAL_CHANNEL_DETERMINISTIC; - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Byte8 = - MUHWI_PACKET_TYPE_FIFO; - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Size = 16; - - desc->Message_Length = sizeof(MUHWI_Descriptor_t); - - hdr = (union fi_bgq_mu_packet_hdr *) &desc->PacketHeader; - fi_bgq_mu_packet_type_set(hdr, FI_BGQ_MU_PACKET_TYPE_ATOMIC); - hdr->atomic.origin = fi_bgq_uid_get_destination(self.uid.fi); - - /* ==== specified at injection time ==== */ - desc->Torus_FIFO_Map = -1; - desc->PacketHeader.NetworkHeader.pt2pt.Destination.Destination.Destination = -1; - desc->Pa_Payload = 0; - - /* emulation memory-fifo fence model */ - desc = &bgq_ep->tx.atomic.emulation.fence.mfifo_model; - MUSPI_DescriptorZeroOut(desc); - - desc->Half_Word0.Prefetch_Only = - MUHWI_DESCRIPTOR_PRE_FETCH_ONLY_NO; - desc->Half_Word1.Interrupt = - MUHWI_DESCRIPTOR_DO_NOT_INTERRUPT_ON_PACKET_ARRIVAL; - desc->PacketHeader.NetworkHeader.pt2pt.Destination = - fi_bgq_uid_get_destination(self.uid.fi); - desc->PacketHeader.NetworkHeader.pt2pt.Data_Packet_Type = - MUHWI_PT2PT_DATA_PACKET_TYPE; - desc->PacketHeader.NetworkHeader.pt2pt.Byte3.Byte3 = - MUHWI_PACKET_VIRTUAL_CHANNEL_DETERMINISTIC; - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Byte8 = - MUHWI_PACKET_TYPE_FIFO; - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Size = 16; - - desc->Message_Length = sizeof(MUHWI_Descriptor_t); - - hdr = (union fi_bgq_mu_packet_hdr *) &desc->PacketHeader; - fi_bgq_mu_packet_type_set(hdr, FI_BGQ_MU_PACKET_TYPE_RMA); - hdr->rma.nbytes = 0; /* no immediate bytes to 'put' for a fence operation */ - hdr->rma.key = (uint64_t)-1; /* not used when nbytes == 0 */ - hdr->rma.offset = 0; /* not used when nbytes == 0 */ - hdr->rma.ndesc = 1; - - /* ==== specified at injection time ==== */ - desc->Torus_FIFO_Map = -1; - desc->PacketHeader.NetworkHeader.pt2pt.Destination.Destination.Destination = -1; - desc->Pa_Payload = 0; - - /* emulation direct-put fi_cntr increment model */ - desc = &bgq_ep->tx.atomic.emulation.fence.cntr_model; - MUSPI_DescriptorZeroOut(desc); - - desc->Half_Word0.Prefetch_Only = - MUHWI_DESCRIPTOR_PRE_FETCH_ONLY_NO; - desc->Half_Word1.Interrupt = - MUHWI_DESCRIPTOR_DO_NOT_INTERRUPT_ON_PACKET_ARRIVAL; - desc->PacketHeader.NetworkHeader.pt2pt.Data_Packet_Type = - MUHWI_PT2PT_DATA_PACKET_TYPE; - desc->PacketHeader.NetworkHeader.pt2pt.Byte3.Byte3 = - MUHWI_PACKET_VIRTUAL_CHANNEL_DETERMINISTIC; - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Byte8 = - MUHWI_PACKET_TYPE_PUT; - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Size = 16; - desc->PacketHeader.NetworkHeader.pt2pt.Destination = - fi_bgq_uid_get_destination(self.uid.fi); - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Pacing = - MUHWI_PACKET_DIRECT_PUT_IS_NOT_PACED; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Counter_Offset = 0; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Rec_Counter_Base_Address_Id = - FI_BGQ_MU_BAT_ID_COUNTER; - desc->Pa_Payload = bgq_ep->tx.read.global_one_paddr; - desc->Message_Length = 8; - - /* ==== specified at fi_cntr bind time ==== */ -// MUSPI_SetRecPayloadBaseAddressInfo(desc, write_cntr->std.batid, -// MUSPI_GetAtomicAddress(0, MUHWI_ATOMIC_OPCODE_STORE_ADD)); - - /* ==== specified at injection time ==== */ - desc->Torus_FIFO_Map = -1; - MUSPI_SetRecPayloadBaseAddressInfo(desc, FI_BGQ_MU_BAT_ID_GLOBAL, 0); - - /* emulation direct-put cq byte counter clear model */ - desc = &bgq_ep->tx.atomic.emulation.fence.cq_model; - MUSPI_DescriptorZeroOut(desc); - - desc->Half_Word0.Prefetch_Only = - MUHWI_DESCRIPTOR_PRE_FETCH_ONLY_NO; - desc->Half_Word1.Interrupt = - MUHWI_DESCRIPTOR_DO_NOT_INTERRUPT_ON_PACKET_ARRIVAL; - desc->PacketHeader.NetworkHeader.pt2pt.Data_Packet_Type = - MUHWI_PT2PT_DATA_PACKET_TYPE; - desc->PacketHeader.NetworkHeader.pt2pt.Byte3.Byte3 = - MUHWI_PACKET_VIRTUAL_CHANNEL_DETERMINISTIC; - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Byte8 = - MUHWI_PACKET_TYPE_PUT; - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Size = 16; - desc->PacketHeader.NetworkHeader.pt2pt.Destination = - fi_bgq_uid_get_destination(self.uid.fi); - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Pacing = - MUHWI_PACKET_DIRECT_PUT_IS_NOT_PACED; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Counter_Offset = 0; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Rec_Counter_Base_Address_Id = - FI_BGQ_MU_BAT_ID_COUNTER; - desc->Pa_Payload = bgq_ep->tx.read.global_zero_paddr; - desc->Message_Length = 8; - - /* ==== specified at injection time ==== */ - desc->Torus_FIFO_Map = -1; - MUSPI_SetRecPayloadBaseAddressInfo(desc, FI_BGQ_MU_BAT_ID_GLOBAL, 0); - } - - bgq_ep->tx.state = FI_BGQ_EP_INITITALIZED_ENABLED; - return 0; -} - - - -static int fi_bgq_ep_rx_init(struct fi_bgq_ep *bgq_ep) -{ - assert(FI_SHARED_CONTEXT != bgq_ep->rx.index); - - struct fi_bgq_domain * bgq_domain = bgq_ep->domain; - - BG_CoordinateMapping_t my_coords = bgq_domain->my_coords; - - const uint32_t fifo_map = - fi_bgq_mu_calculate_fifo_map_single(my_coords, my_coords); - - const MUHWI_Destination_t destination = - fi_bgq_spi_coordinates_to_destination(my_coords); - - const uint32_t rx = - fi_bgq_addr_calculate_base_rx(my_coords.t, Kernel_ProcessCount()) + bgq_ep->rx.index; - - bgq_ep->rx.self.fi = fi_bgq_addr_create(destination, fifo_map, rx); - - /* assign the mu reception fifos - all potential - * reception fifos were allocated at domain initialization */ - if (NULL == bgq_domain->rx.rfifo[fi_bgq_uid_get_rx(bgq_ep->rx.self.uid.fi)]) { - assert(0); - goto err; - } - - if (NULL != bgq_ep->rx.poll.muspi_recfifo) { - assert(0); - goto err; - } - - bgq_ep->rx.poll.muspi_recfifo = bgq_domain->rx.rfifo[fi_bgq_uid_get_rx(bgq_ep->rx.self.uid.fi)]; -#ifdef FI_BGQ_TRACE - fprintf(stderr,"fi_bgq_ep_rx_init recfifo set to %u created addr:\n",fi_bgq_uid_get_rx(bgq_ep->rx.self.uid.fi)); - FI_BGQ_ADDR_DUMP(&bgq_ep->rx.self.fi); -#endif - - bgq_ep->rx.poll.bat = bgq_domain->bat; - - /* **** acquire the mu lock (node scoped) **** */ - l2atomic_lock_acquire(&bgq_domain->mu.lock); - - /* create an injection fifo for rendezvous and ack messages */ - - const int num_fifos_to_allocate = 1; - if (num_fifos_to_allocate != - fi_bgq_spi_injfifo_init(&bgq_ep->rx.poll.injfifo, - &bgq_ep->rx.poll.injfifo_subgroup, - num_fifos_to_allocate, - FI_BGQ_RX_SIZE, - sizeof(union fi_bgq_mu_packet_payload), - 0 /* is_remote_get */, - 1 /* is_top_down */)) { - assert(0); - goto err; - } - - /* - * fi_atomic*() descriptor models - */ - { - MUHWI_Descriptor_t * desc = NULL; - - /* - * fi_atomic*() direct-put fetch response model - */ - desc = &bgq_ep->rx.poll.atomic_dput_model; - MUSPI_DescriptorZeroOut(desc); - - desc->Half_Word0.Prefetch_Only = - MUHWI_DESCRIPTOR_PRE_FETCH_ONLY_NO; - desc->Half_Word1.Interrupt = - MUHWI_DESCRIPTOR_DO_NOT_INTERRUPT_ON_PACKET_ARRIVAL; - desc->PacketHeader.NetworkHeader.pt2pt.Data_Packet_Type = - MUHWI_PT2PT_DATA_PACKET_TYPE; - desc->PacketHeader.NetworkHeader.pt2pt.Byte3.Byte3 = - MUHWI_PACKET_VIRTUAL_CHANNEL_DETERMINISTIC; - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Byte8 = - MUHWI_PACKET_TYPE_PUT; - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Size = 16; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Pacing = - MUHWI_PACKET_DIRECT_PUT_IS_NOT_PACED; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Rec_Counter_Base_Address_Id = - FI_BGQ_MU_BAT_ID_GLOBAL; - - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Counter_Offset = - fi_bgq_node_bat_read(&bgq_domain->fabric->node, - FI_BGQ_MU_BAT_ID_COUNTER); - - /* specified at injection time */ - desc->Torus_FIFO_Map = -1; - desc->Pa_Payload = 0; - desc->Message_Length = 0; - desc->PacketHeader.NetworkHeader.pt2pt.Destination.Destination.Destination = -1; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Rec_Payload_Base_Address_Id = -1; - MUSPI_SetRecPayloadBaseAddressInfo(desc, FI_BGQ_MU_BAT_ID_GLOBAL, 0); - - /* - * fi_atomic*() direct-put fi_cntr completion model - */ - desc = &bgq_ep->rx.poll.atomic_cntr_update_model[0]; /* intranode .. TODO - use an enum */ - MUSPI_DescriptorZeroOut(desc); - - desc->Half_Word0.Prefetch_Only = - MUHWI_DESCRIPTOR_PRE_FETCH_ONLY_NO; - desc->Half_Word1.Interrupt = - MUHWI_DESCRIPTOR_DO_NOT_INTERRUPT_ON_PACKET_ARRIVAL; - desc->PacketHeader.NetworkHeader.pt2pt.Data_Packet_Type = - MUHWI_PT2PT_DATA_PACKET_TYPE; - desc->PacketHeader.NetworkHeader.pt2pt.Byte3.Byte3 = - MUHWI_PACKET_VIRTUAL_CHANNEL_DETERMINISTIC; - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Byte8 = - MUHWI_PACKET_TYPE_PUT; - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Size = 16; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Pacing = - MUHWI_PACKET_DIRECT_PUT_IS_NOT_PACED; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Counter_Offset = 0; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Rec_Counter_Base_Address_Id = - FI_BGQ_MU_BAT_ID_COUNTER; - - desc->Pa_Payload = bgq_ep->tx.read.global_one_paddr; - desc->Message_Length = 8; - desc->Torus_FIFO_Map = - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_AM | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_AP | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_BM | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_BP | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_CM | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_CP | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_DM | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_DP | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_EM | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_EP; - - MUSPI_SetRecPayloadBaseAddressInfo(desc, - FI_BGQ_MU_BAT_ID_GLOBAL, /* the bat id will be updated at injection time */ - MUSPI_GetAtomicAddress(0, - MUHWI_ATOMIC_OPCODE_STORE_ADD)); - - /* specified at injection time */ - desc->PacketHeader.NetworkHeader.pt2pt.Destination.Destination.Destination = -1; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Rec_Payload_Base_Address_Id = -1; - - /* initialize the "intranode" version of the descriptor model */ - bgq_ep->rx.poll.atomic_cntr_update_model[1] = *desc; /* internode .. TODO - use an enum */ - bgq_ep->rx.poll.atomic_cntr_update_model[1].Torus_FIFO_Map = - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_LOCAL0 | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_LOCAL1; - } - - /* - * initialize the remote-get descriptor models - used for - * the "rendezvous" protocol - */ - { - /* initialize the "internode" version of the descriptor model */ - MUHWI_Descriptor_t * desc = &bgq_ep->rx.poll.rzv.rget_model[0]; /* TODO - use an enum */ - MUSPI_DescriptorZeroOut(desc); - - desc->Half_Word0.Prefetch_Only = - MUHWI_DESCRIPTOR_PRE_FETCH_ONLY_NO; - desc->Half_Word1.Interrupt = - MUHWI_DESCRIPTOR_DO_NOT_INTERRUPT_ON_PACKET_ARRIVAL; - desc->Torus_FIFO_Map = - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_AM | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_AP | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_BM | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_BP | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_CM | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_CP | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_DM | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_DP | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_EM | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_EP; - desc->PacketHeader.NetworkHeader.pt2pt.Data_Packet_Type = - MUHWI_PT2PT_DATA_PACKET_TYPE; - desc->PacketHeader.NetworkHeader.pt2pt.Byte3.Byte3 = - MUHWI_PACKET_VIRTUAL_CHANNEL_DETERMINISTIC; - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Byte8 = - MUHWI_PACKET_TYPE_GET; - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Size = 16; - - /* specified at injection time */ - desc->Pa_Payload = 0; - desc->Message_Length = sizeof(MUHWI_Descriptor_t); - desc->PacketHeader.NetworkHeader.pt2pt.Destination.Destination.Destination = -1; - desc->PacketHeader.messageUnitHeader.Packet_Types.Remote_Get.Rget_Inj_FIFO_Id = -1; - - /* initialize the "intranode" version of the descriptor model */ - bgq_ep->rx.poll.rzv.rget_model[1] = *desc; /* TODO - use an enum */ - bgq_ep->rx.poll.rzv.rget_model[1].Torus_FIFO_Map = - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_LOCAL0 | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_LOCAL1; - } - - /* - * initialize the direct-put descriptor models - used - * to transfer the application data in the "rendezvous" - * protocol - */ - { - /* initialize the "internode" version of the descriptor model */ - MUHWI_Descriptor_t * desc = &bgq_ep->rx.poll.rzv.dput_model[0]; /* TODO - use an enum */ - MUSPI_DescriptorZeroOut(desc); - - desc->Half_Word0.Prefetch_Only = - MUHWI_DESCRIPTOR_PRE_FETCH_ONLY_NO; - desc->Half_Word1.Interrupt = - MUHWI_DESCRIPTOR_DO_NOT_INTERRUPT_ON_PACKET_ARRIVAL; - desc->Torus_FIFO_Map = - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_AM | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_AP | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_BM | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_BP | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_CM | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_CP | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_DM | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_DP | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_EM | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_EP; - desc->PacketHeader.NetworkHeader.pt2pt.Destination = - fi_bgq_uid_get_destination(bgq_ep->rx.self.uid.fi); - desc->PacketHeader.NetworkHeader.pt2pt.Data_Packet_Type = - MUHWI_PT2PT_DATA_PACKET_TYPE; - desc->PacketHeader.NetworkHeader.pt2pt.Byte3.Byte3 = - MUHWI_PACKET_VIRTUAL_CHANNEL_DETERMINISTIC; - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Byte8 = - MUHWI_PACKET_TYPE_PUT; - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Size = 16; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Rec_Payload_Base_Address_Id = - FI_BGQ_MU_BAT_ID_GLOBAL; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Pacing = - MUHWI_PACKET_DIRECT_PUT_IS_NOT_PACED; - - /* specified at injection time */ - desc->Pa_Payload = 0; - desc->Message_Length = 0; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Put_Offset_MSB = 0; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Put_Offset_LSB = 0; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Rec_Counter_Base_Address_Id = -1; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Counter_Offset = 0; - - /* initialize the "intranode" version of the descriptor model */ - bgq_ep->rx.poll.rzv.dput_model[1] = *desc; /* TODO - use an enum */ - bgq_ep->rx.poll.rzv.dput_model[1].Torus_FIFO_Map = - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_LOCAL0 | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_LOCAL1; - } - - /* - * initialize the 'local completion' direct-put - * descriptor model - used to zero the byte counter - * of the send operation on the origin for the - * "rendezvous" protocol - * - * see also -> fi_bgq_cq::local_completion_model - */ - { - MUHWI_Descriptor_t * desc = &bgq_ep->rx.poll.rzv.dput_completion_model; - MUSPI_DescriptorZeroOut(desc); - - desc->Half_Word0.Prefetch_Only = MUHWI_DESCRIPTOR_PRE_FETCH_ONLY_NO; - desc->Half_Word1.Interrupt = MUHWI_DESCRIPTOR_DO_NOT_INTERRUPT_ON_PACKET_ARRIVAL; - desc->Message_Length = sizeof(uint64_t); - desc->Torus_FIFO_Map = - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_LOCAL0 | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_LOCAL1; - desc->PacketHeader.NetworkHeader.pt2pt.Data_Packet_Type = - MUHWI_PT2PT_DATA_PACKET_TYPE; - desc->PacketHeader.NetworkHeader.pt2pt.Byte3.Byte3 = - MUHWI_PACKET_VIRTUAL_CHANNEL_DETERMINISTIC; - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Byte8 = - MUHWI_PACKET_TYPE_PUT; - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Size = 16; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Rec_Payload_Base_Address_Id = - FI_BGQ_MU_BAT_ID_COUNTER; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Pacing = - MUHWI_PACKET_DIRECT_PUT_IS_NOT_PACED; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Rec_Counter_Base_Address_Id = - FI_BGQ_MU_BAT_ID_COUNTER; - - desc->PacketHeader.NetworkHeader.pt2pt.Destination.Destination.Destination = 0; - - /* specified at injection time */ - desc->Pa_Payload = 0; - } - - { - MUHWI_Descriptor_t * desc = &bgq_ep->rx.poll.rzv.multi_recv_ack_model; - MUSPI_DescriptorZeroOut(desc); - - desc->Half_Word0.Prefetch_Only = - MUHWI_DESCRIPTOR_PRE_FETCH_ONLY_NO; - desc->Half_Word1.Interrupt = - MUHWI_DESCRIPTOR_DO_NOT_INTERRUPT_ON_PACKET_ARRIVAL; - desc->Message_Length = 0; - desc->Pa_Payload = 0; - desc->PacketHeader.NetworkHeader.pt2pt.Data_Packet_Type = - MUHWI_PT2PT_DATA_PACKET_TYPE; - desc->PacketHeader.NetworkHeader.pt2pt.Byte3.Byte3 = - MUHWI_PACKET_VIRTUAL_CHANNEL_DETERMINISTIC; - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Byte8 = - MUHWI_PACKET_TYPE_FIFO; - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Size = 16; - desc->PacketHeader.NetworkHeader.pt2pt.Destination = - fi_bgq_uid_get_destination(bgq_ep->rx.self.uid.fi); - - desc->PacketHeader.messageUnitHeader.Packet_Types.Memory_FIFO.Rec_FIFO_Id = - fi_bgq_uid_get_rx(bgq_ep->rx.self.uid.fi); - - union fi_bgq_mu_packet_hdr * hdr = (union fi_bgq_mu_packet_hdr *) &desc->PacketHeader; - fi_bgq_mu_packet_type_set(hdr, FI_BGQ_MU_PACKET_TYPE_ACK); - - /* specified at injection time */ - desc->Torus_FIFO_Map = 0; - hdr->ack.context = 0; - } - - /* - * initialize the direct-put descriptor models used to zero an arbitrary - * 8 byte variable - used to implement FI_DELIVERY_COMPLETE - */ - { - /* initialize the "internode" version of the descriptor model */ - MUHWI_Descriptor_t * desc = &bgq_ep->rx.poll.ack_model[0]; /* TODO - use an enum */ - MUSPI_DescriptorZeroOut(desc); - - desc->Half_Word0.Prefetch_Only = - MUHWI_DESCRIPTOR_PRE_FETCH_ONLY_NO; - desc->Half_Word1.Interrupt = - MUHWI_DESCRIPTOR_DO_NOT_INTERRUPT_ON_PACKET_ARRIVAL; - desc->Torus_FIFO_Map = - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_AM | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_AP | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_BM | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_BP | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_CM | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_CP | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_DM | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_DP | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_EM | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_EP; - desc->PacketHeader.NetworkHeader.pt2pt.Data_Packet_Type = - MUHWI_PT2PT_DATA_PACKET_TYPE; - desc->PacketHeader.NetworkHeader.pt2pt.Byte3.Byte3 = - MUHWI_PACKET_VIRTUAL_CHANNEL_DETERMINISTIC; - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Byte8 = - MUHWI_PACKET_TYPE_PUT; - desc->PacketHeader.NetworkHeader.pt2pt.Byte8.Size = 16; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Pacing = - MUHWI_PACKET_DIRECT_PUT_IS_NOT_PACED; - desc->Pa_Payload = bgq_domain->zero.paddr; - desc->Message_Length = sizeof(uint64_t); - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Rec_Counter_Base_Address_Id = - FI_BGQ_MU_BAT_ID_COUNTER; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Counter_Offset = 0; - - /* specified at injection time - not used for local transfers */ - desc->PacketHeader.NetworkHeader.pt2pt.Destination.Destination.Destination = -1; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Put_Offset_MSB = 0; - desc->PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Put_Offset_LSB = 0; - desc->PacketHeader.NetworkHeader.pt2pt.Destination.Destination.Destination = -1; - - /* initialize the "intranode" version of the descriptor model */ - bgq_ep->rx.poll.ack_model[1] = *desc; /* TODO - use an enum */ - bgq_ep->rx.poll.ack_model[1].Torus_FIFO_Map = - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_LOCAL0 | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_LOCAL1; - } - - - /* allocate the l2atomic fifos for match information and control information */ - { - struct l2atomic_fifo_data * memptr = NULL; - size_t bytes = (sizeof(struct l2atomic_fifo_data) + sizeof(uint64_t) * bgq_ep->recv_cq->size) * 2; - bytes += sizeof(struct l2atomic_fifo_data) + sizeof(uint64_t) * FI_BGQ_L2FIFO_CTL_SIZE; - - if (posix_memalign((void **)&memptr, 32, bytes)) { - errno = FI_ENOMEM; - goto err; - } - memset((void*)memptr, 0, bytes); - bgq_ep->rx.l2atomic_memptr = (void*)memptr; - - l2atomic_fifo_initialize(&bgq_ep->rx.poll.rfifo[IS_TAG].match, - &bgq_ep->rx.post.match[IS_TAG], - memptr, bgq_ep->recv_cq->size); - - memptr = (struct l2atomic_fifo_data *)((uintptr_t)memptr + sizeof(struct l2atomic_fifo_data) + sizeof(uint64_t) * bgq_ep->recv_cq->size); - l2atomic_fifo_initialize(&bgq_ep->rx.poll.rfifo[IS_MSG].match, - &bgq_ep->rx.post.match[IS_MSG], - memptr, bgq_ep->recv_cq->size); - - memptr = (struct l2atomic_fifo_data *)((uintptr_t)memptr + sizeof(struct l2atomic_fifo_data) + sizeof(uint64_t) * bgq_ep->recv_cq->size); - l2atomic_fifo_initialize(&bgq_ep->rx.poll.control, - &bgq_ep->rx.post.control, - memptr, FI_BGQ_L2FIFO_CTL_SIZE); - } - - /* **** release the mu lock (node scoped) **** */ - l2atomic_lock_release(&bgq_domain->mu.lock); - - bgq_ep->rx.state = FI_BGQ_EP_INITITALIZED_ENABLED; - return 0; -err: - return 1; -} - -static int fi_bgq_open_command_queues(struct fi_bgq_ep *bgq_ep) -{ - struct fi_bgq_domain *bgq_domain; - - if (!bgq_ep) { - errno = FI_EINVAL; - return -errno; - } - - bgq_domain = bgq_ep->domain; - -#ifdef FI_BGQ_TRACE - fprintf(stderr,"fi_bgq_open_command_queues ofi_send_allowed(bgq_ep->tx.caps) is %016lx ofi_rma_initiate_allowed(bgq_ep->tx.caps) is %016lx ofi_recv_allowed(bgq_ep->rx.caps) is %016lx ofi_rma_target_allowed(bgq_ep->rx.caps) is %016lx\n",ofi_send_allowed(bgq_ep->tx.caps),ofi_rma_initiate_allowed(bgq_ep->tx.caps),ofi_recv_allowed(bgq_ep->rx.caps),ofi_rma_target_allowed(bgq_ep->rx.caps)); - fflush(stderr); -#endif - - if (ofi_send_allowed(bgq_ep->tx.caps) || ofi_rma_initiate_allowed(bgq_ep->tx.caps)) { - - /* verify there is a completion queue associated with the tx context */ - if (!bgq_ep->send_cq) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_EP_DATA, - "No completion queue bound to send context"); - goto err; - } - - /* verify there is a shared tx context associated with the endpoint - if so configured */ - if (FI_SHARED_CONTEXT == bgq_ep->tx.index && !bgq_ep->tx.stx) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_EP_DATA, - "No shared tx context bound to endpoint as configured"); - goto err; - } - - - if (fi_bgq_ep_tx_init(bgq_ep, bgq_domain)) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_EP_DATA, - "Too many tx contexts"); - goto err; - } - } - - if (ofi_recv_allowed(bgq_ep->rx.caps) || ofi_rma_target_allowed(bgq_ep->rx.caps)) { - - /* verify there is a completion queue associated with the rx context */ - if (!bgq_ep->recv_cq) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_EP_DATA, - "No completion queue bound to receive context"); - goto err; - } - - if (FI_SHARED_CONTEXT == bgq_ep->rx.index) { - /* verify there is a shared rx context associated with the endpoint */ - if (!bgq_ep->rx.srx) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_EP_DATA, - "No shared rx context bound to endpoint as configured"); - goto err; - } - - } else if (bgq_ep->rx.index >= bgq_domain->rx.max) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_EP_DATA, - "Invalid rx context index (exceeds maximum)"); - goto err; - - } else if (bgq_ep->rx.index < 0) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_EP_DATA, - "Invalid rx context index (exceeds minimum)"); - goto err; - - } else if (NULL != bgq_domain->rx.ctx[bgq_ep->rx.index]) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_EP_DATA, - "Invalid rx context index (existing allocation)"); - goto err; - } - - if (0 != fi_bgq_ep_rx_init(bgq_ep)) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_EP_DATA, - "Error during rx context initialization"); - goto err; - } - - bgq_domain->rx.ctx[bgq_ep->rx.index] = bgq_ep; - } - - return 0; -err: - return -1; -} - -static int fi_bgq_enable_ep(struct fid_ep *ep) -{ - int ret; - struct fi_bgq_ep *bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid); - ret = fi_bgq_check_ep(bgq_ep); - if (ret) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_EP_DATA, - "ep enable failed\n"); - return -errno; - } - - ret = fi_bgq_open_command_queues(bgq_ep); - if (ret) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_EP_DATA, - "failed to assign command queues\n"); - return -errno; - } - - ret = fi_bgq_enable_msg_ops(bgq_ep); - if (ret) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_EP_DATA, - "failed to enable msg ops\n"); - return -errno; - } - - ret = fi_bgq_enable_rma_ops(bgq_ep); - if (ret) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_EP_DATA, - "failed to enable rma ops\n"); - return -errno; - } - - ret = fi_bgq_enable_atomic_ops(bgq_ep); - if (ret) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_EP_DATA, - "failed to enable rma ops\n"); - return -errno; - } - - ret = fi_bgq_enable_tagged_ops(bgq_ep); - if (ret) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_EP_DATA, - "failed to enable rma ops\n"); - return -errno; - } - - bgq_ep->state = FI_BGQ_EP_INITITALIZED_ENABLED; - - /* create an async progress thread for the receive context for FI_PROGRESS_AUTO mode*/ - if (FI_BGQ_FABRIC_DIRECT_PROGRESS == FI_PROGRESS_AUTO) - if (ofi_recv_allowed(bgq_ep->rx.caps)) { - - if (bgq_ep->domain->rx.count == 1) { - - /* - * This is the first endpoint to be assigned to a - * progress thread. The first progress thread is - * started at domain initialization time - */ - - ret = fi_bgq_progress_ep_enable(&bgq_ep->domain->progress.thread[0], bgq_ep); - if (ret) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_EP_DATA, - "failed to enable async progress on endpoint\n"); - return -errno; - } - - } else if (bgq_ep->domain->progress.num_threads_active < bgq_ep->domain->progress.max_threads) { - - const unsigned n = bgq_ep->domain->progress.num_threads_active; - ret = fi_bgq_progress_enable(bgq_ep->domain, n); - if (ret) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_EP_DATA, - "failed to enable progress thread\n"); - return -errno; - } - - ret = fi_bgq_progress_ep_enable(&bgq_ep->domain->progress.thread[n], bgq_ep); - if (ret) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_EP_DATA, - "failed to enable async progress on endpoint\n"); - return -errno; - } - - } else { - - /* - * Assign endpoint to progress thread in a round-robin fashion. - * - * TODO - better assignment algorithm - */ - - const unsigned t = bgq_ep->domain->rx.count % bgq_ep->domain->progress.max_threads; - ret = fi_bgq_progress_ep_enable(&bgq_ep->domain->progress.thread[t], bgq_ep); - if (ret) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_EP_DATA, - "failed to enable async progress on endpoint\n"); - return -errno; - } - - } - } - - return 0; -} - -static int fi_bgq_control_ep(fid_t fid, int command, void *arg) -{ - struct fid_ep *ep; - ep = container_of(fid, struct fid_ep, fid); - - switch (command) { - case FI_ENABLE: - return fi_bgq_enable_ep(ep); - default: - return -FI_ENOSYS; - } - - return 0; -} - -static int fi_bgq_getopt_ep(fid_t fid, int level, int optname, - void *optval, size_t *optlen) -{ - struct fi_bgq_ep *bgq_ep = container_of(fid, struct fi_bgq_ep, ep_fid); - - if (level != FI_OPT_ENDPOINT) - return -FI_ENOPROTOOPT; - - switch (optname) { - case FI_OPT_MIN_MULTI_RECV: - *(size_t *)optval = bgq_ep->rx.min_multi_recv; - *optlen = sizeof(size_t); - break; - case FI_OPT_CM_DATA_SIZE: - *(size_t *)optval = 0; - *optlen = sizeof(size_t); - break; - default: - return -FI_ENOPROTOOPT; - } - - return 0; -} - -static int fi_bgq_setopt_ep(fid_t fid, int level, int optname, - const void *optval, size_t optlen) -{ - struct fi_bgq_ep *bgq_ep = container_of(fid, struct fi_bgq_ep, ep_fid); - - if (level != FI_OPT_ENDPOINT) - return -FI_ENOPROTOOPT; - - switch (optname) { - case FI_OPT_MIN_MULTI_RECV: - bgq_ep->rx.min_multi_recv = *(size_t *)optval; - bgq_ep->rx.poll.min_multi_recv = bgq_ep->rx.min_multi_recv; - break; - - default: - return -FI_ENOPROTOOPT; - } - - return 0; -} - -static -ssize_t fi_bgq_cancel(fid_t fid, void *context) -{ - struct fi_bgq_ep *bgq_ep = container_of(fid, struct fi_bgq_ep, ep_fid); - - if (FI_BGQ_FABRIC_DIRECT_PROGRESS == FI_PROGRESS_MANUAL) { /* TODO - FI_PROGRESS_AUTO + 64 ppn */ - const enum fi_threading threading = bgq_ep->domain->threading; - const int lock_required = - (threading == FI_THREAD_FID) || - (threading == FI_THREAD_UNSPEC) || - (threading == FI_THREAD_SAFE); - - int ret; - ret = fi_bgq_lock_if_required(&bgq_ep->lock, lock_required); - if (ret) return ret; - - fi_bgq_ep_progress_manual_cancel(bgq_ep, (const uint64_t)context); - - ret = fi_bgq_unlock_if_required(&bgq_ep->lock, lock_required); - if (ret) return ret; - - } else { - - /* context must be 8 byte aligned */ - assert(((uint64_t)context & 0x07ull) == 0); - uint64_t value = (uint64_t)context >> 3; - - struct l2atomic_fifo_producer * fifo = &bgq_ep->rx.post.control; - while (0 != l2atomic_fifo_produce(fifo, value)); - } - - return 0; -} - -static struct fi_ops fi_bgq_fi_ops = { - .size = sizeof(struct fi_ops), - .close = fi_bgq_close_ep, - .bind = fi_bgq_bind_ep, - .control = fi_bgq_control_ep, - .ops_open = fi_no_ops_open -}; - -static struct fi_ops_ep fi_bgq_ep_ops = { - .size = sizeof(struct fi_ops_ep), - .cancel = fi_bgq_cancel, - .getopt = fi_bgq_getopt_ep, - .setopt = fi_bgq_setopt_ep, - .tx_ctx = fi_no_tx_ctx, - .rx_ctx = fi_no_rx_ctx, - .rx_size_left = fi_no_rx_size_left, - .tx_size_left = fi_no_tx_size_left -}; - -int fi_bgq_alloc_default_rx_attr(struct fi_rx_attr **rx_attr) -{ - struct fi_rx_attr *attr; - - attr = calloc(1, sizeof(*attr)); - if (!attr) - goto err; - - attr->caps = FI_MSG | FI_RMA | FI_TAGGED | FI_ATOMIC | FI_RECV | FI_REMOTE_READ | FI_REMOTE_WRITE | FI_NAMED_RX_CTX | FI_DIRECTED_RECV | FI_MULTI_RECV | FI_SOURCE; - attr->mode = FI_ASYNC_IOV; - attr->op_flags = 0; - attr->msg_order = FI_BGQ_DEFAULT_MSG_ORDER; - attr->comp_order = FI_ORDER_NONE; - attr->total_buffered_recv = FI_BGQ_TOTAL_BUFFERED_RECV; - attr->size = FI_BGQ_RX_SIZE; - attr->iov_limit = SIZE_MAX; - - *rx_attr = attr; - - return 0; -err: - errno = FI_EINVAL; - return -errno; -} - -int fi_bgq_check_rx_attr(const struct fi_rx_attr *attr) -{ - /* TODO: more error checking of rx_attr */ -#ifdef TODO - if (attr->total_buffered_recv > FI_BGQ_TOTAL_BUFFERED_RECV) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_EP_DATA, - "unavailable [bad total_buffered_recv (%lu)]", - attr->total_buffered_recv); - goto err; - } -#endif - if (attr->comp_order && attr->comp_order == FI_ORDER_STRICT) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_EP_DATA, - "unavailable [bad rx comp_order (%lx)] ", - attr->comp_order); - goto err; - } - - - return 0; -err: - errno = FI_EINVAL; - return -errno; -} - -int fi_bgq_alloc_default_tx_attr(struct fi_tx_attr **tx_attr) -{ - struct fi_tx_attr *attr; - - attr = calloc(1, sizeof(*attr)); - if (!attr) - goto err; - - attr->caps = FI_MSG | FI_RMA | FI_TAGGED | FI_ATOMIC | FI_SEND | FI_READ | FI_WRITE; - attr->mode = FI_CONTEXT | FI_ASYNC_IOV; - attr->op_flags = FI_TRANSMIT_COMPLETE; - attr->msg_order = FI_BGQ_DEFAULT_MSG_ORDER; - attr->comp_order = FI_ORDER_NONE; - attr->inject_size = FI_BGQ_INJECT_SIZE; - attr->size = FI_BGQ_TX_SIZE; - attr->iov_limit = SIZE_MAX; - attr->rma_iov_limit = 1; - - *tx_attr = attr; - - return 0; -err: - errno = FI_EINVAL; - return -errno; -} - -int fi_bgq_check_tx_attr(const struct fi_tx_attr *attr) -{ - if (attr->inject_size > FI_BGQ_INJECT_SIZE) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_EP_DATA, - "unavailable [bad inject_size (%lu)]", - attr->inject_size); - goto err; - } - /* TODO: more error checking of tx_attr */ - - if (attr->comp_order && attr->comp_order == FI_ORDER_STRICT) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_EP_DATA, - "unavailable [bad tx comp_order (%lx)] ", - attr->comp_order); - goto err; - } - - return 0; -err: - errno = FI_EINVAL; - return -errno; -} - -int fi_bgq_alloc_default_ep_attr(struct fi_ep_attr **ep_attr) -{ - struct fi_ep_attr *attr; - - attr = calloc(1, sizeof(*attr)); - if (!attr) - goto err; - - uint32_t ppn = Kernel_ProcessCount(); - - /* - * See: fi_bgq_stx_init() for the number of mu injection fifos - * allocated for each tx context. Each rx context uses one - * mu injection fifo and one mu reception fifo. - */ - const unsigned tx_ctx_cnt = (((BGQ_MU_NUM_INJ_FIFO_GROUPS-1) * BGQ_MU_NUM_INJ_FIFOS_PER_GROUP) / 3) / ppn; - - /* - * The number of rx contexts on a node is the minimum of: - * 1. number of mu injection fifos on the node not used by tx contexts - * 2. total number mu reception fifos on the node - */ - const unsigned rx_ctx_cnt = MIN((((BGQ_MU_NUM_INJ_FIFO_GROUPS-1) * BGQ_MU_NUM_INJ_FIFOS_PER_GROUP) - (tx_ctx_cnt * ppn)), ((BGQ_MU_NUM_REC_FIFO_GROUPS-1) * BGQ_MU_NUM_REC_FIFOS_PER_GROUP)) / ppn; - - attr->type = FI_EP_RDM; - attr->protocol = FI_BGQ_PROTOCOL; - attr->protocol_version = FI_BGQ_PROTOCOL_VERSION; - attr->max_msg_size = FI_BGQ_MAX_MSG_SIZE; - attr->msg_prefix_size = 0; - attr->max_order_raw_size= FI_BGQ_MAX_ORDER_RAW_SIZE; - attr->max_order_war_size= FI_BGQ_MAX_ORDER_WAR_SIZE; - attr->max_order_waw_size= FI_BGQ_MAX_ORDER_WAW_SIZE; - attr->mem_tag_format = FI_BGQ_MEM_TAG_FORMAT; - attr->tx_ctx_cnt = tx_ctx_cnt; - attr->rx_ctx_cnt = rx_ctx_cnt; - - *ep_attr = attr; - - return 0; -err: - errno = FI_EINVAL; - return -errno; -} - -int fi_bgq_check_ep_attr(const struct fi_ep_attr *attr) -{ - switch(attr->protocol) { - case FI_PROTO_UNSPEC: - case FI_BGQ_PROTOCOL: - break; - default: - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_EP_DATA, - "unavailable [bad protocol (%u)]", - attr->protocol); - goto err; - } - if (attr->max_msg_size > FI_BGQ_MAX_MSG_SIZE) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_EP_DATA, - "unavailable [bad max_msg_size (%lu)]", - attr->max_msg_size); - goto err; - } - if (attr->max_order_raw_size > FI_BGQ_MAX_ORDER_RAW_SIZE) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_EP_DATA, - "unavailable [bad max_order_raw_size (%lu)", - attr->max_order_raw_size); - goto err; - } - if (attr->max_order_war_size > FI_BGQ_MAX_ORDER_WAR_SIZE) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_EP_DATA, - "unavailable [bad max_order_war_size (%lu)", - attr->max_order_war_size); - goto err; - } - if (attr->max_order_waw_size > FI_BGQ_MAX_ORDER_WAW_SIZE) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_EP_DATA, - "unavailable [bad max_order_waw_size (%lu)", - attr->max_order_waw_size); - goto err; - } - if (attr->mem_tag_format && - attr->mem_tag_format & ~FI_BGQ_MEM_TAG_FORMAT) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_EP_DATA, - "unavailable [bad mem_tag_format (%lx)", - attr->mem_tag_format); - goto err; - } - /* TODO: what msg orders do we not support? */ - - return 0; -err: - errno = FI_EINVAL; - return -errno; -} - -int fi_bgq_endpoint_rx_tx (struct fid_domain *dom, struct fi_info *info, - struct fid_ep **ep, void *context, const ssize_t rx_index, const ssize_t tx_index) -{ -#ifdef FI_BGQ_TRACE - fprintf(stderr,"fi_bgq_endpoint_rx_tx called with rx_index %ld tx_index %ld\n",rx_index,tx_index); - fflush(stderr); -#endif - - int ret; - struct fi_bgq_ep *bgq_ep = NULL; - struct fi_bgq_domain *bgq_domain = NULL; - - if (!info || !dom) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_EP_DATA, - "no info/domain supplied\n"); - errno = FI_EINVAL; - goto err; - } - - ret = fi_bgq_fid_check(&dom->fid, FI_CLASS_DOMAIN, "domain"); - if (ret) return ret; - - ret = fi_bgq_check_info(info); - if (ret) - return ret; - - void *mem = NULL; - mem = malloc(sizeof(struct fi_bgq_ep) + FI_BGQ_CACHE_LINE_SIZE); - if (!mem) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_EP_DATA, - "no memory for endpoint"); - errno = FI_ENOMEM; - goto err; - } - bgq_ep = (struct fi_bgq_ep *)(((uintptr_t)mem + FI_BGQ_CACHE_LINE_SIZE) & ~(FI_BGQ_CACHE_LINE_SIZE - 1)); - memset(bgq_ep, 0, sizeof(struct fi_bgq_ep)); - bgq_ep->mem = mem; - - bgq_ep->ep_fid.fid.fclass = FI_CLASS_EP; - bgq_ep->ep_fid.fid.context = context; - bgq_ep->ep_fid.fid.ops = &fi_bgq_fi_ops; - bgq_ep->ep_fid.ops = &fi_bgq_ep_ops; - - ret = fi_bgq_init_cm_ops((struct fid_ep *)&(bgq_ep->ep_fid), info); - if (ret) - goto err; - - ret = fi_bgq_init_msg_ops(bgq_ep, info); - if (ret) - goto err; - - ret = fi_bgq_init_rma_ops(bgq_ep, info); - if (ret) - goto err; - - ret = fi_bgq_init_tagged_ops(bgq_ep, info); - if (ret) - goto err; - - ret = fi_bgq_init_atomic_ops(bgq_ep, info); - if (ret) - goto err; - - bgq_ep->rx.index = rx_index; - bgq_ep->tx.index = tx_index; - - if (rx_index >= 0) { - bgq_ep->rx.caps = info->rx_attr ? info->rx_attr->caps : info->caps; - bgq_ep->rx.caps |= FI_RECV; - bgq_ep->rx.mode = info->rx_attr ? info->rx_attr->mode : 0; - bgq_ep->rx.op_flags = info->rx_attr ? info->rx_attr->op_flags : 0; - bgq_ep->rx.total_buffered_recv = info->rx_attr ? - info->rx_attr->total_buffered_recv : 0; - } - else { - bgq_ep->rx.caps = 0; - bgq_ep->rx.mode = 0; - bgq_ep->rx.op_flags = 0; - bgq_ep->rx.total_buffered_recv = 0; - } - - if (tx_index >= 0) { - bgq_ep->tx.caps = info->tx_attr ? info->tx_attr->caps : info->caps; - bgq_ep->tx.mode = info->tx_attr ? info->tx_attr->mode : 0; - bgq_ep->tx.op_flags = info->tx_attr ? info->tx_attr->op_flags : 0; - } - else { - bgq_ep->tx.caps = 0; - bgq_ep->tx.mode = 0; - bgq_ep->tx.op_flags = 0; - } - - - bgq_domain = container_of(dom, struct fi_bgq_domain, domain_fid); - bgq_ep->domain = bgq_domain; - fi_bgq_ref_inc(&bgq_domain->ref_cnt, "domain"); - - *ep = &bgq_ep->ep_fid; - - return 0; -err: - fi_bgq_finalize_cm_ops(bgq_ep); - fi_bgq_finalize_msg_ops(bgq_ep); - fi_bgq_finalize_rma_ops(bgq_ep); - fi_bgq_finalize_tagged_ops(bgq_ep); - fi_bgq_finalize_atomic_ops(bgq_ep); - if (bgq_domain) - fi_bgq_ref_dec(&bgq_domain->ref_cnt, "domain"); - if (bgq_ep) - free(bgq_ep->mem); - return -errno; -} - -int fi_bgq_endpoint (struct fid_domain *dom, struct fi_info *info, - struct fid_ep **ep, void *context) -{ - ssize_t rx_index = 0; - ssize_t tx_index = 0; - - if (info && info->ep_attr) { - - if (FI_SHARED_CONTEXT == info->ep_attr->tx_ctx_cnt) - tx_index = FI_SHARED_CONTEXT; - - if (FI_SHARED_CONTEXT == info->ep_attr->rx_ctx_cnt) - rx_index = FI_SHARED_CONTEXT; - } - - return fi_bgq_endpoint_rx_tx(dom, info, ep, context, rx_index, tx_index); -} - -/* ************************************************************************* */ -/* These functions are only be used in FI_PROGRESS_MANUAL mode */ -/* ************************************************************************* */ - -void fi_bgq_ep_progress_manual_cancel (struct fi_bgq_ep * bgq_ep, const uint64_t cancel_context) { - - if (bgq_ep->rx.caps & FI_MSG) { - cancel_match_queue(bgq_ep, 1, cancel_context); - } - - if (bgq_ep->rx.caps & FI_TAGGED) { - cancel_match_queue(bgq_ep, 0, cancel_context); - } -} - - -int fi_bgq_ep_progress_manual_recv (struct fi_bgq_ep *bgq_ep, - const uint64_t is_msg, - union fi_bgq_context * context, - const uint64_t rx_op_flags, - const uint64_t is_context_ext) { - - assert(bgq_ep->rx.poll.injfifo.muspi_injfifo); - return process_mfifo_context(bgq_ep, is_msg, 0, context, rx_op_flags, is_context_ext, 1); -} - - -int fi_bgq_ep_progress_manual_recv_fast (struct fi_bgq_ep *bgq_ep, - const uint64_t is_msg, - union fi_bgq_context * context) { - - return process_mfifo_context(bgq_ep, is_msg, 0, context, 0, 0, 1); -} - - -int fi_bgq_ep_progress_manual (struct fi_bgq_ep *bgq_ep) { - - - poll_rfifo(bgq_ep, 1); - - return 0; -} diff --git a/prov/bgq/src/fi_bgq_fabric.c b/prov/bgq/src/fi_bgq_fabric.c deleted file mode 100644 index c51ec4c1e27..00000000000 --- a/prov/bgq/src/fi_bgq_fabric.c +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "rdma/bgq/fi_bgq.h" -#include - -#include -#include -#include -#include - -static int fi_bgq_close_fabric(struct fid *fid) -{ - int ret; - struct fi_bgq_fabric *bgq_fabric = - container_of(fid, struct fi_bgq_fabric, fabric_fid); - - ret = fi_bgq_fid_check(fid, FI_CLASS_FABRIC, "fabric"); - if (ret) - return ret; - - ret = fi_bgq_ref_finalize(&bgq_fabric->ref_cnt, "fabric"); - if (ret) - return ret; - - free(bgq_fabric); - return 0; -} - -static struct fi_ops fi_bgq_fi_ops = { - .size = sizeof(struct fi_ops), - .close = fi_bgq_close_fabric, - .bind = fi_no_bind, - .control = fi_no_control, - .ops_open = fi_no_ops_open -}; - -static struct fi_ops_fabric fi_bgq_ops_fabric = { - .size = sizeof(struct fi_ops_fabric), - .domain = fi_bgq_domain, - .passive_ep = fi_no_passive_ep, - .eq_open = fi_no_eq_open -}; - -int fi_bgq_check_fabric_attr(const struct fi_fabric_attr *attr) -{ - if (attr->name) { - if (strcmp(attr->name, FI_BGQ_FABRIC_NAME)) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_FABRIC, - "attr->name (%s) doesn't match fabric (%s)\n", - attr->name, FI_BGQ_FABRIC_NAME); - errno = FI_EINVAL; - return -errno; - } - } - if (attr->prov_name) { - if (strcmp(attr->prov_name, FI_BGQ_PROVIDER_NAME)) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_FABRIC, - "attr->prov_name (%s) doesn't match prov (%s)\n", - attr->prov_name, FI_BGQ_PROVIDER_NAME); - errno = FI_EINVAL; - return -errno; - } - } - if (attr->prov_version) { - if (attr->prov_version != FI_BGQ_PROVIDER_VERSION) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_FABRIC, - "attr->prov_version (%u) doesn't match prov (%u) " - "backward/forward compatibility support not implemented\n", - attr->prov_version, FI_BGQ_PROVIDER_VERSION); - errno = FI_ENOSYS; - return -errno; - } - } - return 0; -} - -int fi_bgq_fabric(struct fi_fabric_attr *attr, - struct fid_fabric **fabric, void *context) -{ - int ret; - struct fi_bgq_fabric *bgq_fabric; - - if (attr) { - if (attr->fabric) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_FABRIC, - "attr->fabric only valid on getinfo\n"); - errno = FI_EINVAL; - return -errno; - } - - ret = fi_bgq_check_fabric_attr(attr); - if (ret) - return ret; - } - - bgq_fabric = calloc(1, sizeof(*bgq_fabric)); - if (!bgq_fabric) - goto err; - - bgq_fabric->fabric_fid.fid.fclass = FI_CLASS_FABRIC; - bgq_fabric->fabric_fid.fid.context = context; - bgq_fabric->fabric_fid.fid.ops = &fi_bgq_fi_ops; - bgq_fabric->fabric_fid.ops = &fi_bgq_ops_fabric; - - ret = fi_bgq_node_init(&bgq_fabric->node); - if (ret) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_FABRIC, - "error initializing the bgq node manager\n"); - errno = FI_EOTHER; - goto err; - } - - fi_bgq_mu_checks(); - - *fabric = &bgq_fabric->fabric_fid; - - fi_bgq_ref_init(&bgq_fabric->node, &bgq_fabric->ref_cnt, "fabric"); - - return 0; -err: - errno = FI_ENOMEM; - return -errno; -} diff --git a/prov/bgq/src/fi_bgq_info.c b/prov/bgq/src/fi_bgq_info.c deleted file mode 100644 index 35ae7bf34d4..00000000000 --- a/prov/bgq/src/fi_bgq_info.c +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "rdma/bgq/fi_bgq.h" - -int fi_bgq_set_default_info() -{ - struct fi_info *fi; - uint32_t ppn = Kernel_ProcessCount(); - - /* - * See: fi_bgq_stx_init() for the number of mu injection fifos - * allocated for each tx context. Each rx context uses one - * mu injection fifo and one mu reception fifo. - */ - const unsigned tx_ctx_cnt = (((BGQ_MU_NUM_INJ_FIFO_GROUPS-1) * BGQ_MU_NUM_INJ_FIFOS_PER_GROUP) / 3) / ppn; - - /* - * The number of rx contexts on a node is the minimum of: - * 1. number of mu injection fifos on the node not used by tx contexts - * 2. total number mu reception fifos on the node - */ - const unsigned rx_ctx_cnt = MIN((((BGQ_MU_NUM_INJ_FIFO_GROUPS-1) * BGQ_MU_NUM_INJ_FIFOS_PER_GROUP) - (tx_ctx_cnt * ppn)), ((BGQ_MU_NUM_REC_FIFO_GROUPS-1) * BGQ_MU_NUM_REC_FIFOS_PER_GROUP)) / ppn; - - fi = fi_dupinfo(NULL); - if (!fi) { - errno = FI_ENOMEM; - return -errno; - } - - fi_bgq_global.info = fi; - - *fi->tx_attr = (struct fi_tx_attr) { - .caps = FI_RMA | FI_ATOMIC | FI_TRANSMIT_COMPLETE, - .mode = FI_ASYNC_IOV, - .op_flags = FI_TRANSMIT_COMPLETE, - .msg_order = FI_ORDER_SAS | OFI_ORDER_WAW_SET | - OFI_ORDER_RAW_SET | OFI_ORDER_RAR_SET, - .comp_order = FI_ORDER_NONE, - .inject_size = FI_BGQ_INJECT_SIZE, - .size = FI_BGQ_TX_SIZE, - .iov_limit = SIZE_MAX, - .rma_iov_limit = 0 - }; - - *fi->rx_attr = (struct fi_rx_attr) { - .caps = FI_RMA | FI_ATOMIC | FI_NAMED_RX_CTX, - .mode = FI_ASYNC_IOV, - .op_flags = 0, - .msg_order = 0, - .comp_order = FI_ORDER_NONE, - .total_buffered_recv = FI_BGQ_TOTAL_BUFFERED_RECV, - .size = FI_BGQ_RX_SIZE, - .iov_limit = SIZE_MAX - }; - - *fi->ep_attr = (struct fi_ep_attr) { - .type = FI_EP_RDM, - .protocol = FI_BGQ_PROTOCOL, - .protocol_version = FI_BGQ_PROTOCOL_VERSION, - .max_msg_size = FI_BGQ_MAX_MSG_SIZE, - .msg_prefix_size = FI_BGQ_MAX_PREFIX_SIZE, - .max_order_raw_size = FI_BGQ_MAX_ORDER_RAW_SIZE, - .max_order_war_size = FI_BGQ_MAX_ORDER_WAR_SIZE, - .max_order_waw_size = FI_BGQ_MAX_ORDER_WAW_SIZE, - .mem_tag_format = FI_BGQ_MEM_TAG_FORMAT, - .tx_ctx_cnt = tx_ctx_cnt, - .rx_ctx_cnt = rx_ctx_cnt, - }; - - *fi->domain_attr = (struct fi_domain_attr) { - .domain = NULL, - .name = NULL, /* TODO: runtime query for name? */ - .threading = FI_THREAD_FID, - .control_progress = FI_PROGRESS_MANUAL, - .data_progress = FI_BGQ_FABRIC_DIRECT_PROGRESS, - .resource_mgmt = FI_RM_DISABLED, - .av_type = FI_AV_MAP, - .mr_mode = FI_BGQ_FABRIC_DIRECT_MR, - .mr_key_size = 2, - .cq_data_size = FI_BGQ_REMOTE_CQ_DATA_SIZE, - .cq_cnt = 128 / ppn, - .ep_cnt = SIZE_MAX, - .tx_ctx_cnt = tx_ctx_cnt, - .rx_ctx_cnt = rx_ctx_cnt, - - .max_ep_tx_ctx = ((BGQ_MU_NUM_INJ_FIFO_GROUPS-1) * BGQ_MU_NUM_INJ_FIFOS_PER_GROUP) / ppn / 2, - .max_ep_rx_ctx = ((BGQ_MU_NUM_REC_FIFO_GROUPS-1) * BGQ_MU_NUM_REC_FIFOS_PER_GROUP) / ppn, - .max_ep_stx_ctx = ((BGQ_MU_NUM_INJ_FIFO_GROUPS-1) * BGQ_MU_NUM_INJ_FIFOS_PER_GROUP) / ppn / 2, - .max_ep_srx_ctx = 0 - }; - - *fi->fabric_attr = (struct fi_fabric_attr) { - .fabric = NULL, - .name = strdup(FI_BGQ_FABRIC_NAME), - .prov_name = strdup(FI_BGQ_PROVIDER_NAME), - .prov_version = FI_BGQ_PROVIDER_VERSION - }; - - fi->caps = FI_BGQ_DEFAULT_CAPS; - fi->mode = FI_ASYNC_IOV; - fi->mode |= (FI_CONTEXT); - fi->mode &= (~FI_LOCAL_MR); - fi->mode &= (~FI_MSG_PREFIX); - - fi->addr_format = FI_ADDR_BGQ; - fi->src_addrlen = 24; // includes null - fi->dest_addrlen = 24; // includes null - fi->dest_addr = NULL; - fi->next = NULL; - - return 0; -} diff --git a/prov/bgq/src/fi_bgq_init.c b/prov/bgq/src/fi_bgq_init.c deleted file mode 100644 index 9680d2f9677..00000000000 --- a/prov/bgq/src/fi_bgq_init.c +++ /dev/null @@ -1,365 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "rdma/bgq/fi_bgq.h" -#include "ofi_prov.h" - -#include -#include -#include -#include - -static int fi_bgq_init; -static int fi_bgq_count; - -int fi_bgq_check_info(const struct fi_info *info) -{ - int ret; - - /* TODO: check caps, mode */ - - if ((info->tx_attr) && ((info->tx_attr->caps | info->caps) != info->caps)) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_FABRIC, - "The tx_attr capabilities (0x%016lx) must be a subset of those requested of the associated endpoint (0x%016lx)", - info->tx_attr->caps, info->caps); - goto err; - } - - if ((info->rx_attr) && ((info->rx_attr->caps | info->caps) != info->caps)) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_FABRIC, - "The rx_attr capabilities (0x%016lx) must be a subset of those requested of the associated endpoint (0x%016lx)", - info->rx_attr->caps, info->caps); - goto err; - } - - - switch (info->addr_format) { - case FI_FORMAT_UNSPEC: - case FI_ADDR_BGQ: - break; - default: - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_FABRIC, - "unavailable [bad info->addr_format (%u)]", - info->addr_format); - goto err; - } - - if (info->tx_attr) { - ret = fi_bgq_check_tx_attr(info->tx_attr); - if (ret) - return ret; - } - - if (info->rx_attr) { - ret = fi_bgq_check_rx_attr(info->rx_attr); - if (ret) - return ret; - } - - if (info->ep_attr) { - ret = fi_bgq_check_ep_attr(info->ep_attr); - if (ret) - return ret; - } - - if (info->domain_attr) { - ret = fi_bgq_check_domain_attr(info->domain_attr); - if (ret) - return ret; - } - if (info->fabric_attr) { - ret = fi_bgq_check_fabric_attr(info->fabric_attr); - if (ret) - return ret; - } - - return 0; - -err: - errno = FI_ENODATA; - return -errno; -} - -static int fi_bgq_fillinfo(struct fi_info *fi, const char *node, - const char* service, const struct fi_info *hints, - uint64_t flags) -{ - int ret; - uint64_t caps; - - if (!fi) - goto err; - - if (!hints && !node && !service) - goto err; - - if (hints->dest_addr) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_FABRIC, - "cannot support dest_addr lookups now"); - errno = FI_ENOSYS; - return -errno; - } - - fi->next = NULL; - fi->caps = FI_BGQ_DEFAULT_CAPS; - - /* set the mode that we require */ - fi->mode = FI_ASYNC_IOV; - fi->mode |= (FI_CONTEXT); - - /* clear modes that we do not require */ - fi->mode &= (~FI_LOCAL_MR); - fi->mode &= (~FI_MSG_PREFIX); - - fi->addr_format = FI_ADDR_BGQ; - fi->src_addrlen = 24; // includes null - fi->dest_addrlen = 24; // includes null -#ifdef TODO - if (flags & FI_SOURCE) { - fi->src_addr = strdup(service); - if (!fi->src_addr) { - goto err; - } - } -#endif - fi->dest_addr = NULL; - - /* - * man/fi_fabric.3 - * - * On input to fi_getinfo, a user may set this (fi_fabric_attr::fabric) - * to an opened fabric instance to restrict output to the given fabric. - * On output from fi_getinfo, if no fabric was specified, but the user - * has an opened instance of the named fabric, this (fi_fabric_attr::fabric) - * will reference the first opened instance. If no instance has been - * opened, this field will be NULL. - */ - - fi->fabric_attr->name = strdup(FI_BGQ_FABRIC_NAME); - if (!fi->fabric_attr->name) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_FABRIC, - "memory allocation failed"); - goto err; - } - fi->fabric_attr->prov_version = FI_BGQ_PROVIDER_VERSION; - - memcpy(fi->tx_attr, fi_bgq_global.default_tx_attr, sizeof(*fi->tx_attr)); - - if (hints->tx_attr) { - - /* - * man/fi_endpoint.3 - * - * fi_tx_attr::caps - * - * "... If the caps field is 0 on input to fi_getinfo(3), the - * caps value from the fi_info structure will be used." - */ - if (hints->tx_attr->caps) { - fi->tx_attr->caps = hints->tx_attr->caps; - } - - /* adjust parameters down from what requested if required */ - fi->tx_attr->op_flags = hints->tx_attr->op_flags; - } else if (hints->caps) { - fi->tx_attr->caps = hints->caps; - } - - memcpy(fi->rx_attr, fi_bgq_global.default_rx_attr, sizeof(*fi->rx_attr)); - if (hints->rx_attr) { - - /* - * man/fi_endpoint.3 - * - * fi_rx_attr::caps - * - * "... If the caps field is 0 on input to fi_getinfo(3), the - * caps value from the fi_info structure will be used." - */ - if (hints->rx_attr->caps) { - fi->rx_attr->caps = hints->rx_attr->caps; - } - - /* adjust parameters down from what requested if required */ - fi->rx_attr->op_flags = hints->rx_attr->op_flags; - if (hints->rx_attr->total_buffered_recv > 0 && - hints->rx_attr->total_buffered_recv < fi_bgq_global.default_rx_attr->total_buffered_recv) - fi->rx_attr->total_buffered_recv = hints->rx_attr->total_buffered_recv; - } else if (hints->caps) { - fi->rx_attr->caps = hints->caps; - } - - caps = fi->caps | fi->tx_attr->caps | fi->rx_attr->caps; - - /* - * man/fi_domain.3 - * - * On input to fi_getinfo, a user may set this (fi_domain_attr::domain) - * to an opened domain instance to restrict output to the given domain. - * On output from fi_getinfo, if no domain was specified, but the user - * has an opened instance of the named domain, this (fi_domain_attr::domain) - * will reference the first opened instance. If no instance has been - * opened, this field will be NULL. - */ - - ret = fi_bgq_choose_domain(caps, fi->domain_attr, hints->domain_attr); - if (ret) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_FABRIC, - "cannot find appropriate domain"); - goto err; - } - - memcpy(fi->ep_attr, fi_bgq_global.default_ep_attr, sizeof(*fi->ep_attr)); - if (hints->ep_attr) { - /* adjust parameters down from what requested if required */ - fi->ep_attr->type = hints->ep_attr->type; - if (hints->ep_attr->max_msg_size > 0 && - hints->ep_attr->max_msg_size <= fi_bgq_global.default_ep_attr->max_msg_size) - fi->ep_attr->max_msg_size = hints->ep_attr->max_msg_size; - - if (0 != hints->ep_attr->tx_ctx_cnt && hints->ep_attr->tx_ctx_cnt <= fi->ep_attr->tx_ctx_cnt) - fi->ep_attr->tx_ctx_cnt = hints->ep_attr->tx_ctx_cnt; /* TODO - check */ - - if (0 != hints->ep_attr->rx_ctx_cnt && hints->ep_attr->rx_ctx_cnt <= fi->ep_attr->rx_ctx_cnt) - fi->ep_attr->rx_ctx_cnt = hints->ep_attr->rx_ctx_cnt; /* TODO - check */ - } - - - - return 0; -err: - if (fi->domain_attr->name) free(fi->domain_attr->name); - if (fi->fabric_attr->name) free(fi->fabric_attr->name); - if (fi->fabric_attr->prov_name) free(fi->fabric_attr->prov_name); - errno = FI_ENODATA; - return -errno; -} - -struct fi_bgq_global_data fi_bgq_global; - -static int fi_bgq_getinfo(uint32_t version, const char *node, - const char *service, uint64_t flags, - const struct fi_info *hints, struct fi_info **info) -{ - - if (!((FI_BGQ_FABRIC_DIRECT_PROGRESS == FI_PROGRESS_MANUAL) || (FI_BGQ_FABRIC_DIRECT_PROGRESS == FI_PROGRESS_AUTO))){ - fprintf(stderr,"BGQ Provider must be configured with either auto or manual progresss mode specified\n"); - exit(1); - assert(0); - } - - BG_JobCoords_t jobCoords; - uint32_t jcrc = Kernel_JobCoords(&jobCoords); - if (jobCoords.isSubBlock) { - fprintf(stderr,"BGQ Provider cannot be run in a sub-block.\n"); - fflush(stderr); - exit(1); - } - - int ret; - struct fi_info *fi, *prev_fi, *curr; - - if (!fi_bgq_count) { - errno = FI_ENODATA; - return -errno; - } - - if (hints) { - ret = fi_bgq_check_info(hints); - if (ret) { - return ret; - } - if (!(fi = fi_allocinfo())) { - return -FI_ENOMEM; - } - if (fi_bgq_fillinfo(fi, node, service, - hints, flags)) { - return -errno; - } - *info = fi; - } else { - if(node || service) { - errno = FI_ENODATA; - return -errno; - } else { - if (!(fi = fi_dupinfo(fi_bgq_global.info))) { - return -FI_ENOMEM; - } - *info = fi; - } - } - - return 0; -} - -static void fi_bgq_fini() -{ - always_assert(fi_bgq_init == 1, - "BGQ provider finalize called before initialize\n"); - fi_freeinfo(fi_bgq_global.info); -} - -static struct fi_provider fi_bgq_provider = { - .name = FI_BGQ_PROVIDER_NAME, - .version = FI_VERSION(0, 1), - .fi_version = OFI_VERSION_LATEST, - .getinfo = fi_bgq_getinfo, - .fabric = fi_bgq_fabric, - .cleanup = fi_bgq_fini -}; - -BGQ_INI -{ - fi_bgq_count = 1; - fi_bgq_set_default_info(); // TODO: fold into fi_bgq_set_defaults - - if (fi_bgq_alloc_default_domain_attr(&fi_bgq_global.default_domain_attr)) { - return NULL; - } - - if (fi_bgq_alloc_default_ep_attr(&fi_bgq_global.default_ep_attr)) { - return NULL; - } - - if (fi_bgq_alloc_default_tx_attr(&fi_bgq_global.default_tx_attr)) { - return NULL; - } - - if (fi_bgq_alloc_default_rx_attr(&fi_bgq_global.default_rx_attr)) { - return NULL; - } - - fi_bgq_global.prov = &fi_bgq_provider; - - fi_bgq_init = 1; - - return (&fi_bgq_provider); -} diff --git a/prov/bgq/src/fi_bgq_mr.c b/prov/bgq/src/fi_bgq_mr.c deleted file mode 100644 index d4f836fa6c9..00000000000 --- a/prov/bgq/src/fi_bgq_mr.c +++ /dev/null @@ -1,207 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "rdma/bgq/fi_bgq.h" -#include - -static int fi_bgq_close_mr(fid_t fid) -{ - struct fi_bgq_domain *bgq_domain; - struct fi_bgq_mr *bgq_mr = (struct fi_bgq_mr *) fid; - - bgq_domain = bgq_mr->domain; - - if (FI_BGQ_FABRIC_DIRECT_MR == FI_MR_SCALABLE) { - int ret; - fi_bgq_domain_bat_clear(bgq_domain, bgq_mr->mr_fid.key); - - ret = fi_bgq_ref_dec(&bgq_domain->ref_cnt, "domain"); - if (ret) return ret; - } - free(bgq_mr); - return 0; -} - -static int fi_bgq_bind_mr(struct fid *fid, - struct fid *bfid, uint64_t flags) -{ - int ret; - struct fi_bgq_mr *bgq_mr = - (struct fi_bgq_mr *) fid; - struct fi_bgq_cntr *bgq_cntr; - - ret = fi_bgq_fid_check(fid, FI_CLASS_MR, "memory region"); - if (ret) - return ret; - - switch (bfid->fclass) { - case FI_CLASS_CNTR: - bgq_cntr = (struct fi_bgq_cntr *) bfid; - bgq_mr->cntr = bgq_cntr; - bgq_mr->cntr_bflags = flags; - break; - default: - errno = FI_ENOSYS; - return -errno; - } - return 0; -} - -static struct fi_ops fi_bgq_fi_ops = { - .size = sizeof(struct fi_ops), - .close = fi_bgq_close_mr, - .bind = fi_bgq_bind_mr, - .control = fi_no_control, - .ops_open = fi_no_ops_open -}; - -static int fi_bgq_mr_reg(struct fid *fid, const void *buf, - size_t len, uint64_t access, uint64_t offset, - uint64_t requested_key, uint64_t flags, - struct fid_mr **mr, void *context) -{ - int ret; - - struct fi_bgq_mr *bgq_mr; - struct fi_bgq_domain *bgq_domain; - - if (!fid || !mr) { - errno = FI_EINVAL; - return -errno; - } - - ret = fi_bgq_fid_check(fid, FI_CLASS_DOMAIN, "domain"); - if (ret) return ret; - - if (flags != 0) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_MR, - "Flags for fi_mr_reg must be 0\n"); - errno = FI_EINVAL; - return -errno; - } - - bgq_domain = (struct fi_bgq_domain *) container_of(fid, struct fid_domain, fid); - - if (FI_BGQ_FABRIC_DIRECT_MR == FI_MR_SCALABLE) { - if (requested_key >= bgq_domain->num_mr_keys) { - /* requested key is too large */ - errno = FI_EKEYREJECTED; - return -errno; - } - } - bgq_mr = calloc(1, sizeof(*bgq_mr)); - if (!bgq_mr) { - errno = FI_ENOMEM; - return -errno; - } - - bgq_mr->mr_fid.fid.fclass = FI_CLASS_MR; - bgq_mr->mr_fid.fid.context = context; - bgq_mr->mr_fid.fid.ops = &fi_bgq_fi_ops; - if (FI_BGQ_FABRIC_DIRECT_MR == FI_MR_SCALABLE) { - bgq_mr->mr_fid.key = requested_key; - } - else if (FI_BGQ_FABRIC_DIRECT_MR == FI_MR_BASIC) { - - uint64_t paddr = 0; - - fi_bgq_cnk_vaddr2paddr(buf,1,&paddr); - bgq_mr->mr_fid.key = ((uint64_t)buf - paddr); -#ifdef FI_BGQ_TRACE - fprintf(stderr,"fi_bgq_mr_reg - FI_MR_BASIC virtual addr is 0x%016lx physical addr is 0x%016lx key is %lu \n",(uint64_t)buf,paddr,(uint64_t)((uint64_t)buf - paddr)); -fflush(stderr); - -#endif - - } - bgq_mr->buf = buf; - bgq_mr->len = len; - bgq_mr->offset = offset; - bgq_mr->access = FI_SEND | FI_RECV | FI_READ | FI_WRITE | FI_REMOTE_READ | FI_REMOTE_WRITE; - bgq_mr->flags = flags; - bgq_mr->domain = bgq_domain; - - if (FI_BGQ_FABRIC_DIRECT_MR == FI_MR_SCALABLE) { - fi_bgq_domain_bat_write(bgq_domain, requested_key, buf, len); - - fi_bgq_ref_inc(&bgq_domain->ref_cnt, "domain"); - } - - *mr = &bgq_mr->mr_fid; - - return 0; -} - -int fi_bgq_bind_ep_mr(struct fi_bgq_ep *bgq_ep, - struct fi_bgq_mr *bgq_mr, uint64_t flags) -{ - return 0; -} - -static struct fi_ops_mr fi_bgq_mr_ops = { - .size = sizeof(struct fi_ops_mr), - .reg = fi_bgq_mr_reg, - .regv = fi_no_mr_regv, - .regattr = fi_no_mr_regattr -}; - -int fi_bgq_init_mr_ops(struct fi_bgq_domain *bgq_domain, struct fi_info *info) -{ - if (!bgq_domain || !info) { - goto err; - } - - if (info->domain_attr->mr_mode == FI_MR_UNSPEC) goto err; - - bgq_domain->domain_fid.mr = &fi_bgq_mr_ops; - - bgq_domain->mr_mode = info->domain_attr->mr_mode; - - if (FI_BGQ_FABRIC_DIRECT_MR == FI_MR_SCALABLE) { - bgq_domain->num_mr_keys = (1<<(8*info->domain_attr->mr_key_size)); - bgq_domain->bat = (struct fi_bgq_bat_entry *) calloc(bgq_domain->num_mr_keys, sizeof(struct fi_bgq_bat_entry)); - - } - return 0; -err: - errno = FI_EINVAL; - return -errno; -} - -int fi_bgq_finalize_mr_ops(struct fi_bgq_domain *bgq_domain) -{ - if (FI_BGQ_FABRIC_DIRECT_MR == FI_MR_SCALABLE) { - free((void*)bgq_domain->bat); - bgq_domain->bat = (void*)NULL; - bgq_domain->num_mr_keys = 0; - } - return 0; -} diff --git a/prov/bgq/src/fi_bgq_msg.c b/prov/bgq/src/fi_bgq_msg.c deleted file mode 100644 index 8752ce0663a..00000000000 --- a/prov/bgq/src/fi_bgq_msg.c +++ /dev/null @@ -1,176 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "rdma/bgq/fi_bgq.h" -#include - -ssize_t fi_bgq_sendmsg(struct fid_ep *ep, const struct fi_msg *msg, - uint64_t flags) -{ - struct fi_bgq_ep * bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid); - const enum fi_threading threading = bgq_ep->threading; - - return fi_bgq_send_generic_flags(ep, msg->msg_iov, msg->iov_count, - msg->desc, msg->addr, 0, msg->context, msg->data, - (threading != FI_THREAD_ENDPOINT && threading != FI_THREAD_DOMAIN), /* "lock required"? */ - 1 /* is_msg */, - 0 /* is_contiguous */, - 1 /* override the default tx flags */, - flags); -} - -ssize_t fi_bgq_sendv(struct fid_ep *ep, const struct iovec *iov, - void **desc, size_t count, fi_addr_t dest_addr, - void *context) -{ - struct fi_bgq_ep * bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid); - const enum fi_threading threading = bgq_ep->threading; - - return fi_bgq_send_generic_flags(ep, iov, count, - desc, dest_addr, 0, context, 0, - (threading != FI_THREAD_ENDPOINT && threading != FI_THREAD_DOMAIN), /* "lock required"? */ - 1 /* is_msg */, - 0 /* is_contiguous */, - 0 /* do not override flags */, - 0); -} - - -ssize_t fi_bgq_senddata(struct fid_ep *ep, const void *buf, size_t len, void *desc, - uint64_t data, void *context) -{ - errno = FI_ENOSYS; - return -errno; -} - -/* "FI_BGQ_MSG_SPECIALIZED_FUNC(0)" is already declared via FABRIC_DIRECT */ -FI_BGQ_MSG_SPECIALIZED_FUNC(1) - -#define FI_BGQ_MSG_OPS_STRUCT_NAME(LOCK) \ - fi_bgq_ops_msg_ ## LOCK - -#define FI_BGQ_MSG_OPS_STRUCT(LOCK) \ -static struct fi_ops_msg \ - FI_BGQ_MSG_OPS_STRUCT_NAME(LOCK) = { \ - .size = sizeof(struct fi_ops_msg), \ - .recv = \ - FI_BGQ_MSG_SPECIALIZED_FUNC_NAME(recv, LOCK), \ - .recvv = fi_no_msg_recvv, \ - .recvmsg = \ - FI_BGQ_MSG_SPECIALIZED_FUNC_NAME(recvmsg, LOCK), \ - .send = \ - FI_BGQ_MSG_SPECIALIZED_FUNC_NAME(send, LOCK), \ - .sendv = fi_bgq_sendv, \ - .sendmsg = fi_bgq_sendmsg, \ - .inject = \ - FI_BGQ_MSG_SPECIALIZED_FUNC_NAME(inject, LOCK), \ - .senddata = \ - FI_BGQ_MSG_SPECIALIZED_FUNC_NAME(senddata, LOCK), \ - .injectdata = \ - FI_BGQ_MSG_SPECIALIZED_FUNC_NAME(injectdata, LOCK), \ -} - -FI_BGQ_MSG_OPS_STRUCT(0); -FI_BGQ_MSG_OPS_STRUCT(1); - -static struct fi_ops_msg fi_bgq_no_msg_ops = { - .size = sizeof(struct fi_ops_msg), - .recv = fi_no_msg_recv, - .recvv = fi_no_msg_recvv, - .recvmsg = fi_no_msg_recvmsg, - .send = fi_no_msg_send, - .sendv = fi_no_msg_sendv, - .sendmsg = fi_no_msg_sendmsg, - .inject = fi_no_msg_inject, - .senddata = fi_no_msg_senddata, - .injectdata = fi_no_msg_injectdata -}; - -int fi_bgq_init_msg_ops(struct fi_bgq_ep *bgq_ep, struct fi_info *info) -{ - if (!info || !bgq_ep) { - errno = FI_EINVAL; - goto err; - } - if (info->caps & FI_MSG || - (info->tx_attr && - (info->tx_attr->caps & FI_MSG))) { - - bgq_ep->rx.min_multi_recv = sizeof(union fi_bgq_mu_packet_payload); - bgq_ep->rx.poll.min_multi_recv = bgq_ep->rx.min_multi_recv; - - } - - return 0; - -err: - return -errno; -} - -int fi_bgq_enable_msg_ops(struct fi_bgq_ep *bgq_ep) -{ - if (!bgq_ep || !bgq_ep->domain) - return -FI_EINVAL; - - if (!(bgq_ep->tx.caps & FI_MSG)) { - /* Messaging ops not enabled on this endpoint */ - bgq_ep->ep_fid.msg = - &fi_bgq_no_msg_ops; - return 0; - } - - - switch (bgq_ep->domain->threading) { - case FI_THREAD_ENDPOINT: - case FI_THREAD_DOMAIN: - case FI_THREAD_COMPLETION: - bgq_ep->ep_fid.msg = &FI_BGQ_MSG_OPS_STRUCT_NAME(0); - break; - case FI_THREAD_FID: - case FI_THREAD_UNSPEC: - case FI_THREAD_SAFE: - bgq_ep->ep_fid.msg = &FI_BGQ_MSG_OPS_STRUCT_NAME(1); - break; - default: - return -FI_EINVAL; - } - - return 0; -} - -int fi_bgq_finalize_msg_ops(struct fi_bgq_ep *bgq_ep) -{ - if (!bgq_ep) { - return 0; - } - - return 0; -} diff --git a/prov/bgq/src/fi_bgq_node.c b/prov/bgq/src/fi_bgq_node.c deleted file mode 100644 index a1b4ff6c266..00000000000 --- a/prov/bgq/src/fi_bgq_node.c +++ /dev/null @@ -1,486 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include - -#include "rdma/bgq/fi_bgq_hwi.h" -#include "rdma/bgq/fi_bgq_spi.h" - -#include "rdma/bgq/fi_bgq_mu.h" -#include "rdma/bgq/fi_bgq_node.h" - -uint64_t fi_bgq_node_bat_allocate_id (struct fi_bgq_node * node, struct l2atomic_lock * lock, uint64_t index); - -#define FI_BGQ_NODE_COUNTER_SIZE ((1 << 13)) /* 8192 counters per node */ -#define FI_BGQ_NODE_LOCK_SIZE ((1 << 13)) /* 8192 locks per node */ - -enum bat_variable { - BAT_VARIABLE_GLOBAL = 0, - BAT_VARIABLE_COUNTER, - BAT_VARIABLE_ONE, - BAT_VARIABLE_ZERO, - BAT_VARIABLE_BLACKHOLE, - BAT_VARIABLE_NUM -}; - -struct fi_bgq_node_shared { - volatile uint64_t init_counter; - volatile uint64_t is_initialized; - uint64_t pad; - volatile uint64_t global_variables[BAT_VARIABLE_NUM]; - struct { - struct l2atomic_lock_data lock_data; - } mu; - struct { - struct l2atomic_counter_data allocator_data; - struct l2atomic_counter_data counter_data[FI_BGQ_NODE_COUNTER_SIZE]; - } counter; - struct { - struct l2atomic_counter_data allocator_data; - struct l2atomic_lock_data lock_data[FI_BGQ_NODE_LOCK_SIZE]; - } lock; - struct l2atomic_barrier_data barrier_data; - uint32_t leader_tcoord; - volatile uint64_t bat_shadow[FI_BGQ_NODE_BAT_SIZE]; - volatile uint64_t bat_cntr[FI_BGQ_NODE_APPLICATION_BAT_SIZE]; -}; - - -#define FI_BGQ_NODE_SHM_FILENAME "/fi_bgq_node" -#define FI_BGQ_NODE_SHM_FILESIZE (sizeof(struct fi_bgq_node_shared) + L2_CACHE_LINE_SIZE) - -void calculate_local_process_count (uint64_t * local_process_count, uint32_t * leader_tcoord) { - - int cnk_rc __attribute__ ((unused)); - - Personality_t personality; - cnk_rc = Kernel_GetPersonality(&personality, sizeof(Personality_t)); - assert(cnk_rc==0); - - BG_CoordinateMapping_t local_coords; - local_coords.a = personality.Network_Config.Acoord; - local_coords.b = personality.Network_Config.Bcoord; - local_coords.c = personality.Network_Config.Ccoord; - local_coords.d = personality.Network_Config.Dcoord; - local_coords.e = personality.Network_Config.Ecoord; - local_coords.t = 0; - local_coords.reserved = 0; - const uint32_t * const local_coords_uint32 = (uint32_t *)&local_coords; - - size_t node_count = personality.Network_Config.Anodes * - personality.Network_Config.Bnodes * - personality.Network_Config.Cnodes * - personality.Network_Config.Dnodes * - personality.Network_Config.Enodes; - - uint32_t ppn = Kernel_ProcessCount(); - - /* - * read the ranks2coords mapping on to the stack - */ - size_t mapsize = node_count * ppn; - BG_CoordinateMapping_t map[mapsize]; - uint64_t numentries = 0; - cnk_rc = Kernel_RanksToCoords(mapsize*sizeof(BG_CoordinateMapping_t), map, &numentries); - assert(cnk_rc==0); - - /* - * scan the mapping for all ranks on the local node - * - * the last rank encountered, the highest global rank, will be the - * node "leader" regardless of its t coordinate - * - * calculate the number of active processes on the local node - */ - *local_process_count = 0; - uint64_t n; - const uint32_t * const map_uint32 = (uint32_t *)map; - for (n = 0; n < numentries; ++n) { - const uint32_t bg_coordinatemapping = map_uint32[n]; - if ((bg_coordinatemapping & 0xBFFFFFC0) == *local_coords_uint32) { - *local_process_count += 1; - *leader_tcoord = map[n].t; - } - } -} - -int fi_bgq_node_init (struct fi_bgq_node * node) { - - /* open and create the shared memory segment */ - int _fd = -1; - _fd = shm_open(FI_BGQ_NODE_SHM_FILENAME, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR); - if (_fd == -1) goto err; - - /* set this shared memory as l2 atomic */ - uint64_t foo=1; - int rc; - rc = ioctl(_fd, FIOBGQATOMIC, &foo); - if (rc) goto err; - - /* set the size of the shared memory segment */ - size_t nbytes = FI_BGQ_NODE_SHM_FILESIZE; - if (ftruncate(_fd, nbytes) == -1) goto err; - - /* map the shared memory segment and get the virtual address */ - void * _ptr = MAP_FAILED; - _ptr = mmap(NULL, nbytes, PROT_READ | PROT_WRITE, MAP_SHARED, _fd, 0); - if (_ptr == MAP_FAILED) goto err; - - /* align to L2 cache */ - node->abs_ptr = _ptr; - _ptr = (void *)(((uint64_t)_ptr+L2_CACHE_LINE_SIZE) & ~(L2_CACHE_LINE_SIZE-1)); - node->shm_ptr = _ptr; - - struct fi_bgq_node_shared * shared = (struct fi_bgq_node_shared *) node->shm_ptr; - - uint32_t cnk_rc __attribute__ ((unused)); - cnk_rc = Kernel_L2AtomicsAllocate((void *)shared, FI_BGQ_NODE_SHM_FILESIZE); - assert(cnk_rc==0); - - uint64_t value = L2_AtomicLoadIncrement(&shared->init_counter); - if (value == 0) { - - /* initialize the mu lock */ - L2_AtomicStore(&shared->mu.lock_data.ticket, 0); - L2_AtomicStore(&shared->mu.lock_data.serving, 0); - - /* initialize the counter allocater .. er .. counter */ - l2atomic_counter_initialize(&node->counter.allocator, &shared->counter.allocator_data); - - /* initialize the lock allocator counter */ - l2atomic_counter_initialize(&node->lock.allocator, &shared->lock.allocator_data); - - uint64_t local_process_count = 0; - shared->leader_tcoord = (uint32_t)-1; - calculate_local_process_count(&local_process_count, &shared->leader_tcoord); - - l2atomic_barrier_initialize(&node->barrier, &shared->barrier_data, local_process_count); - -#ifdef TODO - /* verify that the MU and ND are not in reset. they must be out - * of reset in order to set up the MU resources or a machine - * check will occur - this would impact a future job using ofi-bgq. */ - uint64_t val1, val2, val3; - val1 = DCRReadUser(MU_DCR(RESET)); - val2 = DCRReadUser(ND_X2_DCR(RESET)); - val3 = DCRReadUser(ND_500_DCR(RESET)); - if ((MU_DCR__RESET__DCRS_OUT_get(val1)) || - (ND_X2_DCR__RESET__DCRS_OUT_get(val2)) || - (ND_500_DCR__RESET__DCRS_OUT_get(val3))) { - assert(0); - } -#endif - - /* Initialize the Base Address Table shadow */ - { - node->bat.shadow = &shared->bat_shadow[0]; - unsigned index; - for (index=0; indexbat.shadow[index] = 0xFFFFFFFFFFFFFFFFull; - } - } - - /* - * Initialize the FI_BGQ_MU_BAT_ID_GLOBAL entry to the base - * physical address 0x00 which will be used by MU operations - * that specify the actual physical addresss - * - * Initialize the FI_BGQ_MU_BAT_ID_COUNTER entry to a global - * variable which will be used by MU "direct put" operations - * that choose to disregard reception counter completions. - * - * Initialize the FI_BGQ_MU_BAT_ID_ZERO entry to a global - * variable which is set to the constant value 'zero'. - * - * Initialize the FI_BGQ_MU_BAT_ID_ONE entry to a global - * variable which is set to the constant value 'one'. - * - * Initialize the FI_BGQ_MU_BAT_ID_BLACKHOLE entry to a global - * variable which is used as a 'garbage' location to write - * data that is to be ignored. - */ - uint64_t rc __attribute__ ((unused)); - - rc = fi_bgq_node_bat_allocate_id(node, NULL, FI_BGQ_MU_BAT_ID_GLOBAL); - assert(rc == 0); - fi_bgq_node_bat_write(node, NULL, FI_BGQ_MU_BAT_ID_GLOBAL, 0); - - rc = fi_bgq_node_bat_allocate_id(node, NULL, FI_BGQ_MU_BAT_ID_COUNTER); - assert(rc == 0); - - rc = fi_bgq_node_bat_allocate_id(node, NULL, FI_BGQ_MU_BAT_ID_ZERO); - assert(rc == 0); - - rc = fi_bgq_node_bat_allocate_id(node, NULL, FI_BGQ_MU_BAT_ID_ONE); - assert(rc == 0); - - rc = fi_bgq_node_bat_allocate_id(node, NULL, FI_BGQ_MU_BAT_ID_BLACKHOLE); - assert(rc == 0); - - uint64_t base_paddr = (uint64_t)-1; - { - void * vaddr = (void *)shared->global_variables; - - Kernel_MemoryRegion_t cnk_mr; - Kernel_CreateMemoryRegion(&cnk_mr, vaddr, sizeof(uint64_t)*5); - uint64_t offset = (uint64_t)vaddr - (uint64_t)cnk_mr.BaseVa; - base_paddr = (uint64_t)cnk_mr.BasePa + offset; - } - - shared->global_variables[BAT_VARIABLE_COUNTER] = 0; - uint64_t mu_atomic_paddr = - MUSPI_GetAtomicAddress(base_paddr + sizeof(uint64_t) * BAT_VARIABLE_COUNTER, - MUHWI_ATOMIC_OPCODE_STORE_ADD); - fi_bgq_node_bat_write(node, NULL, FI_BGQ_MU_BAT_ID_COUNTER, mu_atomic_paddr); - - shared->global_variables[BAT_VARIABLE_ZERO] = 0; - fi_bgq_node_bat_write(node, NULL, FI_BGQ_MU_BAT_ID_ZERO, - base_paddr + sizeof(uint64_t) * BAT_VARIABLE_ZERO); - - shared->global_variables[BAT_VARIABLE_ONE] = 1; - fi_bgq_node_bat_write(node, NULL, FI_BGQ_MU_BAT_ID_ONE, - base_paddr + sizeof(uint64_t) * BAT_VARIABLE_ONE); - - shared->global_variables[BAT_VARIABLE_BLACKHOLE] = 0; - fi_bgq_node_bat_write(node, NULL, FI_BGQ_MU_BAT_ID_BLACKHOLE, - base_paddr + sizeof(uint64_t) * BAT_VARIABLE_BLACKHOLE); - - - /* finally, update the shared state to "initialized" */ - L2_AtomicStore(&shared->is_initialized, 1); - - } else { - /* all other processes will wait until the first process - * updates the shared state to "initialized" */ - while (L2_AtomicLoad(&shared->is_initialized) == 0) { - usleep(1); - } - - /* clone the counter allocator counter ... */ - node->counter.allocator.value_l2vaddr = (uintptr_t)&shared->counter.allocator_data.value; - - /* clone the lock allocator counter */ - node->lock.allocator.value_l2vaddr = (uintptr_t)&shared->lock.allocator_data.value; - - /* set the pointer to the shared base address table shadow */ - node->bat.shadow = &shared->bat_shadow[0]; - - l2atomic_barrier_clone(&node->barrier, &shared->barrier_data); - } - - node->leader_tcoord = shared->leader_tcoord; - node->is_leader = node->leader_tcoord == Kernel_MyTcoord(); - - /* get the paddr of the bat counters in l2 atomic shared memory */ - void * vaddr = (void *)&shared->bat_cntr[0]; - Kernel_MemoryRegion_t cnk_mr; - Kernel_CreateMemoryRegion(&cnk_mr, vaddr, sizeof(uint64_t)*FI_BGQ_NODE_APPLICATION_BAT_SIZE); - uint64_t offset = (uint64_t)vaddr - (uint64_t)cnk_mr.BaseVa; - uint64_t paddr = (uint64_t)cnk_mr.BasePa + offset; - unsigned i; - for (i=0; ibat.l2_cntr_paddr[i] = paddr + sizeof(uint64_t)*i; - } - - l2atomic_barrier_enter(&node->barrier); - - return 0; -err: - if (_fd != -1) close(_fd); - return -errno; -} - -int fi_bgq_node_mu_lock_init (struct fi_bgq_node * node, struct l2atomic_lock * lock) { - - struct fi_bgq_node_shared * shared = (struct fi_bgq_node_shared *) node->shm_ptr; - - /* do not use 'l2atomic_lock_initialize()' because it clears the - * 'ticket' and 'serving' values each time */ - lock->ticket_l2vaddr = (uintptr_t)&shared->mu.lock_data.ticket; - lock->serving_l2vaddr = (uintptr_t)&shared->mu.lock_data.serving; - - return 0; -} - -int fi_bgq_node_counter_allocate (struct fi_bgq_node * node, struct l2atomic_counter * counter) { - - uint64_t index = l2atomic_counter_increment(&node->counter.allocator); - if (index == 0x8000000000000000ull) - return -1; - - struct fi_bgq_node_shared * shared = (struct fi_bgq_node_shared *) node->shm_ptr; - - counter->value_l2vaddr = (uintptr_t)&shared->counter.counter_data[index].value; - l2atomic_counter_set(counter, 0); - - return 0; -} - -int fi_bgq_node_lock_allocate (struct fi_bgq_node * node, struct l2atomic_lock * lock) { - - uint64_t index = l2atomic_counter_increment(&node->lock.allocator); - if (index == 0x8000000000000000ull) - return -1; - - struct fi_bgq_node_shared * shared = (struct fi_bgq_node_shared *) node->shm_ptr; - l2atomic_lock_initialize(lock, &shared->lock.lock_data[index]); - - return 0; -} - - -void fi_bgq_node_bat_write (struct fi_bgq_node * node, struct l2atomic_lock * lock, uint64_t index, uint64_t offset) { - - assert(index < FI_BGQ_NODE_BAT_SIZE); - - uint32_t requested_bat_id = index & 0x07; - - if (lock) l2atomic_lock_acquire(lock); - - int32_t cnk_rc __attribute__ ((unused)); - cnk_rc = MUSPI_SetBaseAddress(&node->bat.subgroup[index], requested_bat_id, offset); - assert(cnk_rc == 0); - - node->bat.shadow[index] = offset; - - { /* this "l1p flush" hack is only needed to flush *writes* from a processor cache to the memory system */ - volatile uint64_t *mu_register = - (volatile uint64_t *)(BGQ_MU_STATUS_CONTROL_REGS_START_OFFSET(0, 0) + - 0x030 - PHYMAP_PRIVILEGEDOFFSET); - *mu_register = 0; - } - ppc_msync(); - - if (lock) l2atomic_lock_release(lock); -} - -void fi_bgq_node_bat_clear (struct fi_bgq_node * node, struct l2atomic_lock * lock, uint64_t index) { - fi_bgq_node_bat_write(node, lock, index, 0xFFFFFFFFFFFFFFFFull); -} - - -uint64_t fi_bgq_node_bat_allocate (struct fi_bgq_node * node, struct l2atomic_lock * lock) { - - if (lock) l2atomic_lock_acquire(lock); - - uint32_t subgroup_id; - for (subgroup_id = 0; subgroup_id < FI_BGQ_NODE_NUM_USER_SUBGROUPS; ++subgroup_id) { - - uint32_t nbatids; - uint32_t batids[BGQ_MU_NUM_DATA_COUNTERS_PER_SUBGROUP]; - int32_t cnk_rc __attribute__ ((unused)); - cnk_rc = Kernel_QueryBaseAddressTable(subgroup_id, &nbatids, batids); - assert(cnk_rc == 0); - - if (nbatids > 0) { - - uint64_t index = (subgroup_id << 3) | batids[0]; - - cnk_rc = Kernel_AllocateBaseAddressTable(subgroup_id, - &node->bat.subgroup[index], 1, &batids[0], 0); - assert(cnk_rc == 0); - - uint64_t bat_offset __attribute__ ((unused)); - bat_offset = fi_bgq_node_bat_read(node, index); - assert(bat_offset == 0xFFFFFFFFFFFFFFFFull); - - if (lock) l2atomic_lock_release(lock); - - return index; - } - } - - if (lock) l2atomic_lock_release(lock); - - return 0xFFFFFFFFFFFFFFFFull; /* error! */ -} - -uint64_t fi_bgq_node_bat_allocate_id (struct fi_bgq_node * node, struct l2atomic_lock * lock, uint64_t index) { - - assert(index < FI_BGQ_NODE_BAT_SIZE); - - uint32_t requested_subgroup_id = index >> 3; - uint32_t requested_bat_id = index & 0x07; - - if (lock) l2atomic_lock_acquire(lock); - - uint32_t nbatids; - uint32_t batids[BGQ_MU_NUM_DATA_COUNTERS_PER_SUBGROUP]; - int32_t cnk_rc __attribute__ ((unused)); - cnk_rc = Kernel_QueryBaseAddressTable(requested_subgroup_id, &nbatids, batids); - assert(cnk_rc == 0); - assert(nbatids > 0); - - unsigned i; - for (i=0; ibat.subgroup[index], 1, &batids[i], 0); - assert(cnk_rc == 0); - - uint64_t bat_offset __attribute__ ((unused)); - bat_offset = fi_bgq_node_bat_read(node, index); - assert(bat_offset == 0xFFFFFFFFFFFFFFFFull); - - if (lock) l2atomic_lock_release(lock); - - return 0; - } - } - - if (lock) l2atomic_lock_release(lock); - - return 0xFFFFFFFFFFFFFFFFull; /* error! */ -} - -void fi_bgq_node_bat_free (struct fi_bgq_node * node, struct l2atomic_lock * lock, uint64_t index) { - - assert(index < FI_BGQ_NODE_APPLICATION_BAT_SIZE); - - if (lock) l2atomic_lock_acquire(lock); - - fi_bgq_node_bat_clear(node, NULL, index); - - uint32_t batid = index & 0x07; - - int32_t cnk_rc __attribute__ ((unused)); - cnk_rc = Kernel_DeallocateBaseAddressTable(&node->bat.subgroup[index], 1, &batid); - assert(cnk_rc == 0); - - if (lock) l2atomic_lock_release(lock); -} diff --git a/prov/bgq/src/fi_bgq_pmi.c b/prov/bgq/src/fi_bgq_pmi.c deleted file mode 100644 index a9c32f63df2..00000000000 --- a/prov/bgq/src/fi_bgq_pmi.c +++ /dev/null @@ -1,364 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include - -#define PMI_TRUE (1) -#define PMI_FALSE (0) - -#define PMI_SUCCESS 0 -#define PMI_FAIL -1 -#define PMI_ERR_INIT 1 -#define PMI_ERR_NOMEM 2 -#define PMI_ERR_INVALID_ARG 3 -#define PMI_ERR_INVALID_KEY 4 -#define PMI_ERR_INVALID_KEY_LENGTH 5 -#define PMI_ERR_INVALID_VAL 6 -#define PMI_ERR_INVALID_VAL_LENGTH 7 -#define PMI_ERR_INVALID_LENGTH 8 -#define PMI_ERR_INVALID_NUM_ARGS 9 -#define PMI_ERR_INVALID_ARGS 10 -#define PMI_ERR_INVALID_NUM_PARSED 11 -#define PMI_ERR_INVALID_KEYVALP 12 -#define PMI_ERR_INVALID_SIZE 13 - -#define BGQ_PMI_NAME_MAXLEN 128 -#define BGQ_PMI_KEY_MAXLEN 128 -#define BGQ_PMI_VALUE_MAXLEN 128 -#define BGQ_SINGLE_ENTRY_NODE_BLOCK_MAX_LEN 16 - -#include "rdma/bgq/fi_bgq_spi.h" - -static inline void node_block_write(char *value, char *current_node_block_value, int current_block_id, int current_node_block_count, int proc_count) { - sprintf(current_node_block_value,",(%d,%d,%d)",current_block_id,current_node_block_count,proc_count); - strcat(value,current_node_block_value); -} - -static inline void * -convert_virtual_address_to_global_virtual_address (void * vaddr, size_t len) -{ - uint64_t paddr = 0; - void * global_vaddr = NULL; - uint32_t cnk_rc __attribute__ ((unused)); - - Kernel_MemoryRegion_t cnk_mr; - cnk_rc = Kernel_CreateMemoryRegion(&cnk_mr, (void *)vaddr, len); - assert(cnk_rc == 0); - - paddr = (uint64_t)cnk_mr.BasePa + ((uint64_t)vaddr - (uint64_t)cnk_mr.BaseVa); - - cnk_rc = Kernel_Physical2GlobalVirtual((void *)paddr, &global_vaddr); - assert(cnk_rc == 0); - - return global_vaddr; -} - -static int pmi_rank = INT_MAX; -static int pmi_size = INT_MAX; -static int pmi_local_size = INT_MAX; -static int bgq_pmi_value_maxlen; -static uint32_t my_bgq_rank_node_id; - -uint32_t *bgq_node_list; - -int PMI_Init (int *spawned) -{ - if (!spawned) return PMI_ERR_INVALID_ARG; - *spawned = PMI_FALSE; - - if (pmi_rank != INT_MAX) return PMI_FAIL; - if (pmi_size != INT_MAX) return PMI_FAIL; - - Personality_t pers; - int rc = 0; - rc = Kernel_GetPersonality(&pers, sizeof(pers)); - if (rc) return PMI_FAIL; - - /* calculate the maximum number of ranks from the torus dimensions */ - Personality_Networks_t *net = &pers.Network_Config; - uint32_t max_ranks = net->Anodes * net->Bnodes * net->Cnodes * - net->Dnodes * net->Enodes * Kernel_ProcessCount(); - uint64_t numentries = 0; - - BG_CoordinateMapping_t mapping[max_ranks]; - rc = Kernel_RanksToCoords(sizeof(mapping), mapping, &numentries); - bgq_node_list = (uint32_t *) malloc(sizeof(uint32_t) * max_ranks); - uint32_t tcoord32bitmask = 0xFFFFFFC0; - uint32_t origcoord; - - /* while populating the rank map also determine how many local ranks on - my node - pmi_local_size */ - BG_CoordinateMapping_t my_bgq_coords; - my_bgq_coords.e = net->Ecoord; - my_bgq_coords.reserved = mapping[0].reserved; - my_bgq_coords.a = net->Acoord; - my_bgq_coords.b = net->Bcoord; - my_bgq_coords.c = net->Ccoord; - my_bgq_coords.d = net->Dcoord; - my_bgq_coords.t = 0; - - memcpy(&my_bgq_rank_node_id,&my_bgq_coords,sizeof(BG_CoordinateMapping_t)); - pmi_local_size = 0; - - int i; - for (i=0;iAnodes * net->Bnodes * net->Cnodes * - net->Dnodes * net->Enodes * BGQ_SINGLE_ENTRY_NODE_BLOCK_MAX_LEN; - if (bgq_pmi_value_maxlen < BGQ_PMI_VALUE_MAXLEN) - bgq_pmi_value_maxlen = BGQ_PMI_VALUE_MAXLEN; - return PMI_SUCCESS; -} - - -int PMI_Initialized (int *initialized) -{ - if (!initialized) return PMI_ERR_INVALID_ARG; - - return (pmi_size != INT_MAX && pmi_rank != INT_MAX) ? PMI_TRUE : PMI_FALSE; -} - - -int PMI_Finalize () -{ - free(bgq_node_list); - if (pmi_rank == INT_MAX) return PMI_ERR_INIT; - if (pmi_size == INT_MAX) return PMI_ERR_INIT; - - pmi_rank = INT_MAX; - pmi_size = INT_MAX; - return PMI_SUCCESS; -} - - -int PMI_Get_size (int *size) -{ - if (pmi_size == INT_MAX) return PMI_ERR_INIT; - - if (!size) return PMI_ERR_INVALID_ARG; - *size = pmi_size; - return PMI_SUCCESS; -} - - -int PMI_Get_rank (int *rank) -{ - if (pmi_rank == INT_MAX) return PMI_ERR_INIT; - - if (!rank) return PMI_ERR_INVALID_ARG; - *rank = pmi_rank; - return PMI_SUCCESS; -} - -int PMI_Get_universe_size (int *size) -{ - return PMI_Get_size(size); -} - -int PMI_Get_appnum (int *appnum) -{ - if (pmi_rank == INT_MAX) return PMI_ERR_INIT; - if (pmi_size == INT_MAX) return PMI_ERR_INIT; - - if (!appnum) return PMI_ERR_INVALID_ARG; - *appnum = 0; - return PMI_SUCCESS; -} - -int PMI_Publish_name (const char service_name[], const char port[]) -{ - return PMI_FAIL; -} - -int PMI_Unpublish_name (const char service_name[]) -{ - return PMI_FAIL; -} - -int PMI_Lookup_name (const char service_name[], char port[]) -{ - return PMI_FAIL; -} - -int PMI_Barrier () -{ -// abort(); -// return PMI_FAIL; - return PMI_SUCCESS; -} - -int PMI_Abort (int exit_code, const char error_msg[]) -{ - abort(); - return PMI_SUCCESS; -} - -int PMI_KVS_Get_my_name (char kvsname[], int length) -{ - /* obtain the name of the keyval space the local process group has access to */ - kvsname[0] = 0; - return PMI_SUCCESS; -} - -int PMI_KVS_Get_name_length_max (int *length) -{ - /* obtain the length necessary to store a kvsname */ - if (!length) return PMI_ERR_INVALID_ARG; - *length = BGQ_PMI_NAME_MAXLEN; - - return PMI_SUCCESS; -} - -int PMI_KVS_Get_key_length_max (int *length) -{ - /* obtain the length necessary to store a key */ - if (!length) return PMI_ERR_INVALID_ARG; - *length = BGQ_PMI_KEY_MAXLEN; - - return PMI_SUCCESS; -} - -int PMI_KVS_Get_value_length_max (int *length) -{ - /* obtain the length necessary to store a value */ - if (!length) return PMI_ERR_INVALID_ARG; - *length = bgq_pmi_value_maxlen; - - return PMI_SUCCESS; -} - - -int PMI_KVS_Put (const char kvsname[], const char key[], const char value[]) -{ - /* unimplemented put -- we are not really maintaining a keyval space */ - return PMI_SUCCESS; -} - -int PMI_KVS_Commit (const char kvsname[]) -{ - /* unimplemented commit -- we are not really maintaining a keyval space */ - return PMI_SUCCESS; -} - -int PMI_KVS_Get (const char kvsname[], const char key[], char value[], int length) -{ - /* get a key/value pair from a keyval space */ - if (0 == strncmp(key, "PMI_local_size", 14)) { - snprintf(value, length, "%d", pmi_local_size); - return PMI_SUCCESS; - } - else if (0 == strncmp(key, "PMI_process_mapping", 19)) { - - /* Build the pmi node block list from the bgq node list computed - in the PMI_Init. */ - char current_node_block_value[BGQ_SINGLE_ENTRY_NODE_BLOCK_MAX_LEN*2]; - strcpy(value,"(vector"); - - /* These are the variables holding values written out as a tuple - for a node block. */ - int current_block_id = 0; - int current_proc_count = 0; - int prev_proc_count = -1; - int current_node_block_count = 1; - - /* This is the current node being analyzed. */ - uint32_t current_node_id = bgq_node_list[0]; - - int i; - for (i=0;iconsumer; - uint64_t value_rsh3b = 0; - - /* Check if another endpoint should be managed by this progress thread - */ - if (l2atomic_fifo_consume(consumer, &value_rsh3b) == 0) { - struct fi_bgq_ep *bgq_ep = (struct fi_bgq_ep *)(value_rsh3b << 3); - - assert(L2_AtomicLoad(&bgq_ep->async.enabled) != 0); - assert(L2_AtomicLoad(&bgq_ep->async.active) == 0); - - progress->all_ep[(progress->all_ep_count)++] = bgq_ep; - - if (bgq_ep->rx.caps & FI_TAGGED) { - progress->tag_ep[(progress->tag_ep_count)++] = bgq_ep; - } - if (bgq_ep->rx.caps & FI_MSG) { - progress->msg_ep[(progress->msg_ep_count)++] = bgq_ep; - } - L2_AtomicStore(&bgq_ep->async.active, 1); - } - - /* - * Advance control code path for each endpoint once and check - * each endpoint if async progress is disabled - */ - unsigned i = 0; - while (i < progress->all_ep_count) { - - struct fi_bgq_ep *bgq_ep = progress->all_ep[i]; - poll_cfifo(bgq_ep, 0); - - if (L2_AtomicLoad(&bgq_ep->async.enabled) == 0) { - L2_AtomicStore(&bgq_ep->async.active, 0); - - if (bgq_ep->rx.caps & FI_MSG) { - unsigned n = 0; - while (progress->msg_ep[n] != bgq_ep) ++n; - progress->msg_ep[n] = progress->msg_ep[--(progress->msg_ep_count)]; - } - - if (bgq_ep->rx.caps & FI_TAGGED) { - unsigned n = 0; - while (progress->tag_ep[n] != bgq_ep) ++n; - progress->tag_ep[n] = progress->tag_ep[--(progress->tag_ep_count)]; - } - - progress->all_ep[i] = progress->all_ep[--(progress->all_ep_count)]; - } else { - ++i; - } - } - - return; -} - -/* internal function */ -void poll_noinline (struct fi_bgq_ep *bgq_ep, const unsigned poll_msg, const uint64_t cancel_context) { - - poll_mfifo(bgq_ep, poll_msg, cancel_context, 0); - poll_rfifo(bgq_ep, 0); -} - -/* internal function */ -void * progress_fn (void *arg) { - - struct fi_bgq_progress * progress = (struct fi_bgq_progress *)arg; - - struct fi_bgq_ep ** tag_ep = progress->tag_ep; - struct fi_bgq_ep ** msg_ep = progress->msg_ep; - struct fi_bgq_ep ** all_ep = progress->all_ep; - - struct l2atomic_fifo_consumer * consumer = &progress->consumer; - struct l2atomic_fifo_producer * producer = &progress->producer; - uint64_t value_rsh3b = 0; - - const unsigned tag_loop = 16; - const unsigned msg_loop = 4; - - unsigned m, j, i; - - /* first, enable the progress thread control fifo by setting the - * HEAD and TAIL to zero and setting the BOUNDS to FIFO_SIZE-1 - */ - l2atomic_fifo_enable(consumer, producer); - - - progress->active = 1; - fi_bgq_msync(FI_BGQ_MSYNC_TYPE_WO); - - while (progress->enabled) { - - /* Advance performance critical code path for each endpoint multiple times */ - - const unsigned tag_ep_count = progress->tag_ep_count; - const unsigned msg_ep_count = progress->msg_ep_count; - - for (m=0; mall_ep[(progress->all_ep_count)++] = (struct fi_bgq_ep *)(value_rsh3b << 3); - } - - struct fi_bgq_domain *bgq_domain = progress->bgq_domain; - const unsigned max_threads = bgq_domain->progress.max_threads; - - for (i=0; iall_ep_count; ++i) { - - value_rsh3b = ((uint64_t)(all_ep[i])) >> 3; - - unsigned p; - for (p=0; pprogress.thread[p].producer, value_rsh3b)) { - all_ep[i] = NULL; - break; - } - } - - if (all_ep[i] != NULL) { - /* No active progress threads; disable async progress on this endpoint */ - L2_AtomicStore(&all_ep[i]->async.enabled, 0); - L2_AtomicStore(&all_ep[i]->async.active, 0); - all_ep[i] = NULL; - /* TODO - is this an error or something? */ - } - } - - /* Deactivate this progress thread and exit */ - progress->active = 0; - fi_bgq_msync(FI_BGQ_MSYNC_TYPE_WO); - - return NULL; -}; - -int fi_bgq_progress_init (struct fi_bgq_domain *bgq_domain, const uint64_t max_threads) { - - assert(max_threads < (64/Kernel_ProcessCount())); - - bgq_domain->progress.max_threads = max_threads; - bgq_domain->progress.num_threads_active = 0; - bgq_domain->progress.memptr = NULL; - - if (0 == max_threads) return 0; - - size_t i, j; - - const size_t bytes = sizeof(union fi_bgq_progress_data) * max_threads; - const size_t alignment = 128; - - void * memptr = malloc(bytes+alignment); - uint32_t cnk_rc = 0; - cnk_rc = Kernel_L2AtomicsAllocate(memptr, bytes+alignment); - assert(0==cnk_rc); - if (cnk_rc != 0) { - /* Error allocating l2atomic memory */ - free(memptr); - bgq_domain->progress.memptr = NULL; - return -1; - } - - union fi_bgq_progress_data *data = (union fi_bgq_progress_data *)(((uintptr_t)memptr + alignment) & (~(alignment-1))); - const uint64_t npackets = sizeof(data[0].data) / sizeof(uint64_t); - const size_t nep = sizeof(bgq_domain->progress.thread[0].tag_ep) / sizeof(struct fi_bgq_ep *); - for (i=0; iprogress.thread[i].consumer, - &bgq_domain->progress.thread[i].producer, - &data[i].l2atomic, npackets); - - bgq_domain->progress.thread[i].tag_ep_count = 0; - bgq_domain->progress.thread[i].msg_ep_count = 0; - bgq_domain->progress.thread[i].all_ep_count = 0; - bgq_domain->progress.thread[i].pthread = 0; - bgq_domain->progress.thread[i].enabled = 0; - bgq_domain->progress.thread[i].active = 0; - - fi_bgq_ref_inc(&bgq_domain->ref_cnt, "domain"); - bgq_domain->progress.thread[i].bgq_domain = bgq_domain; - - for (j=0; jprogress.thread[i].tag_ep[j] = NULL; - bgq_domain->progress.thread[i].msg_ep[j] = NULL; - bgq_domain->progress.thread[i].all_ep[j] = NULL; - } - } - - bgq_domain->progress.memptr = memptr; - - return 0; -} - -int fi_bgq_progress_enable (struct fi_bgq_domain *bgq_domain, const unsigned id) { - - assert(id < (64/Kernel_ProcessCount()-1)); - assert(id < bgq_domain->progress.max_threads); - - fi_bgq_msync(FI_BGQ_MSYNC_TYPE_RO); - if (bgq_domain->progress.thread[id].enabled) { - assert(bgq_domain->progress.thread[id].active); - return 0; - } - - bgq_domain->progress.thread[id].enabled = 1; - bgq_domain->progress.thread[id].active = 0; - fi_bgq_msync(FI_BGQ_MSYNC_TYPE_WO); - - int rc = 0; - rc = pthread_create(&bgq_domain->progress.thread[id].pthread, NULL, progress_fn, (void *)&bgq_domain->progress.thread[id]); - if (rc) { - /* Error starting this progress thread */ - bgq_domain->progress.thread[id].enabled = 0; - bgq_domain->progress.thread[id].active = 0; - return -1; - } - - /* Wait until the progress thread is active */ - while (0 == bgq_domain->progress.thread[id].active) { - fi_bgq_msync(FI_BGQ_MSYNC_TYPE_RO); - } - - ++(bgq_domain->progress.num_threads_active); - fi_bgq_msync(FI_BGQ_MSYNC_TYPE_WO); - - return 0; -} - -int fi_bgq_progress_disable (struct fi_bgq_domain *bgq_domain, const unsigned id) { - - assert(id < (64/Kernel_ProcessCount()-1)); - assert(id < bgq_domain->progress.max_threads); - - fi_bgq_msync(FI_BGQ_MSYNC_TYPE_RO); - if (0 == bgq_domain->progress.thread[id].enabled) { - assert(0 == bgq_domain->progress.thread[id].active); - return 0; - } - - bgq_domain->progress.thread[id].enabled = 0; - fi_bgq_msync(FI_BGQ_MSYNC_TYPE_WO); - - /* Wait until the progress thread is active */ - while (bgq_domain->progress.thread[id].active) { - fi_bgq_msync(FI_BGQ_MSYNC_TYPE_RO); - } - - int rc __attribute__ ((unused)); - void *retval = NULL; - rc = pthread_join(bgq_domain->progress.thread[id].pthread, &retval); - assert(0 == rc); - bgq_domain->progress.thread[id].pthread = 0; - - l2atomic_fifo_disable(&bgq_domain->progress.thread[id].consumer, - &bgq_domain->progress.thread[id].producer); - - --(bgq_domain->progress.num_threads_active); - - return 0; -} - -int fi_bgq_progress_fini (struct fi_bgq_domain *bgq_domain) { - - assert(0==bgq_domain->progress.num_threads_active); - - int i; - for (i=0; iprogress.max_threads; ++i) { - assert(0 == bgq_domain->progress.thread[i].enabled); - assert(0 == bgq_domain->progress.thread[i].active); - //l2atomic_fifo_finalize(&bgq_domain->progress.thread[i].consumer, - // &bgq_domain->progress.thread[i].producer); - bgq_domain->progress.thread[i].bgq_domain = NULL; - fi_bgq_ref_dec(&bgq_domain->ref_cnt, "domain"); - } - - free(bgq_domain->progress.memptr); - bgq_domain->progress.memptr = NULL; - - return 0; -} - -int fi_bgq_progress_ep_enable (struct fi_bgq_progress *thread, struct fi_bgq_ep *bgq_ep) { - - bgq_ep->async.active = 0; - bgq_ep->async.enabled = 1; - fi_bgq_msync(FI_BGQ_MSYNC_TYPE_WO); - - uint64_t value_rsh3b = ((uint64_t)bgq_ep) >> 3; - if (0 == l2atomic_fifo_produce(&thread->producer, value_rsh3b)) { - - /* Wait until async progress on the endpoint is activated */ - while (0 == bgq_ep->async.active) { - fi_bgq_msync(FI_BGQ_MSYNC_TYPE_RW); - } - - } else { - - /* "fifo is full" means that the progress thread has been disabled */ - assert(0); - return -1; - } - - return 0; -} - -int fi_bgq_progress_ep_disable (struct fi_bgq_ep *bgq_ep) { - - bgq_ep->async.enabled = 0; - fi_bgq_msync(FI_BGQ_MSYNC_TYPE_WO); - - /* Wait until async progress on the endpoint is deactivated */ - while (0 != bgq_ep->async.active) { - fi_bgq_msync(FI_BGQ_MSYNC_TYPE_RO); - } - - return 0; -} diff --git a/prov/bgq/src/fi_bgq_rma.c b/prov/bgq/src/fi_bgq_rma.c deleted file mode 100644 index ab18aafbefe..00000000000 --- a/prov/bgq/src/fi_bgq_rma.c +++ /dev/null @@ -1,257 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "rdma/bgq/fi_bgq.h" -#include -#include - -/* "FI_BGQ_RMA_SPECIALIZED_FUNC(0)" is already declared via FABRIC_DIRECT */ -FI_BGQ_RMA_SPECIALIZED_FUNC(1) - -#define FI_BGQ_RMA_OPS_STRUCT_NAME(LOCK) \ - fi_bgq_ops_rma_ ## LOCK - -#define FI_BGQ_RMA_OPS_STRUCT(LOCK) \ -static struct fi_ops_rma \ - FI_BGQ_RMA_OPS_STRUCT_NAME(LOCK) = { \ - .size = sizeof(struct fi_ops_rma), \ - .read = FI_BGQ_RMA_SPECIALIZED_FUNC_NAME(read, LOCK), \ - .readv = fi_no_rma_readv, \ - .readmsg = FI_BGQ_RMA_SPECIALIZED_FUNC_NAME(readmsg, \ - LOCK), \ - .write = FI_BGQ_RMA_SPECIALIZED_FUNC_NAME(write, \ - LOCK), \ - .inject = FI_BGQ_RMA_SPECIALIZED_FUNC_NAME(inject_write,\ - LOCK), \ - .writev = FI_BGQ_RMA_SPECIALIZED_FUNC_NAME(writev, \ - LOCK), \ - .writemsg = FI_BGQ_RMA_SPECIALIZED_FUNC_NAME(writemsg, \ - LOCK), \ - .writedata = fi_no_rma_writedata, \ -} - -FI_BGQ_RMA_OPS_STRUCT(0); -FI_BGQ_RMA_OPS_STRUCT(1); - -static inline ssize_t fi_bgq_rma_read(struct fid_ep *ep, - void *buf, size_t len, void *desc, - fi_addr_t src_addr, uint64_t addr, - uint64_t key, void *context) -{ - int lock_required; - struct fi_bgq_ep *bgq_ep; - - bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid); - - switch (bgq_ep->threading) { - case FI_THREAD_ENDPOINT: - case FI_THREAD_DOMAIN: - lock_required = 0; - default: - lock_required = 1; - } - - return fi_bgq_read_generic(ep, buf, len, desc, src_addr, - addr, key, context, lock_required); -} - -static inline ssize_t fi_bgq_rma_readmsg(struct fid_ep *ep, - const struct fi_msg_rma *msg, uint64_t flags) -{ - int lock_required; - struct fi_bgq_ep *bgq_ep; - - bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid); - - switch (bgq_ep->threading) { - case FI_THREAD_ENDPOINT: - case FI_THREAD_DOMAIN: - lock_required = 0; - default: - lock_required = 1; - } - - return fi_bgq_readmsg_generic(ep, msg, flags, - lock_required); -} - -static inline ssize_t fi_bgq_rma_inject_write(struct fid_ep *ep, - const void *buf, size_t len, - fi_addr_t dst_addr, uint64_t addr, uint64_t key) -{ - int lock_required; - struct fi_bgq_ep *bgq_ep; - - bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid); - - switch (bgq_ep->threading) { - case FI_THREAD_ENDPOINT: - case FI_THREAD_DOMAIN: - lock_required = 0; - default: - lock_required = 1; - } - - return fi_bgq_inject_write_generic(ep, buf, len, dst_addr, - addr, key, lock_required); -} - -static inline ssize_t fi_bgq_rma_write(struct fid_ep *ep, - const void *buf, size_t len, void *desc, - fi_addr_t dst_addr, uint64_t addr, - uint64_t key, void *context) -{ - int lock_required; - struct fi_bgq_ep *bgq_ep; - - bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid); - - switch (bgq_ep->threading) { - case FI_THREAD_ENDPOINT: - case FI_THREAD_DOMAIN: - lock_required = 0; - default: - lock_required = 1; - } - - return fi_bgq_write_generic(ep, buf, len, desc, dst_addr, - addr, key, context, lock_required); -} - -static inline ssize_t fi_bgq_rma_writev(struct fid_ep *ep, - const struct iovec *iov, void **desc, - size_t count, fi_addr_t dest_addr, uint64_t addr, - uint64_t key, void *context) -{ - int lock_required; - struct fi_bgq_ep *bgq_ep; - - bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid); - - switch (bgq_ep->threading) { - case FI_THREAD_ENDPOINT: - case FI_THREAD_DOMAIN: - lock_required = 0; - default: - lock_required = 1; - } - - return fi_bgq_writev_generic(ep, iov, desc, count, dest_addr, addr, - key, context, lock_required); -} - -static inline ssize_t fi_bgq_rma_writemsg(struct fid_ep *ep, - const struct fi_msg_rma *msg, uint64_t flags) -{ - int lock_required; - struct fi_bgq_ep *bgq_ep; - - bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid); - - switch (bgq_ep->threading) { - case FI_THREAD_ENDPOINT: - case FI_THREAD_DOMAIN: - lock_required = 0; - default: - lock_required = 1; - } - - return fi_bgq_writemsg_generic(ep, msg, flags, - lock_required); -} - -static struct fi_ops_rma fi_bgq_ops_rma_default = { - .size = sizeof(struct fi_ops_rma), - .read = fi_bgq_rma_read, - .readv = fi_no_rma_readv, - .readmsg = fi_bgq_rma_readmsg, - .write = fi_bgq_rma_write, - .inject = fi_bgq_rma_inject_write, - .writev = fi_bgq_rma_writev, - .writemsg = fi_bgq_rma_writemsg, - .writedata = fi_no_rma_writedata, -}; - -int fi_bgq_init_rma_ops(struct fi_bgq_ep *bgq_ep, struct fi_info *info) -{ - if (!bgq_ep || !info) { - errno = FI_EINVAL; - goto err; - } - - return 0; -err: - return -errno; -} - -int fi_bgq_enable_rma_ops(struct fi_bgq_ep *bgq_ep) -{ - if (!bgq_ep || !bgq_ep->domain) { - errno = FI_EINVAL; - goto err; - } - - if (!(bgq_ep->tx.caps & FI_RMA)) { - /* rma ops not enabled on this endpoint */ - return 0; - } - - switch (bgq_ep->domain->threading) { - case FI_THREAD_ENDPOINT: - case FI_THREAD_DOMAIN: - case FI_THREAD_COMPLETION: - bgq_ep->ep_fid.rma = &FI_BGQ_RMA_OPS_STRUCT_NAME(0); - break; - case FI_THREAD_FID: - case FI_THREAD_UNSPEC: - case FI_THREAD_SAFE: - bgq_ep->ep_fid.rma = &FI_BGQ_RMA_OPS_STRUCT_NAME(1); - break; - default: - /*bgq_ep->ep_fid.rma = &fi_bgq_ops_rma_default;*/ - errno = FI_EINVAL; - goto err; - } - - - return 0; -err: - return -errno; -} - -int fi_bgq_finalize_rma_ops(struct fi_bgq_ep *bgq_ep) -{ - if (!bgq_ep) { - return 0; - } - - return 0; -} diff --git a/prov/bgq/src/fi_bgq_sep.c b/prov/bgq/src/fi_bgq_sep.c deleted file mode 100644 index 68782637b75..00000000000 --- a/prov/bgq/src/fi_bgq_sep.c +++ /dev/null @@ -1,469 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "rdma/bgq/fi_bgq.h" - -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -/* forward declaration */ -int fi_bgq_endpoint_rx_tx (struct fid_domain *dom, struct fi_info *info, - struct fid_ep **ep, void *context, const int rx_index, const int tx_index); - -static int fi_bgq_close_sep(fid_t fid) -{ - int ret; - struct fi_bgq_sep *bgq_sep = container_of(fid, struct fi_bgq_sep, ep_fid); - - ret = fi_bgq_fid_check(fid, FI_CLASS_SEP, "scalable endpoint"); - if (ret) - return ret; - - ret = fi_bgq_ref_dec(&bgq_sep->av->ref_cnt, "address vector"); - if (ret) - return ret; - - ret = fi_bgq_ref_finalize(&bgq_sep->ref_cnt, "scalable endpoint"); - if (ret) - return ret; - - ret = fi_bgq_ref_dec(&bgq_sep->domain->ref_cnt, "domain"); - if (ret) - return ret; - - free(bgq_sep->info->ep_attr); - free(bgq_sep->info); - void * memptr = bgq_sep->memptr; - free(memptr); - - return 0; -} - -static int fi_bgq_control_sep(fid_t fid, int command, void *arg) -{ - struct fid_ep *ep __attribute__ ((unused)); - ep = container_of(fid, struct fid_ep, fid); - return 0; -} - -static int fi_bgq_tx_ctx(struct fid_ep *sep, int index, - struct fi_tx_attr *attr, struct fid_ep **tx_ep, - void *context) -{ - int ret; - struct fi_info info = {0}; - struct fi_tx_attr tx_attr = {0}; - struct fi_ep_attr ep_attr = {0}; - struct fi_domain_attr dom_attr = {0}; - struct fi_fabric_attr fab_attr = {0}; - struct fi_bgq_sep *bgq_sep; - struct fi_bgq_ep *bgq_tx_ep; - - if (!sep || !attr || !tx_ep) { - errno = FI_EINVAL; - return -errno; - } - - bgq_sep = container_of(sep, struct fi_bgq_sep, ep_fid); - - uint64_t caps = attr->caps; /* TODO - "By default, a transmit context inherits the properties of its associated endpoint. However, applications may request context specific attributes through the attr parameter." */ - - if ((caps & FI_MSG || caps & FI_TAGGED) && (caps & FI_RECV)) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_DOMAIN, - "FI_MSG|FI_TAGGED with FI_RECV capability specified for a TX context\n"); - caps &= ~FI_RECV; - } - - if ((caps & FI_RMA || caps & FI_ATOMIC) && (caps & FI_REMOTE_READ || caps & FI_REMOTE_WRITE)) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_DOMAIN, - "FI_RMA|FI_ATOMIC with FI_REMOTE_READ|FI_REMOTE_WRITE capability specified for a TX context\n"); - caps &= ~FI_REMOTE_READ; - caps &= ~FI_REMOTE_WRITE; - } - - if (caps & FI_MSG || caps & FI_TAGGED) { - caps |= FI_SEND; - } - - if (caps & FI_RMA || caps & FI_ATOMIC) { - caps |= FI_READ; - caps |= FI_WRITE; - } - - if (ofi_recv_allowed(caps) || ofi_rma_target_allowed(caps)) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_DOMAIN, - "RX capabilities specified for TX context\n"); - errno = FI_EINVAL; - return -errno; - } - - if (!ofi_send_allowed(caps) && !ofi_rma_initiate_allowed(caps)) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_DOMAIN, - "TX capabilities not specified for TX context\n"); - errno = FI_EINVAL; - return -errno; - } - - if (bgq_sep->domain->tx.count >= fi_bgq_domain_get_tx_max(bgq_sep->domain)) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_DOMAIN, - "TX ctx count exceeded (max %lu, created %lu)\n", - fi_bgq_domain_get_tx_max(bgq_sep->domain), bgq_sep->domain->tx.count); - errno = FI_EINVAL; - return -errno; - } - - info.caps = caps; - info.mode = attr->mode; - - info.tx_attr = &tx_attr; - memcpy(info.tx_attr, attr, sizeof(*info.tx_attr)); - - info.ep_attr = &ep_attr; - memcpy(info.ep_attr, bgq_sep->info->ep_attr, sizeof(*info.ep_attr)); - - info.domain_attr = &dom_attr; - memcpy(info.domain_attr, bgq_sep->info->domain_attr, sizeof(*info.domain_attr)); - - info.fabric_attr = &fab_attr; - memcpy(info.fabric_attr, bgq_sep->info->fabric_attr, sizeof(*info.fabric_attr)); -#ifdef FI_BGQ_TRACE - fprintf(stderr,"fi_bgq_tx_ctx calling fi_bgq_endpoint_rx_tx with tx index %d\n",index); -#endif - - ret = fi_bgq_endpoint_rx_tx((struct fid_domain *)bgq_sep->domain, - &info, tx_ep, context, -1, index); - if (ret) { - goto err; - } - - bgq_tx_ep = container_of(*tx_ep, struct fi_bgq_ep, ep_fid); - bgq_tx_ep->ep_fid.fid.fclass = FI_CLASS_TX_CTX; - - bgq_tx_ep->av = bgq_sep->av; - fi_bgq_ref_inc(&bgq_tx_ep->av->ref_cnt, "address vector"); - - bgq_tx_ep->sep = container_of(sep, struct fi_bgq_sep, ep_fid); - - ++ bgq_sep->domain->tx.count; - - fi_bgq_ref_inc(&bgq_sep->ref_cnt, "scalable endpoint"); - - attr->caps = caps; - - return 0; - -err: - return -errno; -} - -static int fi_bgq_rx_ctx(struct fid_ep *sep, int index, - struct fi_rx_attr *attr, struct fid_ep **rx_ep, - void *context) -{ - int ret; - struct fi_info info = {0}; - struct fi_bgq_sep *bgq_sep; - struct fi_bgq_ep *bgq_rx_ep; - - if (!sep || !attr || !rx_ep) { - errno = FI_EINVAL; - return -errno; - } - - bgq_sep = container_of(sep, struct fi_bgq_sep, ep_fid); - - uint64_t caps = attr->caps; /* TODO - "By default, a receive context inherits the properties of its associated endpoint. However, applications may request context specific attributes through the attr parameter." */ - - if ((caps & FI_MSG || caps & FI_TAGGED) && (caps & FI_SEND)) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_DOMAIN, - "FI_MSG|FI_TAGGED with FI_SEND capability specified for a RX context\n"); - caps &= ~FI_SEND; - } - - if ((caps & FI_RMA || caps & FI_ATOMIC) && (caps & FI_READ || caps & FI_WRITE)) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_DOMAIN, - "FI_RMA|FI_ATOMIC with FI_READ|FI_WRITE capability specified for a RX context\n"); - caps &= ~FI_READ; - caps &= ~FI_WRITE; - } - - if (caps & FI_MSG || caps & FI_TAGGED) { - caps |= FI_RECV; - } - - if (caps & FI_RMA || caps & FI_ATOMIC) { - caps |= FI_REMOTE_READ; - caps |= FI_REMOTE_WRITE; - } - - if (ofi_send_allowed(caps) || ofi_rma_initiate_allowed(caps)) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_DOMAIN, - "TX capabilities specified for RX context\n"); - errno = FI_EINVAL; - return -errno; - } - - if (!ofi_recv_allowed(caps) && !ofi_rma_target_allowed(caps)) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_DOMAIN, - "RX capabilities not specified for RX context\n"); - errno = FI_EINVAL; - return -errno; - } - - if (bgq_sep->domain->rx.count >= fi_bgq_domain_get_rx_max(bgq_sep->domain)) { - FI_LOG(fi_bgq_global.prov, FI_LOG_DEBUG, FI_LOG_DOMAIN, - "RX ctx count exceeded (max %lu, created %lu)\n", - fi_bgq_domain_get_rx_max(bgq_sep->domain), bgq_sep->domain->rx.count); - errno = FI_EINVAL; - return -errno; - } - - info.caps = caps; - info.mode = attr->mode; - - info.rx_attr = calloc(1, sizeof(*info.rx_attr)); - if (!info.rx_attr) { - errno = FI_ENOMEM; - goto err; - } - - info.rx_attr->caps = caps; - info.rx_attr->mode = attr->mode; - info.rx_attr->op_flags = attr->op_flags; - info.rx_attr->msg_order = attr->msg_order; - info.rx_attr->total_buffered_recv = attr->total_buffered_recv; - info.rx_attr->iov_limit = attr->iov_limit; - - info.ep_attr = calloc(1, sizeof(*info.ep_attr)); - if (!info.ep_attr) { - errno = FI_ENOMEM; - goto err; - } - memcpy(info.ep_attr, bgq_sep->info->ep_attr, - sizeof(*info.ep_attr)); - - info.domain_attr = calloc(1, sizeof(*info.domain_attr)); - if (!info.domain_attr) { - errno = FI_ENOMEM; - goto err; - } - memcpy(info.domain_attr, bgq_sep->info->domain_attr, - sizeof(*info.domain_attr)); - - info.fabric_attr = calloc(1, sizeof(*info.fabric_attr)); - if (!info.fabric_attr) { - errno = FI_ENOMEM; - goto err; - } - memcpy(info.fabric_attr, bgq_sep->info->fabric_attr, - sizeof(*info.fabric_attr)); - -#ifdef FI_BGQ_TRACE - fprintf(stderr,"fi_bgq_tx_ctx calling fi_bgq_endpoint_rx_tx with rx index %d\n",index); -#endif - ret = fi_bgq_endpoint_rx_tx(&bgq_sep->domain->domain_fid, &info, - rx_ep, context, index, -1); - if (ret) { - goto err; - } - - bgq_rx_ep = container_of(*rx_ep, struct fi_bgq_ep, ep_fid); - bgq_rx_ep->ep_fid.fid.fclass = FI_CLASS_RX_CTX; - - bgq_rx_ep->sep = container_of(sep, struct fi_bgq_sep, ep_fid); - - bgq_rx_ep->av = bgq_sep->av; - fi_bgq_ref_inc(&bgq_rx_ep->av->ref_cnt, "address vector"); - - ++ bgq_sep->domain->rx.count; - - fi_bgq_ref_inc(&bgq_sep->ref_cnt, "scalable endpoint"); - - return 0; - -err: - if (info.fabric_attr) - free(info.fabric_attr); - if (info.domain_attr) - free(info.domain_attr); - if (info.ep_attr) - free(info.ep_attr); - if (info.tx_attr) - free(info.tx_attr); - return -errno; -} - -static int fi_bgq_bind_sep(struct fid *fid, struct fid *bfid, - uint64_t flags) -{ - int ret = 0; - struct fi_bgq_sep *bgq_sep = container_of(fid, struct fi_bgq_sep, ep_fid); - struct fi_bgq_av *bgq_av; - - if (!fid || !bfid) { - errno = FI_EINVAL; - return -errno; - } - - switch (bfid->fclass) { - case FI_CLASS_AV: - bgq_av = container_of(bfid, struct fi_bgq_av, av_fid); - fi_bgq_ref_inc(&bgq_av->ref_cnt, "address vector"); - bgq_sep->av = bgq_av; - break; - default: - errno = FI_ENOSYS; - return -errno; - } - - return ret; -} - -static struct fi_ops fi_bgq_fi_ops = { - .size = sizeof(struct fi_ops), - .close = fi_bgq_close_sep, - .bind = fi_bgq_bind_sep, - .control = fi_bgq_control_sep, - .ops_open = fi_no_ops_open -}; - -static struct fi_ops_ep fi_bgq_sep_ops = { - .size = sizeof(struct fi_ops_ep), - .cancel = fi_no_cancel, - .getopt = fi_no_getopt, - .setopt = fi_no_setopt, - .tx_ctx = fi_bgq_tx_ctx, - .rx_ctx = fi_bgq_rx_ctx, - .rx_size_left = fi_no_rx_size_left, - .tx_size_left = fi_no_tx_size_left -}; - -int fi_bgq_scalable_ep (struct fid_domain *domain, - struct fi_info *info, - struct fid_ep **sep, - void *context) -{ - struct fi_bgq_sep *bgq_sep = NULL; - - if (!info || !domain) { - errno = FI_EINVAL; - goto err; - } - - void * memptr = NULL; - memptr = malloc(sizeof(struct fi_bgq_sep)+L2_CACHE_LINE_SIZE); - if (!memptr) { - errno = FI_ENOMEM; - goto err; - } - memset(memptr, 0, sizeof(struct fi_bgq_sep)+L2_CACHE_LINE_SIZE); - bgq_sep = (struct fi_bgq_sep *)(((uintptr_t)memptr+L2_CACHE_LINE_SIZE) & ~(L2_CACHE_LINE_SIZE-1)); - bgq_sep->memptr = memptr; - memptr = NULL; - - bgq_sep->domain = (struct fi_bgq_domain *) domain; - - bgq_sep->ep_fid.fid.fclass = FI_CLASS_SEP; - bgq_sep->ep_fid.fid.context = context; - bgq_sep->ep_fid.fid.ops = &fi_bgq_fi_ops; - bgq_sep->ep_fid.ops = &fi_bgq_sep_ops; - - int ret = fi_bgq_init_cm_ops((struct fid_ep *)&(bgq_sep->ep_fid), info); - if (ret) - goto err; - - bgq_sep->info = calloc(1, sizeof (struct fi_info)); - if (!bgq_sep->info) { - errno = FI_ENOMEM; - goto err; - } - memcpy(bgq_sep->info, info, sizeof (struct fi_info)); - bgq_sep->info->next = NULL; - bgq_sep->info->ep_attr = calloc(1, sizeof(struct fi_ep_attr)); - if (!bgq_sep->info->ep_attr) { - errno = FI_ENOMEM; - goto err; - } - memcpy(bgq_sep->info->ep_attr, info->ep_attr, sizeof(struct fi_ep_attr)); - -#ifdef FI_BGQ_TRACE - fprintf(stderr,"fi_bgq_scalable_ep - called with %ld tx %ld rx\n",bgq_sep->info->ep_attr->tx_ctx_cnt,bgq_sep->info->ep_attr->rx_ctx_cnt); -#endif - /* - * fi_endpoint.3 - * - * "tx_ctx_cnt - Transmit Context Count - * Number of transmit contexts to associate with the endpoint. If - * not specified (0), 1 context will be assigned if the endpoint - * supports outbound transfers." - */ - if (0 == bgq_sep->info->ep_attr->tx_ctx_cnt) { - bgq_sep->info->ep_attr->tx_ctx_cnt = 1; - } - - /* - * fi_endpoint.3 - * - * "rx_ctx_cnt - Receive Context Count - * Number of receive contexts to associate with the endpoint. If - * not specified, 1 context will be assigned if the endpoint - * supports inbound transfers." - */ - if (0 == bgq_sep->info->ep_attr->rx_ctx_cnt) { - bgq_sep->info->ep_attr->rx_ctx_cnt = 1; - } - - fi_bgq_ref_init(&bgq_sep->domain->fabric->node, &bgq_sep->ref_cnt, "scalable endpoint"); - fi_bgq_ref_inc(&bgq_sep->domain->ref_cnt, "domain"); - - *sep = &bgq_sep->ep_fid; - - return 0; -err: - if (bgq_sep) { - if (bgq_sep->info) { - if (bgq_sep->info->ep_attr) - free(bgq_sep->info->ep_attr); - free(bgq_sep->info); - } - memptr = bgq_sep->memptr; - free(memptr); - } - return -errno; -} diff --git a/prov/bgq/src/fi_bgq_spi.c b/prov/bgq/src/fi_bgq_spi.c deleted file mode 100644 index 0c2feb5e397..00000000000 --- a/prov/bgq/src/fi_bgq_spi.c +++ /dev/null @@ -1,211 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "rdma/bgq/fi_bgq_spi.h" - -#include - -/* internal function */ -int fi_bgq_spi_injfifo_subgrp_init (struct fi_bgq_spi_injfifo *f, - MUSPI_InjFifoSubGroup_t *subgrp, - unsigned num_fifos_to_allocate, - const size_t injfifo_size, - const unsigned immediate_payload_sizeof, - const unsigned is_remote_get, - const int subgrp_id) { - - assert(num_fifos_to_allocate > 0); - assert(is_remote_get == 0 || is_remote_get == 1); - - uint32_t available; - uint32_t fifo_ids[BGQ_MU_NUM_INJ_FIFOS_PER_SUBGROUP]; - Kernel_MemoryRegion_t mregion; - int rc = 0; - - memset((void*)f, 0x00, sizeof(*f)); - f->node_scoped_fifo_id = ~(0ull); - - Kernel_InjFifoAttributes_t attr; - memset(&attr, 0x00, sizeof(attr)); - attr.RemoteGet = is_remote_get; - attr.System = 0; - - rc = Kernel_QueryInjFifos(subgrp_id, &available, fifo_ids); - if (rc) { - goto err; - } - if (!available) { - return 0; - } - - uint32_t subgrp_fifo_id = fifo_ids[0]; - - if ((rc = Kernel_AllocateInjFifos(subgrp_id, - subgrp, - 1, - &subgrp_fifo_id, - &attr)) != 0) { - goto err; - } - - size_t bytes = injfifo_size * sizeof(MUHWI_Descriptor_t); - if (posix_memalign((void**) &f->memory, 64, bytes + 64 /* force alignment */)) { - errno = ENOMEM; - goto err; - } - - /* FORCE a 64-byte alignment (?!?!) */ - void * injfifo_memory = (void *)(((uintptr_t)f->memory+64) & (~63)); - - if (Kernel_CreateMemoryRegion(&mregion, injfifo_memory, bytes)) { - goto err; - } - - if (Kernel_InjFifoInit(subgrp, subgrp_fifo_id, &mregion, - (uint64_t)injfifo_memory - (uint64_t)mregion.BaseVa, - bytes-1)) { - goto err; - } - - if (Kernel_InjFifoActivate(subgrp, 1, &subgrp_fifo_id, - KERNEL_INJ_FIFO_ACTIVATE)) { - goto err; - } - - f->muspi_injfifo = MUSPI_IdToInjFifo(subgrp_fifo_id, subgrp); - f->sw_freeSpace = &f->muspi_injfifo->freeSpace; - f->sw_tailva = (uint64_t*)&f->muspi_injfifo->_fifo.va_tail; - f->hw_injfifo = f->muspi_injfifo->hw_injfifo; - f->node_scoped_fifo_id = subgrp_id * BGQ_MU_NUM_INJ_FIFO_SUBGROUPS + subgrp_fifo_id; - - - if (!is_remote_get && immediate_payload_sizeof > 0) { - - f->immediate_payload_sizeof = immediate_payload_sizeof; - - bytes = injfifo_size * immediate_payload_sizeof; - if (posix_memalign((void**) &f->immediate_payload_memory, 64, bytes + 64 /* force alignment */)) { - errno = ENOMEM; - goto err; - } - - /* FORCE a 64-byte alignment (?!?!) */ - f->immediate_payload_base_vaddr = (uintptr_t)f->immediate_payload_memory & ~63ull; - - Kernel_MemoryRegion_t cnk_mr; - uint32_t cnk_rc = 0; - cnk_rc = Kernel_CreateMemoryRegion(&cnk_mr, (void*)f->immediate_payload_base_vaddr, bytes); - if (cnk_rc) { - goto err; - } - - f->immediate_payload_base_paddr = - (uint64_t)cnk_mr.BasePa + - ((uint64_t)f->immediate_payload_base_vaddr - (uint64_t)cnk_mr.BaseVa); - - f->va_start = (uintptr_t)f->muspi_injfifo->_fifo.va_start; - - } - - return 1; -err: - - if (f->memory) free(f->memory); - if (f->immediate_payload_memory) free(f->immediate_payload_memory); - memset((void*)f, 0x00, sizeof(*f)); - f->node_scoped_fifo_id = ~(0ull); - return 0; -} - -int fi_bgq_spi_injfifo_init (struct fi_bgq_spi_injfifo *f, - MUSPI_InjFifoSubGroup_t *injfifo_subgroup, - unsigned num_fifos_to_allocate, - const size_t injfifo_size, - const unsigned immediate_payload_sizeof, - const unsigned is_remote_get, - const unsigned is_top_down) { - - assert(num_fifos_to_allocate > 0); - assert(is_top_down == 0 || is_top_down == 1); - - int subgrp_id; - - unsigned n, total_fifos_allocated = 0; - if (is_top_down) { - for (subgrp_id = BGQ_MU_NUM_FIFO_SUBGROUPS_PER_NODE-BGQ_MU_NUM_FIFO_SUBGROUPS-1; (subgrp_id >= 0) && (num_fifos_to_allocate > 0); --subgrp_id) { - n = fi_bgq_spi_injfifo_subgrp_init(f, - injfifo_subgroup, - num_fifos_to_allocate, - injfifo_size, - immediate_payload_sizeof, - is_remote_get, - subgrp_id); - num_fifos_to_allocate -= n; - total_fifos_allocated += n; - } - } else { - for (subgrp_id = 0; (subgrp_id < (BGQ_MU_NUM_FIFO_SUBGROUPS_PER_NODE-BGQ_MU_NUM_FIFO_SUBGROUPS)) && (num_fifos_to_allocate > 0); ++subgrp_id) { - n = fi_bgq_spi_injfifo_subgrp_init(f, - injfifo_subgroup, - num_fifos_to_allocate, - injfifo_size, - immediate_payload_sizeof, - is_remote_get, - subgrp_id); - num_fifos_to_allocate -= n; - total_fifos_allocated += n; - } - } - - return total_fifos_allocated; -} - -void fi_bgq_spi_injfifo_clone (struct fi_bgq_spi_injfifo *dst, struct fi_bgq_spi_injfifo *src) { - - assert(dst); - assert(src); - - /* TODO - set an "is clone" variable to remeber not to free this */ - *dst = *src; -} - -int fi_bgq_spi_injfifo_fini (struct fi_bgq_spi_injfifo *f) { - - /* TODO ..... */ - - if (f->memory) free(f->memory); - if (f->immediate_payload_memory) free(f->immediate_payload_memory); - memset((void*)f, 0x00, sizeof(*f)); - f->node_scoped_fifo_id = ~(0ull); - - return 0; -} diff --git a/prov/bgq/src/fi_bgq_tagged.c b/prov/bgq/src/fi_bgq_tagged.c deleted file mode 100644 index 84f9583e0c0..00000000000 --- a/prov/bgq/src/fi_bgq_tagged.c +++ /dev/null @@ -1,199 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "rdma/bgq/fi_bgq.h" - -#include - -ssize_t fi_bgq_trecvmsg(struct fid_ep *ep, - const struct fi_msg_tagged *msg, uint64_t flags) -{ - struct fi_bgq_ep * bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid); - const enum fi_threading threading = bgq_ep->domain->threading; - const int lock_required = - (threading == FI_THREAD_FID) || - (threading == FI_THREAD_UNSPEC) || - (threading == FI_THREAD_SAFE); - - return fi_bgq_trecvmsg_generic(ep, msg, flags, lock_required); -} - -ssize_t fi_bgq_tsendmsg(struct fid_ep *ep, - const struct fi_msg_tagged *msg, uint64_t flags) -{ - const size_t niov = msg->iov_count; - - if (niov > 32) { - - /* --------------------------------------------------------- - * a single torus packet payload can only transfer 32 - * 'struct fi_bgq_mu_iov' elements - this is the current - * limit for non-contiguous rendezvous operations - * - * TODO - support >32 iov elements? - * --------------------------------------------------------- */ - return -FI_EINVAL; - - } else { - - struct fi_bgq_ep * bgq_ep = container_of(ep, struct fi_bgq_ep, ep_fid); - const enum fi_threading threading = bgq_ep->threading; - - return fi_bgq_send_generic_flags(ep, msg->msg_iov, niov, - msg->desc, msg->addr, msg->tag, msg->context, msg->data, - (threading != FI_THREAD_ENDPOINT && threading != FI_THREAD_DOMAIN), - 0 /* is_msg */, - 0 /* is_contiguous */, - 1 /* override flags */, - flags); - } -} - -ssize_t fi_bgq_tsenddata(struct fid_ep *ep, const void *buf, size_t len, - void *desc, uint64_t data, fi_addr_t dest_addr, uint64_t tag, - void *context) -{ - errno = FI_ENOSYS; - return -errno; -} - -/* "FI_BGQ_TAGGED_SPECIALIZED_FUNC(0)" is already declared via FABRIC_DIRECT */ -FI_BGQ_TAGGED_SPECIALIZED_FUNC(1) - -#define FI_BGQ_TAGGED_OPS_STRUCT_NAME(LOCK) \ - fi_bgq_ops_tagged_ ## LOCK - -#define FI_BGQ_TAGGED_OPS_STRUCT(LOCK) \ -static struct fi_ops_tagged \ - FI_BGQ_TAGGED_OPS_STRUCT_NAME(LOCK) = { \ - .size = sizeof(struct fi_ops_tagged), \ - .recv = \ - FI_BGQ_TAGGED_SPECIALIZED_FUNC_NAME(trecv, LOCK), \ - .recvv = fi_no_tagged_recvv, \ - .recvmsg = \ - FI_BGQ_TAGGED_SPECIALIZED_FUNC_NAME(trecvmsg, LOCK), \ - .send = \ - FI_BGQ_TAGGED_SPECIALIZED_FUNC_NAME(tsend, LOCK), \ - .sendv = fi_no_tagged_sendv, \ - .sendmsg = fi_bgq_tsendmsg, \ - .inject = \ - FI_BGQ_TAGGED_SPECIALIZED_FUNC_NAME(tinject, LOCK), \ - .senddata = \ - FI_BGQ_TAGGED_SPECIALIZED_FUNC_NAME(tsenddata, LOCK), \ - .injectdata = \ - FI_BGQ_TAGGED_SPECIALIZED_FUNC_NAME(tinjectdata, LOCK), \ -} - -FI_BGQ_TAGGED_OPS_STRUCT(0); -FI_BGQ_TAGGED_OPS_STRUCT(1); - -ssize_t fi_bgq_tsearch(struct fid_ep *ep, uint64_t *tag, - uint64_t ignore, uint64_t flags, - fi_addr_t *src_addr, size_t *len, void *context) -{ - errno = FI_ENOSYS; - return -errno; -} - -static struct fi_ops_tagged fi_bgq_no_tagged_ops = { - .size = sizeof(struct fi_ops_tagged), - .recv = fi_no_tagged_recv, - .recvv = fi_no_tagged_recvv, - .recvmsg = fi_no_tagged_recvmsg, - .send = fi_no_tagged_send, - .sendv = fi_no_tagged_sendv, - .sendmsg = fi_no_tagged_sendmsg, - .inject = fi_no_tagged_inject, - .senddata = fi_no_tagged_senddata, - .injectdata = fi_no_tagged_injectdata -}; - -int fi_bgq_init_tagged_ops(struct fi_bgq_ep *bgq_ep, struct fi_info *info) -{ - if (!info || !bgq_ep) - goto err; - - if (info->caps & FI_TAGGED || - (info->tx_attr && - (info->tx_attr->caps & FI_TAGGED))) { - } - - return 0; - -err: - errno = FI_EINVAL; - return -errno; -} - -int fi_bgq_enable_tagged_ops(struct fi_bgq_ep *bgq_ep) -{ - if (!bgq_ep || !bgq_ep->domain) - goto err; - - if (!(bgq_ep->tx.caps & FI_TAGGED)) { - /* Tagged ops not enabled on this endpoint */ - bgq_ep->ep_fid.tagged = - &fi_bgq_no_tagged_ops; - return 0; - } - - switch (bgq_ep->domain->threading) { - case FI_THREAD_ENDPOINT: - case FI_THREAD_DOMAIN: - case FI_THREAD_COMPLETION: - bgq_ep->ep_fid.tagged = &FI_BGQ_TAGGED_OPS_STRUCT_NAME(0); - break; - case FI_THREAD_FID: - case FI_THREAD_UNSPEC: - case FI_THREAD_SAFE: - bgq_ep->ep_fid.tagged = &FI_BGQ_TAGGED_OPS_STRUCT_NAME(1); - break; - default: - bgq_ep->ep_fid.tagged = &fi_bgq_no_tagged_ops; - FI_WARN(fi_bgq_global.prov, FI_LOG_EP_DATA, - "Tagged ops not enabled on EP\n"); - break; - } - - return 0; -err: - errno = FI_EINVAL; - return -errno; -} - -int fi_bgq_finalize_tagged_ops(struct fi_bgq_ep *bgq_ep) -{ - if (!bgq_ep) { - return 0; - } - - return 0; -} diff --git a/prov/bgq/src/test/Makefile.include b/prov/bgq/src/test/Makefile.include deleted file mode 100644 index 21745871a7b..00000000000 --- a/prov/bgq/src/test/Makefile.include +++ /dev/null @@ -1,139 +0,0 @@ -# -# Copyright (C) 2016 by Argonne National Laboratory. -# -# This software is available to you under a choice of one of two -# licenses. You may choose to be licensed under the terms of the GNU -# General Public License (GPL) Version 2, available from the file -# COPYING in the main directory of this source tree, or the -# BSD license below: -# -# Redistribution and use in source and binary forms, with or -# without modification, are permitted provided that the following -# conditions are met: -# -# - Redistributions of source code must retain the above -# copyright notice, this list of conditions and the following -# disclaimer. -# -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials -# provided with the distribution. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. -# - -testdir = @bindir@/test -test_PROGRAMS = - -#bin_PROGRAMS += test_l2alloc_simple -#test_l2alloc_simple_SOURCES = prov/bgq/src/test/l2alloc_simple.c \ -# prov/bgq/src/l2atomic.c -#test_l2alloc_simple_CPPFLAGS = $(AM_CPPFLAGS) $(bgq_CPPFLAGS) -#test_l2alloc_simple_LDFLAGS = \ -# -module -avoid-version -export-dynamic $(bgq_LDFLAGS) - -#bin_PROGRAMS += test_l2alloc_single -#test_l2alloc_single_SOURCES = prov/bgq/src/test/l2alloc_single.c \ -# prov/bgq/src/l2atomic.c -#test_l2alloc_single_CPPFLAGS = $(AM_CPPFLAGS) $(bgq_CPPFLAGS) -#test_l2alloc_single_LDFLAGS = \ -# -module -avoid-version -export-dynamic $(bgq_LDFLAGS) - -#bin_PROGRAMS += test_l2alloc_func -#test_l2alloc_func_SOURCES = prov/bgq/src/test/l2alloc_func.c \ -# prov/bgq/src/l2atomic.c -#test_l2alloc_func_CPPFLAGS = $(AM_CPPFLAGS) $(bgq_CPPFLAGS) -#test_l2alloc_func_LDFLAGS = \ -# -module -avoid-version -export-dynamic $(bgq_LDFLAGS) - -#bin_PROGRAMS += test_l2lock_init -#test_l2lock_init_SOURCES = prov/bgq/src/test/l2lock_init.c \ -# prov/bgq/src/l2atomic.c -#test_l2lock_init_CPPFLAGS = $(AM_CPPFLAGS) $(bgq_CPPFLAGS) -#test_l2lock_init_LDFLAGS = \ -# -module -avoid-version -export-dynamic $(bgq_LDFLAGS) - -#bin_PROGRAMS += test_l2lock_func -#test_l2lock_func_SOURCES = prov/bgq/src/test/l2lock_func.c \ -# prov/bgq/src/l2atomic.c -#test_l2lock_func_CPPFLAGS = $(AM_CPPFLAGS) $(bgq_CPPFLAGS) -#test_l2lock_func_LDFLAGS = \ -# -module -avoid-version -export-dynamic $(bgq_LDFLAGS) - -#bin_PROGRAMS += test_l2barrier_func -#test_l2barrier_func_SOURCES = prov/bgq/src/test/l2barrier_func.c \ -# prov/bgq/src/l2atomic.c -#test_l2barrier_func_CPPFLAGS = $(AM_CPPFLAGS) $(bgq_CPPFLAGS) -#test_l2barrier_func_LDFLAGS = \ -# -module -avoid-version -export-dynamic $(bgq_LDFLAGS) - -#bin_PROGRAMS += test_cq_mfifo_init -#test_cq_mfifo_init_SOURCES = prov/bgq/src/test/cq_mfifo_init.c \ -# prov/bgq/src/l2atomic.c \ -# prov/bgq/src/fi_bgq_memfifo.c -#test_cq_mfifo_init_CPPFLAGS = $(AM_CPPFLAGS) $(bgq_CPPFLAGS) -#test_cq_mfifo_init_LDFLAGS = \ -# -module -avoid-version -export-dynamic $(bgq_LDFLAGS) - -#bin_PROGRAMS += test_cq_mfifo_overflow -#test_cq_mfifo_overflow_SOURCES = prov/bgq/src/test/cq_mfifo_overflow.c \ -# prov/bgq/src/l2atomic.c \ -# prov/bgq/src/fi_bgq_memfifo.c -#test_cq_mfifo_overflow_CPPFLAGS = $(AM_CPPFLAGS) $(bgq_CPPFLAGS) -#test_cq_mfifo_overflow_LDFLAGS = \ -# -module -avoid-version -export-dynamic $(bgq_LDFLAGS) - -#bin_PROGRAMS += test_cq_mfifo_multithreaded -#test_cq_mfifo_multithreaded_SOURCES = prov/bgq/src/test/cq_mfifo_multithreaded.c \ -# prov/bgq/src/l2atomic.c \ -# prov/bgq/src/fi_bgq_memfifo.c -#test_cq_mfifo_multithreaded_CPPFLAGS = $(AM_CPPFLAGS) $(bgq_CPPFLAGS) -#test_cq_mfifo_multithreaded_LDFLAGS = \ -# -module -avoid-version -export-dynamic $(bgq_LDFLAGS) - -#bin_PROGRAMS += test_cq_mfifo_multithreaded_perf -#test_cq_mfifo_multithreaded_perf_SOURCES = prov/bgq/src/test/cq_mfifo_multithreaded_perf.c \ -# prov/bgq/src/l2atomic.c \ -# prov/bgq/src/fi_bgq_memfifo.c -#test_cq_mfifo_multithreaded_perf_CPPFLAGS = $(AM_CPPFLAGS) $(bgq_CPPFLAGS) -#test_cq_mfifo_multithreaded_perf_LDFLAGS = \ -# -module -avoid-version -export-dynamic $(bgq_LDFLAGS) - - - -test_PROGRAMS += l2atomic_fifo_perf -l2atomic_fifo_perf_SOURCES = prov/bgq/src/test/l2atomic_fifo_perf.c -l2atomic_fifo_perf_CPPFLAGS = $(AM_CPPFLAGS) $(bgq_CPPFLAGS) -l2atomic_fifo_perf_LDFLAGS = \ - -module -avoid-version -export-dynamic $(bgq_LDFLAGS) - -test_PROGRAMS += spi_pingpong -spi_pp_files = prov/bgq/src/test/spi_pingpong.c \ - prov/bgq/src/fi_bgq_spi.c -spi_pp_files_nodist = prov/bgq/external/memory_impl.c - -spi_pingpong_SOURCES = $(spi_pp_files) -nodist_spi_pingpong_SOURCES = $(spi_pp_files_nodist) -spi_pingpong_CPPFLAGS = $(AM_CPPFLAGS) $(bgq_CPPFLAGS) -spi_pingpong_LDFLAGS = \ - -module -avoid-version -export-dynamic $(bgq_LDFLAGS) - - -#bin_PROGRAMS += test_cq_agent_init -#test_cq_agent_init_SOURCES = prov/bgq/src/test/cq_agent_init.c \ -# prov/bgq/src/l2atomic.c \ -# prov/bgq/src/cq_agent_client.c \ -# prov/bgq/src/agent/cq_agent.c \ -# prov/bgq/src/fi_bgq_memfifo.c -#test_cq_agent_init_CPPFLAGS = $(AM_CPPFLAGS) $(bgq_CPPFLAGS) -#test_cq_agent_init_LDFLAGS = \ -# -module -avoid-version -export-dynamic $(bgq_LDFLAGS) -#test_cq_agent_init_LDADD = $(bgq_LIBS) diff --git a/prov/bgq/src/test/cq_agent_init.c b/prov/bgq/src/test/cq_agent_init.c deleted file mode 100644 index 0e8c745ae53..00000000000 --- a/prov/bgq/src/test/cq_agent_init.c +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include - -#include "fi_bgq_spi.h" - - -#include "cq_agent.h" -#include "l2atomic.h" -#include "fi_bgq_memfifo.h" - - -int cq_agent_main_test (struct l2atomic_barrier * barrier); - -void test_init_fn (void *buffer, uintptr_t cookie) { - - uint64_t *ptr = (uint64_t *) buffer; - *ptr = cookie; -} - -int main (int argc, char *argv[]) { - - struct l2atomic l2atomic; - - int rc, lineno; - rc = l2atomic_init(&l2atomic); lineno = __LINE__; - if (rc) goto err; - - uint32_t ppn = Kernel_ProcessCount(); lineno = __LINE__; - if (ppn==1) { - /* check for ofi agent environment variable */ - char * envvar = NULL; - envvar = getenv("BG_APPAGENT"); lineno = __LINE__; - if (!envvar) { fprintf(stderr, "Required environment variable 'BG_APPAGENT' is not set\n"); goto err; } - } - - struct l2atomic_barrier barrier; - rc = l2atomic_barrier_alloc_generic(&l2atomic, &barrier, 2, "agent_barrier"); lineno = __LINE__; - if (rc) goto err; - - uint32_t tcoord = Kernel_MyTcoord(); - if (tcoord==1) { - - //struct cq_agent_internal internal; - rc = cq_agent_main_test(&barrier); lineno = __LINE__; - if (rc) goto err; - - - } else if (tcoord==0) { - struct cq_agent_client client; - rc = cq_agent_client_init(&client, &l2atomic); lineno = __LINE__; - if (rc) goto err; - - union fi_bgq_addr self; - fi_bgq_create_addr_self_cx(&self.fi, 0); - - struct memfifo mfifo; - MUHWI_Descriptor_t model; - rc = cq_agent_client_register(&client, &l2atomic, &self, &mfifo, 8192, &model, 1); lineno = __LINE__; - if (rc) goto err; - - struct cq_agent_client_test_mu test; - rc = cq_agent_client_test_mu_setup(&test); lineno = __LINE__; - if (rc) goto err; - - uint16_t entry_id = 1234; - rc = cq_agent_client_test_mu_inject(&test, &model, entry_id, 1); lineno = __LINE__; - if (rc) goto err; - - if (ppn>1) l2atomic_barrier(&barrier); - - /* spin until something is received from the mfifo */ - uint16_t id = (uint16_t)-1; - while (0 != memfifo_consume16(&mfifo.consumer, &id)); - - if (ppn>1) l2atomic_barrier(&barrier); - fprintf (stdout, "TEST SUCCESSFUL\n"); - } - - return 0; -err: - fprintf(stderr, "%s : Error at line %d (rc=%d)\n", __FILE__, lineno, rc); - abort(); -} diff --git a/prov/bgq/src/test/cq_mfifo_init.c b/prov/bgq/src/test/cq_mfifo_init.c deleted file mode 100644 index ba2a0356428..00000000000 --- a/prov/bgq/src/test/cq_mfifo_init.c +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "fi_bgq_spi.h" - -#include "cq_agent.h" -#include "fi_bgq_memfifo.h" - -#define ITERATIONS 10 - -void test_init_fn (void * buffer, uintptr_t cookie) { - - uint64_t * ptr = (uint64_t *) buffer; - *ptr = cookie; -} - -int main (int argc, char *argv[]) { - - struct l2atomic l2atomic; - memset((void*)&l2atomic, 0, sizeof(l2atomic)); - - int rc, lineno; - rc = l2atomic_init(&l2atomic); lineno = __LINE__; - if (rc) goto err; - - struct memfifo mfifo; - rc = memfifo_initialize(&l2atomic, "some name", &mfifo, 0); lineno = __LINE__; - if (rc) goto err; - - unsigned i=0; - for (i=0; i - -#include "fi_bgq_spi.h" - -#include "cq_agent.h" -#include "fi_bgq_memfifo.h" - -#define N_PRODUCERS 16 -#define N_PACKETS 10000 - -struct memfifo mfifo; -struct memfifo_producer producer[N_PRODUCERS]; -pthread_t info[N_PRODUCERS]; - -static -void * producer_fn (void *arg) { - - uint16_t id = (uint16_t)((uintptr_t)arg); - - unsigned production_count = N_PACKETS; - while (production_count > 0) { - if (0 == memfifo_produce16(&producer[id], id+1)) --production_count; - } - - return NULL; -} - -void test_init_fn (void *buffer, uintptr_t cookie) { - - uint64_t *ptr = (uint64_t *) buffer; - *ptr = cookie; -} - -int main (int argc, char *argv[]) { - - struct l2atomic l2atomic; - memset((void*)&l2atomic, 0, sizeof(l2atomic)); - - int rc, lineno; - rc = l2atomic_init(&l2atomic); lineno = __LINE__; - if (rc) goto err; - - struct memfifo mfifo; - rc = memfifo_initialize(&l2atomic, "some name", &mfifo, 0); lineno = __LINE__; - if (rc) goto err; - - /* create 'producer' threads */ - uintptr_t pid; - unsigned production_count[N_PRODUCERS]; - for (pid = 0; pid < N_PRODUCERS; ++pid) { - producer[pid] = mfifo.producer; - production_count[pid] = 0; - if (pthread_create(&info[pid], NULL, &producer_fn, (void*)pid)) { lineno = __LINE__; goto err; } - } - - unsigned consumption_count = 0; - unsigned expected_packet_count = N_PRODUCERS * N_PACKETS; - uint16_t id; - while (consumption_count < expected_packet_count) { - if (0 == memfifo_consume16(&mfifo.consumer, &id)) { - ++production_count[id-1]; - ++consumption_count; - } - } - - for (id = 0; id < N_PRODUCERS; ++id) { -//fprintf(stderr, "%s:%d, production_count[%d]=%d (%d)\n", __FILE__, __LINE__, id, production_count[id], N_PACKETS); - if (production_count[id] != N_PACKETS) { lineno = __LINE__; goto err; } - } - - - fprintf (stdout, "TEST SUCCESSFUL\n"); - return 0; -err: - fprintf (stderr, "%s: Error at line %d\n", __FILE__, lineno); - return 1; -} diff --git a/prov/bgq/src/test/cq_mfifo_multithreaded_perf.c b/prov/bgq/src/test/cq_mfifo_multithreaded_perf.c deleted file mode 100644 index 15e457bb745..00000000000 --- a/prov/bgq/src/test/cq_mfifo_multithreaded_perf.c +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include - -#include "fi_bgq_spi.h" -#include "cq_agent.h" -#include "fi_bgq_memfifo.h" - -#define N_PRODUCERS 16 -#define N_PACKETS (0x01 << 16) - -struct memfifo mfifo; -struct memfifo_producer producer[N_PRODUCERS]; -pthread_t info[N_PRODUCERS]; - -struct l2atomic_barrier barrier; - -static -void * producer_fn (void *arg) { - - uint16_t id = (uint16_t)((uintptr_t)arg); - - unsigned i; - for (i=0; i<2; i++) { - l2atomic_barrier(&barrier); - unsigned production_count = N_PACKETS; - while (production_count > 0) { - if (0 == memfifo_produce16(&producer[id], id+1)) --production_count; - } - } - - return NULL; -} - -void test_init_fn (void *buffer, uintptr_t cookie) { - - uint64_t *ptr = (uint64_t *) buffer; - *ptr = cookie; -} - -int main (int argc, char *argv[]) { - - struct l2atomic l2atomic; - memset((void*)&l2atomic, 0, sizeof(l2atomic)); - - int rc, lineno; - rc = l2atomic_init(&l2atomic); lineno = __LINE__; - if (rc) goto err; - - struct memfifo mfifo; - rc = memfifo_initialize(&l2atomic, "some name", &mfifo, 0); lineno = __LINE__; - if (rc) goto err; - - rc = l2atomic_barrier_alloc_generic(&l2atomic, &barrier, N_PRODUCERS+1, "barrier_test"); lineno = __LINE__; - if (rc) goto err; - /* create 'producer' threads */ - uintptr_t pid; - unsigned production_count[N_PRODUCERS]; - for (pid = 0; pid < N_PRODUCERS; ++pid) { - producer[pid] = mfifo.producer; - production_count[pid] = 0; - if (pthread_create(&info[pid], NULL, &producer_fn, (void*)pid)) { lineno = __LINE__; goto err; } - } - unsigned expected_packet_count = N_PRODUCERS * N_PACKETS; - uint16_t id; - unsigned i; - uint64_t total_time; - for (i=0; i<2; i++) { - l2atomic_barrier(&barrier); - uint64_t start_time = GetTimeBase(); - unsigned consumption_count = 0; - while (consumption_count < expected_packet_count) { - if (0 == memfifo_consume16(&mfifo.consumer, &id)) { - ++consumption_count; - } - } - uint64_t end_time = GetTimeBase(); - total_time = end_time - start_time; - } - - fprintf(stdout, "total cycles: %lu\n", total_time); - - double cycles_per_producer = (double)total_time / (double)N_PRODUCERS; - fprintf(stdout, "cycles/producer: %f\n", cycles_per_producer); - - double cycles_per_write = cycles_per_producer / (double)N_PACKETS; - fprintf(stdout, "write: %d (cycles)\n", (unsigned)cycles_per_write); - fprintf(stdout, "write: %0.04f (usec)\n", cycles_per_write / 1600.0); - - fprintf (stdout, "TEST SUCCESSFUL\n"); - return 0; -err: - fprintf (stderr, "%s: Error at line %d\n", __FILE__, lineno); - return 1; -} diff --git a/prov/bgq/src/test/cq_mfifo_overflow.c b/prov/bgq/src/test/cq_mfifo_overflow.c deleted file mode 100644 index 96770852608..00000000000 --- a/prov/bgq/src/test/cq_mfifo_overflow.c +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "fi_bgq_spi.h" - -#include "cq_agent.h" -#include "fi_bgq_memfifo.h" - -#define ITERATIONS 10 - -void test_init_fn (void * buffer, uintptr_t cookie) { - - uint64_t * ptr = (uint64_t *) buffer; - *ptr = cookie; -} - -int main (int argc, char *argv[]) { - - struct l2atomic l2atomic; - memset((void*)&l2atomic, 0, sizeof(l2atomic)); - - int rc, lineno; - rc = l2atomic_init(&l2atomic); lineno = __LINE__; - if (rc) goto err; - - struct memfifo mfifo; - rc = memfifo_initialize(&l2atomic, "some name", &mfifo, 0); lineno = __LINE__; - if (rc) goto err; - - unsigned production_count = 0; - while (0 == memfifo_produce16(&mfifo.producer, production_count)) production_count++; - if (production_count != CQ_MFIFO_SIZE) { lineno = __LINE__; goto err; } - - uint16_t entry_id; - unsigned consumption_count = 0; - while (0 == memfifo_consume16(&mfifo.consumer, &entry_id)) { - if (entry_id != consumption_count++) { lineno = __LINE__; goto err; } - } - if (consumption_count != CQ_MFIFO_SIZE) { lineno = __LINE__; goto err; } - - while (0 == memfifo_produce16(&mfifo.producer, production_count)) production_count++; - if (production_count != (CQ_MFIFO_SIZE*2)) { lineno = __LINE__; goto err; } - - rc = memfifo_consume16(&mfifo.consumer, &entry_id); lineno = __LINE__; - if (rc) goto err; - if (entry_id != (0x7FFF & consumption_count)) { lineno = __LINE__; goto err; } - consumption_count++; - - rc = memfifo_produce16(&mfifo.producer, production_count++); lineno = __LINE__; - if (rc) goto err; - - rc = memfifo_consume16(&mfifo.consumer, &entry_id); lineno = __LINE__; - if (rc) goto err; - if (entry_id != (0x7FFF & consumption_count)) { lineno = __LINE__; goto err; } - consumption_count++; - - - fprintf (stdout, "TEST SUCCESSFUL\n"); - return 0; -err: - fprintf (stderr, "%s: Error at line %d\n", __FILE__, lineno); - return 1; -} diff --git a/prov/bgq/src/test/l2alloc_func.c b/prov/bgq/src/test/l2alloc_func.c deleted file mode 100644 index 1829ed28a78..00000000000 --- a/prov/bgq/src/test/l2alloc_func.c +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "fi_bgq_spi.h" - -#include "l2atomic.h" - -#define ITERATIONS 1000000 - -void test_init_fn (void * buffer, uintptr_t cookie) { - - uint64_t * ptr = (uint64_t *) buffer; - ptr[0] = cookie; - ptr[1] = cookie; - ptr[2] = cookie; -} - -int main (int argc, char *argv[]) { - - struct l2atomic l2atomic; - memset((void*)&l2atomic, 0, sizeof(l2atomic)); - - uint32_t tcoord = Kernel_MyTcoord(); - int rc, lineno; - - rc = l2atomic_init(&l2atomic); lineno = __LINE__; - if (rc) goto err; - - volatile uint64_t * buffer = NULL; - rc = l2atomic_alloc(&l2atomic, "simple", sizeof(uint64_t)*3, (void**)&buffer, test_init_fn, 0); lineno = __LINE__; - if (rc) goto err; - - volatile uint64_t * entered = buffer+1; - volatile uint64_t * exited = buffer+2; - L2_AtomicLoadIncrement(entered); - - uint32_t i; - if (tcoord % 2 == 0) { - for (i=0; i -#include - -#define L2ATOMIC_FIFO_NPACKETS N_PACKETS -#define L2ATOMIC_FIFO_MSYNC_CONSUMER -#define L2ATOMIC_FIFO_PRODUCER_STORE_FAST -#define L2ATOMIC_FIFO_CONSUMER_MULTIPLE -#include "rdma/bgq/fi_bgq_l2atomic.h" - -struct my_fifo { - struct l2atomic_fifo_consumer consumer; - uint64_t pad_0[16]; - struct l2atomic_fifo_data data; - uint64_t element[N_PACKETS]; -} __attribute__((__aligned__(32))); - -struct global { - struct my_fifo fifo[N_THREADS]; - struct l2atomic_barrier_data barrier_data; - struct { - struct l2atomic_fifo_producer producer[N_THREADS]; - struct l2atomic_barrier barrier; - } worker[N_THREADS]; -} __attribute__((__aligned__(32))); - -struct results { - double mmps; - uint16_t n_consumers; - uint16_t n_producers; - uint16_t is_consumer; - uint16_t is_producer; - uint64_t count; -}; - -struct results result[N_THREADS][N_THREADS*N_THREADS]; - -struct global *global; - -pthread_t info[N_THREADS]; - - -static uint64_t producer_fn (uintptr_t pid, struct l2atomic_fifo_producer *producer, unsigned n_consumers) { - - uint64_t i, full_count = 0; - do { - /* choose fifo */ - const uint64_t f = (i+pid)%n_consumers; - l2atomic_fifo_produce_wait(&producer[f], i); - - } while (++i<(n_consumers*N_MESSAGES)); - - return full_count; -} - -static uint64_t consumer_fn (uintptr_t pid, struct l2atomic_fifo_consumer *consumer, unsigned n_producers) { - - const unsigned t_messages = N_MESSAGES * n_producers; - uint64_t data[32], empty_count = 0, i = 0; - do { - i += l2atomic_fifo_consume16(consumer, data); - } while (i < t_messages); - - return empty_count; -} - - -static void * worker_fn (void * arg) { - - uintptr_t pid = (uintptr_t)arg; - - struct l2atomic_barrier *barrier = &global->worker[pid].barrier; - struct l2atomic_fifo_consumer *consumer = &global->fifo[pid].consumer; - struct l2atomic_fifo_producer *producer = &global->worker[pid].producer[0]; - - unsigned n_consumers, n_producers, test = 0; - for (n_consumers = 1; n_consumers <= N_THREADS; ++n_consumers) { - - const unsigned max_producers = N_THREADS - n_consumers; - - for (n_producers = 1; n_producers <= max_producers; ++n_producers) { - - uint64_t count = 0; - - l2atomic_barrier_enter(barrier); - uint64_t start_time = GetTimeBase(); - - if (pid < n_consumers) { - count = consumer_fn(pid, consumer, n_producers); - } else if (pid < (n_consumers + n_producers)) { - count = producer_fn(pid, producer, n_consumers); - } - - uint64_t end_time = GetTimeBase(); - l2atomic_barrier_enter(barrier); - - uint64_t total_time = end_time - start_time; - double total_usec = (double)total_time / 1600.0; - - result[pid][test].n_consumers = n_consumers; - result[pid][test].n_producers = n_producers; - result[pid][test].count = count; - - if (pid < n_consumers) { - result[pid][test].is_consumer = 1; - result[pid][test].is_producer = 0; - - result[pid][test].mmps = ((double)(N_MESSAGES*n_producers)) / total_usec; /* same as million messages per second */ - } else if (pid < (n_producers+n_consumers)) { - result[pid][test].is_consumer = 0; - result[pid][test].is_producer = 1; - - result[pid][test].mmps = ((double)(N_MESSAGES*n_consumers)) / total_usec; /* same as million messages per second */ - } else { - result[pid][test].is_consumer = 0; - result[pid][test].is_producer = 0; - result[pid][test].mmps = 0.0; - } - - l2atomic_barrier_enter(barrier); ppc_msync(); - - if (pid == 0) { - char out[1024*10]; - char * ptr = &out[0]; - unsigned n; - unsigned i; - - double all_consumer_mmps = 0.0; - double all_producer_mmps = 0.0; - for (i = 0; i < N_THREADS; ++i) { - - if (i>0 && result[i-1][test].is_consumer && !result[i][test].is_consumer) { - n = sprintf(ptr, ":: "); ptr += n; - } - - if (result[i][test].is_consumer) - all_consumer_mmps += result[i][test].mmps; - - if (result[i][test].is_producer) - all_producer_mmps += result[i][test].mmps; -#ifdef DISPLAY_EMPTY_FULL_COUNT - n = sprintf(ptr, "%5.2f(%5.2f) ", result[i][test].mmps, (double)result[i][test].count/10000000.0); -#else - n = sprintf(ptr, "%5.2f ", result[i][test].mmps); -#endif - ptr += n; - } - fprintf(stdout, "consumers=%-2u (%5.2f) producers=%-2u (%5.2f) :: %s\n", n_consumers, all_consumer_mmps, n_producers, all_producer_mmps, out); - } - - ++test; - - l2atomic_barrier_enter(barrier); - } - } - - return NULL; -} - -int main (int argc, char *argv[]) { - - int lineno, retval = 0; - - const size_t bytes_to_allocate = 128 * 2 + sizeof(struct global); - void *memptr = calloc(bytes_to_allocate, 1); - global = (struct global *)(((uintptr_t)memptr + 128) & ~127); - - uintptr_t pid, w; - for (pid = 0; pid < N_THREADS; ++pid) { - l2atomic_fifo_initialize(&global->fifo[pid].consumer, - &global->worker[0].producer[pid], &global->fifo[pid].data, N_PACKETS); - - for (w = 1; w < N_THREADS; ++w) { - global->worker[w].producer[pid] = global->worker[0].producer[pid]; - } - } - - l2atomic_barrier_initialize(&global->worker[0].barrier, &global->barrier_data, N_THREADS); - - for (pid=1; pidworker[pid].barrier, &global->barrier_data); - if (pthread_create(&info[pid], NULL, &worker_fn, (void*)pid)) { lineno = __LINE__; goto err; } - } - - worker_fn((void *)0); - -ret: - return retval; - -err: - fprintf(stderr, "%s:%s():%d\n", __FILE__, __func__, lineno); retval = -1; - goto ret; - -}; diff --git a/prov/bgq/src/test/l2barrier_func.c b/prov/bgq/src/test/l2barrier_func.c deleted file mode 100644 index bb3d25009e6..00000000000 --- a/prov/bgq/src/test/l2barrier_func.c +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include - -#include "rdma/bgq/fi_bgq_hwi.h" -#include "rdma/bgq/fi_bgq_spi.h" - -#include "l2atomic.h" - -#define ITERATIONS 1000000 - - -void test_init_fn (void * buffer, uintptr_t cookie) { - uint64_t * ptr = (uint64_t *) buffer; - *ptr = cookie; -} - -int main (int argc, char *argv[]) { - - struct l2atomic l2atomic; - memset((void*)&l2atomic, 0, sizeof(l2atomic)); - - uint32_t tcoord = Kernel_MyTcoord(); - - int rc, lineno; - rc = l2atomic_init(&l2atomic); lineno = __LINE__; - if (rc) goto err; - - /* race condition! how to determine the number of *active* ranks on the node - * without using Kernel_RanksToCoords() ? */ - usleep(5000); - ppc_msync(); - int participants = l2atomic.shared->counter; - fprintf(stderr, "%s:%d participants=%d\n", __FILE__, __LINE__, participants); - /* end: race */ - - struct l2atomic_barrier barrier; - rc = l2atomic_barrier_alloc_generic(&l2atomic, &barrier, participants, "barrier_test"); lineno = __LINE__; - if (rc) goto err; - uint64_t start_time = GetTimeBase(); - l2atomic_barrier(&barrier); - if (tcoord == 0) usleep(1); - l2atomic_barrier(&barrier); - uint64_t end_time = GetTimeBase(); - - fprintf(stdout, "barrier cycles: %lu\n", end_time - start_time); - - if (tcoord==0) fprintf(stdout, "TEST SUCCESSFUL\n"); - return 0; -err: - fprintf(stderr, "Error at line %d\n", lineno); - return 1; -} diff --git a/prov/bgq/src/test/l2lock_func.c b/prov/bgq/src/test/l2lock_func.c deleted file mode 100644 index dabcf37ed7d..00000000000 --- a/prov/bgq/src/test/l2lock_func.c +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (C) 2016 by Argonne National Laboratory. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "fi_bgq_spi.h" - -#include "l2atomic.h" - -#define ITERATIONS 100000 - - -void test_init_fn (void * buffer, uintptr_t cookie) { - uint64_t * ptr = (uint64_t *) buffer; - *ptr = cookie; -} - -int main (int argc, char *argv[]) { - - struct l2atomic l2atomic; - memset((void*)&l2atomic, 0, sizeof(l2atomic)); - - uint32_t tcoord = Kernel_MyTcoord(); - - int rc, lineno; - rc = l2atomic_init(&l2atomic); lineno = __LINE__; - if (rc) goto err; - - L2_Lock_t * lock; - rc = l2atomic_lock_alloc_generic (&l2atomic, &lock, "lock"); lineno = __LINE__; - if (rc) goto err; - - - uint64_t * buffer = NULL; - rc = l2atomic_alloc(&l2atomic, "simple", 128, (void**)&buffer, test_init_fn, 0); lineno = __LINE__; - if (rc) goto err; - - /* race condition */ - usleep(5000); - ppc_msync(); - int participants = l2atomic.shared->counter; - /* race condition */ - - struct l2atomic_barrier barrier; - rc = l2atomic_barrier_alloc_generic(&l2atomic, &barrier, participants, "lock_barrier_test"); lineno = __LINE__; - if (rc) goto err; - l2atomic_barrier(&barrier); - - uint32_t i; - for (i=0; i 0); - - cnk_rc = Kernel_AllocateBaseAddressTable(0, bat_subgroup, nbatids, &batids[0], 0); - assert(cnk_rc == 0); -} - -static inline void bat_write (MUSPI_BaseAddressTableSubGroup_t * bat_subgroup, uint64_t index, uint64_t offset) -{ - - int32_t cnk_rc __attribute__ ((unused)); - cnk_rc = MUSPI_SetBaseAddress(bat_subgroup, index, offset); - assert(cnk_rc == 0); - -} - - -static inline void init_gi_barrier (MUSPI_GIBarrier_t * GIBarrier) -{ - int rc; - rc = MUSPI_GIBarrierInit(GIBarrier, 0); - if (rc) exit(1); -} - - -static inline void do_gi_barrier (MUSPI_GIBarrier_t * GIBarrier) -{ - int rc; - uint64_t gi_timeout = 1600000000; /* about 1 sec at 16 mhz */ - gi_timeout *= 30; - - rc = MUSPI_GIBarrierEnter(GIBarrier); - if (rc) exit(1); - - rc = MUSPI_GIBarrierPollWithTimeout(GIBarrier, gi_timeout); - if (rc) exit(1); -} - - -static inline void do_gi_barrier_no_timeout (MUSPI_GIBarrier_t * GIBarrier) -{ - int rc; - rc = MUSPI_GIBarrierEnter(GIBarrier); - if (rc) exit(1); - - rc = MUSPI_GIBarrierPoll(GIBarrier); - if (rc) exit(1); -} - - -static inline MUSPI_RecFifo_t * allocate_reception_fifo (MUSPI_RecFifoSubGroup_t * rfifo_subgroup) -{ - - int rc __attribute__ ((unused)); - uint8_t * memptr; - - size_t nbytes = 8 * 1024 * 1024; - rc = posix_memalign((void**)&memptr, 32, nbytes); - assert(0 == rc); - - Kernel_MemoryRegion_t mregion; - rc = Kernel_CreateMemoryRegion(&mregion, (void*)memptr, nbytes); - assert(0 == rc); - - uint32_t free_fifo_num; - uint32_t free_fifo_ids[BGQ_MU_NUM_REC_FIFOS_PER_SUBGROUP]; - rc = Kernel_QueryRecFifos(0, &free_fifo_num, free_fifo_ids); - assert(0 == rc); - assert(0 < free_fifo_num); - assert(0 == free_fifo_ids[0]); - - Kernel_RecFifoAttributes_t attr; - memset(&attr, 0x00, sizeof(attr)); - rc = Kernel_AllocateRecFifos(0, rfifo_subgroup, 1, free_fifo_ids, &attr); - assert(0 == rc); - - rc = Kernel_RecFifoInit(rfifo_subgroup, 0, &mregion, - ((uint64_t)memptr) - (uint64_t)mregion.BaseVa, - nbytes-1); - assert(0 == rc); - - rc = Kernel_RecFifoEnable(0, 0x08000ull); - assert(0 == rc); - - assert(rfifo_subgroup->_recfifos[0]._fifo.hwfifo); - - return &rfifo_subgroup->_recfifos[0]; -} - - -static inline void inject (struct fi_bgq_spi_injfifo * ififo, MUHWI_Descriptor_t * model) -{ - - MUSPI_InjFifo_t * muspi_injfifo = ififo->muspi_injfifo; - MUHWI_Descriptor_t * d = fi_bgq_spi_injfifo_tail_wait(ififo); - *d = *model; - - MUSPI_InjFifoAdvanceDesc(muspi_injfifo); - return; -} - - -/* return the number of *chunks* consumed */ -static inline uint64_t receive (MUSPI_RecFifo_t * recfifo) -{ - MUSPI_Fifo_t * fifo = (MUSPI_Fifo_t *)recfifo; - - const uintptr_t pa_start = MUSPI_getStartPa(fifo); - const uintptr_t va_head = (uintptr_t) MUSPI_getHeadVa(fifo); - const uintptr_t va_start = (uintptr_t) MUSPI_getStartVa(fifo); - const uintptr_t offset_head = va_head - va_start; - - uintptr_t offset_tail = MUSPI_getHwTail(fifo) - pa_start; - - /* - * wait until the head does not equal the tail; this signifies that - * a packet has been received - */ - while (offset_head == offset_tail) { - offset_tail = MUSPI_getHwTail(fifo) - pa_start; - } - - uint64_t bytes_consumed; - if (offset_head < offset_tail) { - - MUSPI_setHeadVa(fifo, (void*)(va_start + offset_tail)); - MUSPI_setHwHead(fifo, offset_tail); - - bytes_consumed = offset_tail - offset_head; - - } else { - - MUSPI_setHeadVa(fifo, (void*)(va_start)); - MUSPI_setHwHead(fifo, 0); - - const uintptr_t va_end = (uintptr_t) fifo->va_end; - bytes_consumed = va_end - va_head; - } - - return bytes_consumed >> 5; /* each chunk is 32 bytes */ -} - - - - -int main (int argc, char **argv) -{ - Personality_t pers; - Kernel_GetPersonality(&pers, sizeof(pers)); - - BG_CoordinateMapping_t local; - local.a = pers.Network_Config.Acoord; - local.b = pers.Network_Config.Bcoord; - local.c = pers.Network_Config.Ccoord; - local.d = pers.Network_Config.Dcoord; - local.e = pers.Network_Config.Ecoord; - local.t = Kernel_PhysicalProcessorID(); - - int is_root = 0; - int is_neighbor = 0; - if (local.t == 0 && local.a == 0 && local.b == 0 && local.c == 0 && local.d == 0) { - - is_root = (local.e == 0); - is_neighbor = (local.e == 1); - } - - - MUSPI_BaseAddressTableSubGroup_t bat_subgroup; - bat_allocate(&bat_subgroup); - - volatile uint64_t byte_counter __attribute__((__aligned__(64))); - byte_counter = 0; - - uint64_t byte_counter_paddr = 0; - fi_bgq_cnk_vaddr2paddr((const void *)&byte_counter, sizeof(uint64_t), &byte_counter_paddr); - uint64_t atomic_byte_counter_paddr = MUSPI_GetAtomicAddress(byte_counter_paddr, MUHWI_ATOMIC_OPCODE_STORE_ADD); - bat_write(&bat_subgroup, BYTE_COUNTER_BAT_ID, atomic_byte_counter_paddr); - - uint64_t rbuf_paddr = 0; - fi_bgq_cnk_vaddr2paddr((const void *)&rbuf[0], MAX_MESSAGE_SIZE, &rbuf_paddr); - bat_write(&bat_subgroup, RECEIVE_BUFFER_BAT_ID, rbuf_paddr); - - - struct fi_bgq_spi_injfifo rget_ififo; - MUSPI_InjFifoSubGroup_t rget_ififo_subgroup; - fi_bgq_spi_injfifo_init(&rget_ififo, &rget_ififo_subgroup, 1, NUM_LOOPS, 0, 1, 0); - - struct fi_bgq_spi_injfifo ififo; - MUSPI_InjFifoSubGroup_t ififo_subgroup; - fi_bgq_spi_injfifo_init(&ififo, &ififo_subgroup, 1, NUM_LOOPS, 0, 0, 1); - - MUSPI_RecFifoSubGroup_t rfifo_subgroup; - MUSPI_RecFifo_t * recfifo = allocate_reception_fifo(&rfifo_subgroup); - - - /* - * Create the 'memory fifo' descriptor - used for eager-style transfers - * and rendezvous-style RTS messages. - */ - MUHWI_Descriptor_t fifo_model __attribute__((__aligned__(64))); - MUSPI_DescriptorZeroOut(&fifo_model); - - fifo_model.Half_Word0.Prefetch_Only = MUHWI_DESCRIPTOR_PRE_FETCH_ONLY_NO; - fifo_model.Half_Word1.Interrupt = MUHWI_DESCRIPTOR_DO_NOT_INTERRUPT_ON_PACKET_ARRIVAL; - fi_bgq_cnk_vaddr2paddr((const void *)&sbuf[0], MAX_MESSAGE_SIZE+64, &fifo_model.Pa_Payload); - fifo_model.Message_Length = 0; - fifo_model.PacketHeader.NetworkHeader.pt2pt.Data_Packet_Type = MUHWI_PT2PT_DATA_PACKET_TYPE; - fifo_model.PacketHeader.NetworkHeader.pt2pt.Byte3.Byte3 = MUHWI_PACKET_VIRTUAL_CHANNEL_DETERMINISTIC; - fifo_model.PacketHeader.NetworkHeader.pt2pt.Byte8.Byte8 = MUHWI_PACKET_TYPE_FIFO; - fifo_model.PacketHeader.messageUnitHeader.Packet_Types.Memory_FIFO.Rec_FIFO_Id = 0; - fifo_model.PacketHeader.NetworkHeader.pt2pt.Byte8.Size = 16; - - fifo_model.PacketHeader.NetworkHeader.pt2pt.Destination.Destination.A_Destination = 0; - fifo_model.PacketHeader.NetworkHeader.pt2pt.Destination.Destination.B_Destination = 0; - fifo_model.PacketHeader.NetworkHeader.pt2pt.Destination.Destination.C_Destination = 0; - fifo_model.PacketHeader.NetworkHeader.pt2pt.Destination.Destination.D_Destination = 0; - - if (is_root) { - - fifo_model.Torus_FIFO_Map = MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_EP; - fifo_model.PacketHeader.NetworkHeader.pt2pt.Destination.Destination.E_Destination = 1; - - } else if (is_neighbor) { - - fifo_model.Torus_FIFO_Map = MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_EM; - fifo_model.PacketHeader.NetworkHeader.pt2pt.Destination.Destination.E_Destination = 0; - } - - - MUHWI_Descriptor_t dput __attribute__((__aligned__(64))); - MUSPI_DescriptorZeroOut(&dput); - - dput = fifo_model; - dput.Torus_FIFO_Map = - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_AM | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_AP | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_BM | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_BP | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_CM | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_CP | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_DM | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_DP | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_EM | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_EP; - dput.PacketHeader.NetworkHeader.pt2pt.Byte8.Byte8 = MUHWI_PACKET_TYPE_PUT; - dput.PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Pacing = MUHWI_PACKET_DIRECT_PUT_IS_NOT_PACED; - MUSPI_SetRecPayloadBaseAddressInfo(&dput, RECEIVE_BUFFER_BAT_ID, 0); - dput.PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Rec_Counter_Base_Address_Id = BYTE_COUNTER_BAT_ID; - dput.PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Counter_Offset = 0; - - dput.Message_Length = 0; /* updated during the test */ - dput.PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Put_Offset_MSB = 0; - dput.PacketHeader.messageUnitHeader.Packet_Types.Direct_Put.Put_Offset_LSB = 0; - - dput.PacketHeader.NetworkHeader.pt2pt.Destination.Destination.A_Destination = local.a; - dput.PacketHeader.NetworkHeader.pt2pt.Destination.Destination.B_Destination = local.b; - dput.PacketHeader.NetworkHeader.pt2pt.Destination.Destination.C_Destination = local.c; - dput.PacketHeader.NetworkHeader.pt2pt.Destination.Destination.D_Destination = local.d; - dput.PacketHeader.NetworkHeader.pt2pt.Destination.Destination.E_Destination = local.e; - dput.PacketHeader.NetworkHeader.pt2pt.Byte8.Size = 16; - - - MUHWI_Descriptor_t rget_model __attribute__((__aligned__(64))); - MUSPI_DescriptorZeroOut(&rget_model); - - rget_model = fifo_model; - rget_model.Torus_FIFO_Map = - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_AM | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_AP | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_BM | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_BP | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_CM | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_CP | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_DM | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_DP | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_EM | - MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_EP; - rget_model.PacketHeader.NetworkHeader.pt2pt.Byte8.Byte8 = MUHWI_PACKET_TYPE_GET; - fi_bgq_cnk_vaddr2paddr((const void *)&dput, sizeof(MUHWI_Descriptor_t), &rget_model.Pa_Payload); - rget_model.Message_Length = sizeof(MUHWI_Descriptor_t); - - rget_model.PacketHeader.NetworkHeader.pt2pt.Byte8.Size = 16; - - - /* - * Barrier - */ - MUSPI_GIBarrier_t GIBarrier; - init_gi_barrier(&GIBarrier); - do_gi_barrier(&GIBarrier); - - - if (is_root) { - - fprintf(stdout, "# eager rendezvous\n"); - fprintf(stdout, "# %10s %10s %9s %10s %9s\n", "bytes", "cycles", "usec", "cycles", "usec"); - fprintf(stdout, "# ====================================================\n"); - - } - - const uint64_t num_loops = NUM_LOOPS; - - uint64_t i = 0; - uint64_t msg_size = 0; - while (msg_size <= MAX_MESSAGE_SIZE) { - - fifo_model.Message_Length = msg_size; - dput.Message_Length = msg_size; - - uint64_t eager_cycles = 0; - uint64_t rendezvous_cycles = 0; - - /* - * each torus chunk is 32 bytes and the first 32 bytes in each - * packet is header - */ - const uint64_t npackets = msg_size == 0 ? 1 : (msg_size / 512) + (msg_size % 512 != 0); - const uint64_t nchunks = npackets + (msg_size / 32) + (msg_size % 32 != 0); - if (is_root) { - - Delay(500000); /* make sure receiver is ready */ - - const unsigned long long t0 = GetTimeBase(); - for (i=0; i 0) { - n -= receive(recfifo); - } - } - const unsigned long long t1 = GetTimeBase(); - eager_cycles = t1 - t0; - - } else if (is_neighbor) { - for (i=0; i 0) { - n -= receive(recfifo); - } - - inject(&ififo, &fifo_model); - } - - Delay(500000); /* make sure sender is finished */ - } - - - /* - * rendezvous - */ - if (msg_size > 0) { - - fifo_model.Message_Length = 0; - - if (is_root) { - - Delay(500000); /* make sure receiver is ready */ - - const unsigned long long t0 = GetTimeBase(); - for (i=0; i 0) { - unsigned chunks = receive(recfifo); - n -= chunks; - } - - /* transfer the 'pong' data */ - byte_counter = msg_size; - inject(&ififo, &rget_model); - - /* wait until all 'pong' data is delivered */ - while (byte_counter > 0); - } - const unsigned long long t1 = GetTimeBase(); - rendezvous_cycles = t1 - t0; - - } else if (is_neighbor) { - - for (i=0; i 0) { - unsigned chunks = receive(recfifo); - n -= chunks; - } - - /* transfer the 'ping' data */ - byte_counter = msg_size; - inject(&ififo, &rget_model); - - /* wait until all 'ping' data is delivered */ - while (byte_counter > 0); - - /* inject the 'pong' rts */ - inject(&ififo, &fifo_model); - } - - Delay(500000); /* make sure sender is finished */ - } - } - - if (is_root) { - - /* report half pingpong */ - fprintf(stdout, " %10lu %10lu %9.2f %10lu %9.2f\n", msg_size, - (eager_cycles)/(num_loops*2), (((eager_cycles)*1.0)/1600.0) / (num_loops * 2.0), - (rendezvous_cycles)/(num_loops*2), (((rendezvous_cycles)*1.0)/1600.0) / (num_loops * 2.0)); - } - - msg_size = msg_size == 0 ? 1 : msg_size*2; - } - - return 0; -} - diff --git a/prov/psm3/configure.ac b/prov/psm3/configure.ac index 1f4659218d6..a986a5fbdc1 100644 --- a/prov/psm3/configure.ac +++ b/prov/psm3/configure.ac @@ -864,8 +864,6 @@ AC_DEFINE([HAVE_SYNAPSEAI], 0, [Ignore HAVE_SYNAPSEAI]) AC_DEFINE([HAVE_UFFD_MONITOR], 0, [Ignore HAVE_UFFD_MONITOR]) dnl Provider-specific checks dnl FI_PROVIDER_INIT -AC_DEFINE([HAVE_BGQ], 0, [Ignore HAVE_BGQ]) -AC_DEFINE([HAVE_BGQ_DL], 0, [Ignore HAVE_BGQ_DL]) AC_DEFINE([HAVE_EFA], 0, [Ignore HAVE_EFA]) AC_DEFINE([HAVE_EFA_DL], 0, [Ignore HAVE_EFA_DL]) AC_DEFINE([HAVE_GNI], 0, [Ignore HAVE_GNI]) diff --git a/src/common.c b/src/common.c index cac720c54ff..88df4e465c1 100644 --- a/src/common.c +++ b/src/common.c @@ -429,9 +429,6 @@ const char *ofi_straddr(char *buf, size_t *len, size = snprintf(buf, *len, "fi_addr_gni://%" PRIx64, *(uint64_t *)addr); break; - case FI_ADDR_BGQ: - size = snprintf(buf, *len, "fi_addr_bgq://%p", addr); - break; case FI_ADDR_OPX: size = snprintf(buf, *len, "fi_addr_opx://%016lx", *(uint64_t *)addr); break; @@ -494,8 +491,6 @@ uint32_t ofi_addr_format(const char *str) return FI_ADDR_PSMX3; else if (!strcasecmp(fmt, "fi_addr_gni")) return FI_ADDR_GNI; - else if (!strcasecmp(fmt, "fi_addr_bgq")) - return FI_ADDR_BGQ; else if (!strcasecmp(fmt, "fi_addr_opx")) return FI_ADDR_OPX; else if (!strcasecmp(fmt, "fi_addr_efa")) @@ -905,7 +900,6 @@ int ofi_str_toaddr(const char *str, uint32_t *addr_format, case FI_SOCKADDR_IB: return ofi_str_to_sib(str, addr, len); case FI_ADDR_GNI: - case FI_ADDR_BGQ: case FI_ADDR_MLX: case FI_ADDR_UCX: default: diff --git a/src/fabric.c b/src/fabric.c index 53f567a27d1..1ceb4219cde 100644 --- a/src/fabric.c +++ b/src/fabric.c @@ -445,7 +445,7 @@ static struct fi_provider *ofi_get_hook(const char *name) static void ofi_ordered_provs_init(void) { char *ordered_prov_names[] = { - "efa", "psm2", "opx", "usnic", "gni", "bgq", "verbs", + "efa", "psm2", "opx", "usnic", "gni", "verbs", "netdir", "psm3", "ucx", "ofi_rxm", "ofi_rxd", "shm", /* Initialize the socket based providers last of the @@ -891,7 +891,6 @@ void fi_ini(void) ofi_register_provider(PSM2_INIT, NULL); ofi_register_provider(USNIC_INIT, NULL); ofi_register_provider(GNI_INIT, NULL); - ofi_register_provider(BGQ_INIT, NULL); ofi_register_provider(NETDIR_INIT, NULL); ofi_register_provider(SHM_INIT, NULL); ofi_register_provider(SM2_INIT, NULL); diff --git a/src/fi_tostr.c b/src/fi_tostr.c index ed2a286ed5c..5f9e5032f80 100644 --- a/src/fi_tostr.c +++ b/src/fi_tostr.c @@ -118,7 +118,6 @@ static void ofi_tostr_addr_format(char *buf, size_t len, uint32_t addr_format) CASEENUMSTRN(FI_SOCKADDR_IB, len); CASEENUMSTRN(FI_ADDR_PSMX2, len); CASEENUMSTRN(FI_ADDR_GNI, len); - CASEENUMSTRN(FI_ADDR_BGQ, len); CASEENUMSTRN(FI_ADDR_MLX, len); CASEENUMSTRN(FI_ADDR_UCX, len); CASEENUMSTRN(FI_ADDR_STR, len); diff --git a/util/info.c b/util/info.c index fe7844c5624..80bae9fd273 100644 --- a/util/info.c +++ b/util/info.c @@ -193,7 +193,6 @@ static int str2addr_format(char *inputstr, uint32_t *value) ORCASE(FI_SOCKADDR_IN6); ORCASE(FI_SOCKADDR_IB); ORCASE(FI_ADDR_GNI); - ORCASE(FI_ADDR_BGQ); ORCASE(FI_ADDR_MLX); ORCASE(FI_ADDR_STR); ORCASE(FI_ADDR_PSMX2); From e2317581ff6f5999496d33445884cb1740cf1d57 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Mon, 18 Sep 2023 16:24:18 -0700 Subject: [PATCH 02/34] prov/usnic: Remove provider Provider only supported by v1.x series Signed-off-by: Sean Hefty --- .github/workflows/coverity.yml | 1 - .github/workflows/pr-ci.yml | 1 - .travis.yml | 7 +- Makefile.am | 1 - README.md | 32 - config/cron-make-nightly-tarball.pl | 2 +- configure.ac | 3 - contrib/buildrpm/README | 2 +- contrib/intel/jenkins/common.py | 1 - fabtests/Makefile.am | 2 - fabtests/test_configs/usnic/all.test | 40 - fabtests/test_configs/usnic/quick.test | 41 - include/ofi_prov.h | 11 - man/fi_pingpong.1.md | 12 +- man/fi_usnic.7.md | 330 --- man/man7/fi_usnic.7 | 382 ---- prov/psm2/build-psm2.sh | 4 +- prov/psm3/configure.ac | 2 - prov/usnic/Makefile.include | 164 -- prov/usnic/configure.m4 | 366 ---- prov/usnic/libfabric-usnic.spec.in | 52 - prov/usnic/src/fi_ext_usnic.h | 157 -- prov/usnic/src/usdf.h | 525 ----- prov/usnic/src/usdf_av.c | 887 -------- prov/usnic/src/usdf_av.h | 97 - prov/usnic/src/usdf_cm.c | 321 --- prov/usnic/src/usdf_cm.h | 85 - prov/usnic/src/usdf_cq.c | 1333 ------------ prov/usnic/src/usdf_cq.h | 55 - prov/usnic/src/usdf_dgram.c | 805 -------- prov/usnic/src/usdf_dgram.h | 104 - prov/usnic/src/usdf_domain.c | 427 ---- prov/usnic/src/usdf_endpoint.c | 123 -- prov/usnic/src/usdf_endpoint.h | 53 - prov/usnic/src/usdf_ep_dgram.c | 943 --------- prov/usnic/src/usdf_eq.c | 660 ------ prov/usnic/src/usdf_ext.c | 248 --- prov/usnic/src/usdf_fabric.c | 1057 ---------- prov/usnic/src/usdf_fake_ibv.c | 126 -- prov/usnic/src/usdf_mem.c | 159 -- prov/usnic/src/usdf_pep.c | 838 -------- prov/usnic/src/usdf_poll.c | 293 --- prov/usnic/src/usdf_poll.h | 56 - prov/usnic/src/usdf_progress.c | 161 -- prov/usnic/src/usdf_progress.h | 52 - prov/usnic/src/usdf_rudp.h | 109 - prov/usnic/src/usdf_socket.c | 68 - prov/usnic/src/usdf_socket.h | 43 - prov/usnic/src/usdf_timer.c | 266 --- prov/usnic/src/usdf_timer.h | 74 - prov/usnic/src/usdf_wait.c | 346 ---- prov/usnic/src/usdf_wait.h | 65 - prov/usnic/src/usnic_direct/cq_desc.h | 136 -- prov/usnic/src/usnic_direct/cq_enet_desc.h | 269 --- prov/usnic/src/usnic_direct/kcompat.h | 108 - prov/usnic/src/usnic_direct/kcompat_priv.h | 95 - prov/usnic/src/usnic_direct/libnl1_utils.h | 112 -- prov/usnic/src/usnic_direct/libnl3_utils.h | 97 - prov/usnic/src/usnic_direct/libnl_utils.h | 64 - .../src/usnic_direct/libnl_utils_common.c | 465 ----- prov/usnic/src/usnic_direct/linux/delay.h | 48 - prov/usnic/src/usnic_direct/linux/slab.h | 48 - prov/usnic/src/usnic_direct/linux_types.h | 67 - prov/usnic/src/usnic_direct/rq_enet_desc.h | 84 - prov/usnic/src/usnic_direct/usd.h | 324 --- prov/usnic/src/usnic_direct/usd_caps.c | 67 - prov/usnic/src/usnic_direct/usd_caps.h | 48 - prov/usnic/src/usnic_direct/usd_dest.c | 595 ------ prov/usnic/src/usnic_direct/usd_dest.h | 73 - prov/usnic/src/usnic_direct/usd_device.c | 689 ------- prov/usnic/src/usnic_direct/usd_device.h | 49 - prov/usnic/src/usnic_direct/usd_enum.c | 133 -- prov/usnic/src/usnic_direct/usd_event.c | 84 - prov/usnic/src/usnic_direct/usd_ib_cmd.c | 1032 ---------- prov/usnic/src/usnic_direct/usd_ib_cmd.h | 69 - prov/usnic/src/usnic_direct/usd_ib_sysfs.c | 383 ---- prov/usnic/src/usnic_direct/usd_ib_sysfs.h | 71 - prov/usnic/src/usnic_direct/usd_mem.c | 220 -- prov/usnic/src/usnic_direct/usd_poll.c | 293 --- prov/usnic/src/usnic_direct/usd_post.c | 122 -- prov/usnic/src/usnic_direct/usd_post.h | 227 --- .../src/usnic_direct/usd_post_ud_pio_udp.c | 251 --- prov/usnic/src/usnic_direct/usd_post_ud_raw.c | 75 - prov/usnic/src/usnic_direct/usd_post_ud_udp.c | 325 --- prov/usnic/src/usnic_direct/usd_queue.h | 55 - prov/usnic/src/usnic_direct/usd_queues.c | 1370 ------------- prov/usnic/src/usnic_direct/usd_socket.c | 121 -- prov/usnic/src/usnic_direct/usd_socket.h | 54 - prov/usnic/src/usnic_direct/usd_time.h | 66 - prov/usnic/src/usnic_direct/usd_util.h | 148 -- prov/usnic/src/usnic_direct/usd_vnic.c | 213 -- prov/usnic/src/usnic_direct/usd_vnic.h | 53 - prov/usnic/src/usnic_direct/usnic_abi.h | 361 ---- prov/usnic/src/usnic_direct/usnic_direct.h | 719 ------- prov/usnic/src/usnic_direct/usnic_ib_abi.h | 153 -- prov/usnic/src/usnic_direct/usnic_ip_utils.c | 199 -- prov/usnic/src/usnic_direct/usnic_ip_utils.h | 52 - .../usnic/src/usnic_direct/usnic_user_utils.h | 116 -- prov/usnic/src/usnic_direct/vnic_cq.c | 128 -- prov/usnic/src/usnic_direct/vnic_cq.h | 154 -- prov/usnic/src/usnic_direct/vnic_dev.c | 1787 ----------------- prov/usnic/src/usnic_direct/vnic_dev.h | 214 -- prov/usnic/src/usnic_direct/vnic_devcmd.h | 1413 ------------- prov/usnic/src/usnic_direct/vnic_enet.h | 86 - prov/usnic/src/usnic_direct/vnic_intr.c | 123 -- prov/usnic/src/usnic_direct/vnic_intr.h | 140 -- prov/usnic/src/usnic_direct/vnic_resource.h | 119 -- prov/usnic/src/usnic_direct/vnic_rq.c | 272 --- prov/usnic/src/usnic_direct/vnic_rq.h | 296 --- prov/usnic/src/usnic_direct/vnic_stats.h | 99 - prov/usnic/src/usnic_direct/vnic_wq.c | 288 --- prov/usnic/src/usnic_direct/vnic_wq.h | 302 --- prov/usnic/src/usnic_direct/wq_enet_desc.h | 122 -- prov/util/src/util_attr.c | 7 +- prov/verbs/src/verbs_info.c | 1 - src/fabric.c | 3 +- 116 files changed, 14 insertions(+), 27935 deletions(-) delete mode 100644 fabtests/test_configs/usnic/all.test delete mode 100644 fabtests/test_configs/usnic/quick.test delete mode 100644 man/fi_usnic.7.md delete mode 100644 man/man7/fi_usnic.7 delete mode 100644 prov/usnic/Makefile.include delete mode 100644 prov/usnic/configure.m4 delete mode 100644 prov/usnic/libfabric-usnic.spec.in delete mode 100644 prov/usnic/src/fi_ext_usnic.h delete mode 100644 prov/usnic/src/usdf.h delete mode 100644 prov/usnic/src/usdf_av.c delete mode 100644 prov/usnic/src/usdf_av.h delete mode 100644 prov/usnic/src/usdf_cm.c delete mode 100644 prov/usnic/src/usdf_cm.h delete mode 100644 prov/usnic/src/usdf_cq.c delete mode 100644 prov/usnic/src/usdf_cq.h delete mode 100644 prov/usnic/src/usdf_dgram.c delete mode 100644 prov/usnic/src/usdf_dgram.h delete mode 100644 prov/usnic/src/usdf_domain.c delete mode 100644 prov/usnic/src/usdf_endpoint.c delete mode 100644 prov/usnic/src/usdf_endpoint.h delete mode 100644 prov/usnic/src/usdf_ep_dgram.c delete mode 100644 prov/usnic/src/usdf_eq.c delete mode 100644 prov/usnic/src/usdf_ext.c delete mode 100644 prov/usnic/src/usdf_fabric.c delete mode 100644 prov/usnic/src/usdf_fake_ibv.c delete mode 100644 prov/usnic/src/usdf_mem.c delete mode 100644 prov/usnic/src/usdf_pep.c delete mode 100644 prov/usnic/src/usdf_poll.c delete mode 100644 prov/usnic/src/usdf_poll.h delete mode 100644 prov/usnic/src/usdf_progress.c delete mode 100644 prov/usnic/src/usdf_progress.h delete mode 100644 prov/usnic/src/usdf_rudp.h delete mode 100644 prov/usnic/src/usdf_socket.c delete mode 100644 prov/usnic/src/usdf_socket.h delete mode 100644 prov/usnic/src/usdf_timer.c delete mode 100644 prov/usnic/src/usdf_timer.h delete mode 100644 prov/usnic/src/usdf_wait.c delete mode 100644 prov/usnic/src/usdf_wait.h delete mode 100644 prov/usnic/src/usnic_direct/cq_desc.h delete mode 100644 prov/usnic/src/usnic_direct/cq_enet_desc.h delete mode 100644 prov/usnic/src/usnic_direct/kcompat.h delete mode 100644 prov/usnic/src/usnic_direct/kcompat_priv.h delete mode 100644 prov/usnic/src/usnic_direct/libnl1_utils.h delete mode 100644 prov/usnic/src/usnic_direct/libnl3_utils.h delete mode 100644 prov/usnic/src/usnic_direct/libnl_utils.h delete mode 100644 prov/usnic/src/usnic_direct/libnl_utils_common.c delete mode 100644 prov/usnic/src/usnic_direct/linux/delay.h delete mode 100644 prov/usnic/src/usnic_direct/linux/slab.h delete mode 100644 prov/usnic/src/usnic_direct/linux_types.h delete mode 100644 prov/usnic/src/usnic_direct/rq_enet_desc.h delete mode 100644 prov/usnic/src/usnic_direct/usd.h delete mode 100644 prov/usnic/src/usnic_direct/usd_caps.c delete mode 100644 prov/usnic/src/usnic_direct/usd_caps.h delete mode 100644 prov/usnic/src/usnic_direct/usd_dest.c delete mode 100644 prov/usnic/src/usnic_direct/usd_dest.h delete mode 100644 prov/usnic/src/usnic_direct/usd_device.c delete mode 100644 prov/usnic/src/usnic_direct/usd_device.h delete mode 100644 prov/usnic/src/usnic_direct/usd_enum.c delete mode 100644 prov/usnic/src/usnic_direct/usd_event.c delete mode 100644 prov/usnic/src/usnic_direct/usd_ib_cmd.c delete mode 100644 prov/usnic/src/usnic_direct/usd_ib_cmd.h delete mode 100644 prov/usnic/src/usnic_direct/usd_ib_sysfs.c delete mode 100644 prov/usnic/src/usnic_direct/usd_ib_sysfs.h delete mode 100644 prov/usnic/src/usnic_direct/usd_mem.c delete mode 100644 prov/usnic/src/usnic_direct/usd_poll.c delete mode 100644 prov/usnic/src/usnic_direct/usd_post.c delete mode 100644 prov/usnic/src/usnic_direct/usd_post.h delete mode 100644 prov/usnic/src/usnic_direct/usd_post_ud_pio_udp.c delete mode 100644 prov/usnic/src/usnic_direct/usd_post_ud_raw.c delete mode 100644 prov/usnic/src/usnic_direct/usd_post_ud_udp.c delete mode 100644 prov/usnic/src/usnic_direct/usd_queue.h delete mode 100644 prov/usnic/src/usnic_direct/usd_queues.c delete mode 100644 prov/usnic/src/usnic_direct/usd_socket.c delete mode 100644 prov/usnic/src/usnic_direct/usd_socket.h delete mode 100644 prov/usnic/src/usnic_direct/usd_time.h delete mode 100644 prov/usnic/src/usnic_direct/usd_util.h delete mode 100644 prov/usnic/src/usnic_direct/usd_vnic.c delete mode 100644 prov/usnic/src/usnic_direct/usd_vnic.h delete mode 100644 prov/usnic/src/usnic_direct/usnic_abi.h delete mode 100644 prov/usnic/src/usnic_direct/usnic_direct.h delete mode 100644 prov/usnic/src/usnic_direct/usnic_ib_abi.h delete mode 100644 prov/usnic/src/usnic_direct/usnic_ip_utils.c delete mode 100644 prov/usnic/src/usnic_direct/usnic_ip_utils.h delete mode 100644 prov/usnic/src/usnic_direct/usnic_user_utils.h delete mode 100644 prov/usnic/src/usnic_direct/vnic_cq.c delete mode 100644 prov/usnic/src/usnic_direct/vnic_cq.h delete mode 100644 prov/usnic/src/usnic_direct/vnic_dev.c delete mode 100644 prov/usnic/src/usnic_direct/vnic_dev.h delete mode 100644 prov/usnic/src/usnic_direct/vnic_devcmd.h delete mode 100644 prov/usnic/src/usnic_direct/vnic_enet.h delete mode 100644 prov/usnic/src/usnic_direct/vnic_intr.c delete mode 100644 prov/usnic/src/usnic_direct/vnic_intr.h delete mode 100644 prov/usnic/src/usnic_direct/vnic_resource.h delete mode 100644 prov/usnic/src/usnic_direct/vnic_rq.c delete mode 100644 prov/usnic/src/usnic_direct/vnic_rq.h delete mode 100644 prov/usnic/src/usnic_direct/vnic_stats.h delete mode 100644 prov/usnic/src/usnic_direct/vnic_wq.c delete mode 100644 prov/usnic/src/usnic_direct/vnic_wq.h delete mode 100644 prov/usnic/src/usnic_direct/wq_enet_desc.h diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index 642a25e3153..74b1abe6d21 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -34,7 +34,6 @@ env: --enable-shm --enable-tcp --enable-udp - --enable-usnic --enable-verbs=rdma-core/build --enable-sm2 RDMA_CORE_PATH: 'rdma-core/build' diff --git a/.github/workflows/pr-ci.yml b/.github/workflows/pr-ci.yml index ae0b04d3f04..c4652c698ee 100644 --- a/.github/workflows/pr-ci.yml +++ b/.github/workflows/pr-ci.yml @@ -34,7 +34,6 @@ env: --enable-shm --enable-tcp --enable-udp - --enable-usnic --enable-verbs=$PWD/rdma-core/build RDMA_CORE_PATH: '$PWD/rdma-core/build' RDMA_CORE_VERSION: v34.1 diff --git a/.travis.yml b/.travis.yml index 2089160f468..42443fedc8c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -39,7 +39,7 @@ addons: name: "ofiwg/libfabric" description: "Libfabric project coverity scans" notification_email: sean.hefty@intel.com - build_command_prepend: "./autogen.sh; ./configure --enable-efa=$RDMA_CORE_PATH --enable-psm2 --enable-psm3=$RDMA_CORE_PATH --enable-usnic --enable-verbs=$RDMA_CORE_PATH" + build_command_prepend: "./autogen.sh; ./configure --enable-efa=$RDMA_CORE_PATH --enable-psm2 --enable-psm3=$RDMA_CORE_PATH --enable-verbs=$RDMA_CORE_PATH" build_command: "make -j2" branch_pattern: main @@ -75,7 +75,7 @@ install: git clone --depth 1 -b $RDMA_CORE_BRANCH https://github.com/linux-rdma/rdma-core.git && cd rdma-core && bash build.sh && cd -; RDMA_CORE_PATH=$PWD/rdma-core/build ; export LD_LIBRARY_PATH="$RDMA_CORE_PATH/lib:$LD_LIBRARY_PATH" ; - LIBFABRIC_CONFIGURE_ARGS="$LIBFABRIC_CONFIGURE_ARGS --enable-usnic + LIBFABRIC_CONFIGURE_ARGS="$LIBFABRIC_CONFIGURE_ARGS --enable-psm3=$RDMA_CORE_PATH --enable-verbs=$RDMA_CORE_PATH --enable-efa=$RDMA_CORE_PATH"; @@ -101,7 +101,6 @@ install: --disable-shm --disable-tcp --disable-udp - --disable-usnic --disable-verbs - make -j2 $MAKE_FLAGS - make install @@ -123,7 +122,7 @@ install: make dist; config_options="--enable-efa=$RDMA_CORE_PATH --enable-psm3=$RDMA_CORE_PATH - --enable-verbs=$RDMA_CORE_PATH --enable-usnic"; + --enable-verbs=$RDMA_CORE_PATH; LDFLAGS=-Wl,--build-id rpmbuild -ta --define "configopts $config_options" libfabric-*.tar.bz2; fi diff --git a/Makefile.am b/Makefile.am index d916654bd58..7b2941c9283 100644 --- a/Makefile.am +++ b/Makefile.am @@ -452,7 +452,6 @@ include prov/sockets/Makefile.include include prov/udp/Makefile.include include prov/verbs/Makefile.include include prov/efa/Makefile.include -include prov/usnic/Makefile.include include prov/psm2/Makefile.include include prov/psm3/Makefile.include include prov/gni/Makefile.include diff --git a/README.md b/README.md index 7378f3ad552..c4ec349fb31 100644 --- a/README.md +++ b/README.md @@ -239,38 +239,6 @@ libfabric features over any hardware. See the `fi_udp(7)` man page for more details. -### usnic - -*** - -The `usnic` provider is designed to run over the Cisco VIC (virtualized NIC) -hardware on Cisco UCS servers. It utilizes the Cisco usnic (userspace NIC) -capabilities of the VIC to enable ultra low latency and other offload -capabilities on Ethernet networks. - -See the `fi_usnic(7)` man page for more details. - -#### Dependencies - -- The `usnic` provider depends on library files from either `libnl` version 1 - (sometimes known as `libnl` or `libnl1`) or version 3 (sometimes known as - `libnl3`). If you are compiling libfabric from source and want to enable - usNIC support, you will also need the matching `libnl` header files (e.g., - if you are building with `libnl` version 3, you need both the header and - library files from version 3). - -#### Configure options - -``` ---with-libnl= -``` - -If specified, look for libnl support. If it is not found, the `usnic` -provider will not be built. If `` is specified, then check in the -directory and check for `libnl` version 3. If version 3 is not found, then -check for version 1. If no `` argument is specified, then this -option is redundant with `--with-usnic`. - ### verbs *** diff --git a/config/cron-make-nightly-tarball.pl b/config/cron-make-nightly-tarball.pl index 2db6d241480..c8c22ecea2a 100755 --- a/config/cron-make-nightly-tarball.pl +++ b/config/cron-make-nightly-tarball.pl @@ -279,7 +279,7 @@ sub submit_to_coverity { # Run the coverity script if requested if (defined($libfabric_coverity_token_arg) && $rebuilt_libfabric) { submit_to_coverity("ofiwg%2Flibfabric", $libfabric_version, - "--enable-sockets --enable-udp --enable-verbs --enable-usnic", + "--enable-sockets --enable-udp --enable-verbs", $libfabric_coverity_token_arg); } if (defined($fabtests_coverity_token_arg) && $rebuilt_fabtests) { diff --git a/configure.ac b/configure.ac index a4cca43be4e..8552ed0bced 100644 --- a/configure.ac +++ b/configure.ac @@ -950,9 +950,6 @@ FI_PROVIDER_SETUP([psm3]) FI_PROVIDER_SETUP([sockets]) FI_PROVIDER_SETUP([verbs]) FI_PROVIDER_SETUP([efa]) -dnl The usnic provider must be setup after the verbs provider. See -dnl prov/usnic/configure.m4 for details. -FI_PROVIDER_SETUP([usnic]) FI_PROVIDER_SETUP([gni]) FI_PROVIDER_SETUP([udp]) FI_PROVIDER_SETUP([tcp]) diff --git a/contrib/buildrpm/README b/contrib/buildrpm/README index 40db242278e..01fb28d35a2 100644 --- a/contrib/buildrpm/README +++ b/contrib/buildrpm/README @@ -87,5 +87,5 @@ General parameters: Print usage message and exit. Example usages of the script: - buildrpmLibfabric.sh -omsv -i usnic -e sockets -e verbs -e psm3 libfabric-1.4.1.tar.bz2 + buildrpmLibfabric.sh -omsv -e sockets -e verbs -e psm3 libfabric-1.4.1.tar.bz2 buildrpmLibfabric.sh -omsv -c "--disable-silent-rules" libfabric-1.4.1.tar.bz2 diff --git a/contrib/intel/jenkins/common.py b/contrib/intel/jenkins/common.py index b4c1918498c..d456578a33f 100755 --- a/contrib/intel/jenkins/common.py +++ b/contrib/intel/jenkins/common.py @@ -129,7 +129,6 @@ def run(self): 'shm' ] common_disable_list = [ - 'usnic', 'efa', 'perf', 'rstream', diff --git a/fabtests/Makefile.am b/fabtests/Makefile.am index c743d570284..025cabd8d7a 100644 --- a/fabtests/Makefile.am +++ b/fabtests/Makefile.am @@ -110,8 +110,6 @@ nobase_dist_config_DATA = \ test_configs/verbs/all.test \ test_configs/verbs/quick.test \ test_configs/verbs/verbs.exclude \ - test_configs/usnic/all.test \ - test_configs/usnic/quick.test \ test_configs/psm2/all.test \ test_configs/psm2/verify.test \ test_configs/psm2/psm2.exclude \ diff --git a/fabtests/test_configs/usnic/all.test b/fabtests/test_configs/usnic/all.test deleted file mode 100644 index 7c93a0ce53a..00000000000 --- a/fabtests/test_configs/usnic/all.test +++ /dev/null @@ -1,40 +0,0 @@ -#: "Suite of tests for the usnic provider" -{ - prov_name: usnic, - test_type: [ - FT_TEST_LATENCY, - FT_TEST_BANDWIDTH, - ], - class_function: [ - FT_FUNC_SEND, - FT_FUNC_SENDV, - FT_FUNC_SENDMSG, - FT_FUNC_INJECT, - ], - ep_type: [ - FI_EP_DGRAM, - FI_EP_RDM, - FI_EP_MSG - ], - av_type: [ - FI_AV_MAP, - ], - comp_type: [ - FT_COMP_QUEUE, - ], - eq_wait_obj: [ - FI_WAIT_NONE, - FI_WAIT_UNSPEC, - FI_WAIT_FD, - ], - cq_wait_obj: [ - FI_WAIT_NONE, - FI_WAIT_UNSPEC, - ], - mode: [ - FI_CONTEXT, FI_RX_CQ_DATA, - ], - test_class: [ - FT_CAP_MSG, - ], -}, diff --git a/fabtests/test_configs/usnic/quick.test b/fabtests/test_configs/usnic/quick.test deleted file mode 100644 index 225d4cd77a5..00000000000 --- a/fabtests/test_configs/usnic/quick.test +++ /dev/null @@ -1,41 +0,0 @@ -#: "Suite of tests for the usnic provider" -{ - prov_name: usnic, - test_type: [ - FT_TEST_LATENCY, - FT_TEST_BANDWIDTH, - ], - class_function: [ - FT_FUNC_SEND, - FT_FUNC_SENDV, - FT_FUNC_SENDMSG, - FT_FUNC_INJECT, - ], - ep_type: [ - FI_EP_DGRAM, - FI_EP_RDM, - FI_EP_MSG, - ], - av_type: [ - FI_AV_MAP, - ], - comp_type: [ - FT_COMP_QUEUE, - ], - eq_wait_obj: [ - FI_WAIT_NONE, - FI_WAIT_UNSPEC, - FI_WAIT_FD, - ], - cq_wait_obj: [ - FI_WAIT_NONE, - FI_WAIT_UNSPEC, - ], - mode: [ - FI_CONTEXT, FI_RX_CQ_DATA, - ], - test_class: [ - FT_CAP_MSG, - ], - test_flags: FT_FLAG_QUICKTEST -}, diff --git a/include/ofi_prov.h b/include/ofi_prov.h index cdfb50794ae..38ee97f74df 100644 --- a/include/ofi_prov.h +++ b/include/ofi_prov.h @@ -123,17 +123,6 @@ SOCKETS_INI ; # define SOCKETS_INIT NULL #endif -#if (HAVE_USNIC) && (HAVE_USNIC_DL) -# define USNIC_INI FI_EXT_INI -# define USNIC_INIT NULL -#elif (HAVE_USNIC) -# define USNIC_INI INI_SIG(fi_usnic_ini) -# define USNIC_INIT fi_usnic_ini() -USNIC_INI ; -#else -# define USNIC_INIT NULL -#endif - #if (HAVE_UDP) && (HAVE_UDP_DL) # define UDP_INI FI_EXT_INI # define UDP_INIT NULL diff --git a/man/fi_pingpong.1.md b/man/fi_pingpong.1.md index e72c5631903..ab59d45e028 100644 --- a/man/fi_pingpong.1.md +++ b/man/fi_pingpong.1.md @@ -72,7 +72,7 @@ given domains cannot communicate, then the application will fail. ## Fabric Filtering *-p \* -: The name of the underlying fabric provider (e.g., sockets, psm3, usnic, etc.). +: The name of the underlying fabric provider (e.g., sockets, psm3, etc.). If a provider is not specified via the -p switch, the test will pick one from the list of available providers (as returned by fi_getinfo(3)). @@ -119,15 +119,15 @@ given domains cannot communicate, then the application will fail. ## An example with various options ### Server: -`server$ fi_pingpong -p usnic -I 1000 -S 1024` +`server$ fi_pingpong -p tcp -I 1000 -S 1024` ### Client: -`client$ fi_pingpong -p usnic -I 1000 -S 1024 192.168.0.123` +`client$ fi_pingpong -p tcp -I 1000 -S 1024 192.168.0.123` Specifically, this will run a pingpong test with: -- usNIC provider +- tcp provider - 1000 iterations - 1024 bytes message size - server node as 192.168.0.123 @@ -135,10 +135,10 @@ Specifically, this will run a pingpong test with: ## A longer test ### Server: -`server$ fi_pingpong -p usnic -I 10000 -S all` +`server$ fi_pingpong -p tcp -I 10000 -S all` ### Client: -`client$ fi_pingpong -p usnic -I 10000 -S all 192.168.0.123` +`client$ fi_pingpong -p tcp -I 10000 -S all 192.168.0.123` # DEFAULTS diff --git a/man/fi_usnic.7.md b/man/fi_usnic.7.md deleted file mode 100644 index 88855fc35fb..00000000000 --- a/man/fi_usnic.7.md +++ /dev/null @@ -1,330 +0,0 @@ ---- -layout: page -title: fi_usnic(7) -tagline: Libfabric Programmer's Manual ---- -{% include JB/setup %} - -# NAME - -fi_usnic \- The usNIC Fabric Provider - -# OVERVIEW - -The *usnic* provider is designed to run over the Cisco VIC -(virtualized NIC) hardware on Cisco UCS servers. It utilizes the -Cisco usNIC (userspace NIC) capabilities of the VIC to enable ultra -low latency and other offload capabilities on Ethernet networks. - -# RELEASE NOTES - -* The *usnic* libfabric provider requires the use of the "libnl" - library. - - There are two versions of libnl generally available: v1 and v3; - the usnic provider can use either version. - - If you are building libfabric/the usnic provider from source, you - will need to have the libnl header files available (e.g., if you - are installing libnl from RPM or other packaging system, install - the "-devel" versions of the package). - - If you have libnl (either v1 or v3) installed in a non-standard - location (e.g., not in /usr/lib or /usr/lib64), you may need to - tell libfabric's configure where to find libnl via the - `--with-libnl=DIR` command line option (where DIR is the - installation prefix of the libnl package). -* The most common way to use the libfabric usnic provider is via an - MPI implementation that uses libfabric (and the usnic provider) as a - lower layer transport. MPI applications do not need to know - anything about libfabric or usnic in this use case -- the MPI - implementation hides all these details from the application. -* If you are writing applications directly to the libfabric API: - - *FI_EP_DGRAM* endpoints are the best supported method of utilizing - the usNIC interface. Specifically, the *FI_EP_DGRAM* endpoint - type has been extensively tested as the underlying layer for Open - MPI's *usnic* BTL. - - *FI_EP_MSG* and *FI_EP_RDM* endpoints are implemented, but are - only lightly tested. It is likely that there are still some bugs - in these endpoint types. In particular, there are known bugs in RDM - support in the presence of congestion or packet loss (issue 1621). - RMA is not yet supported. - - [`fi_provider`(7)](fi_provider.7.html) lists requirements for all - providers. The following limitations exist in the *usnic* - provider: - * multicast operations are not supported on *FI_EP_DGRAM* and - *FI_EP_RDM* endpoints. - * *FI_EP_MSG* endpoints only support connect, accept, and getname - CM operations. - * Passive endpoints only support listen, setname, and getname CM - operations. - * *FI_EP_DGRAM* endpoints support `fi_sendmsg()` and - `fi_recvmsg()`, but some flags are ignored. `fi_sendmsg()` - supports `FI_INJECT` and `FI_COMPLETION`. `fi_recvmsg()` - supports `FI_MORE`. - * Address vectors only support `FI_AV_MAP`. - * No counters are supported. - * The tag matching interface is not supported. - * *FI_MSG_PREFIX* is only supported on *FI_EP_DGRAM* and usage - is limited to releases 1.1 and beyond. - * fi_control with FI_GETWAIT may only be used on CQs that have been - bound to an endpoint. If fi_control is used on an unbound CQ, it will - return -FI_EOPBADSTATE. - * There is limited support for data returned as part of an erroneous - asynchronous operation. EQs will return error data for CM operations, - CQs do not support returning error data. - * As of 1.5, usNIC supports fi_mr_regv, and fi_mr_regattr. Support is - limited to a single iov. - * Atomic operations are not supported. - - Resource management is not supported. The application is responsible for - resource protection. - - The usnic libfabric provider supports extensions that provide - information and functionality beyond the standard libfabric - interface. See the "USNIC EXTENSIONS" section, below. - -# USNIC EXTENSIONS - -The usnic libfabric provider exports extensions for additional VIC, -usNIC, and Ethernet capabilities not provided by the standard -libfabric interface. - -These extensions are available via the "fi_ext_usnic.h" header file. - -## Fabric Extension: getinfo - -Version 2 of the "fabric getinfo" extension was introduced in Libfabric release -v1.3.0 and can be used to retrieve IP and SR-IOV information about a usNIC -device obtained from the [`fi_getinfo`(3)](fi_getinfo.3.html) function. - -The "fabric getinfo" extension is obtained by calling `fi_open_ops` and -requesting `FI_USNIC_FABRIC_OPS_1` to get the usNIC fabric extension -operations. The `getinfo` function accepts a version parameter that can be -used to select different versions of the extension. The information returned by -the "fabric getinfo" extension is accessible through a `fi_usnic_info` struct -that uses a version tagged union. The accessed union member must correspond -with the requested version. It is recommended that applications explicitly -request a version rather than using the header provided -`FI_EXT_USNIC_INFO_VERSION`. Although there is a version 1 of the extension, -its use is discouraged, and it may not be available in future releases. - -### Compatibility issues - -The addition of version 2 of the extension caused an alignment issue that -could lead to invalid data in the v1 portion of the structure. This means that -the alignment difference manifests when an application using v1 of the -extension is compiled with Libfabric v1.1.x or v1.2.x, but then runs with -Libfabric.so that is v1.3.x or higher (and vice versa). - -The v1.4.0 release of Libfabric introduced a padding field to explicitly -maintain compatibility with the v1.3.0 release. If the issue is encountered, -then it is recommended that you upgrade to a release containing version 2 of -the extension, or recompile with a patched version of an older release. - - -```c -#include - -struct fi_usnic_info { - uint32_t ui_version; - uint8_t ui_pad0[4]; - union { - struct fi_usnic_info_v1 v1; - struct fi_usnic_info_v2 v2; - } ui; -} __attribute__((packed)); - -int getinfo(uint32_t version, struct fid_fabric *fabric, - struct fi_usnic_info *info); -``` - -*version* -: Version of getinfo to be used - -*fabric* -: Fabric descriptor - -*info* -: Upon successful return, this parameter will contain information about the -fabric. - -- Version 2 - -```c -struct fi_usnic_cap { - const char *uc_capability; - int uc_present; -} __attribute__((packed)); - -struct fi_usnic_info_v2 { - uint32_t ui_link_speed; - uint32_t ui_netmask_be; - char ui_ifname[IFNAMSIZ]; - unsigned ui_num_vf; - unsigned ui_qp_per_vf; - unsigned ui_cq_per_vf; - - char ui_devname[FI_EXT_USNIC_MAX_DEVNAME]; - uint8_t ui_mac_addr[6]; - - uint8_t ui_pad0[2]; - - uint32_t ui_ipaddr_be; - uint32_t ui_prefixlen; - uint32_t ui_mtu; - uint8_t ui_link_up; - - uint8_t ui_pad1[3]; - - uint32_t ui_vendor_id; - uint32_t ui_vendor_part_id; - uint32_t ui_device_id; - char ui_firmware[64]; - - unsigned ui_intr_per_vf; - unsigned ui_max_cq; - unsigned ui_max_qp; - - unsigned ui_max_cqe; - unsigned ui_max_send_credits; - unsigned ui_max_recv_credits; - - const char *ui_nicname; - const char *ui_pid; - - struct fi_usnic_cap **ui_caps; -} __attribute__((packed)); -``` - -- Version 1 - -```c -struct fi_usnic_info_v1 { - uint32_t ui_link_speed; - uint32_t ui_netmask_be; - char ui_ifname[IFNAMSIZ]; - - uint32_t ui_num_vf; - uint32_t ui_qp_per_vf; - uint32_t ui_cq_per_vf; -} __attribute__((packed)); -``` - -Version 1 of the "fabric getinfo" extension can be used by explicitly -requesting it in the call to `getinfo` and accessing the `v1` portion of the -`fi_usnic_info.ui` union. Use of version 1 is not recommended and it may be -removed from future releases. - - -The following is an example of how to utilize version 2 of the usnic "fabric -getinfo" extension. - -```c -#include -#include - -/* The usNIC extensions are all in the - rdma/fi_ext_usnic.h header */ -#include - -int main(int argc, char *argv[]) { - struct fi_info *info; - struct fi_info *info_list; - struct fi_info hints = {0}; - struct fi_ep_attr ep_attr = {0}; - struct fi_fabric_attr fabric_attr = {0}; - - fabric_attr.prov_name = "usnic"; - ep_attr.type = FI_EP_DGRAM; - - hints.caps = FI_MSG; - hints.mode = FI_LOCAL_MR | FI_MSG_PREFIX; - hints.addr_format = FI_SOCKADDR; - hints.ep_attr = &ep_attr; - hints.fabric_attr = &fabric_attr; - - /* Find all usnic providers */ - fi_getinfo(FI_VERSION(1, 0), NULL, 0, 0, &hints, &info_list); - - for (info = info_list; NULL != info; info = info->next) { - /* Open the fabric on the interface */ - struct fid_fabric *fabric; - fi_fabric(info->fabric_attr, &fabric, NULL); - - /* Pass FI_USNIC_FABRIC_OPS_1 to get usnic ops - on the fabric */ - struct fi_usnic_ops_fabric *usnic_fabric_ops; - fi_open_ops(&fabric->fid, FI_USNIC_FABRIC_OPS_1, 0, - (void **) &usnic_fabric_ops, NULL); - - /* Now use the returned usnic ops structure to call - usnic extensions. The following extension queries - some IP and SR-IOV characteristics about the - usNIC device. */ - struct fi_usnic_info usnic_info; - - /* Explicitly request version 2. */ - usnic_fabric_ops->getinfo(2, fabric, &usnic_info); - - printf("Fabric interface %s is %s:\n" - "\tNetmask: 0x%08x\n\tLink speed: %d\n" - "\tSR-IOV VFs: %d\n\tQPs per SR-IOV VF: %d\n" - "\tCQs per SR-IOV VF: %d\n", - info->fabric_attr->name, - usnic_info.ui.v2.ui_ifname, - usnic_info.ui.v2.ui_netmask_be, - usnic_info.ui.v2.ui_link_speed, - usnic_info.ui.v2.ui_num_vf, - usnic_info.ui.v2.ui_qp_per_vf, - usnic_info.ui.v2.ui_cq_per_vf); - - fi_close(&fabric->fid); - } - - fi_freeinfo(info_list); - return 0; -} -``` - -## Adress Vector Extension: get_distance - -The "address vector get_distance" extension was introduced in Libfabric release -v1.0.0 and can be used to retrieve the network distance of an address. - -The "get_distance" extension is obtained by calling `fi_open_ops` and -requesting `FI_USNIC_AV_OPS_1` to get the usNIC address vector extension -operations. - -```c -int get_distance(struct fid_av *av, void *addr, int *metric); -``` - -*av* -: Address vector - -*addr* -: Destination address - -*metric* -: On output this will contain `-1` if the destination host is unreachable, `0` -is the destination host is locally connected, and `1` otherwise. - -See fi_ext_usnic.h for more details. - -# VERSION DIFFERENCES - -## New naming convention for fabric/domain starting with libfabric v1.4 - -The release of libfabric v1.4 introduced a new naming convention for fabric and domain. However the usNIC provider -remains backward compatible with applications supporting the old scheme and decides which one to use based on -the version passed to `fi_getinfo`: - -* When `FI_VERSION(1,4)` or higher is used: - - fabric name is the network address with the CIDR notation (i.e., `a.b.c.d/e`) - - domain name is the usNIC Linux interface name (i.e., `usnic_X`) - -* When a lower version number is used, like `FI_VERSION(1, 3)`, it follows the same behavior the usNIC provider exhibited in libfabric <= v1.3: - - fabric name is the usNIC Linux interface name (i.e., `usnic_X`) - - domain name is `NULL` - -# SEE ALSO - -[`fabric`(7)](fabric.7.html), -[`fi_open_ops`(3)](fi_open_ops.3.html), -[`fi_provider`(7)](fi_provider.7.html), diff --git a/man/man7/fi_usnic.7 b/man/man7/fi_usnic.7 deleted file mode 100644 index cf03f28a0f7..00000000000 --- a/man/man7/fi_usnic.7 +++ /dev/null @@ -1,382 +0,0 @@ -.\" Automatically generated by Pandoc 2.9.2.1 -.\" -.TH "fi_usnic" "7" "2022\-12\-09" "Libfabric Programmer\[cq]s Manual" "#VERSION#" -.hy -.SH NAME -.PP -fi_usnic - The usNIC Fabric Provider -.SH OVERVIEW -.PP -The \f[I]usnic\f[R] provider is designed to run over the Cisco VIC -(virtualized NIC) hardware on Cisco UCS servers. -It utilizes the Cisco usNIC (userspace NIC) capabilities of the VIC to -enable ultra low latency and other offload capabilities on Ethernet -networks. -.SH RELEASE NOTES -.IP \[bu] 2 -The \f[I]usnic\f[R] libfabric provider requires the use of the -\[lq]libnl\[rq] library. -.RS 2 -.IP \[bu] 2 -There are two versions of libnl generally available: v1 and v3; the -usnic provider can use either version. -.IP \[bu] 2 -If you are building libfabric/the usnic provider from source, you will -need to have the libnl header files available (e.g., if you are -installing libnl from RPM or other packaging system, install the -\[lq]-devel\[rq] versions of the package). -.IP \[bu] 2 -If you have libnl (either v1 or v3) installed in a non-standard location -(e.g., not in /usr/lib or /usr/lib64), you may need to tell -libfabric\[cq]s configure where to find libnl via the -\f[C]--with-libnl=DIR\f[R] command line option (where DIR is the -installation prefix of the libnl package). -.RE -.IP \[bu] 2 -The most common way to use the libfabric usnic provider is via an MPI -implementation that uses libfabric (and the usnic provider) as a lower -layer transport. -MPI applications do not need to know anything about libfabric or usnic -in this use case \[en] the MPI implementation hides all these details -from the application. -.IP \[bu] 2 -If you are writing applications directly to the libfabric API: -.RS 2 -.IP \[bu] 2 -\f[I]FI_EP_DGRAM\f[R] endpoints are the best supported method of -utilizing the usNIC interface. -Specifically, the \f[I]FI_EP_DGRAM\f[R] endpoint type has been -extensively tested as the underlying layer for Open MPI\[cq]s -\f[I]usnic\f[R] BTL. -.IP \[bu] 2 -\f[I]FI_EP_MSG\f[R] and \f[I]FI_EP_RDM\f[R] endpoints are implemented, -but are only lightly tested. -It is likely that there are still some bugs in these endpoint types. -In particular, there are known bugs in RDM support in the presence of -congestion or packet loss (issue 1621). -RMA is not yet supported. -.IP \[bu] 2 -\f[C]fi_provider\f[R](7) lists requirements for all providers. -The following limitations exist in the \f[I]usnic\f[R] provider: -.RS 2 -.IP \[bu] 2 -multicast operations are not supported on \f[I]FI_EP_DGRAM\f[R] and -\f[I]FI_EP_RDM\f[R] endpoints. -.IP \[bu] 2 -\f[I]FI_EP_MSG\f[R] endpoints only support connect, accept, and getname -CM operations. -.IP \[bu] 2 -Passive endpoints only support listen, setname, and getname CM -operations. -.IP \[bu] 2 -\f[I]FI_EP_DGRAM\f[R] endpoints support \f[C]fi_sendmsg()\f[R] and -\f[C]fi_recvmsg()\f[R], but some flags are ignored. -\f[C]fi_sendmsg()\f[R] supports \f[C]FI_INJECT\f[R] and -\f[C]FI_COMPLETION\f[R]. -\f[C]fi_recvmsg()\f[R] supports \f[C]FI_MORE\f[R]. -.IP \[bu] 2 -Address vectors only support \f[C]FI_AV_MAP\f[R]. -.IP \[bu] 2 -No counters are supported. -.IP \[bu] 2 -The tag matching interface is not supported. -.IP \[bu] 2 -\f[I]FI_MSG_PREFIX\f[R] is only supported on \f[I]FI_EP_DGRAM\f[R] and -usage is limited to releases 1.1 and beyond. -.IP \[bu] 2 -fi_control with FI_GETWAIT may only be used on CQs that have been bound -to an endpoint. -If fi_control is used on an unbound CQ, it will return -FI_EOPBADSTATE. -.IP \[bu] 2 -There is limited support for data returned as part of an erroneous -asynchronous operation. -EQs will return error data for CM operations, CQs do not support -returning error data. -.IP \[bu] 2 -As of 1.5, usNIC supports fi_mr_regv, and fi_mr_regattr. -Support is limited to a single iov. -.IP \[bu] 2 -Atomic operations are not supported. -.RE -.IP \[bu] 2 -Resource management is not supported. -The application is responsible for resource protection. -.IP \[bu] 2 -The usnic libfabric provider supports extensions that provide -information and functionality beyond the standard libfabric interface. -See the \[lq]USNIC EXTENSIONS\[rq] section, below. -.RE -.SH USNIC EXTENSIONS -.PP -The usnic libfabric provider exports extensions for additional VIC, -usNIC, and Ethernet capabilities not provided by the standard libfabric -interface. -.PP -These extensions are available via the \[lq]fi_ext_usnic.h\[rq] header -file. -.SS Fabric Extension: getinfo -.PP -Version 2 of the \[lq]fabric getinfo\[rq] extension was introduced in -Libfabric release v1.3.0 and can be used to retrieve IP and SR-IOV -information about a usNIC device obtained from the -\f[C]fi_getinfo\f[R](3) function. -.PP -The \[lq]fabric getinfo\[rq] extension is obtained by calling -\f[C]fi_open_ops\f[R] and requesting \f[C]FI_USNIC_FABRIC_OPS_1\f[R] to -get the usNIC fabric extension operations. -The \f[C]getinfo\f[R] function accepts a version parameter that can be -used to select different versions of the extension. -The information returned by the \[lq]fabric getinfo\[rq] extension is -accessible through a \f[C]fi_usnic_info\f[R] struct that uses a version -tagged union. -The accessed union member must correspond with the requested version. -It is recommended that applications explicitly request a version rather -than using the header provided \f[C]FI_EXT_USNIC_INFO_VERSION\f[R]. -Although there is a version 1 of the extension, its use is discouraged, -and it may not be available in future releases. -.SS Compatibility issues -.PP -The addition of version 2 of the extension caused an alignment issue -that could lead to invalid data in the v1 portion of the structure. -This means that the alignment difference manifests when an application -using v1 of the extension is compiled with Libfabric v1.1.x or v1.2.x, -but then runs with Libfabric.so that is v1.3.x or higher (and vice -versa). -.PP -The v1.4.0 release of Libfabric introduced a padding field to explicitly -maintain compatibility with the v1.3.0 release. -If the issue is encountered, then it is recommended that you upgrade to -a release containing version 2 of the extension, or recompile with a -patched version of an older release. -.IP -.nf -\f[C] -#include - -struct fi_usnic_info { - uint32_t ui_version; - uint8_t ui_pad0[4]; - union { - struct fi_usnic_info_v1 v1; - struct fi_usnic_info_v2 v2; - } ui; -} __attribute__((packed)); - -int getinfo(uint32_t version, struct fid_fabric *fabric, - struct fi_usnic_info *info); -\f[R] -.fi -.TP -\f[I]version\f[R] -Version of getinfo to be used -.TP -\f[I]fabric\f[R] -Fabric descriptor -.TP -\f[I]info\f[R] -Upon successful return, this parameter will contain information about -the fabric. -.IP \[bu] 2 -Version 2 -.IP -.nf -\f[C] -struct fi_usnic_cap { - const char *uc_capability; - int uc_present; -} __attribute__((packed)); - -struct fi_usnic_info_v2 { - uint32_t ui_link_speed; - uint32_t ui_netmask_be; - char ui_ifname[IFNAMSIZ]; - unsigned ui_num_vf; - unsigned ui_qp_per_vf; - unsigned ui_cq_per_vf; - - char ui_devname[FI_EXT_USNIC_MAX_DEVNAME]; - uint8_t ui_mac_addr[6]; - - uint8_t ui_pad0[2]; - - uint32_t ui_ipaddr_be; - uint32_t ui_prefixlen; - uint32_t ui_mtu; - uint8_t ui_link_up; - - uint8_t ui_pad1[3]; - - uint32_t ui_vendor_id; - uint32_t ui_vendor_part_id; - uint32_t ui_device_id; - char ui_firmware[64]; - - unsigned ui_intr_per_vf; - unsigned ui_max_cq; - unsigned ui_max_qp; - - unsigned ui_max_cqe; - unsigned ui_max_send_credits; - unsigned ui_max_recv_credits; - - const char *ui_nicname; - const char *ui_pid; - - struct fi_usnic_cap **ui_caps; -} __attribute__((packed)); -\f[R] -.fi -.IP \[bu] 2 -Version 1 -.IP -.nf -\f[C] -struct fi_usnic_info_v1 { - uint32_t ui_link_speed; - uint32_t ui_netmask_be; - char ui_ifname[IFNAMSIZ]; - - uint32_t ui_num_vf; - uint32_t ui_qp_per_vf; - uint32_t ui_cq_per_vf; -} __attribute__((packed)); -\f[R] -.fi -.PP -Version 1 of the \[lq]fabric getinfo\[rq] extension can be used by -explicitly requesting it in the call to \f[C]getinfo\f[R] and accessing -the \f[C]v1\f[R] portion of the \f[C]fi_usnic_info.ui\f[R] union. -Use of version 1 is not recommended and it may be removed from future -releases. -.PP -The following is an example of how to utilize version 2 of the usnic -\[lq]fabric getinfo\[rq] extension. -.IP -.nf -\f[C] -#include -#include - -/* The usNIC extensions are all in the - rdma/fi_ext_usnic.h header */ -#include - -int main(int argc, char *argv[]) { - struct fi_info *info; - struct fi_info *info_list; - struct fi_info hints = {0}; - struct fi_ep_attr ep_attr = {0}; - struct fi_fabric_attr fabric_attr = {0}; - - fabric_attr.prov_name = \[dq]usnic\[dq]; - ep_attr.type = FI_EP_DGRAM; - - hints.caps = FI_MSG; - hints.mode = FI_LOCAL_MR | FI_MSG_PREFIX; - hints.addr_format = FI_SOCKADDR; - hints.ep_attr = &ep_attr; - hints.fabric_attr = &fabric_attr; - - /* Find all usnic providers */ - fi_getinfo(FI_VERSION(1, 0), NULL, 0, 0, &hints, &info_list); - - for (info = info_list; NULL != info; info = info->next) { - /* Open the fabric on the interface */ - struct fid_fabric *fabric; - fi_fabric(info->fabric_attr, &fabric, NULL); - - /* Pass FI_USNIC_FABRIC_OPS_1 to get usnic ops - on the fabric */ - struct fi_usnic_ops_fabric *usnic_fabric_ops; - fi_open_ops(&fabric->fid, FI_USNIC_FABRIC_OPS_1, 0, - (void **) &usnic_fabric_ops, NULL); - - /* Now use the returned usnic ops structure to call - usnic extensions. The following extension queries - some IP and SR-IOV characteristics about the - usNIC device. */ - struct fi_usnic_info usnic_info; - - /* Explicitly request version 2. */ - usnic_fabric_ops->getinfo(2, fabric, &usnic_info); - - printf(\[dq]Fabric interface %s is %s:\[rs]n\[dq] - \[dq]\[rs]tNetmask: 0x%08x\[rs]n\[rs]tLink speed: %d\[rs]n\[dq] - \[dq]\[rs]tSR-IOV VFs: %d\[rs]n\[rs]tQPs per SR-IOV VF: %d\[rs]n\[dq] - \[dq]\[rs]tCQs per SR-IOV VF: %d\[rs]n\[dq], - info->fabric_attr->name, - usnic_info.ui.v2.ui_ifname, - usnic_info.ui.v2.ui_netmask_be, - usnic_info.ui.v2.ui_link_speed, - usnic_info.ui.v2.ui_num_vf, - usnic_info.ui.v2.ui_qp_per_vf, - usnic_info.ui.v2.ui_cq_per_vf); - - fi_close(&fabric->fid); - } - - fi_freeinfo(info_list); - return 0; -} -\f[R] -.fi -.SS Adress Vector Extension: get_distance -.PP -The \[lq]address vector get_distance\[rq] extension was introduced in -Libfabric release v1.0.0 and can be used to retrieve the network -distance of an address. -.PP -The \[lq]get_distance\[rq] extension is obtained by calling -\f[C]fi_open_ops\f[R] and requesting \f[C]FI_USNIC_AV_OPS_1\f[R] to get -the usNIC address vector extension operations. -.IP -.nf -\f[C] -int get_distance(struct fid_av *av, void *addr, int *metric); -\f[R] -.fi -.TP -\f[I]av\f[R] -Address vector -.TP -\f[I]addr\f[R] -Destination address -.TP -\f[I]metric\f[R] -On output this will contain \f[C]-1\f[R] if the destination host is -unreachable, \f[C]0\f[R] is the destination host is locally connected, -and \f[C]1\f[R] otherwise. -.PP -See fi_ext_usnic.h for more details. -.SH VERSION DIFFERENCES -.SS New naming convention for fabric/domain starting with libfabric v1.4 -.PP -The release of libfabric v1.4 introduced a new naming convention for -fabric and domain. -However the usNIC provider remains backward compatible with applications -supporting the old scheme and decides which one to use based on the -version passed to \f[C]fi_getinfo\f[R]: -.IP \[bu] 2 -When \f[C]FI_VERSION(1,4)\f[R] or higher is used: -.RS 2 -.IP \[bu] 2 -fabric name is the network address with the CIDR notation (i.e., -\f[C]a.b.c.d/e\f[R]) -.IP \[bu] 2 -domain name is the usNIC Linux interface name (i.e., \f[C]usnic_X\f[R]) -.RE -.IP \[bu] 2 -When a lower version number is used, like \f[C]FI_VERSION(1, 3)\f[R], it -follows the same behavior the usNIC provider exhibited in libfabric <= -v1.3: -.RS 2 -.IP \[bu] 2 -fabric name is the usNIC Linux interface name (i.e., \f[C]usnic_X\f[R]) -.IP \[bu] 2 -domain name is \f[C]NULL\f[R] -.RE -.SH SEE ALSO -.PP -\f[C]fabric\f[R](7), \f[C]fi_open_ops\f[R](3), \f[C]fi_provider\f[R](7), -.SH AUTHORS -OpenFabrics. diff --git a/prov/psm2/build-psm2.sh b/prov/psm2/build-psm2.sh index a3333c484b6..6883959b3f6 100755 --- a/prov/psm2/build-psm2.sh +++ b/prov/psm2/build-psm2.sh @@ -10,7 +10,7 @@ # # Please run the script from the top level directory of the source repo. # -# The "psm", "usnic", and "verbs" providers are disabled to reduce the +# The "verbs" providers are disabled to reduce the # building time. They can be enabled as needed. # # Please check that the following variables are either set to appropriate @@ -56,8 +56,6 @@ eval ../configure \ $cflags $ldflags $options \ --prefix=${PREFIX:-$HOME/install/ofi} \ --enable-psm2=${PSM2_HOME:-yes} \ - --disable-psm \ - --disable-usnic \ --disable-verbs && \ make && make install diff --git a/prov/psm3/configure.ac b/prov/psm3/configure.ac index a986a5fbdc1..decca9b4a85 100644 --- a/prov/psm3/configure.ac +++ b/prov/psm3/configure.ac @@ -900,8 +900,6 @@ AC_DEFINE([HAVE_UCX], 0, [Ignore HAVE_UCX]) AC_DEFINE([HAVE_UCX_DL], 0, [Ignore HAVE_UCX_DL]) AC_DEFINE([HAVE_UDP], 0, [Ignore HAVE_UDP]) AC_DEFINE([HAVE_UDP_DL], 0, [Ignore HAVE_UDP_DL]) -AC_DEFINE([HAVE_USNIC], 0, [Ignore HAVE_USNIC]) -AC_DEFINE([HAVE_USNIC_DL], 0, [Ignore HAVE_USNIC_DL]) AC_DEFINE([HAVE_VERBS], 0, [Ignore HAVE_VERBS]) AC_DEFINE([HAVE_VERBS_DL], 0, [Ignore HAVE_VERBS_DL]) dnl FI_PROVIDER_FINI diff --git a/prov/usnic/Makefile.include b/prov/usnic/Makefile.include deleted file mode 100644 index 74ff3d6c02b..00000000000 --- a/prov/usnic/Makefile.include +++ /dev/null @@ -1,164 +0,0 @@ -# -# Copyright (c) 2014-2019, Cisco Systems, Inc. All rights reserved. -# -# This software is available to you under a choice of one of two -# licenses. You may choose to be licensed under the terms of the GNU -# General Public License (GPL) Version 2, available from the file -# COPYING in the main directory of this source tree, or the -# BSD license below: -# -# Redistribution and use in source and binary forms, with or -# without modification, are permitted provided that the following -# conditions are met: -# -# - Redistributions of source code must retain the above -# copyright notice, this list of conditions and the following -# disclaimer. -# -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials -# provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. -# - -if HAVE_USNIC -libusnic_direct_sources = \ - prov/usnic/src/usnic_direct/cq_desc.h \ - prov/usnic/src/usnic_direct/cq_enet_desc.h \ - prov/usnic/src/usnic_direct/kcompat.h \ - prov/usnic/src/usnic_direct/kcompat_priv.h \ - prov/usnic/src/usnic_direct/libnl1_utils.h \ - prov/usnic/src/usnic_direct/libnl3_utils.h \ - prov/usnic/src/usnic_direct/libnl_utils_common.c \ - prov/usnic/src/usnic_direct/libnl_utils.h \ - prov/usnic/src/usnic_direct/linux/delay.h \ - prov/usnic/src/usnic_direct/linux/slab.h \ - prov/usnic/src/usnic_direct/linux_types.h \ - prov/usnic/src/usnic_direct/rq_enet_desc.h \ - prov/usnic/src/usnic_direct/usd_caps.c \ - prov/usnic/src/usnic_direct/usd_caps.h \ - prov/usnic/src/usnic_direct/usd_dest.c \ - prov/usnic/src/usnic_direct/usd_dest.h \ - prov/usnic/src/usnic_direct/usd_device.c \ - prov/usnic/src/usnic_direct/usd_device.h \ - prov/usnic/src/usnic_direct/usd_event.c \ - prov/usnic/src/usnic_direct/usd_enum.c \ - prov/usnic/src/usnic_direct/usd.h \ - prov/usnic/src/usnic_direct/usd_ib_cmd.c \ - prov/usnic/src/usnic_direct/usd_ib_cmd.h \ - prov/usnic/src/usnic_direct/usd_ib_sysfs.c \ - prov/usnic/src/usnic_direct/usd_ib_sysfs.h \ - prov/usnic/src/usnic_direct/usd_mem.c \ - prov/usnic/src/usnic_direct/usd_poll.c \ - prov/usnic/src/usnic_direct/usd_post.c \ - prov/usnic/src/usnic_direct/usd_post.h \ - prov/usnic/src/usnic_direct/usd_post_ud_raw.c \ - prov/usnic/src/usnic_direct/usd_post_ud_udp.c \ - prov/usnic/src/usnic_direct/usd_post_ud_pio_udp.c \ - prov/usnic/src/usnic_direct/usd_queue.h \ - prov/usnic/src/usnic_direct/usd_queues.c \ - prov/usnic/src/usnic_direct/usd_socket.c \ - prov/usnic/src/usnic_direct/usd_socket.h \ - prov/usnic/src/usnic_direct/usd_time.h \ - prov/usnic/src/usnic_direct/usd_util.h \ - prov/usnic/src/usnic_direct/usd_vnic.c \ - prov/usnic/src/usnic_direct/usd_vnic.h \ - prov/usnic/src/usnic_direct/usnic_abi.h \ - prov/usnic/src/usnic_direct/usnic_direct.h \ - prov/usnic/src/usnic_direct/usnic_ib_abi.h \ - prov/usnic/src/usnic_direct/usnic_ip_utils.c \ - prov/usnic/src/usnic_direct/usnic_ip_utils.h \ - prov/usnic/src/usnic_direct/usnic_user_utils.h \ - prov/usnic/src/usnic_direct/vnic_cq.c \ - prov/usnic/src/usnic_direct/vnic_cq.h \ - prov/usnic/src/usnic_direct/vnic_dev.c \ - prov/usnic/src/usnic_direct/vnic_devcmd.h \ - prov/usnic/src/usnic_direct/vnic_dev.h \ - prov/usnic/src/usnic_direct/vnic_enet.h \ - prov/usnic/src/usnic_direct/vnic_resource.h \ - prov/usnic/src/usnic_direct/vnic_rq.c \ - prov/usnic/src/usnic_direct/vnic_rq.h \ - prov/usnic/src/usnic_direct/vnic_stats.h \ - prov/usnic/src/usnic_direct/vnic_wq.c \ - prov/usnic/src/usnic_direct/vnic_wq.h \ - prov/usnic/src/usnic_direct/vnic_intr.c \ - prov/usnic/src/usnic_direct/vnic_intr.h \ - prov/usnic/src/usnic_direct/wq_enet_desc.h - -_usnic_files = \ - $(libusnic_direct_sources) \ - prov/usnic/src/fi_ext_usnic.h \ - prov/usnic/src/usdf.h \ - prov/usnic/src/usdf_av.c \ - prov/usnic/src/usdf_av.h \ - prov/usnic/src/usdf_cm.c \ - prov/usnic/src/usdf_cm.h \ - prov/usnic/src/usdf_cq.c \ - prov/usnic/src/usdf_cq.h \ - prov/usnic/src/usdf_dgram.c \ - prov/usnic/src/usdf_dgram.h \ - prov/usnic/src/usdf_domain.c \ - prov/usnic/src/usdf_endpoint.c \ - prov/usnic/src/usdf_endpoint.h \ - prov/usnic/src/usdf_ep_dgram.c \ - prov/usnic/src/usdf_eq.c \ - prov/usnic/src/usdf_fabric.c \ - prov/usnic/src/usdf_mem.c \ - prov/usnic/src/usdf_pep.c \ - prov/usnic/src/usdf_progress.c \ - prov/usnic/src/usdf_progress.h \ - prov/usnic/src/usdf_rudp.h \ - prov/usnic/src/usdf_timer.c \ - prov/usnic/src/usdf_timer.h \ - prov/usnic/src/usdf_poll.c \ - prov/usnic/src/usdf_poll.h \ - prov/usnic/src/usdf_ext.c \ - prov/usnic/src/usdf_wait.h \ - prov/usnic/src/usdf_wait.c - -if USNIC_BUILD_FAKE_VERBS_DRIVER -_usnic_files += prov/usnic/src/usdf_fake_ibv.c -endif - -_usnic_cppflags = \ - -D__LIBUSNIC__ -DWANT_DEBUG_MSGS=0 \ - -DHAVE_LIBNL3=$(HAVE_LIBNL3) $(usnic_CPPFLAGS) \ - -I$(top_srcdir)/prov/usnic/src/usnic_direct - -rdmainclude_HEADERS += \ - prov/usnic/src/fi_ext_usnic.h - -if HAVE_USNIC_DL -pkglib_LTLIBRARIES += libusnic-fi.la -libusnic_fi_la_CPPFLAGS = $(AM_CPPFLAGS) $(_usnic_cppflags) -libusnic_fi_la_SOURCES = $(_usnic_files) $(common_srcs) -libusnic_fi_la_LDFLAGS = \ - $(usnic_ln_LDFLAGS) \ - -module -avoid-version -shared -export-dynamic -libusnic_fi_la_LIBADD = $(linkback) $(usnic_LIBS) -libusnic_fi_la_DEPENDENCIES = $(linkback) -else !HAVE_USNIC_DL -src_libfabric_la_SOURCES += $(_usnic_files) -src_libfabric_la_CPPFLAGS += $(_usnic_cppflags) -src_libfabric_la_LDFLAGS += $(usnic_LDFLAGS) -src_libfabric_la_LIBADD += $(usnic_LIBS) -endif !HAVE_USNIC_DL - -prov_install_man_pages += man/man7/fi_usnic.7 - -endif HAVE_USNIC - -prov_dist_man_pages += man/man7/fi_usnic.7 diff --git a/prov/usnic/configure.m4 b/prov/usnic/configure.m4 deleted file mode 100644 index f31b40309af..00000000000 --- a/prov/usnic/configure.m4 +++ /dev/null @@ -1,366 +0,0 @@ -dnl -dnl Copyright (c) 2015-2017, Cisco Systems, Inc. All rights reserved. -dnl -dnl This software is available to you under a choice of one of two -dnl licenses. You may choose to be licensed under the terms of the GNU -dnl General Public License (GPL) Version 2, available from the file -dnl COPYING in the main directory of this source tree, or the -dnl BSD license below: -dnl -dnl Redistribution and use in source and binary forms, with or -dnl without modification, are permitted provided that the following -dnl conditions are met: -dnl -dnl - Redistributions of source code must retain the above -dnl copyright notice, this list of conditions and the following -dnl disclaimer. -dnl -dnl - Redistributions in binary form must reproduce the above -dnl copyright notice, this list of conditions and the following -dnl disclaimer in the documentation and/or other materials -dnl provided with the distribution. -dnl -dnl THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -dnl "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -dnl LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -dnl FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -dnl COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -dnl INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -dnl BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -dnl LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -dnl CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -dnl LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -dnl ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -dnl POSSIBILITY OF SUCH DAMAGE. -dnl - -dnl Configury specific to the libfabric usNIC provider - -dnl libnl is sadness, but we have to use it. The majority of this -dnl configure.m4 is just to deal with libnl. :-( - -dnl libnl has two versions: libnl (i.e., version 1) and libnl3. - -dnl These two versions have many of the same symbols, but they are -dnl incompatible with each other. We can handle this in the C code, but -dnl we must know which version to compile file (i.e., configure must -dnl figure this out). Additionally, if both versions get linked into -dnl the same process, they will disrupt each other's global state, and -dnl Random Bad Things happen. We can't always prevent this -- e.g., if we -dnl link against libnl vX and some other middleware links against libnl vY -dnl (and X != Y), prepare for unpleasentness. You have been warned. - -dnl As of this writing (March 2015), most Linux distros seem to be -dnl encouraging packages to prefer libnl v3 over libnl v1. - -dnl libnl wants us to use pkg-config to find CPPFLAGS and LDFLAGS and -dnl LIBS, but pkg-config isn't always available. So we have to test here. -dnl It gets more complicated because libnl changed several things between v1 -dnl and v3: - -dnl v1: -dnl - Header files (e.g., are in $prefix/include -dnl - Library is in $prefix/lib[64] -dnl - Library is named libnl. - -dnl v3: -dnl - Header files (e.g., are in $prefix/include/libnl3 -dnl *** NOTE: This means that a -I switch is REQUIRED to find -dnl the libnl3 headers (!) -dnl - Library is in $prefix/lib[64] -dnl - Library is named libnl-3. -dnl - We *also* need the libnl-route-3 library - -dnl These differing requirements make the configure/m4 tests a bit of -dnl a nightmare. :-( - -dnl --------------------------------------------------------------------------- - -dnl This configure.m4 script supports the following CLI options: - -dnl --with-libnl[=dir] -dnl If specified, look for libnl support. If it is not found, -dnl error/abort configure. If dir is specified, look in that -dnl directory (configure will first look for libnl v3 in that tree, and if -dnl it is not found, look for libnl v1 in that tree). If no dir is -dnl specified, this option is redundant with --with-usnic. - -dnl --without-libnl -dnl Do not look for libnl support. This means that the usnic provider -dnl will not be built (since the usnic provider *requires* libnl support). - -dnl --------------------------------------------------------------------------- - -dnl Called to configure this provider -dnl -dnl Arguments: -dnl -dnl $1: action if configured successfully -dnl $2: action if not configured successfully -dnl -AC_DEFUN([FI_USNIC_CONFIGURE],[ - # Determine if we can support the usnic provider - usnic_happy=0 - usnic_build_fake_driver=0 - AS_IF([test "x$enable_usnic" != "xno"], - [AC_CHECK_HEADER([infiniband/verbs.h], [usnic_happy=1]) - AS_IF([test $usnic_happy -eq 1], - [USNIC_CHECK_IF_NEED_FAKE_USNIC - USNIC_CHECK_LIBNL_SADNESS]) - ]) - - # AM_CONDITIONALs must always be defined - AM_CONDITIONAL([USNIC_BUILD_FAKE_VERBS_DRIVER], - [test $usnic_build_fake_driver -eq 1]) -]) - -dnl -dnl Helper function to parse --with-libnl* options -dnl -dnl $1: variable name -dnl $2: --with- value -dnl -AC_DEFUN([USNIC_PARSE_WITH],[ - case "$2" in - no) - # Nope, don't want it - usnic_want_$1=no - ;; - yes) - # Yes, definitely want it - usnic_want_$1=yes - ;; - default) - # Default case -- try and see if we can find it - usnic_want_$1=default - usnic_$1_location=/usr - ;; - *) - # Yes, definitely want it -- at a specific location - usnic_want_$1=yes - usnic_$1_location="$2" - ;; - esac -]) - -dnl -dnl Check for ibv_register_driver -dnl -dnl If libibverbs is available and is old enough, we need to install a -dnl "fake" usnic verbs driver to keep it from complaining to stderr -dnl that there is no usnic verbs provider. Newer versions of -dnl libibverbs won't complain. If we can detect a new-enough -dnl libibverbs, don't bother to compile the fake usnic verbs driver. -dnl -dnl Per -dnl https://github.com/ofiwg/libfabric/pull/2684#issuecomment-276462368, -dnl the logic boils down to: -dnl -dnl Compile the fake usnic verbs provider if -dnl exists and do not contain a prototype for verbs_register_driver(). -dnl -AC_DEFUN([USNIC_CHECK_IF_NEED_FAKE_USNIC],[ - AC_CHECK_HEADER([infiniband/driver.h], - [AC_CHECK_DECL([verbs_register_driver], - [], - [usnic_build_fake_driver=1], - [#include - ])]) - - AC_MSG_CHECKING([if building usnic fake verbs driver]) - AS_IF([test $usnic_build_fake_driver -eq 1], - [AC_MSG_RESULT([yes])], - [AC_MSG_RESULT([no])]) - AC_DEFINE_UNQUOTED([USNIC_BUILD_FAKE_VERBS_DRIVER], - [$usnic_build_fake_driver], - [Whether to build the fake usNIC verbs provider or not]) -]) - -dnl -dnl Shared macro -dnl -AC_DEFUN([USNIC_CHECK_LIBNL_SADNESS],[ - AC_ARG_WITH([libnl], - [AS_HELP_STRING([--with-libnl(=DIR)], - [Directory prefix for libnl (typically only necessary if libnl is installed in a location that the compiler/linker will not search by default)])], - [], [with_libnl=default]) - - # The --with options carry two pieces of information: 1) do - # you want a specific version of libnl, and 2) where that - # version of libnl lives. For simplicity, let's separate - # those two pieces of information. - USNIC_PARSE_WITH([libnl], [$with_libnl]) - - # Default to a numeric value (this value gets AC_DEFINEd) - HAVE_LIBNL3=0 - - ################################################### - # NOTE: We *must* check for libnl3 before libnl. - ################################################### - - AS_IF([test "$usnic_want_libnl" != "no"], - [USNIC_CHECK_LIBNL3([$usnic_libnl_location], [usnic_nl])]) - AS_IF([test "$usnic_want_libnl" != "no" && - test "$usnic_nl_LIBS" = ""], - [USNIC_CHECK_LIBNL([$usnic_libnl_location], [usnic_nl])]) - - AS_IF([test "$usnic_want_libnl" = "yes" && - test "$usnic_nl_LIBS" = ""], - [AC_MSG_WARN([--with-libnl specified, but not found]) - AC_MSG_ERROR([Cannot continue])]) - - # Final result - AC_SUBST([HAVE_LIBNL3]) - AC_DEFINE_UNQUOTED([HAVE_LIBNL3], [$HAVE_LIBNL3], - [Whether we have libl or libnl3]) - - usnic_CPPFLAGS=$usnic_nl_CPPFLAGS - usnic_LDFLAGS=$usnic_nl_LDFLAGS - usnic_LIBS=$usnic_nl_LIBS - - # If we're building the usNIC fake verbs provider, we need to - # -libverbs, so put it in usnic_LIBS (so that it will also get - # properly substituted into the pkg-config data files). - usnic_verbs_lib= - AS_IF([test $usnic_build_fake_driver -eq 1], - [usnic_verbs_lib="-libverbs"]) - usnic_LIBS="$usnic_LIBS $usnic_verbs_lib" - - AC_SUBST([usnic_CPPFLAGS]) - AC_SUBST([usnic_LDFLAGS]) - AC_SUBST([usnic_LIBS]) - - AS_IF([test "$usnic_nl_LIBS" = ""], - [usnic_happy=0]) -]) - -dnl -dnl Check for libnl-3. -dnl -dnl Inputs: -dnl -dnl $1: prefix where to look for libnl-3 -dnl $2: var name prefix of _CPPFLAGS and _LDFLAGS and _LIBS -dnl -dnl Outputs: -dnl -dnl - Set $2_CPPFLAGS necessary to compile with libnl-3 -dnl - Set $2_LDFLAGS necessary to link with libnl-3 -dnl - Set $2_LIBS necessary to link with libnl-3 -dnl - Set HAVE_LIBNL3 1 if libnl-3 will be used -dnl -AC_DEFUN([USNIC_CHECK_LIBNL3],[ - AC_MSG_NOTICE([checking for libnl3]) - - AC_MSG_CHECKING([for libnl3 prefix]) - AC_MSG_RESULT([$1]) - AC_MSG_CHECKING([for $1/include/libnl3]) - AS_IF([test -d "$1/include/libnl3"], - [usnic_libnl3_happy=1 - AC_MSG_RESULT([found])], - [usnic_libnl3_happy=0 - AC_MSG_RESULT([not found])]) - - # Random note: netlink/version.h is only in libnl3 - it is not in libnl. - # Also, nl_socket_set_peer_groups is only in libnl3. - CPPFLAGS_save=$CPPFLAGS - usnic_tmp_CPPFLAGS="-I$1/include/libnl3" - CPPFLAGS="$usnic_tmp_CPPFLAGS $CPPFLAGS" - AS_IF([test $usnic_libnl3_happy -eq 1], - [FI_CHECK_PACKAGE([$2], - [netlink/version.h], - [nl-3], - [nl_socket_set_peer_groups], - [], - [$1], - [], - [usnic_libnl3_happy=1], - [usnic_libnl3_happy=0]) - - # Note that FI_CHECK_PACKAGE is going to add - # -I$dir/include into $2_CPPFLAGS. But because libnl3 - # puts the headers in $dir/libnl3, we need to - # overwrite $2_CPPFLAGS with -I$dir/libnl3. We can do - # this unconditionally; we don't have to check for - # success (checking for success occurs below). - $2_CPPFLAGS=$usnic_tmp_CPPFLAGS]) - - # If we found libnl-3, we *also* need libnl-route-3 - LIBS_save=$LIBS - LDFLAGS_save=$LDFLAGS - AS_IF([test "$$2_LDFLAGS" != ""], - [LDFLAGS="$$2_LDFLAGS $LDFLAGS"]) - AS_IF([test $usnic_libnl3_happy -eq 1], - [AC_SEARCH_LIBS([nl_rtgen_request], - [nl-route-3], - [usnic_libnl3_happy=1], - [usnic_libnl3_happy=0])]) - LIBS=$LIBS_save - LDFLAGS=$LDFLAGS_save - - # Just because libnl* is evil, double check that the - # netlink/version.h we found was for libnl3. As far as we - # know, netlink/version.h only first appeared in version - # 3... but let's really be sure. - AS_IF([test $usnic_libnl3_happy -eq 1], - [AC_MSG_CHECKING([to ensure these really are libnl3 headers]) - CPPFLAGS="$$2_CPPFLAGS $CPPFLAGS" - AC_COMPILE_IFELSE( - [AC_LANG_PROGRAM([[ -#include -#include -#ifndef LIBNL_VER_MAJ -#error "LIBNL_VER_MAJ not defined!" -#endif -/* to the best of our knowledge, version.h only exists in libnl3 */ -#if LIBNL_VER_MAJ != 3 -#error "LIBNL_VER_MAJ != 3, I am sad" -#endif - ]])], - [AC_MSG_RESULT([yes])], - [AC_MSG_RESULT([no]) - usnic_libnl3_happy=0] - )]) - CPPFLAGS=$CPPFLAGS_save - - # If we found everything - AS_IF([test $usnic_libnl3_happy -eq 1], - [$2_LIBS="-lnl-3 -lnl-route-3" - HAVE_LIBNL3=1], - [$2_CPPFLAGS= - $2_LDFLAGS= - $2_LIBS=]) -]) - -dnl -dnl Check for libnl. -dnl -dnl Inputs: -dnl -dnl $1: prefix where to look for libnl -dnl $2: var name prefix of _CPPFLAGS and _LDFLAGS and _LIBS -dnl -dnl Outputs: -dnl -dnl - Set $2_CPPFLAGS necessary to compile with libnl -dnl - Set $2_LDFLAGS necessary to link with libnl -dnl - Set $2_LIBS necessary to link with libnl -dnl - Set HAVE_LIBNL3 0 if libnl will be used -dnl -AC_DEFUN([USNIC_CHECK_LIBNL],[ - AC_MSG_NOTICE([checking for libnl]) - - FI_CHECK_PACKAGE([$2], - [netlink/netlink.h], - [nl], - [nl_connect], - [-lm], - [$1], - [], - [usnic_libnl_happy=1], - [usnic_libnl_happy=0]) - - AS_IF([test $usnic_libnl_happy -eq 1], - [$2_LIBS="-lnl -lm" - HAVE_LIBNL3=0]) -]) diff --git a/prov/usnic/libfabric-usnic.spec.in b/prov/usnic/libfabric-usnic.spec.in deleted file mode 100644 index 0deada89b1a..00000000000 --- a/prov/usnic/libfabric-usnic.spec.in +++ /dev/null @@ -1,52 +0,0 @@ -%{!?configopts: %global configopts LDFLAGS=-Wl,--build-id} -%{!?provider: %define provider usnic} -%{!?provider_formal: %define provider_formal usNIC} - -Name: libfabric-%{provider} -Version: @VERSION@ -Release: 1%{?dist} -Summary: Dynamic %{provider_formal} provider for user-space RDMA Fabric Interfaces -Group: System Environment/Libraries -License: GPLv2 or BSD -Url: http://www.github.com/ofiwg/libfabric -Source: http://www.github.org/ofiwg/%{name}/releases/download/v{%version}/libfabric-%{version}.tar.bz2 -BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) -Requires: libfabric -BuildRequires: libfabric - -%description -libfabric provides a user-space API to access high-performance fabric -services, such as RDMA. - -This RPM provides the %{provider_formal} provider as a "plugin" to an existing -Libfabric installation. This plugin will override any existing %{provider_formal} -provider functionality in the existing Libfabric installation. - -%prep -%setup -q -n libfabric-%{version} - -%build -%configure %{configopts} --enable-%{provider}=dl -make %{?_smp_mflags} - -%install -rm -rf %{buildroot} -%makeinstall installdirs - -%clean -rm -rf %{buildroot} - -%files -%defattr(-,root,root,-) -%{_libdir}/libfabric/*.so - -%exclude %{_libdir}/libfabric.* -%exclude %{_libdir}/libfabric/*.la -%exclude %{_libdir}/pkgconfig -%exclude %{_bindir} -%exclude %{_mandir} -%exclude %{_includedir} - -%changelog -* Wed May 24 2017 Open Fabrics Interfaces Working Group -- First release of specfile for packaging a single dl provider. diff --git a/prov/usnic/src/fi_ext_usnic.h b/prov/usnic/src/fi_ext_usnic.h deleted file mode 100644 index db2cb6a1ae9..00000000000 --- a/prov/usnic/src/fi_ext_usnic.h +++ /dev/null @@ -1,157 +0,0 @@ -/* - * Copyright (c) 2013-2014 Intel Corporation. All rights reserved. - * Copyright (c) 2015-2016 Cisco Systems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _FI_EXT_USNIC_H_ -#define _FI_EXT_USNIC_H_ - -/* - * See the fi_usnic.7 man page for information about the usnic provider - * extensions provided in this header. - */ - -#include -#include - -#define FI_PROTO_RUDP (100U | FI_PROV_SPECIFIC) - -#define FI_EXT_USNIC_INFO_VERSION 2 - -#define FI_EXT_USNIC_MAX_DEVNAME 16 - -/* - * usNIC specific info - */ -/* Packed in 1.4, maintains the same alignment as <= 1.3.0 */ -struct fi_usnic_cap { - const char *uc_capability; - int uc_present; -} __attribute__((packed)); - -/* Packed in 1.4, maintains the same alignment as <= 1.3.0 */ -struct fi_usnic_info_v1 { - uint32_t ui_link_speed; - uint32_t ui_netmask_be; - char ui_ifname[IFNAMSIZ]; - - uint32_t ui_num_vf; - uint32_t ui_qp_per_vf; - uint32_t ui_cq_per_vf; -} __attribute__((packed)); - -struct fi_usnic_info_v2 { - /* Put all of the v1 fields at the start to provide some backward - * compatibility. - */ - uint32_t ui_link_speed; - uint32_t ui_netmask_be; - char ui_ifname[IFNAMSIZ]; - unsigned ui_num_vf; - unsigned ui_qp_per_vf; - unsigned ui_cq_per_vf; - - char ui_devname[FI_EXT_USNIC_MAX_DEVNAME]; - uint8_t ui_mac_addr[6]; - - /* Explicit padding to match 1.3 alignment */ - uint8_t ui_pad0[2]; - - uint32_t ui_ipaddr_be; - uint32_t ui_prefixlen; - uint32_t ui_mtu; - uint8_t ui_link_up; - - /* Explicit padding to match 1.3 alignment */ - uint8_t ui_pad1[3]; - - uint32_t ui_vendor_id; - uint32_t ui_vendor_part_id; - uint32_t ui_device_id; - char ui_firmware[64]; - - unsigned ui_intr_per_vf; - unsigned ui_max_cq; - unsigned ui_max_qp; - - unsigned ui_max_cqe; - unsigned ui_max_send_credits; - unsigned ui_max_recv_credits; - - const char *ui_nicname; - const char *ui_pid; - - struct fi_usnic_cap **ui_caps; -} __attribute__((packed)); - -/* In API version 1.2 and below, the v1 structure did not contain any 64-bit - * data types and therefore had a 4-byte alignment. Once v2 of the extension API - * was introduced in version 1.3, the extra pointers mandated an 8-byte - * alignment thus changing the offset of the v1 structure. This means that the - * alignment difference manifests when an application using v1 of the extension - * is compiled with Libfabric v1.1.x or v1.2.x, but then runs with libfabric.so - * that is v1.3.x or higher (and vice versa). Make the alignment explicit and - * consistent by adding an extra 32-bit padding (4 uint8_t). - */ -struct fi_usnic_info { - uint32_t ui_version; - uint8_t ui_pad0[4]; - union { - struct fi_usnic_info_v1 v1; - struct fi_usnic_info_v2 v2; - } ui; -} __attribute__((packed)); - -/* - * usNIC-specific fabric ops - */ -#define FI_USNIC_FABRIC_OPS_1 "fabric_ops 1" -struct fi_usnic_ops_fabric { - size_t size; - int (*getinfo)(uint32_t version, struct fid_fabric *fabric, - struct fi_usnic_info *info); -}; - -/* - * usNIC-specific AV ops - */ -#define FI_USNIC_AV_OPS_1 "av_ops 1" -struct fi_usnic_ops_av { - size_t size; - int (*get_distance)(struct fid_av *av, void *addr, int *metric); -}; - -int usdf_fabric_ops_open(struct fid *fid, const char *ops_name, uint64_t flags, - void **ops, void *context); -int usdf_av_ops_open(struct fid *fid, const char *ops_name, uint64_t flags, - void **ops, void *context); - -#endif /* _FI_EXT_USNIC_H_ */ diff --git a/prov/usnic/src/usdf.h b/prov/usnic/src/usdf.h deleted file mode 100644 index ba6a2cfb48e..00000000000 --- a/prov/usnic/src/usdf.h +++ /dev/null @@ -1,525 +0,0 @@ -/* - * Copyright (c) 2014-2019, Cisco Systems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ -#ifndef _USDF_H_ -#define _USDF_H_ - -#include - -#include -#include - -#include -#include - -#include "usdf_progress.h" -#include "usd.h" - - -#define USDF_PROV_NAME "usnic" -#define USDF_MAJOR_VERS 1 -#define USDF_MINOR_VERS 0 -#define USDF_PROV_VERSION FI_VERSION(USDF_MAJOR_VERS, USDF_MINOR_VERS) - -extern struct fi_provider usdf_ops; - -#define USDF_WARN_SYS(subsys, ...) \ - FI_WARN(&usdf_ops, FI_LOG_ ## subsys, __VA_ARGS__) -#define USDF_TRACE_SYS(subsys, ...) \ - FI_TRACE(&usdf_ops, FI_LOG_ ## subsys, __VA_ARGS__) -#define USDF_INFO_SYS(subsys, ...) \ - FI_INFO(&usdf_ops, FI_LOG_ ## subsys, __VA_ARGS__) -#define USDF_DBG_SYS(subsys, ...) \ - FI_DBG(&usdf_ops, FI_LOG_ ## subsys, __VA_ARGS__) - -/* default to "FI_LOG_FABRIC" */ -#define USDF_WARN(...) USDF_WARN_SYS(FABRIC, __VA_ARGS__) -#define USDF_TRACE(...) USDF_TRACE_SYS(FABRIC, __VA_ARGS__) -#define USDF_INFO(...) USDF_INFO_SYS(FABRIC, __VA_ARGS__) -#define USDF_DBG(...) USDF_DBG_SYS(FABRIC, __VA_ARGS__) - -#define USDF_HDR_BUF_ENTRY 64 -#define USDF_EP_CAP_PIO (1ULL << 63) - -#define USDF_MAX_PEERS (16 * 1024) - -/* usdf event flags */ -#define USDF_EVENT_FLAG_ERROR (1ULL << 62) -#define USDF_EVENT_FLAG_FREE_BUF (1ULL << 63) - -/* usdf domain capability: no loopback */ -#define USDF_DOM_CAPS (FI_REMOTE_COMM) - -#define USDF_MR_IOV_LIMIT 1 -#define USDF_MR_CNT (65535) -#define USDF_ADDR_STR_LEN (INET6_ADDRSTRLEN+8) - -/* - * TAILQ stuff that should exist - */ -#define TAILQ_REMOVE_MARK(head, elm, link) \ - do { \ - TAILQ_REMOVE(head, elm, link); \ - (elm)->link.tqe_prev = NULL; \ - } while (0) - -#define TAILQ_ON_LIST(elm, link) ((elm)->link.tqe_prev != NULL) - -struct usdf_domain; - -struct usdf_dev_entry { - struct usd_device *ue_dev; - struct usd_device_attrs ue_dattr; - int ue_dev_ok; -}; -struct usdf_usnic_info { - int uu_num_devs; - struct usd_device_entry uu_devs[USD_MAX_DEVICES]; - struct usdf_dev_entry uu_info[USD_MAX_DEVICES]; -}; -extern struct usdf_usnic_info *__usdf_devinfo; - -struct usdf_fabric { - struct fid_fabric fab_fid; - struct fi_fabric_attr fab_attr; - struct usd_device_attrs *fab_dev_attrs; - int fab_arp_sockfd; - ofi_atomic32_t fab_refcnt; - ofi_atomic32_t num_blocked_waiting; - LIST_HEAD(,usdf_domain) fab_domain_list; - - /* progression */ - pthread_t fab_thread; - int fab_exit; - ofi_epoll_t fab_epollfd; - int fab_eventfd; - struct usdf_poll_item fab_poll_item; - - /* timer vars */ - uint32_t fab_active_timer_count; - LIST_HEAD(usdf_timer_bucket, usdf_timer_entry) *fab_timer_buckets; - uint64_t fab_cur_bucket_ms; - uint32_t fab_cur_bucket; - pthread_spinlock_t fab_timer_lock; -}; -#define fab_ftou(FAB) container_of(FAB, struct usdf_fabric, fab_fid) -#define fab_utof(FP) (&(FP)->fab_fid) -#define fab_fidtou(FID) container_of(FID, struct usdf_fabric, fab_fid.fid) - -struct usdf_domain { - struct fid_domain dom_fid; - struct usdf_fabric *dom_fabric; - struct fi_info *dom_info; - ofi_atomic32_t dom_refcnt; - struct usdf_eq *dom_eq; - struct usd_device *dom_dev; - - pthread_spinlock_t dom_progress_lock; - TAILQ_HEAD(,usdf_tx) dom_tx_ready; - TAILQ_HEAD(,usdf_cq_hard) dom_hcq_list; - - /* used only by connected endpoints */ - struct usdf_ep **dom_peer_tab; - uint32_t dom_next_peer; - - LIST_ENTRY(usdf_domain) dom_link; -}; -#define dom_ftou(FDOM) container_of(FDOM, struct usdf_domain, dom_fid) -#define dom_utof(DOM) (&(DOM)->dom_fid) -#define dom_fidtou(FID) container_of(FID, struct usdf_domain, dom_fid.fid) - -enum usdf_pep_state { - USDF_PEP_UNBOUND, - USDF_PEP_BOUND, - USDF_PEP_LISTENING, - - /* A "ROBBED" PEP has had its socket stolen. The only valid operation - * to call on a ROBBED PEP is fi_close(). */ - USDF_PEP_ROBBED -}; - -struct usdf_pep { - struct fid_pep pep_fid; - ofi_atomic32_t pep_refcnt; - struct usdf_fabric *pep_fabric; - struct usdf_eq *pep_eq; - int pep_sock; - union { - struct sockaddr_in sin; - char addr_str[USDF_ADDR_STR_LEN]; - } pep_src_addr; - enum usdf_pep_state pep_state; - struct usdf_poll_item pep_pollitem; - struct fi_info *pep_info; - - pthread_spinlock_t pep_cr_lock; - size_t pep_cr_max_data; - uint32_t pep_backlog; - uint32_t pep_cr_alloced; - TAILQ_HEAD(,usdf_connreq) pep_cr_free; - TAILQ_HEAD(,usdf_connreq) pep_cr_pending; -}; -#define pep_ftou(FPEP) container_of(FPEP, struct usdf_pep, pep_fid) -#define pep_fidtou(FID) container_of(FID, struct usdf_pep, pep_fid.fid) -#define pep_utof(PEP) (&(PEP)->pep_fid) -#define pep_utofid(PEP) (&(PEP)->pep_fid.fid) - -struct usdf_tx { - struct fid_stx tx_fid; - ofi_atomic32_t tx_refcnt; - struct usdf_domain *tx_domain; - TAILQ_ENTRY(usdf_tx) tx_link; - - struct fi_tx_attr tx_attr; - struct usd_qp *tx_qp; - void (*tx_progress)(struct usdf_tx *tx); - - union { - struct { - struct usdf_cq_hard *tx_hcq; - - uint8_t *tx_inject_bufs; - struct usdf_msg_qe *tx_wqe_buf; - TAILQ_HEAD(,usdf_msg_qe) tx_free_wqe; - TAILQ_HEAD(,usdf_ep) tx_ep_ready; - TAILQ_HEAD(,usdf_ep) tx_ep_have_acks; - size_t tx_num_free_wqe; - } msg; - struct { - struct usdf_cq_hard *tx_hcq; - - ofi_atomic32_t tx_next_msg_id; - struct usdf_rdm_qe *tx_wqe_buf; - uint8_t *tx_inject_bufs; - TAILQ_HEAD(,usdf_rdm_qe) tx_free_wqe; - TAILQ_HEAD(,usdf_rdm_connection) tx_rdc_ready; - TAILQ_HEAD(,usdf_rdm_connection) tx_rdc_have_acks; - size_t tx_num_free_wqe; - } rdm; - } t; -}; -#define tx_ftou(FEP) container_of(FEP, struct usdf_tx, tx_fid) -#define tx_fidtou(FID) container_of(FID, struct usdf_tx, tx_fid) -#define tx_utof(RX) (&(RX)->tx_fid) -#define tx_utofid(RX) (&(RX)->tx_fid.fid) - -struct usdf_rx { - struct fid_ep rx_fid; - ofi_atomic32_t rx_refcnt; - struct usdf_domain *rx_domain; - - struct fi_rx_attr rx_attr; - struct usd_qp *rx_qp; - - union { - struct { - struct usdf_cq_hard *rx_hcq; - - uint8_t *rx_bufs; - struct usdf_msg_qe *rx_rqe_buf; - TAILQ_HEAD(,usdf_msg_qe) rx_free_rqe; - TAILQ_HEAD(,usdf_msg_qe) rx_posted_rqe; - size_t rx_num_free_rqe; - } msg; - struct { - int rx_sock; - struct usdf_cq_hard *rx_hcq; - struct usdf_tx *rx_tx; - - uint8_t *rx_bufs; - struct usdf_rdm_qe *rx_rqe_buf; - TAILQ_HEAD(,usdf_rdm_qe) rx_free_rqe; - TAILQ_HEAD(,usdf_rdm_qe) rx_posted_rqe; - size_t rx_num_free_rqe; - } rdm; - } r; -}; -#define rx_ftou(FEP) container_of(FEP, struct usdf_rx, rx_fid) -#define rx_fidtou(FID) container_of(FID, struct usdf_rx, rx_fid) -#define rx_utof(RX) (&(RX)->rx_fid) -#define rx_utofid(RX) (&(RX)->rx_fid.fid) - -enum { - USDF_EP_ENABLED = (1 << 0) -}; - -struct usdf_ep { - struct fid_ep ep_fid; - struct usdf_domain *ep_domain; - ofi_atomic32_t ep_refcnt; - uint64_t ep_caps; - uint64_t ep_mode; - - uint8_t ep_tx_dflt_signal_comp; - uint8_t ep_rx_dflt_signal_comp; - - uint8_t ep_tx_completion; - uint8_t ep_rx_completion; - - uint32_t flags; - - uint32_t ep_wqe; /* requested queue sizes */ - uint32_t ep_rqe; - - struct usd_qp_attrs ep_qp_attrs; - - struct usdf_eq *ep_eq; - - struct usdf_tx *ep_tx; - struct usdf_rx *ep_rx; - - size_t max_msg_size; - - union { - struct { - struct usd_qp *ep_qp; - struct usdf_cq *ep_wcq; - struct usdf_cq *ep_rcq; - - int ep_sock; - struct usdf_av *ep_av; - - /* TODO: Remove in favor of accessing op flags through - * ep_tx and ep_rx. Update once tx/rx context support - * is added to dgram */ - uint64_t tx_op_flags; - uint64_t rx_op_flags; - - size_t tx_iov_limit; - size_t rx_iov_limit; - - void *ep_hdr_buf; - struct usd_udp_hdr **ep_hdr_ptr; - } dg; - struct { - struct usdf_connreq *ep_connreq; - int ep_cm_sock; - struct sockaddr_in ep_lcl_addr; - struct usd_dest *ep_dest; - uint32_t ep_rem_peer_id; - uint32_t ep_lcl_peer_id; - - TAILQ_HEAD(,usdf_msg_qe) ep_posted_wqe; - TAILQ_HEAD(usdf_msg_qe_head ,usdf_msg_qe) ep_sent_wqe; - uint32_t ep_fairness_credits; - uint32_t ep_seq_credits; - uint16_t ep_next_tx_seq; - uint16_t ep_last_rx_ack; - int ep_send_nak; - - struct usdf_msg_qe *ep_cur_recv; - uint16_t ep_next_rx_seq; - TAILQ_ENTRY(usdf_ep) ep_ack_link; - - struct usdf_timer_entry *ep_ack_timer; - - TAILQ_ENTRY(usdf_ep) ep_link; - } msg; - struct { - int ep_sock; - struct usdf_av *ep_av; - - } rdm; - } e; -}; -#define ep_ftou(FEP) container_of(FEP, struct usdf_ep, ep_fid) -#define ep_fidtou(FID) container_of(FID, struct usdf_ep, ep_fid.fid) -#define ep_utof(EP) (&(EP)->ep_fid) -#define ep_utofid(EP) (&(EP)->ep_fid.fid) - -struct usdf_mr { - struct fid_mr mr_fid; - struct usd_mr *mr_mr; -}; - -struct usdf_cq_hard { - struct usdf_cq *cqh_cq; - struct usd_cq *cqh_ucq; - ofi_atomic32_t cqh_refcnt; - void (*cqh_progress)(struct usdf_cq_hard *hcq); - void (*cqh_post)(struct usdf_cq_hard *hcq, void *context, size_t len, - int prov_errno, uint64_t flags); - TAILQ_ENTRY(usdf_cq_hard) cqh_link; - TAILQ_ENTRY(usdf_cq_hard) cqh_dom_link; -}; - -struct usdf_cq_soft_entry { - void *cse_context; - uint64_t cse_flags; - size_t cse_len; - void *cse_buf; - uint64_t cse_data; - int cse_prov_errno; -}; - -struct usdf_cq { - struct fid_cq cq_fid; - ofi_atomic32_t cq_refcnt; - struct usdf_domain *cq_domain; - struct fi_cq_attr cq_attr; - uint8_t cq_is_soft; - uint8_t cq_waiting; - - union { - int fd; - struct fi_mutex_cond mutex_cond; - } object; - - union { - struct { - struct usd_cq *cq_cq; - } hard; - struct { - struct usdf_cq_soft_entry *cq_comps; - struct usdf_cq_soft_entry *cq_end; - struct usdf_cq_soft_entry *cq_head; - struct usdf_cq_soft_entry *cq_tail; - /* Last operation used to distinguish full vs empty. */ - uint8_t cq_last_op; - TAILQ_HEAD(,usdf_cq_hard) cq_list; - } soft; - } c; - struct usd_completion cq_comp; - struct fi_ops_cq cq_ops; -}; - -enum { - USDF_SOFT_CQ_READ, - USDF_SOFT_CQ_WRITE -}; - -#define cq_ftou(FCQ) container_of(FCQ, struct usdf_cq, cq_fid) -#define cq_fidtou(FID) container_of(FID, struct usdf_cq, cq_fid.fid) -#define cq_utof(CQ) (&(CQ)->cq_fid) - -struct usdf_err_data_entry { - struct slist_entry entry; - uint8_t seen; - uint8_t err_data[]; -}; - -struct usdf_event { - uint32_t ue_event; - void *ue_buf; - size_t ue_len; - uint64_t ue_flags; -}; - -struct usdf_eq { - struct fid_eq eq_fid; - struct usdf_fabric *eq_fabric; - ofi_atomic32_t eq_refcnt; - - pthread_spinlock_t eq_lock; - - struct fi_eq_err_entry *eq_ev_buf; - struct usdf_event *eq_ev_ring; - struct usdf_event *eq_ev_head; - struct usdf_event *eq_ev_tail; - struct usdf_event *eq_ev_end; - int eq_ev_ring_size; - ofi_atomic32_t eq_num_events; - - /* various ways to wait */ - struct fi_eq_attr eq_attr; - union { - int eq_fd; - }; - - struct slist eq_err_data; - struct fi_ops_eq eq_ops_data; -}; -#define eq_ftou(FEQ) container_of(FEQ, struct usdf_eq, eq_fid) -#define eq_fidtou(FID) container_of(FID, struct usdf_eq, eq_fid.fid) -#define eq_utof(EQ) (&(EQ)->eq_fid) - -/* - * Prototypes - */ - -ssize_t usdf_eq_write_internal(struct usdf_eq *eq, uint32_t event, - const void *buf, size_t len, uint64_t flags); - -/* fi_ops_fabric */ -int usdf_domain_open(struct fid_fabric *fabric, struct fi_info *info, - struct fid_domain **domain, void *context); -int usdf_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr, - struct fid_eq **eq, void *context); -int usdf_pep_open(struct fid_fabric *fabric, struct fi_info *info, - struct fid_pep **pep_p, void *context); - -/* fi_ops_domain */ -int usdf_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, - struct fid_cq **cq_o, void *context); -int usdf_endpoint_open(struct fid_domain *domain, struct fi_info *info, - struct fid_ep **ep, void *context); -int usdf_av_open(struct fid_domain *domain, struct fi_av_attr *attr, - struct fid_av **av_o, void *context); -int usdf_query_atomic(struct fid_domain *domain, enum fi_datatype datatype, - enum fi_op op, struct fi_atomic_attr *attr, uint64_t flags); - -/* Domain name functionality */ -int usdf_domain_getname(uint32_t version, struct usd_device_attrs *dap, - char **name); -bool usdf_domain_checkname(uint32_t version, struct usd_device_attrs *dap, - const char *hint); - -/* fi_ops_mr */ -int usdf_reg_mr(struct fid *fid, const void *buf, size_t len, - uint64_t access, uint64_t offset, uint64_t requested_key, - uint64_t flags, struct fid_mr **mr_o, void *context); -int usdf_regv_mr(struct fid *fid, const struct iovec *iov, - size_t count, uint64_t access, - uint64_t offset, uint64_t requested_key, - uint64_t flags, struct fid_mr **mr, void *context); -int usdf_regattr(struct fid *fid, const struct fi_mr_attr *attr, - uint64_t flags, struct fid_mr **mr); - -/* Fake IBV provider */ -void usdf_setup_fake_ibv_provider(void); - -/* passive endpoint functions */ -int usdf_pep_steal_socket(struct usdf_pep *pep, int *is_bound, int *sock_o); - -/* Utility functions */ -int usdf_catch_dom_attr(uint32_t version, const struct fi_info *hints, - struct fi_domain_attr *dom_attr); -int usdf_catch_tx_attr(uint32_t version, const struct fi_tx_attr *tx_attr); -int usdf_catch_rx_attr(uint32_t version, const struct fi_rx_attr *rx_attr); -struct sockaddr_in *usdf_format_to_sin(const struct fi_info *info, const void *addr); -void *usdf_sin_to_format(const struct fi_info *info, void *addr, size_t *len); -void usdf_free_sin_if_needed(const struct fi_info *info, struct sockaddr_in *sin); - -#endif /* _USDF_H_ */ diff --git a/prov/usnic/src/usdf_av.c b/prov/usnic/src/usdf_av.c deleted file mode 100644 index 0c41e17e846..00000000000 --- a/prov/usnic/src/usdf_av.c +++ /dev/null @@ -1,887 +0,0 @@ -/* - * Copyright (c) 2014-2019, Cisco Systems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "rdma/fi_errno.h" -#include "ofi_enosys.h" -#include "ofi.h" - -#include "usnic_direct.h" -#include "usnic_ip_utils.h" -#include "libnl_utils.h" -#include "usd.h" -#include "usd_queue.h" - -#include "usdf.h" -#include "usdf_av.h" -#include "usdf_cm.h" -#include "usdf_timer.h" - -#include "fi_ext_usnic.h" - -static int usdf_av_alloc_dest(struct usdf_dest **dest_o) -{ - struct usdf_dest *dest; - - dest = calloc(1, sizeof(**dest_o)); - if (dest == NULL) - return -errno; - - *dest_o = dest; - return 0; -} - -static void usdf_av_free_dest(struct usdf_dest *dest) -{ - LIST_REMOVE(dest, ds_addresses_entry); - - free(dest); -} - -static int usdf_av_close_(struct usdf_av *av) -{ - struct usdf_dest *entry; - - USDF_TRACE_SYS(AV, "\n"); - - pthread_spin_lock(&av->av_lock); - - if (av->av_eq) - ofi_atomic_dec32(&av->av_eq->eq_refcnt); - - ofi_atomic_dec32(&av->av_domain->dom_refcnt); - - while (!LIST_EMPTY(&av->av_addresses)) { - entry = LIST_FIRST(&av->av_addresses); - usdf_av_free_dest(entry); - } - - pthread_spin_destroy(&av->av_lock); - free(av); - - USDF_DBG_SYS(AV, "AV successfully destroyed\n"); - - return 0; -} - -static int usdf_av_close(struct fid *fid) -{ - struct usdf_av *av; - int pending; - - USDF_TRACE_SYS(AV, "\n"); - - av = container_of(fid, struct usdf_av, av_fid.fid); - if (ofi_atomic_get32(&av->av_refcnt) > 0) - return -FI_EBUSY; - - pending = ofi_atomic_get32(&av->av_active_inserts); - assert(pending >= 0); - - if (pending) { - USDF_DBG_SYS(AV, "%d pending inserts, defer closing\n", - pending); - ofi_atomic_set32(&av->av_closing, 1); - } else { - usdf_av_close_(av); - } - - return 0; -} - -static void -usdf_av_insert_async_complete(struct usdf_av_insert *insert) -{ - struct fi_eq_entry entry; - struct usdf_av *av; - int pending; - int closing; - - av = insert->avi_av; - - entry.fid = &av->av_fid.fid; - entry.context = insert->avi_context; - entry.data = insert->avi_successes; - usdf_eq_write_internal(av->av_eq, - FI_AV_COMPLETE, &entry, sizeof(entry), 0); - - usdf_timer_free(av->av_domain->dom_fabric, insert->avi_timer); - - pending = ofi_atomic_dec32(&av->av_active_inserts); - USDF_DBG_SYS(AV, "new active insert value: %d\n", pending); - assert(pending >= 0); - - closing = ofi_atomic_get32(&av->av_closing); - - if (!pending && closing) - usdf_av_close_(av); - - free(insert); -} - -/* - * A request failed, post an error event to the EQ - */ -static void -usdf_post_insert_request_error(struct usdf_av_insert *insert, - struct usdf_av_req *req) -{ - struct fi_eq_err_entry err_entry; - struct usdf_av *av; - - av = insert->avi_av; - - *req->avr_fi_addr = FI_ADDR_NOTAVAIL; - free(req->avr_dest); - - err_entry.fid = &av->av_fid.fid; - err_entry.context = insert->avi_context; - err_entry.data = req - (struct usdf_av_req *)(insert + 1); - err_entry.err = -req->avr_status; - err_entry.err_data = NULL; - err_entry.err_data_size = 0; - - usdf_eq_write_internal(av->av_eq, 0, - &err_entry, sizeof(err_entry), - USDF_EVENT_FLAG_ERROR); -} - -/* - * Called by progression thread to look for AV completions on this domain - */ -static void -usdf_av_insert_progress(void *v) -{ - int ret; - struct usdf_av_insert *insert; - struct usdf_fabric *fp; - struct usdf_dest *dest; - struct usdf_av_req *req; - struct usdf_av_req *tmpreq; - struct usd_device_attrs *dap; - uint64_t now; - uint8_t *eth; - - insert = v; - fp = insert->avi_av->av_domain->dom_fabric; - dap = fp->fab_dev_attrs; - - TAILQ_FOREACH_SAFE(req, tmpreq, &insert->avi_req_list, avr_link) { - - dest = req->avr_dest; - eth = &dest->ds_dest.ds_dest.ds_udp.u_hdr.uh_eth.ether_dhost[0]; - ret = usnic_arp_lookup(dap->uda_ifname, - req->avr_daddr_be, fp->fab_arp_sockfd, eth); - - /* anything besides EAGAIN means request is completed */ - if (ret != EAGAIN) { - TAILQ_REMOVE(&insert->avi_req_list, req, avr_link); - req->avr_status = -ret; - - if (ret == 0) { - ++insert->avi_successes; - *(struct usdf_dest **)req->avr_fi_addr = dest; - - LIST_INSERT_HEAD(&insert->avi_av->av_addresses, - dest, ds_addresses_entry); - } else { - usdf_post_insert_request_error(insert, req); - } - } - } - - /* Time for a new ARP? */ - now = usdf_get_ms(); - if (now - insert->avi_last_arp_time > USDF_AV_ARP_INTERVAL) { - - /* If no more ARP requests left, fail all remaining requests */ - if (insert->avi_arps_left == 0) { - TAILQ_FOREACH(req, &insert->avi_req_list, avr_link) { - req->avr_status = -FI_EHOSTUNREACH; - usdf_post_insert_request_error(insert, req); - } - TAILQ_INIT(&insert->avi_req_list); - - /* Trigger an ARP request for all pending requests */ - } else { - TAILQ_FOREACH_SAFE(req, tmpreq, - &insert->avi_req_list, avr_link) { - ret = usnic_arp_request(req->avr_daddr_be, - fp->fab_arp_sockfd); - if (ret != 0) { - req->avr_status = -ret; - TAILQ_REMOVE(&insert->avi_req_list, - req, avr_link); - usdf_post_insert_request_error(insert, - req); - } - } - - insert->avi_last_arp_time = now; - --insert->avi_arps_left; - } - } - - /* If no more pending requests, all done! */ - if (TAILQ_EMPTY(&insert->avi_req_list)) { - usdf_av_insert_async_complete(insert); - } else { - /* retry in 1 ms */ - usdf_timer_set(fp, insert->avi_timer, 1); - } - -} - -static int -usdf_am_insert_async(struct fid_av *fav, const void *addr, size_t count, - fi_addr_t *fi_addr, uint64_t flags, void *context) -{ - const struct sockaddr_in *sin; - const char **addr_str; - struct sockaddr_in *cur_sin; - struct usd_device_attrs *dap; - struct usdf_av_insert *insert; - struct usdf_av_req *req; - struct usdf_av *av; - struct usdf_fabric *fp; - struct usd_dest *u_dest; - struct fi_info *info; - int ret; - size_t i; - bool addr_format_str; - - USDF_TRACE_SYS(AV, "\n"); - - if ((flags & ~(FI_MORE)) != 0) - return -FI_EBADFLAGS; - - av = av_ftou(fav); - fp = av->av_domain->dom_fabric; - dap = fp->fab_dev_attrs; - info = av->av_domain->dom_info; - addr_format_str = (info->addr_format == FI_ADDR_STR); - - if (av->av_eq == NULL) { - return -FI_ENOEQ; - } - - sin = addr; - addr_str = (const char **)addr; - - /* allocate an insert record and N requests */ - insert = calloc(1, sizeof(*insert) + count * sizeof(*req)); - if (insert == NULL) { - return -errno; - } - insert->avi_av = av; - insert->avi_context = context; - ret = usdf_timer_alloc(usdf_av_insert_progress, insert, - &insert->avi_timer); - if (ret != 0) { - goto fail; - } - TAILQ_INIT(&insert->avi_req_list); - insert->avi_arps_left = USDF_AV_MAX_ARPS; - - ret = ofi_atomic_inc32(&av->av_active_inserts); - USDF_DBG_SYS(AV, "new active insert value: %d\n", ret); - - /* If no addresses, complete now */ - if (count == 0) { - usdf_av_insert_async_complete(insert); - return 0; - } - - req = (struct usdf_av_req *)(insert + 1); - - for (i = 0; i < count; i++) { - req->avr_fi_addr = &fi_addr[i]; - - if (addr_format_str) { - usdf_str_toaddr(addr_str[i], &cur_sin); - if (NULL == cur_sin) { - ret = -FI_ENOMEM; - goto fail; - } - sin = cur_sin; - } - - /* find the address we actually need to look up */ - ret = usnic_nl_rt_lookup(dap->uda_ipaddr_be, - sin->sin_addr.s_addr, dap->uda_ifindex, - &req->avr_daddr_be); - if (ret != 0) { - if (ret == EHOSTUNREACH) { - req->avr_status = -FI_EHOSTUNREACH; - usdf_post_insert_request_error(insert, req); - } else { - ret = -ret; - goto fail; - } - - } else { - if (req->avr_daddr_be == 0) { - req->avr_daddr_be = sin->sin_addr.s_addr; - } - req->avr_dest = calloc(1, sizeof(*req->avr_dest)); - if (req->avr_dest == NULL) { - ret = -FI_ENOMEM; - goto fail; - } - u_dest = &req->avr_dest->ds_dest; - usd_fill_udp_dest(u_dest, dap, - sin->sin_addr.s_addr, sin->sin_port); - u_dest->ds_dest.ds_udp.u_hdr.uh_ip.frag_off |= - htons(IP_DF); - - TAILQ_INSERT_TAIL(&insert->avi_req_list, req, avr_link); - } - - if (addr_format_str) { - free(cur_sin); - cur_sin = NULL; - } else { - ++sin; - } - - ++req; - } - - /* resolve all addresses we can */ - usdf_av_insert_progress(insert); - - return 0; - -fail: - if (insert != NULL) { - if (insert->avi_timer != NULL) { - usdf_timer_free(fp, insert->avi_timer); - } - free(insert); - } - return ret; -} - -static int -usdf_am_insert_sync(struct fid_av *fav, const void *addr, size_t count, - fi_addr_t *fi_addr, uint64_t flags, void *context) -{ - const struct sockaddr_in *sin; - const char **addr_str; - struct sockaddr_in *cur_sin; - struct usdf_av *av; - struct usd_dest *u_dest; - struct usdf_dest *dest; - struct fi_info *info; - int ret_count; - int ret; - int *errors; - uint32_t api_version; - size_t i; - bool addr_format_str; - - USDF_TRACE_SYS(AV, "\n"); - - ret_count = 0; - av = av_ftou(fav); - api_version = av->av_domain->dom_fabric->fab_attr.fabric->api_version; - info = av->av_domain->dom_info; - addr_format_str = (info->addr_format == FI_ADDR_STR); - errors = context; - - /* Screen out unsupported flags. */ - if ((flags & ~(FI_MORE|FI_SYNC_ERR)) != 0) - return -FI_EBADFLAGS; - - /* If user set FI_SYNC_ERR, we have to report back to user's buffer. */ - if (flags & FI_SYNC_ERR) { - if (FI_VERSION_LT(api_version, FI_VERSION(1, 5))) - return -FI_EBADFLAGS; - - memset(errors, 0, sizeof(int) * count); - } - - sin = addr; - addr_str = (const char **)addr; - - /* XXX parallelize, this will also eliminate u_dest silliness */ - for (i = 0; i < count; i++) { - - if (addr_format_str) { - usdf_str_toaddr(addr_str[i], &cur_sin); - if (NULL == cur_sin) { - if (flags & FI_SYNC_ERR) - errors[i] = -ENOMEM; - - return ret_count; - } - sin = cur_sin; - } - - dest = NULL; - u_dest = NULL; - ret = usdf_av_alloc_dest(&dest); - if (ret == 0) { - USDF_DBG_SYS(AV, "usd_create_dest(addr=0x%x, port=0x%x)\n", - ntohl(sin->sin_addr.s_addr), ntohs(sin->sin_port)); - ret = usd_create_dest(av->av_domain->dom_dev, - sin->sin_addr.s_addr, sin->sin_port, - &u_dest); - } - if (ret == 0) { - u_dest->ds_dest.ds_udp.u_hdr.uh_ip.frag_off |= - htons(IP_DF); - dest->ds_dest = *u_dest; - fi_addr[i] = (fi_addr_t)dest; - LIST_INSERT_HEAD(&av->av_addresses, dest, - ds_addresses_entry); - ++ret_count; - } else { - if (flags & FI_SYNC_ERR) - errors[i] = -ret; - - fi_addr[i] = FI_ADDR_NOTAVAIL; - free(dest); - } - free(u_dest); - - if (addr_format_str) { - free(cur_sin); - cur_sin = NULL; - } else { - ++sin; - } - } - - return ret_count; -} - -static int usdf_resolve_addr(const char *node, const char *service, - struct sockaddr_in *in) -{ - struct addrinfo *ai; - int ret; - - struct addrinfo hints = { - .ai_family = AF_INET, - }; - - if (!node || !service || !in) - return -FI_EINVAL; - - ret = getaddrinfo(node, service, &hints, &ai); - if (ret) { - USDF_DBG("getaddrinfo: %s\n", gai_strerror(ret)); - return -FI_EINVAL; - } - - *in = *(struct sockaddr_in *) ai->ai_addr; - - assert(ai->ai_family == AF_INET); - assert(in->sin_family == AF_INET); - - freeaddrinfo(ai); - return ret; -} - -static int usdf_av_insertsvc(struct fid_av *fav, const char *node, - const char *service, fi_addr_t *fi_addr, uint64_t flags, - void *context) -{ - struct sockaddr_in addr; - struct usdf_av *av; - struct fi_info *info; - int ret; - bool addr_format_str; - - USDF_TRACE_SYS(AV, "\n"); - - av = av_ftou(fav); - info = av->av_domain->dom_info; - addr_format_str = (info->addr_format == FI_ADDR_STR); - - if (!fav) - return -FI_EINVAL; - - if (addr_format_str) { - /* string format should not come with service param. */ - if (service) - return -FI_EINVAL; - - ret = fav->ops->insert(fav, &node, 1, fi_addr, flags, context); - } else { - ret = usdf_resolve_addr(node, service, &addr); - if (ret) - goto fail; - - ret = fav->ops->insert(fav, &addr, 1, fi_addr, flags, context); - } - -fail: - return ret; -} - -static int -usdf_am_remove(struct fid_av *fav, fi_addr_t *fi_addr, size_t count, - uint64_t flags) -{ - struct usdf_dest *dest; - size_t i; - - USDF_TRACE_SYS(AV, "\n"); - - for (i = 0; i < count; ++i) { - if (fi_addr[i] != FI_ADDR_NOTAVAIL) { - dest = (struct usdf_dest *)(uintptr_t)fi_addr[i]; - usdf_av_free_dest(dest); - - /* Mark invalid by setting to FI_ADDR_NOTAVAIL*/ - fi_addr[i] = FI_ADDR_NOTAVAIL; - } - } - - return 0; -} - -static int -usdf_am_lookup(struct fid_av *fav, fi_addr_t fi_addr, void *addr, - size_t *addrlen) -{ - struct usdf_dest *dest; - struct usdf_av *av; - struct fi_info *info; - struct sockaddr_in sin = { 0 }; - size_t copylen; - bool addr_format_str; - - USDF_TRACE_SYS(AV, "\n"); - - av = av_ftou(fav); - info = av->av_domain->dom_info; - addr_format_str = (info->addr_format == FI_ADDR_STR); - - if (fi_addr == FI_ADDR_NOTAVAIL) { - USDF_WARN_SYS(AV, "invalid address, can't lookup\n"); - return -FI_EINVAL; - } - - dest = (struct usdf_dest *)(uintptr_t)fi_addr; - if (*addrlen < sizeof(sin)) { - copylen = *addrlen; - } else { - copylen = sizeof(sin); - } - - sin.sin_family = AF_INET; - usd_expand_dest(&dest->ds_dest, &sin.sin_addr.s_addr, &sin.sin_port); - - if (addr_format_str) - usdf_addr_tostr(&sin, addr, addrlen); - else { - memcpy(addr, &sin, copylen); - *addrlen = sizeof(sin); - } - return 0; -} - -static const char * -usdf_av_straddr(struct fid_av *fav, const void *addr, - char *buf, size_t *len) -{ - struct fi_info *info; - struct usdf_av *av; - - if (!len || !addr || !buf) - return NULL; - - av = av_fidtou(fav); - info = av->av_domain->dom_info; - - return ofi_straddr(buf, len, info->addr_format, addr); -} - -static int -usdf_av_bind(struct fid *fid, struct fid *bfid, uint64_t flags) -{ - struct usdf_av *av; - - USDF_TRACE_SYS(AV, "\n"); - - av = av_fidtou(fid); - - switch (bfid->fclass) { - case FI_CLASS_EQ: - if (av->av_eq != NULL) { - return -FI_EINVAL; - } - av->av_eq = eq_fidtou(bfid); - ofi_atomic_inc32(&av->av_eq->eq_refcnt); - break; - default: - return -FI_EINVAL; - } - - return 0; -} - -static struct fi_ops usdf_av_fi_ops = { - .size = sizeof(struct fi_ops), - .close = usdf_av_close, - .bind = usdf_av_bind, - .control = fi_no_control, - .ops_open = usdf_av_ops_open, -}; - -static struct fi_ops_av usdf_am_ops_async = { - .size = sizeof(struct fi_ops_av), - .insert = usdf_am_insert_async, - .insertsvc = usdf_av_insertsvc, - .insertsym = fi_no_av_insertsym, - .remove = usdf_am_remove, - .lookup = usdf_am_lookup, - .straddr = usdf_av_straddr -}; - -static struct fi_ops_av usdf_am_ops_sync = { - .size = sizeof(struct fi_ops_av), - .insert = usdf_am_insert_sync, - .insertsvc = usdf_av_insertsvc, - .insertsym = fi_no_av_insertsym, - .remove = usdf_am_remove, - .lookup = usdf_am_lookup, - .straddr = usdf_av_straddr -}; - -static int usdf_av_process_attr(struct fi_av_attr *attr) -{ - USDF_TRACE_SYS(AV, "\n"); - - if (attr == NULL) { - USDF_WARN_SYS(AV, "NULL AV attribute structure is invalid\n"); - return -FI_EINVAL; - } - - if (attr->name || attr->map_addr || (attr->flags & FI_READ)) { - USDF_WARN_SYS(AV, "named AVs are not supported\n"); - return -FI_ENOSYS; - } - - if (attr->flags & ~FI_EVENT) { - USDF_WARN_SYS(AV, "invalid flag, only FI_EVENT is supported\n"); - return -FI_EINVAL; - } - - if (attr->rx_ctx_bits) { - USDF_WARN_SYS(AV, "scalable endpoints not supported\n"); - return -FI_EINVAL; - } - - if (attr->ep_per_node > 1) - USDF_WARN_SYS(AV, "ep_per_node not supported, ignoring\n"); - - switch (attr->type) { - case FI_AV_UNSPEC: - USDF_DBG_SYS(AV, "no AV type specified, using FI_AV_MAP\n"); - case FI_AV_MAP: - break; - case FI_AV_TABLE: - USDF_DBG_SYS(AV, "FI_AV_TABLE is unsupported\n"); - return -FI_ENOSYS; - default: - USDF_WARN_SYS(AV, "unknown AV type %d, not supported", - attr->type); - return -FI_EINVAL; - } - - return FI_SUCCESS; -} - -int -usdf_av_open(struct fid_domain *domain, struct fi_av_attr *attr, - struct fid_av **av_o, void *context) -{ - struct usdf_domain *udp; - struct usdf_av *av; - int ret; - - USDF_TRACE_SYS(AV, "\n"); - - if (!av_o) { - USDF_WARN_SYS(AV, "provided AV pointer can not be NULL\n"); - return -FI_EINVAL; - } - - ret = usdf_av_process_attr(attr); - if (ret) - return ret; - - udp = dom_ftou(domain); - - av = calloc(1, sizeof(*av)); - if (av == NULL) { - return -FI_ENOMEM; - } - - if (attr->flags & FI_EVENT) { - av->av_fid.ops = &usdf_am_ops_async; - } else { - av->av_fid.ops = &usdf_am_ops_sync; - } - - LIST_INIT(&av->av_addresses); - - av->av_fid.fid.fclass = FI_CLASS_AV; - av->av_fid.fid.context = context; - av->av_fid.fid.ops = &usdf_av_fi_ops; - av->av_flags = attr->flags; - - pthread_spin_init(&av->av_lock, PTHREAD_PROCESS_PRIVATE); - ofi_atomic_initialize32(&av->av_active_inserts, 0); - ofi_atomic_initialize32(&av->av_closing, 0); - - ofi_atomic_initialize32(&av->av_refcnt, 0); - ofi_atomic_inc32(&udp->dom_refcnt); - av->av_domain = udp; - - *av_o = av_utof(av); - return 0; -} - -/* Look up if the sin address has been already inserted. - * if match, return the address of the dest pointer. otherwise, - * returns FI_ADDR_NOTAVAIL. - */ -fi_addr_t usdf_av_lookup_addr(struct usdf_av *av, - const struct sockaddr_in *sin) -{ - struct usdf_dest *cur; - struct usd_udp_hdr u_hdr; - - for (cur = av->av_addresses.lh_first; cur; - cur = cur->ds_addresses_entry.le_next) { - u_hdr = cur->ds_dest.ds_dest.ds_udp.u_hdr; - if (sin->sin_addr.s_addr == u_hdr.uh_ip.daddr && - sin->sin_port == u_hdr.uh_udp.dest) - return (fi_addr_t)(uintptr_t)cur; - } - return FI_ADDR_NOTAVAIL; -} - -/* Return sockaddr_in pointer. Must be used with usdf_free_sin_if_needed() - * to cleanup properly. - */ -struct sockaddr_in *usdf_format_to_sin(const struct fi_info *info, const void *addr) -{ - struct sockaddr_in *sin; - - if (!info) - return (struct sockaddr_in *)addr; - - switch (info->addr_format) { - case FI_FORMAT_UNSPEC: - case FI_SOCKADDR: - case FI_SOCKADDR_IN: - return (struct sockaddr_in *)addr; - case FI_ADDR_STR: - usdf_str_toaddr(addr, &sin); - return sin; - default: - return NULL; - } -} - -/* Utility function to free the sockaddr_in allocated from usdf_format_to_sin() - */ -void usdf_free_sin_if_needed(const struct fi_info *info, struct sockaddr_in *sin) -{ - if (info && info->addr_format == FI_ADDR_STR) - free(sin); -} - -/* Convert sockaddr_in pointer to appropriate format. - * If conversion happens, destroy the origin. (to minimize cleaning up code) - */ -void *usdf_sin_to_format(const struct fi_info *info, void *addr, size_t *len) -{ - size_t addr_strlen; - char *addrstr; - - if (!info) - return addr; - - switch (info->addr_format) { - case FI_FORMAT_UNSPEC: - case FI_SOCKADDR: - case FI_SOCKADDR_IN: - if (len) - *len = sizeof(struct sockaddr_in); - return addr; - case FI_ADDR_STR: - addrstr = calloc(1, USDF_ADDR_STR_LEN); - if (addrstr == NULL) { - USDF_DBG_SYS(AV, "memory allocation failed\n"); - return NULL; - } - - addr_strlen = USDF_ADDR_STR_LEN; - usdf_addr_tostr(addr, addrstr, &addr_strlen); - - if (len) - *len = addr_strlen; - - free(addr); - return addrstr; - default: - return NULL; - } - -} diff --git a/prov/usnic/src/usdf_av.h b/prov/usnic/src/usdf_av.h deleted file mode 100644 index 24e3cd511e6..00000000000 --- a/prov/usnic/src/usdf_av.h +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (c) 2014-2019, Cisco Systems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ -#ifndef _USDF_AV_H_ -#define _USDF_AV_H_ - -#include "usd_dest.h" - -#define USDF_AV_MAX_ARPS 3 -#define USDF_AV_ARP_INTERVAL 1000 - -struct usdf_rdm_connection; - -/* - * libfabric version of dest - */ -struct usdf_dest { - struct usd_dest ds_dest; - - LIST_ENTRY(usdf_dest) ds_addresses_entry; -}; - -/* struct used to track async insert requests */ -struct usdf_av_req { - fi_addr_t *avr_fi_addr; - struct usdf_dest *avr_dest; - int avr_status; - - uint32_t avr_daddr_be; - - TAILQ_ENTRY(usdf_av_req) avr_link; -}; - -struct usdf_av_insert { - struct usdf_av *avi_av; - void *avi_context; - - struct usdf_timer_entry *avi_timer; - - uint32_t avi_successes; - TAILQ_HEAD(,usdf_av_req) avi_req_list; - uint32_t avi_arps_left; - uint64_t avi_last_arp_time; -}; - -struct usdf_av { - struct fid_av av_fid; - struct usdf_domain *av_domain; - uint64_t av_flags; - struct usdf_eq *av_eq; - ofi_atomic32_t av_refcnt; - ofi_atomic32_t av_closing; - ofi_atomic32_t av_active_inserts; - pthread_spinlock_t av_lock; - LIST_HEAD(, usdf_dest) av_addresses; -}; - -#define av_ftou(FAV) container_of(FAV, struct usdf_av, av_fid) -#define av_fidtou(FID) container_of(FID, struct usdf_av, av_fid.fid) -#define av_utof(AV) (&(AV)->av_fid) - -fi_addr_t usdf_av_lookup_addr(struct usdf_av *av, - const struct sockaddr_in *sin); - -#endif /* _USDF_AV_H_ */ diff --git a/prov/usnic/src/usdf_cm.c b/prov/usnic/src/usdf_cm.c deleted file mode 100644 index cc2198e9b7a..00000000000 --- a/prov/usnic/src/usdf_cm.c +++ /dev/null @@ -1,321 +0,0 @@ -/* - * Copyright (c) 2014-2019, Cisco Systems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include "ofi.h" -#include "ofi_file.h" - -#include "usnic_direct.h" -#include "usdf.h" -#include "usdf_endpoint.h" -#include "usdf_dgram.h" -#include "usdf_av.h" -#include "usdf_cm.h" - -/* Given a connection request structure containing data, make a copy of the data - * that can be accessed in error entries on the EQ. The return value is the size - * of the data stored in the error entry. If the return value is a non-negative - * value, then the function has suceeded and the size and output data can be - * assumed to be valid. If the function fails, then the data will be NULL and - * the size will be a negative error value. - */ -static int usdf_cm_generate_err_data(struct usdf_eq *eq, - struct usdf_connreq *crp, void **data) -{ - struct usdf_err_data_entry *err_data_entry; - struct usdf_connreq_msg *reqp; - size_t entry_size; - size_t data_size; - - if (!eq || !crp || !data) { - USDF_DBG_SYS(EP_CTRL, - "eq, crp, or data is NULL.\n"); - return -FI_EINVAL; - } - - /* Initialize to NULL so data can't be used in the error case. */ - *data = NULL; - - reqp = (struct usdf_connreq_msg *) crp->cr_data; - - /* This is a normal case, maybe there was no data. */ - if (!reqp || !reqp->creq_datalen) - return 0; - - data_size = reqp->creq_datalen; - - entry_size = sizeof(*err_data_entry) + data_size; - - err_data_entry = calloc(1, entry_size); - if (!err_data_entry) { - USDF_WARN_SYS(EP_CTRL, - "failed to allocate err data entry\n"); - return -FI_ENOMEM; - } - - /* This data should be copied and owned by the provider. Keep - * track of it in the EQ, this will be freed in the next EQ read - * call after it has been read. - */ - memcpy(err_data_entry->err_data, reqp->creq_data, data_size); - slist_insert_tail(&err_data_entry->entry, &eq->eq_err_data); - - *data = err_data_entry->err_data; - - return data_size; -} - -/* Report a connection management related failure. Sometimes there is connection - * event data that should be copied into the generated event. If the copy_data - * parameter evaluates to true, then the data will be copied. - * - * If data is to be generated for the error entry, then the connection request - * is assumed to have the data size in host order. If something fails during - * processing of the error data, then the EQ entry will still be generated - * without the error data. - */ -void usdf_cm_report_failure(struct usdf_connreq *crp, int error, bool copy_data) -{ - struct fi_eq_err_entry err = {0}; - struct usdf_pep *pep; - struct usdf_ep *ep; - struct usdf_eq *eq; - fid_t fid; - int ret; - - USDF_DBG_SYS(EP_CTRL, "error=%d (%s)\n", error, fi_strerror(error)); - - pep = crp->cr_pep; - ep = crp->cr_ep; - - if (ep != NULL) { - fid = ep_utofid(ep); - eq = ep->ep_eq; - ep->ep_domain->dom_peer_tab[ep->e.msg.ep_rem_peer_id] = NULL; - } else { - fid = pep_utofid(pep); - eq = pep->pep_eq; - } - - /* Try to generate the space necessary for the error data. If the - * function returns a number greater than or equal to 0, then it was a - * success. The return value is the size of the data. - */ - if (copy_data) { - ret = usdf_cm_generate_err_data(eq, crp, &err.err_data); - if (ret >= 0) - err.err_data_size = ret; - } - - err.fid = fid; - err.err = -error; - - usdf_eq_write_internal(eq, 0, &err, sizeof(err), USDF_EVENT_FLAG_ERROR); -} - -/* A wrapper to core function to translate string address to - * sockaddr_in type. We are expecting a NULL sockaddr_in**. - * The core function will allocated it for us. The caller HAS TO FREE it. - */ -int usdf_str_toaddr(const char *str, struct sockaddr_in **outaddr) -{ - uint32_t type; - size_t size; - int ret; - - type = FI_SOCKADDR_IN; - - /* call the core function. The core always allocate the addr for us. */ - ret = ofi_str_toaddr(str, &type, (void **)outaddr, &size); - -#if ENABLE_DEBUG - char outstr[USDF_ADDR_STR_LEN]; - size_t out_size = USDF_ADDR_STR_LEN; - - inet_ntop(AF_INET, &((*outaddr)->sin_addr), outstr, out_size); - USDF_DBG_SYS(EP_CTRL, - "%s(string) converted to addr :%s:%u(inet)\n", - str, outstr, ntohs((*outaddr)->sin_port)); -#endif - - return ret; -} - -/* A wrapper to core function to translate sockaddr_in address to - * string. This function is not allocating any memory. We are expected - * an allocated buffer. - */ -const char *usdf_addr_tostr(const struct sockaddr_in *sin, - char *addr_str, size_t *size) -{ - const char *ret; - - ret = ofi_straddr(addr_str, size, FI_SOCKADDR_IN, sin); - -#if ENABLE_DEBUG - char outstr[USDF_ADDR_STR_LEN]; - size_t out_size = USDF_ADDR_STR_LEN; - - inet_ntop(AF_INET, &sin->sin_addr, outstr, out_size); - USDF_DBG_SYS(EP_CTRL, - "%s:%d(inet) converted to %s(string)\n", - outstr, ntohs(sin->sin_port), addr_str); -#endif - - return ret; -} - -/* - * Return local address of an EP - */ -static int usdf_cm_copy_name(struct fi_info *info, struct sockaddr_in *sin, - void *addr, size_t *addrlen) -{ - int ret; - char addr_str[USDF_ADDR_STR_LEN]; - size_t len; - - USDF_TRACE_SYS(EP_CTRL, "\n"); - - ret = FI_SUCCESS; - switch (info->addr_format) { - case FI_ADDR_STR: - len = USDF_ADDR_STR_LEN; - usdf_addr_tostr(sin, addr_str, &len); - snprintf(addr, MIN(len, *addrlen), "%s", addr_str); - break; - case FI_SOCKADDR: - case FI_SOCKADDR_IN: - len = sizeof(*sin); - memcpy(addr, sin, MIN(len, *addrlen)); - break; - default: - return -FI_EINVAL; - } - - /* If the buffer is too small, tell the user. */ - if (*addrlen < len) - ret = -FI_ETOOSMALL; - - /* Always return the actual size. */ - *addrlen = len; - return ret; -} - -int usdf_cm_dgram_getname(fid_t fid, void *addr, size_t *addrlen) -{ - int ret; - struct usdf_ep *ep; - struct sockaddr_in sin; - struct fi_info *info; - socklen_t slen; - - USDF_TRACE_SYS(EP_CTRL, "\n"); - - ep = ep_fidtou(fid); - info = ep->ep_domain->dom_info; - - memset(&sin, 0, sizeof(sin)); - if (ep->e.dg.ep_qp == NULL) { - sin.sin_family = AF_INET; - sin.sin_addr.s_addr = - ep->ep_domain->dom_fabric->fab_dev_attrs->uda_ipaddr_be; - sin.sin_port = 0; - } else { - slen = sizeof(sin); - ret = getsockname(ep->e.dg.ep_sock, (struct sockaddr *)&sin, &slen); - if (ret == -1) { - return -errno; - } - assert(((struct sockaddr *)&sin)->sa_family == AF_INET); - assert(slen == sizeof(sin)); - assert(sin.sin_addr.s_addr == - ep->ep_domain->dom_fabric->fab_dev_attrs->uda_ipaddr_be); - } - - return usdf_cm_copy_name(info, &sin, addr, addrlen); -} - -/* Checks that the given address is actually a sockaddr_in of appropriate - * length. "addr_format" is an FI_ constant like FI_SOCKADDR_IN indicating the - * claimed type of the given address. - * - * Returns true if address is actually a sockaddr_in, false otherwise. - * - * Upon successful return, "addr" can be safely cast to either - * "struct sockaddr_in *" or "struct sockaddr *". - * - * "addr" should not be NULL. - */ -bool usdf_cm_addr_is_valid_sin(void *addr, size_t addrlen, uint32_t addr_format) -{ - assert(addr != NULL); - - switch (addr_format) { - case FI_SOCKADDR_IN: - case FI_SOCKADDR: - if (addrlen != sizeof(struct sockaddr_in)) { - USDF_WARN("addrlen is incorrect\n"); - return false; - } - if (((struct sockaddr *)addr)->sa_family != AF_INET) { - USDF_WARN("unknown/unsupported addr_format\n"); - return false; - } - return true; - default: - USDF_WARN("unknown/unsupported addr_format\n"); - return false; - } -} diff --git a/prov/usnic/src/usdf_cm.h b/prov/usnic/src/usdf_cm.h deleted file mode 100644 index d361818055b..00000000000 --- a/prov/usnic/src/usdf_cm.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2014-2019, Cisco Systems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ -#ifndef _USDF_CM_H_ -#define _USDF_CM_H_ - -#include -#include -#include -#include -#include - -#define USDF_MAX_CONN_DATA 256 - -struct usdf_connreq_msg { - uint32_t creq_peer_id; - uint32_t creq_ipaddr; - uint32_t creq_port; - uint32_t creq_result; - uint32_t creq_reason; - uint32_t creq_datalen; - uint8_t creq_data[]; -} __attribute__((packed)); - -struct usdf_connreq { - struct fid handle; - int cr_sockfd; - struct usdf_pep *cr_pep; - struct usdf_ep *cr_ep; - TAILQ_ENTRY(usdf_connreq) cr_link; - - struct usdf_poll_item cr_pollitem; - - uint8_t *cr_ptr; - size_t cr_resid; - - size_t cr_datalen; - uint8_t cr_data[]; -}; - -void usdf_cm_report_failure(struct usdf_connreq *crp, int error, - bool skip_data); - -int usdf_cm_dgram_getname(fid_t fid, void *addr, size_t *addrlen); - -bool usdf_cm_addr_is_valid_sin(void *addr, size_t addrlen, - uint32_t addr_format); - -int usdf_str_toaddr(const char *str, struct sockaddr_in **outaddr); -const char *usdf_addr_tostr(const struct sockaddr_in *sin, - char *addr_str, size_t *size); - -#endif /* _USDF_CM_H_ */ diff --git a/prov/usnic/src/usdf_cq.c b/prov/usnic/src/usdf_cq.c deleted file mode 100644 index 604ca15ad6f..00000000000 --- a/prov/usnic/src/usdf_cq.c +++ /dev/null @@ -1,1333 +0,0 @@ -/* - * Copyright (c) 2014-2017, Cisco Systems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include "ofi.h" -#include "ofi_enosys.h" - -#include "usnic_direct.h" -#include "usd.h" -#include "usdf.h" -#include "usdf_av.h" -#include "usdf_progress.h" -#include "usdf_cq.h" -#include "usd_ib_cmd.h" -#include "usdf_wait.h" - -static inline int usdf_cqe_to_flags(struct usd_completion *comp) -{ - switch (comp->uc_type) { - case USD_COMPTYPE_SEND: - return (FI_MSG | FI_SEND); - case USD_COMPTYPE_RECV: - return (FI_MSG | FI_RECV); - default: - USDF_DBG_SYS(CQ, "WARNING: unknown completion type! (%d)\n", - comp->uc_type); - return 0; - } - -} - -static ssize_t -usdf_cq_readerr(struct fid_cq *fcq, struct fi_cq_err_entry *entry, - uint64_t flags) -{ - struct usdf_cq *cq; - uint32_t api_version; - - USDF_TRACE_SYS(CQ, "\n"); - - cq = container_of(fcq, struct usdf_cq, cq_fid); - api_version = cq->cq_domain->dom_fabric->fab_attr.fabric->api_version; - - // The return values are analogous to sockets cq_readerr - if (cq->cq_comp.uc_status == 0) { - return -FI_EAGAIN; - } - - entry->op_context = cq->cq_comp.uc_context; - entry->flags = 0; - switch (cq->cq_comp.uc_status) { - case USD_COMPSTAT_SUCCESS: - entry->prov_errno = FI_SUCCESS; - break; - case USD_COMPSTAT_ERROR_CRC: - entry->prov_errno = FI_ECRC; - break; - case USD_COMPSTAT_ERROR_TRUNC: - entry->prov_errno = FI_ETRUNC; - break; - case USD_COMPSTAT_ERROR_TIMEOUT: - entry->prov_errno = FI_ETIMEDOUT; - break; - case USD_COMPSTAT_ERROR_INTERNAL: - default: - entry->prov_errno = FI_EOTHER; - break; - } - entry->err = entry->prov_errno; - - cq->cq_comp.uc_status = 0; - - /* We don't have err_data to give back to the user. */ - if (FI_VERSION_GE(api_version, FI_VERSION(1, 5))) - entry->err_data_size = 0; - - return 1; -} - -static ssize_t -usdf_cq_readerr_soft(struct fid_cq *fcq, struct fi_cq_err_entry *entry, - uint64_t flags) -{ - struct usdf_cq *cq; - struct usdf_cq_soft_entry *tail; - - USDF_TRACE_SYS(CQ, "\n"); - - cq = container_of(fcq, struct usdf_cq, cq_fid); - - tail = cq->c.soft.cq_tail; - - entry->op_context = tail->cse_context; - entry->flags = 0; - entry->prov_errno = tail->cse_prov_errno; - entry->err = entry->prov_errno; - - tail++; - if (tail == cq->c.soft.cq_end) { - tail = cq->c.soft.cq_comps; - } - cq->c.soft.cq_tail = tail; - - return 1; -} - -/* Completion lengths should reflect the length given by the application to the - * send/recv call. This means we need to update the lengths for both prefix and - * non-prefix send paths. - * - * RECEIVE COMPLETIONS - * - * Non-prefix: the application isn't aware of the usd_udp_hdr struct. Default - * completion semantics include this in the completion length since it is part - * of the send. - * - * Prefix: the application has allocated a buffer that includes the advertised - * prefix size. For performance reasons our advertised prefix size is not the - * same size as hour headers. To reflect the correct size we need to add the - * size of the padding. - * - * SEND COMPLETIONS - * The send completions are dependent upon the wp_len value that is set by the - * library when using the underscore prefixed variants of the usd functions or - * by the usd library when using the non-underscore prefixed variants. - * Currently all send functions have been unified to report wp_len as the - * length of the payload. This means that adjustments need to be made when in - * libfabric prefix mode. - */ -static inline void usdf_cq_adjust_len(struct usd_completion *src, - size_t *len) -{ - struct usdf_ep *ep = src->uc_qp->uq_context; - - if (src->uc_type == USD_COMPTYPE_RECV) { - if (ep->ep_mode & FI_MSG_PREFIX) - *len += (USDF_HDR_BUF_ENTRY - - sizeof(struct usd_udp_hdr)); - else - *len -= sizeof(struct usd_udp_hdr); - } else { - if (ep->ep_mode & FI_MSG_PREFIX) - *len += USDF_HDR_BUF_ENTRY; - } -} - -static inline ssize_t -usdf_cq_copy_cq_entry(void *dst, struct usd_completion *src, - enum fi_cq_format format) -{ - struct fi_cq_entry *ctx_entry; - struct fi_cq_msg_entry *msg_entry; - struct fi_cq_data_entry *data_entry; - - switch (format) { - case FI_CQ_FORMAT_CONTEXT: - ctx_entry = (struct fi_cq_entry *)dst; - ctx_entry->op_context = src->uc_context; - break; - case FI_CQ_FORMAT_MSG: - msg_entry = (struct fi_cq_msg_entry *)dst; - msg_entry->op_context = src->uc_context; - msg_entry->flags = usdf_cqe_to_flags(src); - msg_entry->len = src->uc_bytes; - - usdf_cq_adjust_len(src, &msg_entry->len); - - break; - case FI_CQ_FORMAT_DATA: - data_entry = (struct fi_cq_data_entry *)dst; - data_entry->op_context = src->uc_context; - data_entry->flags = usdf_cqe_to_flags(src); - data_entry->len = src->uc_bytes; - data_entry->buf = 0; /* XXX */ - data_entry->data = 0; - - usdf_cq_adjust_len(src, &data_entry->len); - - break; - default: - USDF_WARN("unexpected CQ format, internal error\n"); - return -FI_EOPNOTSUPP; - } - - return FI_SUCCESS; -} - -/* - * poll a hard CQ - * Since this routine is an inline and is always called with format as - * a constant, I am counting on the compiler optimizing away all the switches - * on format. - */ -static inline ssize_t -usdf_cq_read_common(struct fid_cq *fcq, void *buf, size_t count, - enum fi_cq_format format) -{ - struct usdf_cq *cq; - struct usdf_fabric *fab; - size_t copylen; - size_t copied; - uint8_t *dest; - ssize_t ret; - - cq = cq_ftou(fcq); - fab = cq->cq_domain->dom_fabric; - - if (cq->cq_comp.uc_status != USD_COMPSTAT_SUCCESS) - return -FI_EAVAIL; - - switch (format) { - case FI_CQ_FORMAT_CONTEXT: - copylen = sizeof(struct fi_cq_entry); - break; - case FI_CQ_FORMAT_MSG: - copylen = sizeof(struct fi_cq_msg_entry); - break; - case FI_CQ_FORMAT_DATA: - copylen = sizeof(struct fi_cq_data_entry); - break; - default: - USDF_WARN_SYS(CQ, "unexpected CQ format, internal error\n"); - return -FI_EOPNOTSUPP; - } - - dest = buf; - - for (copied = 0; copied < count; copied++) { - ret = usd_poll_cq(cq->c.hard.cq_cq, &cq->cq_comp); - if (ret == -EAGAIN) - break; - - if (cq->cq_comp.uc_status != USD_COMPSTAT_SUCCESS) { - if (copied == 0) - return -FI_EAVAIL; - - break; - } - - ret = usdf_cq_copy_cq_entry(dest, &cq->cq_comp, format); - if (ret < 0) - return ret; - - dest += copylen; - } - - if (cq->cq_waiting) { - cq->cq_waiting = false; - ofi_atomic_dec32(&fab->num_blocked_waiting); - } - - return copied > 0 ? copied : -FI_EAGAIN; -} - -static ssize_t -usdf_cq_read_context(struct fid_cq *fcq, void *buf, size_t count) -{ - return usdf_cq_read_common(fcq, buf, count, FI_CQ_FORMAT_CONTEXT); -} - -static ssize_t -usdf_cq_read_msg(struct fid_cq *fcq, void *buf, size_t count) -{ - return usdf_cq_read_common(fcq, buf, count, FI_CQ_FORMAT_MSG); -} - -static ssize_t -usdf_cq_read_data(struct fid_cq *fcq, void *buf, size_t count) -{ - return usdf_cq_read_common(fcq, buf, count, FI_CQ_FORMAT_DATA); -} - -static ssize_t -usdf_cq_readfrom_context(struct fid_cq *fcq, void *buf, size_t count, - fi_addr_t *src_addr) -{ - struct usdf_cq *cq; - struct usd_cq_impl *ucq; - struct fi_cq_entry *entry; - struct fi_cq_entry *last; - ssize_t ret; - struct cq_desc *cq_desc; - struct usdf_ep *ep; - struct sockaddr_in sin; - struct usd_udp_hdr *hdr; - uint16_t index; - - cq = cq_ftou(fcq); - if (cq->cq_comp.uc_status != 0) { - return -FI_EAVAIL; - } - ucq = to_cqi(cq->c.hard.cq_cq); - - ret = 0; - entry = buf; - last = entry + count; - while (entry < last) { - cq_desc = (struct cq_desc *)((uint8_t *)ucq->ucq_desc_ring + - (ucq->ucq_next_desc << 4)); - - ret = usd_poll_cq(cq->c.hard.cq_cq, &cq->cq_comp); - if (ret == -EAGAIN) { - ret = 0; - break; - } - if (cq->cq_comp.uc_status != 0) { - ret = -FI_EAVAIL; - break; - } - - if (cq->cq_comp.uc_type == USD_COMPTYPE_RECV) { - index = le16_to_cpu(cq_desc->completed_index) & - CQ_DESC_COMP_NDX_MASK; - ep = cq->cq_comp.uc_qp->uq_context; - hdr = ep->e.dg.ep_hdr_ptr[index]; - memset(&sin, 0, sizeof(sin)); - - sin.sin_addr.s_addr = hdr->uh_ip.saddr; - sin.sin_port = hdr->uh_udp.source; - - *src_addr = usdf_av_lookup_addr(ep->e.dg.ep_av, &sin); - ++src_addr; - } - - - entry->op_context = cq->cq_comp.uc_context; - - entry++; - } - - if (entry > (struct fi_cq_entry *)buf) { - return entry - (struct fi_cq_entry *)buf; - } else { - return ret; - } -} - -/***************************************************************** - * "soft" CQ support - *****************************************************************/ - -void -usdf_progress_hard_cq(struct usdf_cq_hard *hcq) -{ - int ret; - struct usd_completion comp; - struct usdf_cq_soft_entry *entry; - struct usdf_cq *cq; - - cq = hcq->cqh_cq; - - do { - ret = usd_poll_cq(hcq->cqh_ucq, &comp); - if (ret == 0) { - entry = cq->c.soft.cq_head; - - /* If the current entry is equal to the tail and the - * last operation was a write, then we have filled the - * queue and we just drop whatever there isn't space - * for. - */ - if ((entry == cq->c.soft.cq_tail) && - (cq->c.soft.cq_last_op == - USDF_SOFT_CQ_WRITE)) - return; - - entry->cse_context = cq->cq_comp.uc_context; - entry->cse_flags = 0; - entry->cse_len = cq->cq_comp.uc_bytes; - entry->cse_buf = 0; /* XXX TODO */ - entry->cse_data = 0; - - /* update with wrap */ - entry++; - if (entry != cq->c.soft.cq_end) { - cq->c.soft.cq_head = entry; - } else { - cq->c.soft.cq_head = cq->c.soft.cq_comps; - } - - cq->c.soft.cq_last_op = USDF_SOFT_CQ_WRITE; - } - } while (ret != -EAGAIN); -} - -void -usdf_cq_post_soft(struct usdf_cq_hard *hcq, void *context, size_t len, - int prov_errno, uint64_t flags) -{ - int ret; - struct usdf_cq_soft_entry *entry; - struct usdf_cq *cq; - uint64_t val = 1; - - cq = hcq->cqh_cq; - - entry = cq->c.soft.cq_head; - - /* If the current entry is equal to the tail and the - * last operation was a write, then we have filled the - * queue and we just drop whatever there isn't space - * for. - */ - if ((entry == cq->c.soft.cq_tail) && - (cq->c.soft.cq_last_op == USDF_SOFT_CQ_WRITE)) - return; - - entry->cse_context = context; - entry->cse_len = len; - entry->cse_prov_errno = prov_errno; - entry->cse_flags = flags; - - /* update with wrap */ - entry++; - if (entry != cq->c.soft.cq_end) { - cq->c.soft.cq_head = entry; - } else { - cq->c.soft.cq_head = cq->c.soft.cq_comps; - } - - cq->c.soft.cq_last_op = USDF_SOFT_CQ_WRITE; - - if (cq->cq_attr.wait_obj == FI_WAIT_SET || - cq->cq_attr.wait_obj == FI_WAIT_FD) - while (1) { - ret = write(cq->object.fd, &val, sizeof(val)); - assert(ret == sizeof(val) || - (ret == -1 && errno == EINTR)); - if (ret == sizeof(val)) - return; - else if (ret == -1 && errno == EINTR) - continue; - - /* If the write() fails, there will be no user - * notification. Best we can do is emit a - * debug notice... - */ - USDF_WARN_SYS(CQ, "error while writing to wake CQ\n"); - return; - } -} - -static inline ssize_t -usdf_cq_copy_soft_entry(void *dst, const struct usdf_cq_soft_entry *src, - enum fi_cq_format dst_format) -{ - struct fi_cq_entry *ctx_entry; - struct fi_cq_msg_entry *msg_entry; - struct fi_cq_data_entry *data_entry; - - switch (dst_format) { - case FI_CQ_FORMAT_CONTEXT: - ctx_entry = (struct fi_cq_entry *)dst; - ctx_entry->op_context = src->cse_context; - break; - case FI_CQ_FORMAT_MSG: - msg_entry = (struct fi_cq_msg_entry *)dst; - msg_entry->op_context = src->cse_context; - msg_entry->flags = src->cse_flags; - msg_entry->len = src->cse_len; - break; - case FI_CQ_FORMAT_DATA: - data_entry = (struct fi_cq_data_entry *)dst; - data_entry->op_context = src->cse_context; - data_entry->flags = src->cse_flags; - data_entry->len = src->cse_len; - data_entry->buf = src->cse_buf; - data_entry->data = src->cse_data; - break; - default: - USDF_WARN("unexpected CQ format, internal error\n"); - return -FI_EOPNOTSUPP; - } - - return FI_SUCCESS; -} - -static ssize_t usdf_cq_sread(struct fid_cq *fcq, void *buf, size_t count, - const void *cond, int timeout_ms) -{ - struct usdf_cq *cq; - size_t sleep_time_us; - size_t time_spent_us = 0; - ssize_t ret; - - cq = cq_ftou(fcq); - - if (cq->cq_attr.wait_obj == FI_WAIT_NONE) - return -FI_EOPNOTSUPP; - - sleep_time_us = SREAD_INIT_SLEEP_TIME_US; - - while (1) { - ret = fi_cq_read(fcq, buf, count); - if (ret != -FI_EAGAIN) - return ret; - - if (timeout_ms >= 0) { - if (time_spent_us >= (1000 * timeout_ms)) - break; - } - - usleep(sleep_time_us); - time_spent_us += sleep_time_us; - - /* exponentially back off up to a limit */ - if (sleep_time_us < SREAD_MAX_SLEEP_TIME_US) - sleep_time_us *= SREAD_EXP_BASE; - sleep_time_us = MIN(sleep_time_us, SREAD_MAX_SLEEP_TIME_US); - } - - return -FI_EAGAIN; -} - -static ssize_t usdf_cq_sread_fd(struct fid_cq *fcq, void *buf, size_t count, - const void *cond, int timeout_ms) -{ - struct usdf_cq *cq; - struct usdf_fabric *fabric; - int ret; - - cq = cq_ftou(fcq); - fabric = cq->cq_domain->dom_fabric; - - ret = usdf_cq_trywait(&fcq->fid); - if (ret == FI_SUCCESS) { - ofi_atomic_inc32(&fabric->num_blocked_waiting); - - ret = usdf_fabric_wake_thread(fabric); - if (ret) { - USDF_DBG_SYS(CQ, - "error while waking progress thread\n"); - goto err; - } - - ret = fi_poll_fd(cq->object.fd, timeout_ms); - if (ret == 0) { - ret = -FI_EAGAIN; - goto err; - } else if (ret < 0) { - USDF_DBG_SYS(CQ, "poll failed: %s\n", strerror(-ret)); - goto err; - } - - ofi_atomic_dec32(&fabric->num_blocked_waiting); - } else if ((ret < 0) && (ret != -FI_EAGAIN)) { - return ret; - } - - return fi_cq_read(fcq, buf, count); - -err: - ofi_atomic_dec32(&fabric->num_blocked_waiting); - return ret; -} - -/* - * poll a soft CQ - * This will loop over all the hard CQs within, collecting results. - * Since this routine is an inline and is always called with format as - * a constant, I am counting on the compiler optimizing away all the switches - * on format. - */ -static inline ssize_t -usdf_cq_read_common_soft(struct fid_cq *fcq, void *buf, size_t count, - enum fi_cq_format format) -{ - struct usdf_cq *cq; - uint8_t *dest; - struct usdf_cq_soft_entry *tail; - size_t copylen; - size_t copied; - ssize_t ret; - - cq = cq_ftou(fcq); - - if (cq->cq_comp.uc_status != USD_COMPSTAT_SUCCESS) - return -FI_EAVAIL; - - /* progress... */ - usdf_domain_progress(cq->cq_domain); - - switch (format) { - case FI_CQ_FORMAT_CONTEXT: - copylen = sizeof(struct fi_cq_entry); - break; - case FI_CQ_FORMAT_MSG: - copylen = sizeof(struct fi_cq_msg_entry); - break; - case FI_CQ_FORMAT_DATA: - copylen = sizeof(struct fi_cq_data_entry); - break; - default: - USDF_WARN_SYS(CQ, "unexpected CQ format, internal error\n"); - return -FI_EOPNOTSUPP; - } - - dest = buf; - tail = cq->c.soft.cq_tail; - - for (copied = 0; copied < count; copied++) { - if (tail == cq->c.soft.cq_head) { - /* If the tail and head match and the last operation was - * a read then we have an empty queue. - */ - if (cq->c.soft.cq_last_op == USDF_SOFT_CQ_READ) - break; - } - - if (tail->cse_prov_errno != FI_SUCCESS) { - /* If this is the first read, then just return EAVAIL. - * Although we already checked above, this last read may - * have contained an error. If this isn't the first read - * then break and return the count read. The next read - * will yield an error. - */ - if (copied == 0) - return -FI_EAVAIL; - - break; - } - - ret = usdf_cq_copy_soft_entry(dest, tail, format); - if (ret < 0) - return ret; - - dest += copylen; - - tail++; - if (tail == cq->c.soft.cq_end) - tail = cq->c.soft.cq_comps; - - cq->c.soft.cq_last_op = USDF_SOFT_CQ_READ; - } - - cq->c.soft.cq_tail = tail; - - return copied > 0 ? copied : -FI_EAGAIN; -} - -static ssize_t -usdf_cq_read_context_soft(struct fid_cq *fcq, void *buf, size_t count) -{ - return usdf_cq_read_common_soft(fcq, buf, count, FI_CQ_FORMAT_CONTEXT); -} - -static ssize_t -usdf_cq_read_msg_soft(struct fid_cq *fcq, void *buf, size_t count) -{ - return usdf_cq_read_common_soft(fcq, buf, count, FI_CQ_FORMAT_MSG); -} - -static ssize_t -usdf_cq_read_data_soft(struct fid_cq *fcq, void *buf, size_t count) -{ - return usdf_cq_read_common_soft(fcq, buf, count, FI_CQ_FORMAT_DATA); -} - -/***************************************************************** - * common CQ support - *****************************************************************/ - -static const char * -usdf_cq_strerror(struct fid_cq *eq, int prov_errno, const void *err_data, - char *buf, size_t len) -{ - if (buf && len) { - strncpy(buf, fi_strerror(prov_errno), len); - buf[len-1] = '\0'; - return buf; - } - return fi_strerror(prov_errno); -} - -/* Handle the associated wait object when closing a CQ. - * - Remove the FD from the wait set epoll context - * - Decrement the ref count on the wait set - * - Remove the CQ from the CQ list attached to the wait set - */ -static int usdf_cq_unbind_wait(struct usdf_cq *cq) -{ - int ret; - struct usdf_wait *wait_priv; - - if (!cq->cq_attr.wait_set) { - USDF_DBG_SYS(CQ, "can't unbind from non-existent wait set\n"); - return -FI_EINVAL; - } - - wait_priv = wait_ftou(cq->cq_attr.wait_set); - - ret = ofi_epoll_del(wait_priv->object.epfd, cq->object.fd); - if (ret) { - USDF_WARN_SYS(CQ, "failed to remove FD from wait set\n"); - return ret; - } - - fid_list_remove(&wait_priv->list, &wait_priv->lock, &cq->cq_fid.fid); - - ofi_atomic_dec32(&wait_priv->wait_refcnt); - - USDF_DBG_SYS(CQ, - "dissasociated CQ FD %d from epoll FD %d using FID: %p\n", - cq->object.fd, wait_priv->object.epfd, &cq->cq_fid.fid); - - return FI_SUCCESS; -} - -static int -usdf_cq_close(fid_t fid) -{ - int ret; - struct usdf_cq *cq; - struct usdf_fabric *fab; - struct usdf_cq_hard *hcq; - - USDF_TRACE_SYS(CQ, "\n"); - - cq = container_of(fid, struct usdf_cq, cq_fid.fid); - fab = cq->cq_domain->dom_fabric; - - if (ofi_atomic_get32(&cq->cq_refcnt) > 0) { - return -FI_EBUSY; - } - - if (cq->cq_attr.wait_obj == FI_WAIT_SET) { - ret = usdf_cq_unbind_wait(cq); - if (ret) - return ret; - } - - if (cq->cq_is_soft) { - while (!TAILQ_EMPTY(&cq->c.soft.cq_list)) { - hcq = TAILQ_FIRST(&cq->c.soft.cq_list); - if (ofi_atomic_get32(&hcq->cqh_refcnt) > 0) { - return -FI_EBUSY; - } - TAILQ_REMOVE(&cq->c.soft.cq_list, hcq, cqh_link); - TAILQ_REMOVE(&cq->cq_domain->dom_hcq_list, hcq, - cqh_dom_link); - if (hcq->cqh_ucq != NULL) { - ret = usd_destroy_cq(hcq->cqh_ucq); - if (ret != 0) { - return ret; - } - } - free(hcq); - } - } else { - if (cq->c.hard.cq_cq) { - ret = usd_destroy_cq(cq->c.hard.cq_cq); - if (ret != 0) { - return ret; - } - } - } - - if (cq->cq_waiting) - ofi_atomic_dec32(&fab->num_blocked_waiting); - - free(cq); - return 0; -} - -static int usdf_cq_get_wait(struct usdf_cq *cq, void *arg) -{ - USDF_TRACE_SYS(CQ, "\n"); - - switch (cq->cq_attr.wait_obj) { - case FI_WAIT_FD: - if (cq->object.fd == -1) { - USDF_WARN_SYS(CQ, - "CQ must be bound before FD can be retrieved\n"); - return -FI_EOPBADSTATE; - } - - *(int *) arg = cq->object.fd; - break; - default: - USDF_WARN_SYS(CQ, "unsupported wait type\n"); - return -FI_EINVAL; - } - - return FI_SUCCESS; -} - -static int usdf_wait_control(struct fid *fcq, int command, void *arg) -{ - struct usdf_cq *cq; - - USDF_TRACE_SYS(CQ, "\n"); - - if (!fcq || !arg) { - USDF_WARN_SYS(CQ, "CQ fid and arg can't be NULL\n"); - return -FI_EINVAL; - } - - cq = cq_fidtou(fcq); - - switch (command) { - case FI_GETWAIT: - break; - default: - USDF_WARN_SYS(CQ, "unsupported control command\n"); - return -FI_EINVAL; - } - - return usdf_cq_get_wait(cq, arg); -} - -static struct fi_ops_cq usdf_cq_context_ops = { - .size = sizeof(struct fi_ops_cq), - .read = usdf_cq_read_context, - .readfrom = usdf_cq_readfrom_context, - .readerr = usdf_cq_readerr, - .sread = usdf_cq_sread, - .sreadfrom = fi_no_cq_sreadfrom, - .signal = fi_no_cq_signal, - .strerror = usdf_cq_strerror, -}; - -static struct fi_ops_cq usdf_cq_context_soft_ops = { - .size = sizeof(struct fi_ops_cq), - .read = usdf_cq_read_context_soft, - .readfrom = fi_no_cq_readfrom, - .readerr = usdf_cq_readerr_soft, - .sread = usdf_cq_sread, - .sreadfrom = fi_no_cq_sreadfrom, - .signal = fi_no_cq_signal, - .strerror = usdf_cq_strerror, -}; - -static struct fi_ops_cq usdf_cq_msg_ops = { - .size = sizeof(struct fi_ops_cq), - .read = usdf_cq_read_msg, - .readfrom = fi_no_cq_readfrom, /* XXX */ - .readerr = usdf_cq_readerr, - .sread = usdf_cq_sread, - .sreadfrom = fi_no_cq_sreadfrom, - .signal = fi_no_cq_signal, - .strerror = usdf_cq_strerror, -}; - -static struct fi_ops_cq usdf_cq_msg_soft_ops = { - .size = sizeof(struct fi_ops_cq), - .read = usdf_cq_read_msg_soft, - .readfrom = fi_no_cq_readfrom, /* XXX */ - .readerr = usdf_cq_readerr_soft, - .sread = usdf_cq_sread, - .sreadfrom = fi_no_cq_sreadfrom, - .signal = fi_no_cq_signal, - .strerror = usdf_cq_strerror, -}; - -static struct fi_ops_cq usdf_cq_data_ops = { - .size = sizeof(struct fi_ops_cq), - .read = usdf_cq_read_data, - .readfrom = fi_no_cq_readfrom, /* XXX */ - .readerr = usdf_cq_readerr, - .sread = usdf_cq_sread, - .sreadfrom = fi_no_cq_sreadfrom, - .signal = fi_no_cq_signal, - .strerror = usdf_cq_strerror, -}; - -static struct fi_ops_cq usdf_cq_data_soft_ops = { - .size = sizeof(struct fi_ops_cq), - .read = usdf_cq_read_data_soft, - .readfrom = fi_no_cq_readfrom, /* XXX */ - .readerr = usdf_cq_readerr_soft, - .sread = usdf_cq_sread, - .sreadfrom = fi_no_cq_sreadfrom, - .signal = fi_no_cq_signal, - .strerror = usdf_cq_strerror, -}; - -static struct fi_ops usdf_cq_fi_ops = { - .size = sizeof(struct fi_ops), - .close = usdf_cq_close, - .bind = fi_no_bind, - .control = usdf_wait_control, - .ops_open = fi_no_ops_open, -}; - -int -usdf_cq_make_soft(struct usdf_cq *cq) -{ - struct fi_ops_cq *soft_ops; - struct usdf_cq_hard *hcq; - struct usd_cq *ucq; - - switch (cq->cq_attr.format) { - case FI_CQ_FORMAT_CONTEXT: - soft_ops = &usdf_cq_context_soft_ops; - break; - case FI_CQ_FORMAT_MSG: - soft_ops = &usdf_cq_msg_soft_ops; - break; - case FI_CQ_FORMAT_DATA: - soft_ops = &usdf_cq_data_soft_ops; - break; - default: - return 0; - } - - if (!cq->cq_is_soft) { - - /* save the CQ before we trash the union */ - ucq = cq->c.hard.cq_cq; - - /* fill in the soft part of union */ - TAILQ_INIT(&cq->c.soft.cq_list); - cq->c.soft.cq_comps = calloc(cq->cq_attr.size, - sizeof(struct usdf_cq_soft_entry)); - if (cq->c.soft.cq_comps == NULL) { - return -FI_ENOMEM; - } - cq->c.soft.cq_end = cq->c.soft.cq_comps + cq->cq_attr.size; - cq->c.soft.cq_head = cq->c.soft.cq_comps; - cq->c.soft.cq_tail = cq->c.soft.cq_comps; - - /* need to add hard queue to list? */ - if (ucq != NULL) { - hcq = malloc(sizeof(*hcq)); - if (hcq == NULL) { - free(cq->c.soft.cq_comps); - cq->c.hard.cq_cq = ucq; /* restore */ - return -FI_ENOMEM; - } - - hcq->cqh_cq = cq; - hcq->cqh_ucq = ucq; - hcq->cqh_progress = usdf_progress_hard_cq; - - ofi_atomic_initialize32(&hcq->cqh_refcnt, - ofi_atomic_get32(&cq->cq_refcnt)); - TAILQ_INSERT_HEAD(&cq->c.soft.cq_list, hcq, cqh_link); - } - - cq->cq_is_soft = 1; - cq->cq_ops = *soft_ops; - } - return 0; -} - -int usdf_check_empty_soft_cq(struct usdf_cq *cq) -{ - if (cq->c.soft.cq_tail == cq->c.soft.cq_head) - return cq->c.soft.cq_last_op == USDF_SOFT_CQ_READ; - - return 0; -} - -int usdf_check_empty_hard_cq(struct usdf_cq *cq) -{ - struct usd_cq_impl *cqi; - struct cq_desc *cq_desc; - struct cq_desc *base; - uint8_t last_color; - uint8_t current_color; - - cqi = to_cqi(cq->c.hard.cq_cq); - - base = cqi->ucq_desc_ring; - cq_desc = &base[cqi->ucq_next_desc]; - - last_color = cqi->ucq_last_color; - current_color = cq_desc->type_color >> CQ_DESC_COLOR_SHIFT; - - return current_color == last_color; -} - -static int -usdf_cq_process_attr(struct fi_cq_attr *attr, struct usdf_domain *udp) -{ - if (!attr || !udp) - return -FI_EINVAL; - - switch (attr->wait_obj) { - case FI_WAIT_NONE: - case FI_WAIT_UNSPEC: - break; - case FI_WAIT_FD: - case FI_WAIT_SET: - if (!usd_get_cap(udp->dom_dev, USD_CAP_GRP_INTR)) { - USDF_WARN_SYS(CQ, "FD request invalid.\n"); - USDF_WARN_SYS(CQ, "group interrupts not supported.\n"); - return -FI_EINVAL; - } - break; - default: - return -FI_ENOSYS; - } - - /* bound and default size */ - if (attr->size > udp->dom_fabric->fab_dev_attrs->uda_max_cqe) { - return -FI_EINVAL; - } - if (attr->size == 0) { - attr->size = udp->dom_fabric->fab_dev_attrs->uda_max_cqe; - } - - /* default format is FI_CQ_FORMAT_CONTEXT */ - if (attr->format == FI_CQ_FORMAT_UNSPEC) { - - attr->format = FI_CQ_FORMAT_CONTEXT; - } - return 0; -} - -static int usdf_cq_fd_set_nonblock(int fd) -{ - int flags; - - flags = fcntl(fd, F_GETFL, 0); - if (flags == -1) { - USDF_WARN_SYS(CQ, "fcntl getfl failed[%d]\n", errno); - return -errno; - } - - if (fcntl(fd, F_SETFL, flags | O_NONBLOCK) < 0) { - USDF_WARN_SYS(CQ, "fcntl setfl failed[%d]\n", errno); - return -errno; - } - - return FI_SUCCESS; -} - -static int usdf_cq_create_fd(struct usdf_cq *cq) -{ - int ret; - - ret = usd_ib_cmd_create_comp_channel(cq->cq_domain->dom_dev, - &cq->object.fd); - if (ret) { - USDF_WARN_SYS(CQ, "failed to create comp channel\n"); - return -FI_EINVAL; - } - - USDF_DBG_SYS(CQ, "successfully created comp channel with fd %d\n", - cq->object.fd); - - /* Going to need this assuming edge-triggered semantics. - */ - return usdf_cq_fd_set_nonblock(cq->object.fd); -} - -int usdf_cq_trywait(struct fid *fcq) -{ - struct usdf_cq *cq; - struct usdf_fabric *fab; - uint64_t ev; - int empty; - int ret; - - cq = cq_fidtou(fcq); - fab = cq->cq_domain->dom_fabric; - - switch (cq->cq_attr.wait_obj) { - case FI_WAIT_UNSPEC: - return FI_SUCCESS; - case FI_WAIT_FD: - case FI_WAIT_SET: - break; - default: - USDF_WARN_SYS(CQ, "unsupported wait object type\n"); - return -FI_EINVAL; - } - - while (1) { - ret = read(cq->object.fd, &ev, sizeof(ev)); - if (ret == 0) { - USDF_WARN_SYS(CQ, - "FD read returned 0, is it closed?\n"); - return -FI_EINVAL; - } - - if (ret < 0) { - if (errno == EAGAIN) - break; - else - return -errno; - } - } - - cq->cq_waiting = true; - ofi_atomic_inc32(&fab->num_blocked_waiting); - ret = usdf_fabric_wake_thread(fab); - if (ret) { - USDF_DBG_SYS(FABRIC, "error while waking progress thread\n"); - ofi_atomic_dec32(&fab->num_blocked_waiting); - } - - if (cq->cq_is_soft) { - empty = usdf_check_empty_soft_cq(cq); - } else { - usd_poll_req_notify(cq->c.hard.cq_cq); - empty = usdf_check_empty_hard_cq(cq); - } - - if (empty) - return FI_SUCCESS; - - return -FI_EAGAIN; -} - - -static int usdf_cq_bind_wait(struct usdf_cq *cq) -{ - int ret; - struct usdf_wait *wait_priv; - - if (!cq->cq_attr.wait_set) { - USDF_DBG_SYS(CQ, "can't bind to non-existent wait set\n"); - return -FI_EINVAL; - } - - /* Wait set ref count doesn't need to be incremented here since it was - * already incremented during CQ open. It's incremented in CQ open - * because the CQ isn't actually created until bind time, and we want - * to make sure that the wait object is not closed in between open and - * bind. - */ - wait_priv = wait_ftou(cq->cq_attr.wait_set); - - ret = fid_list_insert(&wait_priv->list, &wait_priv->lock, - &cq->cq_fid.fid); - if (ret) { - USDF_WARN_SYS(CQ, - "failed to associate cq with wait fid list\n"); - return ret; - } - - ret = ofi_epoll_add(wait_priv->object.epfd, cq->object.fd, - OFI_EPOLL_IN, cq); - if (ret) { - USDF_WARN_SYS(CQ, "failed to associate FD with wait set\n"); - goto err; - } - - USDF_DBG_SYS(CQ, "associated CQ FD %d with epoll FD %d using fid %p\n", - cq->object.fd, wait_priv->object.epfd, &cq->cq_fid.fid); - - return ret; - -err: - fid_list_remove(&wait_priv->list, &wait_priv->lock, &cq->cq_fid.fid); - return ret; -} - -/* If cq->cq_attr.wait_obj == (FI_WAIT_FD | FI_WAIT_SET), then use an FD with - * the CQ. If create_fd evaluates to true, then it will create a hardware - * completion channel. - * - * If create_fd does not evaluate to true, then it is assumed that a valid file - * descriptor is available in cq->object.fd. - */ -int usdf_cq_create_cq(struct usdf_cq *cq, struct usd_cq **ucq, int create_fd) -{ - int ret; - struct usd_cq_init_attr attr = {0}; - - if (!cq || !cq->cq_domain || !cq->cq_domain->dom_dev) { - USDF_DBG_SYS(CQ, "Invalid input.\n"); - return -FI_EINVAL; - } - - attr.num_entries = cq->cq_attr.size; - attr.comp_fd = -1; - - /* For hard queues we will need to create an FD for CQs configured to - * use both wait sets and FDs. For a wait set this FD will get added to - * the epoll structure used by the waitset. - * - * For soft queues (emulated endpoints) we will not be creating an FD, - * but will need to set the appropriate functions and bind to the wait - * object, if any. - */ - if ((cq->cq_attr.wait_obj == FI_WAIT_FD) || - (cq->cq_attr.wait_obj == FI_WAIT_SET)) { - cq->cq_ops.sread = usdf_cq_sread_fd; - - if (create_fd) { - ret = usdf_cq_create_fd(cq); - if (ret) - return ret; - - attr.comp_fd = cq->object.fd; - - /* usd_create_cq will only set - * USNIC_CQ_COMP_SIGNAL_VERBS if an ibv_cq is present, - * but we don't have one. Just shove the cq in. - */ - attr.ibv_cq = &ucq; - } - - if (cq->cq_attr.wait_obj == FI_WAIT_SET) { - cq->cq_ops.sread = fi_no_cq_sread; - ret = usdf_cq_bind_wait(cq); - if (ret) - return ret; - } - } - - ret = usd_create_cq(cq->cq_domain->dom_dev, &attr, ucq); - if (ret && cq->cq_attr.wait_obj == FI_WAIT_SET) - usdf_cq_unbind_wait(cq); - return ret; -} - -int -usdf_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, - struct fid_cq **cq_o, void *context) -{ - struct usdf_cq *cq; - struct usdf_domain *udp; - struct usdf_wait *wait_priv; - int ret; - - USDF_TRACE_SYS(CQ, "\n"); - - udp = dom_ftou(domain); - ret = usdf_cq_process_attr(attr, udp); - if (ret != 0) { - return ret; - } - - cq = calloc(1, sizeof(*cq)); - if (cq == NULL) { - return -FI_ENOMEM; - } - - /* Do this here because we don't actually create the CQ until bind - * time. At open time the CQ should be associated with the wait set - * using the ref count so the app can't delete the wait set out from - * under the CQ. - */ - if (attr->wait_obj == FI_WAIT_SET) { - wait_priv = wait_ftou(attr->wait_set); - ofi_atomic_inc32(&wait_priv->wait_refcnt); - } - - cq->object.fd = -1; - cq->cq_domain = udp; - cq->cq_fid.fid.fclass = FI_CLASS_CQ; - cq->cq_fid.fid.context = context; - cq->cq_fid.fid.ops = &usdf_cq_fi_ops; - ofi_atomic_initialize32(&cq->cq_refcnt, 0); - - switch (attr->format) { - case FI_CQ_FORMAT_CONTEXT: - cq->cq_ops = usdf_cq_context_ops; - break; - case FI_CQ_FORMAT_MSG: - cq->cq_ops = usdf_cq_msg_ops; - break; - case FI_CQ_FORMAT_DATA: - cq->cq_ops = usdf_cq_data_ops; - break; - default: - ret = -FI_ENOSYS; - goto fail; - } - - cq->cq_fid.ops = &cq->cq_ops; - - cq->cq_attr = *attr; - *cq_o = &cq->cq_fid; - return 0; - -fail: - if (cq != NULL) { - if (cq->c.hard.cq_cq != NULL) { - usd_destroy_cq(cq->c.hard.cq_cq); - } - free(cq); - } - return ret; -} diff --git a/prov/usnic/src/usdf_cq.h b/prov/usnic/src/usdf_cq.h deleted file mode 100644 index bad6d742668..00000000000 --- a/prov/usnic/src/usdf_cq.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2014-2016, Cisco Systems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ -#ifndef _USDF_CQ_H_ -#define _USDF_CQ_H_ - -/* exponential backoff settings for fi_cq_sread */ -#define SREAD_EXP_BASE 2 -#define SREAD_INIT_SLEEP_TIME_US 1 -#define SREAD_MAX_SLEEP_TIME_US 5000 - -int usdf_cq_make_soft(struct usdf_cq *cq); -int usdf_cq_create_cq(struct usdf_cq *cq, struct usd_cq **ucq, int create_fd); -int usdf_check_empty_hard_cq(struct usdf_cq *cq); -int usdf_check_empty_soft_cq(struct usdf_cq *cq); -int usdf_cq_trywait(struct fid *fcq); - -void usdf_progress_hard_cq(struct usdf_cq_hard *hcq); - -void usdf_cq_post_soft(struct usdf_cq_hard *hcq, void *context, - size_t len, int prov_errno, uint64_t flags); - -#endif /* _USDF_CQ_H_ */ diff --git a/prov/usnic/src/usdf_dgram.c b/prov/usnic/src/usdf_dgram.c deleted file mode 100644 index 905d94be6af..00000000000 --- a/prov/usnic/src/usdf_dgram.c +++ /dev/null @@ -1,805 +0,0 @@ -/* - * Copyright (c) 2014-2016, Cisco Systems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include "ofi.h" - -#include "usd.h" -#include "usd_post.h" - -#include "usdf.h" -#include "usdf_dgram.h" -#include "usdf_av.h" - -static inline size_t _usdf_iov_len(const struct iovec *iov, size_t count) -{ - size_t len; - size_t i; - - for (i = 0, len = 0; i < count; i++) - len += iov[i].iov_len; - - return len; -} - -static inline struct usd_udp_hdr *_usdf_find_hdr(struct usd_wq *wq) -{ - uint8_t *copybuf; - - copybuf = wq->uwq_copybuf + (wq->uwq_post_index * USD_SEND_MAX_COPY); - - return (struct usd_udp_hdr *) copybuf; -} - -static inline void _usdf_adjust_hdr(struct usd_udp_hdr *hdr, - struct usd_qp_impl *qp, size_t len) -{ - hdr->uh_ip.tot_len = htons(len + sizeof(struct usd_udp_hdr) - - sizeof(struct ether_header)); - hdr->uh_udp.len = htons(len + sizeof(struct usd_udp_hdr) - - sizeof(struct ether_header) - - sizeof(struct iphdr)); - hdr->uh_udp.source = - qp->uq_attrs.uqa_local_addr.ul_addr.ul_udp.u_addr.sin_port; -} - -static inline void _usdf_adjust_prefix_hdr(struct usd_udp_hdr *hdr, - struct usd_qp_impl *qp, size_t len, size_t padding) -{ - - hdr->uh_ip.tot_len = htons(len - padding - sizeof(struct ether_header)); - hdr->uh_udp.len = htons(len - padding - sizeof(struct ether_header) - - sizeof(struct iphdr)); - hdr->uh_udp.source = - qp->uq_attrs.uqa_local_addr.ul_addr.ul_udp.u_addr.sin_port; -} - -static inline void _usdf_adjust_post_info(struct usd_wq *wq, uint32_t last_post, - void *context, size_t len) -{ - struct usd_wq_post_info *info; - - info = &wq->uwq_post_info[last_post]; - info->wp_context = context; - info->wp_len = len; -} - -ssize_t -usdf_dgram_recv(struct fid_ep *fep, void *buf, size_t len, - void *desc, fi_addr_t src_addr, void *context) -{ - struct usdf_ep *ep; - struct usd_qp_impl *qp; - struct usd_recv_desc rxd; - uint32_t index; - - ep = ep_ftou(fep); - qp = to_qpi(ep->e.dg.ep_qp); - - index = qp->uq_rq.urq_post_index; - rxd.urd_context = context; - rxd.urd_iov[0].iov_base = (uint8_t *)ep->e.dg.ep_hdr_buf + - (index * USDF_HDR_BUF_ENTRY) + - (USDF_HDR_BUF_ENTRY - sizeof(struct usd_udp_hdr)); - rxd.urd_iov[0].iov_len = sizeof(struct usd_udp_hdr); - rxd.urd_iov[1].iov_base = buf; - rxd.urd_iov[1].iov_len = len; - rxd.urd_iov_cnt = 2; - rxd.urd_next = NULL; - - ep->e.dg.ep_hdr_ptr[index] = rxd.urd_iov[0].iov_base; - index = (index + 1) & qp->uq_rq.urq_post_index_mask; - ep->e.dg.ep_hdr_ptr[index] = rxd.urd_iov[0].iov_base; - - return usd_post_recv(ep->e.dg.ep_qp, &rxd); -} - -ssize_t -usdf_dgram_recvv(struct fid_ep *fep, const struct iovec *iov, void **desc, - size_t count, fi_addr_t src_addr, void *context) -{ - struct usdf_ep *ep; - struct usd_recv_desc rxd; - struct usd_qp_impl *qp; - uint32_t index; - size_t i; - - ep = ep_ftou(fep); - qp = to_qpi(ep->e.dg.ep_qp); - - rxd.urd_context = context; - rxd.urd_iov[0].iov_base = ((uint8_t *)ep->e.dg.ep_hdr_buf) + - qp->uq_rq.urq_post_index * USDF_HDR_BUF_ENTRY; - rxd.urd_iov[0].iov_len = sizeof(struct usd_udp_hdr); - memcpy(&rxd.urd_iov[1], iov, sizeof(*iov) * count); - rxd.urd_iov_cnt = count + 1; - rxd.urd_next = NULL; - - index = qp->uq_rq.urq_post_index; - for (i = 0; i < count; ++i) { - ep->e.dg.ep_hdr_ptr[index] = rxd.urd_iov[0].iov_base; - index = (index + 1) & qp->uq_rq.urq_post_index_mask; - } - - return usd_post_recv(ep->e.dg.ep_qp, &rxd); -} - -ssize_t -usdf_dgram_recvmsg(struct fid_ep *fep, const struct fi_msg *msg, uint64_t flags) -{ - struct usdf_ep *ep; - struct usd_qp_impl *qp; - struct usd_rq *rq; - struct vnic_rq *vrq; - struct rq_enet_desc *desc; - const struct iovec *iovp; - uint8_t *hdr_ptr; - uint32_t index; - unsigned i; - - ep = ep_ftou(fep); - qp = to_qpi(ep->e.dg.ep_qp); - rq = &qp->uq_rq; - vrq = &rq->urq_vnic_rq; - desc = rq->urq_next_desc; - index = rq->urq_post_index; - - iovp = msg->msg_iov; - rq->urq_context[index] = msg->context; - hdr_ptr = ((uint8_t *)ep->e.dg.ep_hdr_buf) + - (index * USDF_HDR_BUF_ENTRY); - rq_enet_desc_enc(desc, (dma_addr_t) hdr_ptr, - RQ_ENET_TYPE_ONLY_SOP, sizeof(struct usd_udp_hdr)); - ep->e.dg.ep_hdr_ptr[index] = (struct usd_udp_hdr *) hdr_ptr; - - index = (index + 1) & rq->urq_post_index_mask; - desc = (struct rq_enet_desc *) - ((uintptr_t)rq->urq_desc_ring + (index << 4)); - - for (i = 0; i < msg->iov_count; ++i) { - rq->urq_context[index] = msg->context; - rq_enet_desc_enc(desc, (dma_addr_t) iovp[i].iov_base, - RQ_ENET_TYPE_NOT_SOP, iovp[i].iov_len); - ep->e.dg.ep_hdr_ptr[index] = (struct usd_udp_hdr *) hdr_ptr; - - index = (index + 1) & rq->urq_post_index_mask; - desc = (struct rq_enet_desc *) - ((uintptr_t)rq->urq_desc_ring + (index << 4)); - } - - if ((flags & FI_MORE) == 0) { - wmb(); - iowrite32(index, &vrq->ctrl->posted_index); - } - - rq->urq_next_desc = desc; - rq->urq_post_index = index; - rq->urq_recv_credits -= msg->iov_count + 1; - - return 0; -} - -ssize_t -usdf_dgram_send(struct fid_ep *fep, const void *buf, size_t len, void *desc, - fi_addr_t dest_addr, void *context) -{ - struct usdf_dest *dest; - struct usdf_ep *ep; - uint32_t flags; - - ep = ep_ftou(fep); - dest = (struct usdf_dest *)(uintptr_t) dest_addr; - flags = (ep->ep_tx_completion) ? USD_SF_SIGNAL : 0; - - assert(len <= ep->max_msg_size); - - if (len + sizeof(struct usd_udp_hdr) <= USD_SEND_MAX_COPY) { - return usd_post_send_one_copy(ep->e.dg.ep_qp, &dest->ds_dest, - buf, len, flags, - context); - } else if (ep->e.dg.tx_op_flags & FI_INJECT) { - USDF_DBG_SYS(EP_DATA, - "given inject length (%zu) exceeds max inject length (%d)\n", - len + sizeof(struct usd_udp_hdr), - USD_SEND_MAX_COPY); - return -FI_ENOSPC; - } - - return usd_post_send_one(ep->e.dg.ep_qp, &dest->ds_dest, buf, len, - flags, context); -} - -static ssize_t -_usdf_dgram_send_iov_copy(struct usdf_ep *ep, struct usd_dest *dest, - const struct iovec *iov, size_t count, void *context, - uint8_t cq_entry) -{ - struct usd_wq *wq; - struct usd_qp_impl *qp; - struct usd_udp_hdr *hdr; - uint32_t last_post; - size_t len; - unsigned i; - - qp = to_qpi(ep->e.dg.ep_qp); - wq = &qp->uq_wq; - - hdr = _usdf_find_hdr(wq); - memcpy(hdr, &dest->ds_dest.ds_udp.u_hdr, sizeof(*hdr)); - - len = 0; - for (i = 0; i < count; i++) { - memcpy((char *) hdr + sizeof(*hdr) + len, iov[i].iov_base, - iov[i].iov_len); - len += iov[i].iov_len; - } - - assert(len <= ep->max_msg_size); - - _usdf_adjust_hdr(hdr, qp, len); - - last_post = _usd_post_send_one(wq, hdr, len + sizeof(*hdr), cq_entry); - - _usdf_adjust_post_info(wq, last_post, context, len); - - return 0; -} - -static ssize_t _usdf_dgram_send_iov(struct usdf_ep *ep, struct usd_dest *dest, - const struct iovec *iov, size_t count, void *context, uint8_t - cq_entry) -{ - struct iovec send_iov[USDF_DGRAM_MAX_SGE]; - struct usd_udp_hdr *hdr; - struct usd_qp_impl *qp; - struct usd_wq *wq; - uint32_t last_post; - size_t len; - - qp = to_qpi(ep->e.dg.ep_qp); - wq = &qp->uq_wq; - - len = _usdf_iov_len(iov, count); - hdr = _usdf_find_hdr(wq); - memcpy(hdr, &dest->ds_dest.ds_udp.u_hdr, sizeof(*hdr)); - _usdf_adjust_hdr(hdr, qp, len); - - assert(len <= ep->max_msg_size); - - send_iov[0].iov_base = hdr; - send_iov[0].iov_len = sizeof(*hdr); - memcpy(&send_iov[1], iov, sizeof(struct iovec) * count); - - last_post = _usd_post_send_iov(wq, send_iov, count + 1, - cq_entry); - _usdf_adjust_post_info(wq, last_post, context, len); - - return FI_SUCCESS; -} - -ssize_t -usdf_dgram_sendv(struct fid_ep *fep, const struct iovec *iov, void **desc, - size_t count, fi_addr_t dest_addr, void *context) -{ - struct usd_dest *dest; - struct usdf_ep *ep; - size_t len; - - ep = ep_ftou(fep); - len = sizeof(struct usd_udp_hdr); - dest = (struct usd_dest *)(uintptr_t) dest_addr; - - len += _usdf_iov_len(iov, count); - assert(len <= ep->max_msg_size); - - if (len <= USD_SEND_MAX_COPY) { - return _usdf_dgram_send_iov_copy(ep, dest, iov, count, context, - ep->ep_tx_completion); - } else if (ep->e.dg.tx_op_flags & FI_INJECT) { - USDF_DBG_SYS(EP_DATA, - "given inject length (%zu) exceeds max inject length (%d)\n", - len, USD_SEND_MAX_COPY); - return -FI_ENOSPC; - } - - if (count > ep->e.dg.tx_iov_limit) { - USDF_DBG_SYS(EP_DATA, "max iov count exceeded: %zu\n", count); - return -FI_ENOSPC; - } - - return _usdf_dgram_send_iov(ep, dest, iov, count, context, - ep->ep_tx_completion); -} - -ssize_t -usdf_dgram_sendmsg(struct fid_ep *fep, const struct fi_msg *msg, uint64_t flags) -{ - struct usd_dest *dest; - struct usdf_ep *ep; - uint8_t completion; - size_t len; - - ep = ep_ftou(fep); - len = sizeof(struct usd_udp_hdr); - dest = (struct usd_dest *)(uintptr_t) msg->addr; - completion = ep->ep_tx_dflt_signal_comp || (flags & FI_COMPLETION); - - len += _usdf_iov_len(msg->msg_iov, msg->iov_count); - assert(len <= ep->max_msg_size); - - if (len <= USD_SEND_MAX_COPY) { - return _usdf_dgram_send_iov_copy(ep, dest, msg->msg_iov, - msg->iov_count, - msg->context, - completion); - } else if (flags & FI_INJECT) { - USDF_DBG_SYS(EP_DATA, - "given inject length (%zu) exceeds max inject length (%d)\n", - len, USD_SEND_MAX_COPY); - return -FI_ENOSPC; - } - - if (msg->iov_count > ep->e.dg.tx_iov_limit) { - USDF_DBG_SYS(EP_DATA, "max iov count exceeded: %zu\n", - msg->iov_count); - return -FI_ENOSPC; - } - - return _usdf_dgram_send_iov(ep, dest, msg->msg_iov, msg->iov_count, - msg->context, completion); -} - -ssize_t -usdf_dgram_inject(struct fid_ep *fep, const void *buf, size_t len, - fi_addr_t dest_addr) -{ - struct usdf_dest *dest; - struct usdf_ep *ep; - - ep = ep_ftou(fep); - dest = (struct usdf_dest *)(uintptr_t) dest_addr; - - if (len + sizeof(struct usd_udp_hdr) > USD_SEND_MAX_COPY) { - USDF_DBG_SYS(EP_DATA, - "given inject length (%zu) exceeds max inject length (%d)\n", - len + sizeof(struct usd_udp_hdr), - USD_SEND_MAX_COPY); - return -FI_ENOSPC; - } - - /* - * fi_inject never generates a completion - */ - return usd_post_send_one_copy(ep->e.dg.ep_qp, &dest->ds_dest, buf, len, - 0, NULL); -} - -ssize_t usdf_dgram_prefix_inject(struct fid_ep *fep, const void *buf, - size_t len, fi_addr_t dest_addr) -{ - return usdf_dgram_inject(fep, ((uint8_t *)buf) + USDF_HDR_BUF_ENTRY, - len - USDF_HDR_BUF_ENTRY, dest_addr); -} - -ssize_t usdf_dgram_rx_size_left(struct fid_ep *fep) -{ - struct usdf_ep *ep; - - USDF_DBG_SYS(EP_DATA, "\n"); - - if (fep == NULL) - return -FI_EINVAL; - - ep = ep_ftou(fep); - - if (!(ep->flags & USDF_EP_ENABLED)) - return -FI_EOPBADSTATE; - - return usd_get_recv_credits(ep->e.dg.ep_qp) / - (ep->e.dg.rx_iov_limit + 1); -} - -ssize_t usdf_dgram_tx_size_left(struct fid_ep *fep) -{ - struct usdf_ep *ep; - - USDF_DBG_SYS(EP_DATA, "\n"); - - if (fep == NULL) - return -FI_EINVAL; - - ep = ep_ftou(fep); - - if (!(ep->flags & USDF_EP_ENABLED)) - return -FI_EOPBADSTATE; - - return usd_get_send_credits(ep->e.dg.ep_qp) / - (ep->e.dg.tx_iov_limit + 1); -} - -/* - * Versions that rely on user to reserve space for header at start of buffer - */ -ssize_t -usdf_dgram_prefix_recv(struct fid_ep *fep, void *buf, size_t len, - void *desc, fi_addr_t src_addr, void *context) -{ - struct usdf_ep *ep; - struct usd_qp_impl *qp; - struct usd_recv_desc rxd; - uint32_t index; - - ep = ep_ftou(fep); - qp = to_qpi(ep->e.dg.ep_qp); - - index = qp->uq_rq.urq_post_index; - rxd.urd_context = context; - rxd.urd_iov[0].iov_base = (uint8_t *)buf + - USDF_HDR_BUF_ENTRY - sizeof(struct usd_udp_hdr); - rxd.urd_iov[0].iov_len = len - - (USDF_HDR_BUF_ENTRY - sizeof(struct usd_udp_hdr)); - rxd.urd_iov_cnt = 1; - rxd.urd_next = NULL; - - ep->e.dg.ep_hdr_ptr[index] = rxd.urd_iov[0].iov_base; - - return usd_post_recv(ep->e.dg.ep_qp, &rxd); -} - -ssize_t -usdf_dgram_prefix_recvv(struct fid_ep *fep, const struct iovec *iov, - void **desc, size_t count, fi_addr_t src_addr, void *context) -{ - struct usdf_ep *ep; - struct usd_recv_desc rxd; - struct usd_qp_impl *qp; - uint32_t index; - size_t i; - - ep = ep_ftou(fep); - qp = to_qpi(ep->e.dg.ep_qp); - - rxd.urd_context = context; - memcpy(&rxd.urd_iov[0], iov, sizeof(*iov) * count); - rxd.urd_iov[0].iov_base = (uint8_t *)rxd.urd_iov[0].iov_base + - USDF_HDR_BUF_ENTRY - sizeof(struct usd_udp_hdr); - rxd.urd_iov[0].iov_len -= (USDF_HDR_BUF_ENTRY - - sizeof(struct usd_udp_hdr)); - - rxd.urd_iov_cnt = count; - rxd.urd_next = NULL; - - index = qp->uq_rq.urq_post_index; - for (i = 0; i < count; ++i) { - ep->e.dg.ep_hdr_ptr[index] = rxd.urd_iov[0].iov_base; - index = (index + 1) & qp->uq_rq.urq_post_index_mask; - } - - return usd_post_recv(ep->e.dg.ep_qp, &rxd); -} - -ssize_t -usdf_dgram_prefix_recvmsg(struct fid_ep *fep, const struct fi_msg *msg, uint64_t flags) -{ - struct usdf_ep *ep; - struct usd_qp_impl *qp; - struct usd_rq *rq; - struct vnic_rq *vrq; - struct rq_enet_desc *desc; - uint8_t *hdr_ptr; - const struct iovec *iovp; - uint32_t index; - unsigned i; - - ep = ep_ftou(fep); - qp = to_qpi(ep->e.dg.ep_qp); - rq = &qp->uq_rq; - vrq = &rq->urq_vnic_rq; - desc = rq->urq_next_desc; - index = rq->urq_post_index; - - iovp = msg->msg_iov; - rq->urq_context[index] = msg->context; - hdr_ptr = ((uint8_t *)iovp[0].iov_base) + - (USDF_HDR_BUF_ENTRY - sizeof(struct usd_udp_hdr)); - rq_enet_desc_enc(desc, (dma_addr_t) hdr_ptr, - RQ_ENET_TYPE_ONLY_SOP, - iovp[0].iov_len - - (USDF_HDR_BUF_ENTRY - sizeof(struct usd_udp_hdr))); - ep->e.dg.ep_hdr_ptr[index] = (struct usd_udp_hdr *) hdr_ptr; - - index = (index+1) & rq->urq_post_index_mask; - desc = (struct rq_enet_desc *) ((uintptr_t)rq->urq_desc_ring - + (index<<4)); - - for (i = 1; i < msg->iov_count; ++i) { - rq->urq_context[index] = msg->context; - rq_enet_desc_enc(desc, (dma_addr_t) iovp[i].iov_base, - RQ_ENET_TYPE_NOT_SOP, iovp[i].iov_len); - ep->e.dg.ep_hdr_ptr[index] = (struct usd_udp_hdr *) hdr_ptr; - - index = (index+1) & rq->urq_post_index_mask; - desc = (struct rq_enet_desc *) ((uintptr_t)rq->urq_desc_ring - + (index<<4)); - } - - if ((flags & FI_MORE) == 0) { - wmb(); - iowrite32(index, &vrq->ctrl->posted_index); - } - - rq->urq_next_desc = desc; - rq->urq_post_index = index; - rq->urq_recv_credits -= msg->iov_count; - - return 0; -} - -ssize_t -usdf_dgram_prefix_send(struct fid_ep *fep, const void *buf, size_t len, - void *desc, fi_addr_t dest_addr, void *context) -{ - struct usd_udp_hdr *hdr; - struct usd_qp_impl *qp; - struct usdf_dest *dest; - struct usdf_ep *ep; - struct usd_wq *wq; - uint32_t last_post; - uint32_t flags; - size_t padding; - - ep = ep_ftou(fep); - dest = (struct usdf_dest *)(uintptr_t) dest_addr; - padding = USDF_HDR_BUF_ENTRY - sizeof(struct usd_udp_hdr); - flags = (ep->ep_tx_completion) ? USD_SF_SIGNAL : 0; - - assert(len <= ep->max_msg_size); - - if (ep->e.dg.tx_op_flags & FI_INJECT) { - if ((len - padding) > USD_SEND_MAX_COPY) { - USDF_DBG_SYS(EP_DATA, - "given inject length (%zu) exceeds max inject length (%d)\n", - len, USD_SEND_MAX_COPY); - return -FI_ENOSPC; - } - - return usd_post_send_one_copy(ep->e.dg.ep_qp, &dest->ds_dest, - ((uint8_t *)buf) + USDF_HDR_BUF_ENTRY, len - - USDF_HDR_BUF_ENTRY, flags, - context); - } - - qp = to_qpi(ep->e.dg.ep_qp); - wq = &qp->uq_wq; - - hdr = (struct usd_udp_hdr *) ((char *) buf + padding); - memcpy(hdr, &dest->ds_dest.ds_dest.ds_udp.u_hdr, sizeof(*hdr)); - - _usdf_adjust_prefix_hdr(hdr, qp, len, padding); - - last_post = _usd_post_send_one(wq, hdr, len - padding, - ep->ep_tx_completion); - - _usdf_adjust_post_info(wq, last_post, context, len - USDF_HDR_BUF_ENTRY); - - return FI_SUCCESS; -} - -static ssize_t -_usdf_dgram_send_iov_prefix(struct usdf_ep *ep, - struct usd_dest *dest, const struct iovec *iov, - size_t count, void *context, uint8_t cq_entry) -{ - struct iovec send_iov[USDF_DGRAM_MAX_SGE]; - struct usd_udp_hdr *hdr; - struct usd_qp_impl *qp; - uint32_t last_post; - struct usd_wq *wq; - size_t padding; - size_t len; - - qp = to_qpi(ep->e.dg.ep_qp); - wq = &qp->uq_wq; - - len = _usdf_iov_len(iov, count); - padding = USDF_HDR_BUF_ENTRY - sizeof(struct usd_udp_hdr); - - assert(len <= ep->max_msg_size); - - hdr = (struct usd_udp_hdr *) ((char *) iov[0].iov_base + - padding); - memcpy(hdr, &dest->ds_dest.ds_udp.u_hdr, sizeof(*hdr)); - - _usdf_adjust_prefix_hdr(hdr, qp, len, padding); - - memcpy(send_iov, iov, sizeof(struct iovec) * count); - send_iov[0].iov_base = hdr; - send_iov[0].iov_len -= padding; - - last_post = _usd_post_send_iov(wq, send_iov, count, cq_entry); - _usdf_adjust_post_info(wq, last_post, context, len - USDF_HDR_BUF_ENTRY); - - return FI_SUCCESS; -} - -ssize_t -usdf_dgram_prefix_sendv(struct fid_ep *fep, const struct iovec *iov, - void **desc, size_t count, fi_addr_t dest_addr, void *context) -{ - struct iovec send_iov[USDF_DGRAM_MAX_SGE]; - struct usd_dest *dest; - struct usdf_ep *ep; - size_t len; - size_t padding; - - ep = ep_ftou(fep); - dest = (struct usd_dest *)(uintptr_t) dest_addr; - len = _usdf_iov_len(iov, count); - padding = USDF_HDR_BUF_ENTRY - sizeof(struct usd_udp_hdr); - - assert(len <= ep->max_msg_size); - - if (count > ep->e.dg.tx_iov_limit) { - USDF_DBG_SYS(EP_DATA, "max iov count exceeded: %zu\n", count); - return -FI_ENOSPC; - } - - if ((len - padding) <= USD_SEND_MAX_COPY) { - /* _usdf_dgram_send_iov_copy isn't prefix aware and allocates - * its own prefix. reorganize iov[0] base to point to data and - * len to reflect data length. - */ - memcpy(send_iov, iov, sizeof(struct iovec) * count); - send_iov[0].iov_base = ((char *) send_iov[0].iov_base + - USDF_HDR_BUF_ENTRY); - send_iov[0].iov_len -= USDF_HDR_BUF_ENTRY; - - return _usdf_dgram_send_iov_copy(ep, dest, send_iov, count, - context, ep->ep_tx_completion); - } else if (ep->e.dg.tx_op_flags & FI_INJECT) { - USDF_DBG_SYS(EP_DATA, - "given inject length (%zu) exceeds max inject length (%d)\n", - len, USD_SEND_MAX_COPY); - return -FI_ENOSPC; - } - - return _usdf_dgram_send_iov_prefix(ep, dest, iov, count, context, - ep->ep_tx_completion); -} - -ssize_t -usdf_dgram_prefix_sendmsg(struct fid_ep *fep, const struct fi_msg *msg, - uint64_t flags) -{ - struct iovec send_iov[USDF_DGRAM_MAX_SGE]; - struct usd_dest *dest; - struct usdf_ep *ep; - uint8_t completion; - size_t len; - size_t padding; - - ep = ep_ftou(fep); - dest = (struct usd_dest *)(uintptr_t) msg->addr; - len = _usdf_iov_len(msg->msg_iov, msg->iov_count); - completion = ep->ep_tx_dflt_signal_comp || (flags & FI_COMPLETION); - padding = USDF_HDR_BUF_ENTRY - sizeof(struct usd_udp_hdr); - - assert(len <= ep->max_msg_size); - - if (msg->iov_count > ep->e.dg.tx_iov_limit) { - USDF_DBG_SYS(EP_DATA, "max iov count exceeded: %zu\n", - msg->iov_count); - return -FI_ENOSPC; - } - - if ((len - padding) <= USD_SEND_MAX_COPY) { - /* _usdf_dgram_send_iov_copy isn't prefix aware and allocates - * its own prefix. reorganize iov[0] base to point to data and - * len to reflect data length. - */ - memcpy(send_iov, msg->msg_iov, - sizeof(struct iovec) * msg->iov_count); - send_iov[0].iov_base = ((char *) send_iov[0].iov_base + - USDF_HDR_BUF_ENTRY); - send_iov[0].iov_len -= USDF_HDR_BUF_ENTRY; - - return _usdf_dgram_send_iov_copy(ep, dest, send_iov, - msg->iov_count, msg->context, completion); - } else if (flags & FI_INJECT) { - USDF_DBG_SYS(EP_DATA, - "given inject length (%zu) exceeds max inject length (%d)\n", - len, USD_SEND_MAX_COPY); - return -FI_ENOSPC; - } - - return _usdf_dgram_send_iov_prefix(ep, dest, msg->msg_iov, - msg->iov_count, msg->context, completion); -} - -ssize_t usdf_dgram_prefix_rx_size_left(struct fid_ep *fep) -{ - struct usdf_ep *ep; - - USDF_DBG_SYS(EP_DATA, "\n"); - - if (fep == NULL) - return -FI_EINVAL; - - ep = ep_ftou(fep); - - if (!(ep->flags & USDF_EP_ENABLED)) - return -FI_EOPBADSTATE; - - /* prefix_recvv can post up to iov_limit descriptors - */ - return (usd_get_recv_credits(ep->e.dg.ep_qp) / ep->e.dg.rx_iov_limit); -} - -ssize_t usdf_dgram_prefix_tx_size_left(struct fid_ep *fep) -{ - struct usdf_ep *ep; - - USDF_DBG_SYS(EP_DATA, "\n"); - - if (fep == NULL) - return -FI_EINVAL; - - ep = ep_ftou(fep); - - if (!(ep->flags & USDF_EP_ENABLED)) - return -FI_EOPBADSTATE; - - /* prefix_sendvcan post up to iov_limit descriptors - */ - return (usd_get_send_credits(ep->e.dg.ep_qp) / ep->e.dg.tx_iov_limit); -} diff --git a/prov/usnic/src/usdf_dgram.h b/prov/usnic/src/usdf_dgram.h deleted file mode 100644 index 8c3b6a54182..00000000000 --- a/prov/usnic/src/usdf_dgram.h +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Copyright (c) 2014-2017, Cisco Systems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ -#ifndef _USDF_DGRAM_H_ -#define _USDF_DGRAM_H_ - -#define USDF_DGRAM_MAX_SGE 8 -#define USDF_DGRAM_DFLT_SGE 4 - -#define USDF_DGRAM_CAPS (FI_MSG | FI_SOURCE | FI_SEND | FI_RECV) - -#define USDF_DGRAM_SUPP_MODE (FI_LOCAL_MR | FI_MSG_PREFIX) - -#define USDF_DGRAM_MSG_ORDER (FI_ORDER_NONE) -#define USDF_DGRAM_COMP_ORDER (FI_ORDER_NONE) -#define USDF_DGRAM_INJECT_SIZE \ - (USD_SEND_MAX_COPY - sizeof(struct usd_udp_hdr)) -#define USDF_DGRAM_SUPP_SENDMSG_FLAGS \ - (FI_INJECT | FI_COMPLETION | FI_INJECT_COMPLETE | FI_TRANSMIT_COMPLETE) -#define USDF_DGRAM_SUPP_RECVMSG_FLAGS (FI_COMPLETION) -#define USDF_DGRAM_IOV_LIMIT (USDF_DGRAM_DFLT_SGE) -#define USDF_DGRAM_RMA_IOV_LIMIT 0 -#define USDF_DGRAM_CNTR_CNT 0 -#define USDF_DGRAM_MR_IOV_LIMIT (USDF_MR_IOV_LIMIT) -#define USDF_DGRAM_MR_CNT (USDF_MR_CNT) - - -int usdf_dgram_fill_rx_attr(uint32_t version, const struct fi_info *hints, - struct fi_info *fi, struct usd_device_attrs *dap); -int usdf_dgram_fill_tx_attr(uint32_t version, const struct fi_info *hints, - struct fi_info *fi, struct usd_device_attrs *dap); -int usdf_dgram_fill_dom_attr(uint32_t version, const struct fi_info *hints, - struct fi_info *fi, struct usd_device_attrs *dap); -int usdf_dgram_fill_ep_attr(uint32_t version, const struct fi_info *hints, - struct fi_info *fi, struct usd_device_attrs *dap); - -/* fi_ops_msg for DGRAM */ -ssize_t usdf_dgram_recv(struct fid_ep *ep, void *buf, size_t len, void *desc, - fi_addr_t src_addr, void *context); -ssize_t usdf_dgram_recvv(struct fid_ep *ep, const struct iovec *iov, - void **desc, size_t count, fi_addr_t src_addr, void *context); -ssize_t usdf_dgram_recvmsg(struct fid_ep *ep, const struct fi_msg *msg, - uint64_t flags); -ssize_t usdf_dgram_send(struct fid_ep *ep, const void *buf, size_t len, - void *desc, fi_addr_t dest_addr, void *context); -ssize_t usdf_dgram_sendv(struct fid_ep *ep, const struct iovec *iov, - void **desc, size_t count, fi_addr_t dest_addr, void *context); -ssize_t usdf_dgram_sendmsg(struct fid_ep *ep, const struct fi_msg *msg, - uint64_t flags); -ssize_t usdf_dgram_inject(struct fid_ep *ep, const void *buf, size_t len, - fi_addr_t dest_addr); -ssize_t usdf_dgram_rx_size_left(struct fid_ep *ep); -ssize_t usdf_dgram_tx_size_left(struct fid_ep *ep); - -ssize_t usdf_dgram_prefix_recv(struct fid_ep *ep, void *buf, size_t len, - void *desc, fi_addr_t src_addr, void *context); -ssize_t usdf_dgram_prefix_recvv(struct fid_ep *ep, const struct iovec *iov, - void **desc, size_t count, fi_addr_t src_addr, void *context); -ssize_t usdf_dgram_prefix_recvmsg(struct fid_ep *fep, const struct fi_msg *msg, - uint64_t flags); -ssize_t usdf_dgram_prefix_send(struct fid_ep *ep, const void *buf, size_t len, - void *desc, fi_addr_t dest_addr, void *context); -ssize_t usdf_dgram_prefix_sendv(struct fid_ep *fep, const struct iovec *iov, - void **desc, size_t count, fi_addr_t dest_addr, void *context); -ssize_t usdf_dgram_prefix_sendmsg(struct fid_ep *ep, const struct fi_msg *msg, - uint64_t flags); -ssize_t usdf_dgram_prefix_inject(struct fid_ep *ep, const void *buf, size_t len, - fi_addr_t dest_addr); -ssize_t usdf_dgram_prefix_rx_size_left(struct fid_ep *ep); -ssize_t usdf_dgram_prefix_tx_size_left(struct fid_ep *ep); - -#endif /* _USDF_DGRAM_H_ */ diff --git a/prov/usnic/src/usdf_domain.c b/prov/usnic/src/usdf_domain.c deleted file mode 100644 index fb4aa4caf54..00000000000 --- a/prov/usnic/src/usdf_domain.c +++ /dev/null @@ -1,427 +0,0 @@ -/* - * Copyright (c) 2014-2019, Cisco Systems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" - -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include "ofi.h" -#include "ofi_enosys.h" -#include "ofi_util.h" - -#include "usnic_direct.h" -#include "usdf.h" -#include "usdf_timer.h" -#include "usdf_poll.h" -#include "usdf_cm.h" - -static int -usdf_domain_bind(struct fid *fid, struct fid *bfid, uint64_t flags) -{ - struct usdf_domain *udp; - - USDF_TRACE_SYS(DOMAIN, "\n"); - - if (flags & FI_REG_MR) { - USDF_WARN_SYS(DOMAIN, - "FI_REG_MR for EQs is not supported by the usnic provider"); - return -FI_EOPNOTSUPP; - } - - udp = dom_fidtou(fid); - - switch (bfid->fclass) { - case FI_CLASS_EQ: - if (udp->dom_eq != NULL) { - return -FI_EINVAL; - } - udp->dom_eq = eq_fidtou(bfid); - ofi_atomic_inc32(&udp->dom_eq->eq_refcnt); - break; - default: - return -FI_EINVAL; - } - - return 0; -} - -static int -usdf_domain_close(fid_t fid) -{ - struct usdf_domain *udp; - int ret; - - USDF_TRACE_SYS(DOMAIN, "\n"); - - udp = container_of(fid, struct usdf_domain, dom_fid.fid); - if (ofi_atomic_get32(&udp->dom_refcnt) > 0) { - return -FI_EBUSY; - } - - if (udp->dom_dev != NULL) { - ret = usd_close(udp->dom_dev); - if (ret != 0) { - return ret; - } - } - - if (udp->dom_eq != NULL) { - ofi_atomic_dec32(&udp->dom_eq->eq_refcnt); - } - ofi_atomic_dec32(&udp->dom_fabric->fab_refcnt); - LIST_REMOVE(udp, dom_link); - fi_freeinfo(udp->dom_info); - free(udp); - - return 0; -} - -static struct fi_ops usdf_fid_ops = { - .size = sizeof(struct fi_ops), - .close = usdf_domain_close, - .bind = usdf_domain_bind, - .control = fi_no_control, - .ops_open = fi_no_ops_open, -}; - -static struct fi_ops_mr usdf_domain_mr_ops = { - .size = sizeof(struct fi_ops_mr), - .reg = usdf_reg_mr, - .regv = usdf_regv_mr, - .regattr = usdf_regattr, -}; - -static struct fi_ops_domain usdf_domain_ops = { - .size = sizeof(struct fi_ops_domain), - .av_open = usdf_av_open, - .cq_open = usdf_cq_open, - .endpoint = usdf_endpoint_open, - .scalable_ep = fi_no_scalable_ep, - .cntr_open = fi_no_cntr_open, - .poll_open = usdf_poll_open, - .stx_ctx = fi_no_stx_context, - .srx_ctx = fi_no_srx_context, - .query_atomic = usdf_query_atomic, - .query_collective = fi_no_query_collective, -}; - -int -usdf_domain_open(struct fid_fabric *fabric, struct fi_info *info, - struct fid_domain **domain, void *context) -{ - struct usdf_fabric *fp; - struct usdf_domain *udp; - struct sockaddr_in *sin; - size_t addrlen; - int ret; -#if ENABLE_DEBUG - char requested[INET_ADDRSTRLEN], actual[INET_ADDRSTRLEN]; -#endif - - USDF_TRACE_SYS(DOMAIN, "\n"); - sin = NULL; - - fp = fab_fidtou(fabric); - - if (info->domain_attr != NULL) { - /* No versioning information available here. */ - if (!usdf_domain_checkname(0, fp->fab_dev_attrs, - info->domain_attr->name)) { - USDF_WARN_SYS(DOMAIN, "domain name mismatch\n"); - return -FI_ENODATA; - } - - if (ofi_check_mr_mode( - &usdf_ops, fabric->api_version, - FI_MR_BASIC | FI_MR_ALLOCATED | FI_MR_LOCAL, info)) { - /* the caller ignored our fi_getinfo results */ - USDF_WARN_SYS(DOMAIN, "MR mode (%d) not supported\n", - info->domain_attr->mr_mode); - return -FI_ENODATA; - } - } - - udp = calloc(1, sizeof *udp); - if (udp == NULL) { - USDF_DBG("unable to alloc mem for domain\n"); - ret = -FI_ENOMEM; - goto fail; - } - - USDF_DBG("uda_devname=%s\n", fp->fab_dev_attrs->uda_devname); - - /* - * Make sure address format is good and matches this fabric - */ - switch (info->addr_format) { - case FI_SOCKADDR: - addrlen = sizeof(struct sockaddr); - sin = info->src_addr; - break; - case FI_SOCKADDR_IN: - addrlen = sizeof(struct sockaddr_in); - sin = info->src_addr; - break; - case FI_ADDR_STR: - sin = usdf_format_to_sin(info, info->src_addr); - if (NULL == sin) { - ret = -FI_ENOMEM; - goto fail; - } - goto skip_size_check; - default: - ret = -FI_EINVAL; - goto fail; - } - - if (info->src_addrlen != addrlen) { - ret = -FI_EINVAL; - goto fail; - } - -skip_size_check: - if (sin->sin_family != AF_INET || - sin->sin_addr.s_addr != fp->fab_dev_attrs->uda_ipaddr_be) { - USDF_DBG_SYS(DOMAIN, "requested src_addr (%s) != fabric addr (%s)\n", - inet_ntop(AF_INET, &sin->sin_addr.s_addr, - requested, sizeof(requested)), - inet_ntop(AF_INET, &fp->fab_dev_attrs->uda_ipaddr_be, - actual, sizeof(actual))); - - ret = -FI_EINVAL; - usdf_free_sin_if_needed(info, sin); - goto fail; - } - usdf_free_sin_if_needed(info, sin); - - ret = usd_open(fp->fab_dev_attrs->uda_devname, &udp->dom_dev); - if (ret != 0) { - goto fail; - } - - udp->dom_fid.fid.fclass = FI_CLASS_DOMAIN; - udp->dom_fid.fid.context = context; - udp->dom_fid.fid.ops = &usdf_fid_ops; - udp->dom_fid.ops = &usdf_domain_ops; - udp->dom_fid.mr = &usdf_domain_mr_ops; - - ret = pthread_spin_init(&udp->dom_progress_lock, - PTHREAD_PROCESS_PRIVATE); - if (ret != 0) { - ret = -ret; - goto fail; - } - TAILQ_INIT(&udp->dom_tx_ready); - TAILQ_INIT(&udp->dom_hcq_list); - - udp->dom_info = fi_dupinfo(info); - if (udp->dom_info == NULL) { - ret = -FI_ENOMEM; - goto fail; - } - if (udp->dom_info->dest_addr != NULL) { - free(udp->dom_info->dest_addr); - udp->dom_info->dest_addr = NULL; - } - - udp->dom_fabric = fp; - LIST_INSERT_HEAD(&fp->fab_domain_list, udp, dom_link); - ofi_atomic_initialize32(&udp->dom_refcnt, 0); - ofi_atomic_inc32(&fp->fab_refcnt); - - *domain = &udp->dom_fid; - return 0; - -fail: - if (udp != NULL) { - if (udp->dom_info != NULL) { - fi_freeinfo(udp->dom_info); - } - if (udp->dom_dev != NULL) { - usd_close(udp->dom_dev); - } - free(udp); - } - return ret; -} - -/* In pre-1.4, the domain name was NULL. - * - * There used to be elaborate schemes to try to preserve this pre-1.4 - * behavior. In Nov 2019 discussions, however, it was determined that - * we could rationalize classifying this as buggy behavior. - * Specifically: we should just now always return a domain name -- - * even if the requested version is <1.4. - * - * This greatly simplifies the logic here, and also greatly simplifies - * layering with the rxd provider. - */ -int usdf_domain_getname(uint32_t version, struct usd_device_attrs *dap, - char **name) -{ - int ret = FI_SUCCESS; - char *buf = NULL; - - buf = strdup(dap->uda_devname); - if (NULL == buf) { - ret = -errno; - USDF_DBG("strdup failed while creating domain name\n"); - } else { - *name = buf; - } - - return ret; -} - -/* Check to see if the name supplied in a hint matches the name of our - * current domain. - * - * In pre-1.4, the domain name was NULL. - * - * There used to be elaborate schemes to try to preserve this pre-1.4 - * behavior. In Nov 2019 discussions, however, it was determined that - * we could rationalize classifying this as buggy behavior. - * Specifically: we should just now always return a domain name -- - * even if the requested version is <1.4. - * - * This greatly simplifies the logic here, and also greatly simplifies - * layering with the rxd provider. - * - * Hence, if a hint was provided, check the domain name (that we now - * always have) against the hint. - */ -bool usdf_domain_checkname(uint32_t version, struct usd_device_attrs *dap, - const char *hint) -{ - char *reference = NULL; - bool valid; - int ret; - - /* If no hint was provided, then by definition, we agree with - * the hint. */ - if (NULL == hint) { - return true; - } - - USDF_DBG("checking domain name: domain name='%s'\n", hint); - - ret = usdf_domain_getname(version, dap, &reference); - if (ret < 0) { - return false; - } - - valid = (strcmp(reference, hint) == 0); - if (!valid) { - USDF_DBG("given hint %s does not match %s -- invalid\n", - hint, reference); - } - - free(reference); - return valid; -} - -/* Query domain's atomic capability. - * We dont support atomic operations, just return EOPNOTSUPP. - */ -int usdf_query_atomic(struct fid_domain *domain, enum fi_datatype datatype, - enum fi_op op, struct fi_atomic_attr *attr, uint64_t flags) -{ - return -FI_EOPNOTSUPP; -} - -/* Catch the version changes for domain_attr. */ -int usdf_catch_dom_attr(uint32_t version, const struct fi_info *hints, - struct fi_domain_attr *dom_attr) -{ - /* version 1.5 introduced new bits. If the user asked for older - * version, we can't return these new bits. - */ - if (FI_VERSION_LT(version, FI_VERSION(1, 5))) { - /* We checked mr_mode compatibility before calling - * this function. This means it is safe to return - * 1.4 default mr_mode. - */ - dom_attr->mr_mode = FI_MR_BASIC; - - /* FI_REMOTE_COMM is introduced in 1.5. So don't return it. */ - dom_attr->caps &= ~FI_REMOTE_COMM; - - /* If FI_REMOTE_COMM is given for version < 1.5, fail. */ - if (hints && hints->domain_attr) { - if (hints->domain_attr->caps == FI_REMOTE_COMM) - return -FI_EBADFLAGS; - } - } else { - dom_attr->mr_mode &= ~(FI_MR_BASIC | FI_MR_SCALABLE); - } - - return FI_SUCCESS; -} - -/* Catch the version changes for tx_attr. */ -int usdf_catch_tx_attr(uint32_t version, const struct fi_tx_attr *tx_attr) -{ - /* In version < 1.5, FI_LOCAL_MR is required. */ - if (FI_VERSION_LT(version, FI_VERSION(1, 5))) { - if ((tx_attr->mode & FI_LOCAL_MR) == 0) - return -FI_ENODATA; - } - - return FI_SUCCESS; -} - -/* Catch the version changes for rx_attr. */ -int usdf_catch_rx_attr(uint32_t version, const struct fi_rx_attr *rx_attr) -{ - /* In version < 1.5, FI_LOCAL_MR is required. */ - if (FI_VERSION_LT(version, FI_VERSION(1, 5))) { - if ((rx_attr->mode & FI_LOCAL_MR) == 0) - return -FI_ENODATA; - } - - return FI_SUCCESS; -} diff --git a/prov/usnic/src/usdf_endpoint.c b/prov/usnic/src/usdf_endpoint.c deleted file mode 100644 index 1fa33ce1728..00000000000 --- a/prov/usnic/src/usdf_endpoint.c +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright (c) 2014-2019, Cisco Systems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include "ofi.h" -#include "ofi_enosys.h" - -#include "usdf.h" -#include "usdf_endpoint.h" -#include "usdf_cm.h" - -int -usdf_endpoint_open(struct fid_domain *domain, struct fi_info *info, - struct fid_ep **ep_o, void *context) -{ - USDF_TRACE_SYS(EP_CTRL, "\n"); - - switch (info->ep_attr->type) { - case FI_EP_DGRAM: - return usdf_ep_dgram_open(domain, info, ep_o, context); - default: - return -FI_ENODEV; - } -} - -int usdf_ep_getopt_connected(fid_t fid, int level, int optname, void *optval, - size_t *optlen) -{ - size_t *cm_size; - size_t dest_size; - - USDF_TRACE_SYS(EP_CTRL, "\n"); - - if (!optval || !optlen) - return -FI_EINVAL; - - if (level != FI_OPT_ENDPOINT) - return -FI_ENOPROTOOPT; - - switch (optname) { - case FI_OPT_CM_DATA_SIZE: - dest_size = *optlen; - *optlen = sizeof(*cm_size); - - if (dest_size < sizeof(*cm_size)) - return -FI_ETOOSMALL; - - cm_size = optval; - *cm_size = USDF_MAX_CONN_DATA; - break; - default: - return -FI_ENOPROTOOPT; - } - - return FI_SUCCESS; -} - -int usdf_ep_getopt_unconnected(fid_t fid, int level, int optname, void *optval, - size_t *optlen) -{ - USDF_TRACE_SYS(EP_CTRL, "\n"); - - return -FI_ENOPROTOOPT; -} - - -int usdf_ep_setopt(fid_t fid, int level, int optname, const void *optval, - size_t optlen) -{ - USDF_TRACE_SYS(EP_CTRL, "\n"); - - return -FI_ENOPROTOOPT; -} diff --git a/prov/usnic/src/usdf_endpoint.h b/prov/usnic/src/usdf_endpoint.h deleted file mode 100644 index 1bbad52869e..00000000000 --- a/prov/usnic/src/usdf_endpoint.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2014-2019, Cisco Systems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ -#ifndef _USDF_ENDPOINT_H_ -#define _USDF_ENDPOINT_H_ - -int usdf_ep_port_bind(struct usdf_ep *ep, struct fi_info *info); -int usdf_ep_dgram_open(struct fid_domain *domain, struct fi_info *info, - struct fid_ep **ep, void *context); -int usdf_msg_upd_lcl_addr(struct usdf_ep *ep); - -int usdf_ep_getopt_connected(fid_t fid, int level, int optname, void *optval, - size_t *optlen); -int usdf_ep_getopt_unconnected(fid_t fid, int level, int optname, void *optval, - size_t *optlen); -int usdf_ep_setopt(fid_t fid, int level, int optname, const void *optval, - size_t optlen); - -extern struct fi_ops usdf_ep_ops; - -#endif /* _USDF_ENDPOINT_H_ */ diff --git a/prov/usnic/src/usdf_ep_dgram.c b/prov/usnic/src/usdf_ep_dgram.c deleted file mode 100644 index d260308dbb2..00000000000 --- a/prov/usnic/src/usdf_ep_dgram.c +++ /dev/null @@ -1,943 +0,0 @@ -/* - * Copyright (c) 2014-2019, Cisco Systems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include "ofi.h" -#include "ofi_enosys.h" -#include "ofi_util.h" - -#include "usnic_direct.h" -#include "usd.h" -#include "usdf.h" -#include "usdf_endpoint.h" -#include "usdf_dgram.h" -#include "usdf_av.h" -#include "usdf_cq.h" -#include "usdf_cm.h" - -static int -usdf_ep_dgram_enable(struct fid_ep *fep) -{ - struct usdf_ep *ep; - struct usd_filter filt; - struct usd_qp_impl *uqp; - int ret; - - USDF_TRACE_SYS(EP_CTRL, "\n"); - - ep = ep_ftou(fep); - - if (ep->e.dg.ep_wcq == NULL) { - ret = -FI_EOPBADSTATE; - goto fail; - } - if (ep->e.dg.ep_rcq == NULL) { - ret = -FI_EOPBADSTATE; - goto fail; - } - - filt.uf_type = USD_FTY_UDP_SOCK; - filt.uf_filter.uf_udp_sock.u_sock = ep->e.dg.ep_sock; - - if (ep->ep_caps & USDF_EP_CAP_PIO) { - ret = usd_create_qp(ep->ep_domain->dom_dev, - USD_QTR_UDP, - USD_QTY_UD_PIO, - ep->e.dg.ep_wcq->c.hard.cq_cq, - ep->e.dg.ep_rcq->c.hard.cq_cq, - 127, // XXX - 127, // XXX - &filt, - &ep->e.dg.ep_qp); - } else { - ret = -FI_EAGAIN; - } - - if (ret != 0) { - ret = usd_create_qp(ep->ep_domain->dom_dev, - USD_QTR_UDP, - USD_QTY_UD, - ep->e.dg.ep_wcq->c.hard.cq_cq, - ep->e.dg.ep_rcq->c.hard.cq_cq, - ep->ep_wqe, - ep->ep_rqe, - &filt, - &ep->e.dg.ep_qp); - } - if (ret != 0) { - goto fail; - } - ep->e.dg.ep_qp->uq_context = ep; - - /* - * Allocate a memory region big enough to hold a header for each - * RQ entry - */ - uqp = to_qpi(ep->e.dg.ep_qp); - ep->e.dg.ep_hdr_ptr = calloc(uqp->uq_rq.urq_num_entries, - sizeof(ep->e.dg.ep_hdr_ptr[0])); - if (ep->e.dg.ep_hdr_ptr == NULL) { - ret = -FI_ENOMEM; - goto fail; - } - - ret = usd_alloc_mr(ep->ep_domain->dom_dev, - usd_get_recv_credits(ep->e.dg.ep_qp) * USDF_HDR_BUF_ENTRY, - &ep->e.dg.ep_hdr_buf); - if (ret != 0) { - goto fail; - } - - ep->flags |= USDF_EP_ENABLED; - - return 0; - -fail: - free(ep->e.dg.ep_hdr_ptr); - ep->e.dg.ep_hdr_ptr = NULL; - - if (ep->e.dg.ep_qp != NULL) { - usd_destroy_qp(ep->e.dg.ep_qp); - ep->e.dg.ep_qp = NULL; - } - return ret; -} - -static int -usdf_ep_dgram_bind(struct fid *fid, struct fid *bfid, uint64_t flags) -{ - int ret; - struct usdf_ep *ep; - struct usdf_cq *cq; - struct usdf_av *av; - - USDF_TRACE_SYS(EP_CTRL, "\n"); - - /* Backward compatibility case for Open MPI. We haven't been validating the flags until now. - * Older version of Open MPI gives FI_RECV as AV bind flag (bug). */ - if (bfid->fclass == FI_CLASS_AV) { - av = av_fidtou(bfid); - if (av->av_domain->dom_info->fabric_attr->api_version <= FI_VERSION(1, 4) && (flags & FI_RECV)) - flags = flags & ~FI_RECV; - } - - /* Check if the binding flags are valid. */ - ret = ofi_ep_bind_valid(&usdf_ops, bfid, flags); - if (ret) - return ret; - - ep = ep_fidtou(fid); - - switch (bfid->fclass) { - - case FI_CLASS_AV: - if (ep->e.dg.ep_av != NULL) { - return -FI_EINVAL; - } - - av = av_fidtou(bfid); - ep->e.dg.ep_av = av; - ofi_atomic_inc32(&av->av_refcnt); - break; - - case FI_CLASS_CQ: - cq = cq_fidtou(bfid); - - /* actually, could look through CQ list for a hard - * CQ with function usd_poll_cq() and use that... XXX - */ - if (cq->cq_is_soft) { - return -FI_EINVAL; - } - if (cq->c.hard.cq_cq == NULL) { - ret = usdf_cq_create_cq(cq, &cq->c.hard.cq_cq, true); - if (ret != 0) { - return ret; - } - } - - if (flags & FI_SEND) { - if (ep->e.dg.ep_wcq != NULL) { - return -FI_EINVAL; - } - - ep->ep_tx_dflt_signal_comp = - (flags & FI_SELECTIVE_COMPLETION) ? 0 : 1; - - ep->ep_tx_completion = (ep->ep_tx_dflt_signal_comp || - (ep->e.dg.tx_op_flags & FI_COMPLETION)); - - ep->e.dg.ep_wcq = cq; - ofi_atomic_inc32(&cq->cq_refcnt); - } - - if (flags & FI_RECV) { - if (ep->e.dg.ep_rcq != NULL) { - return -FI_EINVAL; - } - - if (flags & FI_SELECTIVE_COMPLETION) - return -FI_EOPNOTSUPP; - - ep->ep_rx_dflt_signal_comp = - (flags & FI_SELECTIVE_COMPLETION) ? 0 : 1; - - ep->ep_rx_completion = (ep->ep_rx_dflt_signal_comp || - (ep->e.dg.rx_op_flags & FI_COMPLETION)); - - ep->e.dg.ep_rcq = cq; - ofi_atomic_inc32(&cq->cq_refcnt); - } - break; - - case FI_CLASS_EQ: - if (ep->ep_eq != NULL) { - return -FI_EINVAL; - } - ep->ep_eq = eq_fidtou(bfid); - ofi_atomic_inc32(&ep->ep_eq->eq_refcnt); - break; - default: - return -FI_EINVAL; - } - - return 0; -} - -static void -usdf_ep_dgram_deref_cq(struct usdf_cq *cq) -{ - struct usdf_cq_hard *hcq; - void (*rtn)(struct usdf_cq_hard *hcq); - - if (cq == NULL) { - return; - } - ofi_atomic_dec32(&cq->cq_refcnt); - - rtn = usdf_progress_hard_cq; - - if (cq->cq_is_soft) { - TAILQ_FOREACH(hcq, &cq->c.soft.cq_list, cqh_link) { - if (hcq->cqh_progress == rtn) { - ofi_atomic_dec32(&hcq->cqh_refcnt); - return; - } - } - } -} - -static int -usdf_ep_dgram_close(fid_t fid) -{ - struct usdf_ep *ep; - - USDF_TRACE_SYS(EP_CTRL, "\n"); - - ep = ep_fidtou(fid); - - if (ofi_atomic_get32(&ep->ep_refcnt) > 0) { - return -FI_EBUSY; - } - - free(ep->e.dg.ep_hdr_ptr); - - if (ep->e.dg.ep_qp != NULL) { - usd_destroy_qp(ep->e.dg.ep_qp); - } - ofi_atomic_dec32(&ep->ep_domain->dom_refcnt); - if (ep->ep_eq != NULL) { - ofi_atomic_dec32(&ep->ep_eq->eq_refcnt); - } - - if (ep->e.dg.ep_av) - ofi_atomic_dec32(&ep->e.dg.ep_av->av_refcnt); - - usdf_ep_dgram_deref_cq(ep->e.dg.ep_wcq); - usdf_ep_dgram_deref_cq(ep->e.dg.ep_rcq); - - if (ep->e.dg.ep_sock != -1) { - close(ep->e.dg.ep_sock); - } - - free(ep); - return 0; -} - -static struct fi_ops_ep usdf_base_dgram_ops = { - .size = sizeof(struct fi_ops_ep), - .cancel = fi_no_cancel, - .getopt = fi_no_getopt, - .setopt = fi_no_setopt, - .tx_ctx = fi_no_tx_ctx, - .rx_ctx = fi_no_rx_ctx, - .rx_size_left = usdf_dgram_rx_size_left, - .tx_size_left = usdf_dgram_tx_size_left, -}; - -static struct fi_ops_ep usdf_base_dgram_prefix_ops = { - .size = sizeof(struct fi_ops_ep), - .cancel = fi_no_cancel, - .getopt = fi_no_getopt, - .setopt = fi_no_setopt, - .tx_ctx = fi_no_tx_ctx, - .rx_ctx = fi_no_rx_ctx, - .rx_size_left = usdf_dgram_prefix_rx_size_left, - .tx_size_left = usdf_dgram_prefix_tx_size_left, -}; - -static struct fi_ops_msg usdf_dgram_ops = { - .size = sizeof(struct fi_ops_msg), - .recv = usdf_dgram_recv, - .recvv = usdf_dgram_recvv, - .recvmsg = usdf_dgram_recvmsg, - .send = usdf_dgram_send, - .sendv = usdf_dgram_sendv, - .sendmsg = usdf_dgram_sendmsg, - .inject = usdf_dgram_inject, - .senddata = fi_no_msg_senddata, - .injectdata = fi_no_msg_injectdata, -}; - -static struct fi_ops_msg usdf_dgram_prefix_ops = { - .size = sizeof(struct fi_ops_msg), - .recv = usdf_dgram_prefix_recv, - .recvv = usdf_dgram_prefix_recvv, - .recvmsg = usdf_dgram_prefix_recvmsg, - .send = usdf_dgram_prefix_send, - .sendv = usdf_dgram_prefix_sendv, - .sendmsg = usdf_dgram_prefix_sendmsg, - .inject = usdf_dgram_prefix_inject, - .senddata = fi_no_msg_senddata, - .injectdata = fi_no_msg_injectdata, -}; - -static struct fi_ops_cm usdf_cm_dgram_ops = { - .size = sizeof(struct fi_ops_cm), - .setname = fi_no_setname, - .getname = usdf_cm_dgram_getname, - .getpeer = fi_no_getpeer, - .connect = fi_no_connect, - .listen = fi_no_listen, - .accept = fi_no_accept, - .reject = fi_no_reject, - .shutdown = fi_no_shutdown, - .join = fi_no_join, -}; - -static struct fi_ops_atomic usdf_dgram_atomic_ops = { - .size = sizeof(struct fi_ops_atomic), - .write = fi_no_atomic_write, - .writev = fi_no_atomic_writev, - .writemsg = fi_no_atomic_writemsg, - .inject = fi_no_atomic_inject, - .readwrite = fi_no_atomic_readwrite, - .readwritev = fi_no_atomic_readwritev, - .readwritemsg = fi_no_atomic_readwritemsg, - .compwrite = fi_no_atomic_compwrite, - .compwritev = fi_no_atomic_compwritev, - .compwritemsg = fi_no_atomic_compwritemsg, - .writevalid = fi_no_atomic_writevalid, - .readwritevalid = fi_no_atomic_readwritevalid, - .compwritevalid = fi_no_atomic_compwritevalid, -}; - -/******************************************************************************* - * Default values for dgram attributes - ******************************************************************************/ -static const struct fi_tx_attr dgram_dflt_tx_attr = { - .caps = USDF_DGRAM_CAPS, - .mode = USDF_DGRAM_SUPP_MODE, - .op_flags = 0, - .msg_order = USDF_DGRAM_MSG_ORDER, - .comp_order = USDF_DGRAM_COMP_ORDER, - .inject_size = USDF_DGRAM_INJECT_SIZE, - .iov_limit = USDF_DGRAM_IOV_LIMIT, - .rma_iov_limit = USDF_DGRAM_RMA_IOV_LIMIT -}; - -static const struct fi_rx_attr dgram_dflt_rx_attr = { - .caps = USDF_DGRAM_CAPS, - .mode = USDF_DGRAM_SUPP_MODE, - .op_flags = 0, - .msg_order = USDF_DGRAM_MSG_ORDER, - .comp_order = USDF_DGRAM_COMP_ORDER, - .total_buffered_recv = 0, - .iov_limit = USDF_DGRAM_IOV_LIMIT -}; - -static const struct fi_ep_attr dgram_dflt_ep_attr = { - .type = FI_EP_DGRAM, - .protocol = FI_PROTO_UDP, - .msg_prefix_size = 0, - .max_order_raw_size = 0, - .max_order_war_size = 0, - .max_order_waw_size = 0, - .mem_tag_format = 0, - .tx_ctx_cnt = 1, - .rx_ctx_cnt = 1 -}; - -static const struct fi_domain_attr dgram_dflt_domain_attr = { - .caps = USDF_DOM_CAPS, - .threading = FI_THREAD_ENDPOINT, - .control_progress = FI_PROGRESS_AUTO, - .data_progress = FI_PROGRESS_MANUAL, - .resource_mgmt = FI_RM_DISABLED, - .mr_mode = FI_MR_ALLOCATED | FI_MR_LOCAL | FI_MR_BASIC, - .cntr_cnt = USDF_DGRAM_CNTR_CNT, - .mr_iov_limit = USDF_DGRAM_MR_IOV_LIMIT, - .mr_cnt = USDF_DGRAM_MR_CNT, -}; - -/******************************************************************************* - * Fill functions for attributes - ******************************************************************************/ -int usdf_dgram_fill_ep_attr(uint32_t version, const struct fi_info *hints, struct - fi_info *fi, struct usd_device_attrs *dap) -{ - struct fi_ep_attr defaults; - - defaults = dgram_dflt_ep_attr; - - /* The ethernet header does not count against the MTU. */ - defaults.max_msg_size = dap->uda_mtu - sizeof(struct usd_udp_hdr); - - if (FI_VERSION_GE(version, FI_VERSION(1, 3))) - defaults.max_msg_size += sizeof(struct ether_header); - - if (!hints || !hints->ep_attr) - goto out; - - /* In prefix mode the max message size is the same as in non-prefix mode - * with the advertised header size added on top. - */ - - if (hints->mode & FI_MSG_PREFIX) { - defaults.msg_prefix_size = USDF_HDR_BUF_ENTRY; - - if (FI_VERSION_GE(version, FI_VERSION(1, 3))) - defaults.max_msg_size += defaults.msg_prefix_size; - } - - if (hints->ep_attr->max_msg_size > defaults.max_msg_size) - return -FI_ENODATA; - - switch (hints->ep_attr->protocol) { - case FI_PROTO_UNSPEC: - case FI_PROTO_UDP: - break; - default: - return -FI_ENODATA; - } - - if (hints->ep_attr->tx_ctx_cnt > defaults.tx_ctx_cnt) - return -FI_ENODATA; - if (hints->ep_attr->rx_ctx_cnt > defaults.rx_ctx_cnt) - return -FI_ENODATA; - - if (hints->ep_attr->max_order_raw_size > defaults.max_order_raw_size) - return -FI_ENODATA; - if (hints->ep_attr->max_order_war_size > defaults.max_order_war_size) - return -FI_ENODATA; - if (hints->ep_attr->max_order_waw_size > defaults.max_order_waw_size) - return -FI_ENODATA; - -out: - *fi->ep_attr = defaults; - - return FI_SUCCESS; -} - -int usdf_dgram_fill_dom_attr(uint32_t version, const struct fi_info *hints, - struct fi_info *fi, struct usd_device_attrs *dap) -{ - int ret; - struct fi_domain_attr defaults; - - defaults = dgram_dflt_domain_attr; - ret = usdf_domain_getname(version, dap, &defaults.name); - if (ret < 0) - return -FI_ENODATA; - - if (!hints || !hints->domain_attr) - goto catch; - - switch (hints->domain_attr->threading) { - case FI_THREAD_UNSPEC: - case FI_THREAD_ENDPOINT: - break; - case FI_THREAD_FID: - case FI_THREAD_COMPLETION: - case FI_THREAD_DOMAIN: - defaults.threading = hints->domain_attr->threading; - break; - default: - return -FI_ENODATA; - } - - switch (hints->domain_attr->control_progress) { - case FI_PROGRESS_UNSPEC: - case FI_PROGRESS_AUTO: - break; - case FI_PROGRESS_MANUAL: - defaults.control_progress = - hints->domain_attr->control_progress; - break; - default: - return -FI_ENODATA; - } - - switch (hints->domain_attr->data_progress) { - case FI_PROGRESS_UNSPEC: - case FI_PROGRESS_MANUAL: - break; - default: - return -FI_ENODATA; - } - - switch (hints->domain_attr->resource_mgmt) { - case FI_RM_UNSPEC: - case FI_RM_DISABLED: - break; - default: - return -FI_ENODATA; - } - - switch (hints->domain_attr->caps) { - case 0: - case FI_REMOTE_COMM: - break; - default: - USDF_WARN_SYS(DOMAIN, - "invalid domain capabilities\n"); - return -FI_ENODATA; - } - - switch (hints->domain_attr->av_type) { - case FI_AV_UNSPEC: - case FI_AV_MAP: - break; - default: - return -FI_ENODATA; - } - - if (ofi_check_mr_mode(&usdf_ops, version, defaults.mr_mode, hints)) - return -FI_ENODATA; - - if (hints->domain_attr->mr_cnt) { - if (hints->domain_attr->mr_cnt <= USDF_DGRAM_MR_CNT) { - defaults.mr_cnt = hints->domain_attr->mr_cnt; - } else { - USDF_DBG_SYS(DOMAIN, - "mr_count exceeded provider limit\n"); - return -FI_ENODATA; - } - } - -catch: - /* catch the version change here. */ - ret = usdf_catch_dom_attr(version, hints, &defaults); - if (ret) - return ret; - - *fi->domain_attr = defaults; - return FI_SUCCESS; -} - -int usdf_dgram_fill_tx_attr(uint32_t version, const struct fi_info *hints, - struct fi_info *fi, - struct usd_device_attrs *dap) -{ - int ret; - struct fi_tx_attr defaults; - size_t entries; - - defaults = dgram_dflt_tx_attr; - - defaults.size = dap->uda_max_send_credits / defaults.iov_limit; - - if (!hints || !hints->tx_attr) - goto out; - - /* make sure we can support the capabilities that are requested */ - if (hints->tx_attr->caps & ~USDF_DGRAM_CAPS) - return -FI_ENODATA; - - /* clear the mode bits the app doesn't support */ - if (hints->mode || hints->tx_attr->mode) - defaults.mode &= (hints->mode | hints->tx_attr->mode); - - defaults.op_flags |= hints->tx_attr->op_flags; - - if ((hints->tx_attr->msg_order | USDF_DGRAM_MSG_ORDER) != - USDF_DGRAM_MSG_ORDER) - return -FI_ENODATA; - if ((hints->tx_attr->comp_order | USDF_DGRAM_COMP_ORDER) != - USDF_DGRAM_COMP_ORDER) - return -FI_ENODATA; - - if (hints->tx_attr->inject_size > defaults.inject_size) - return -FI_ENODATA; - - if (hints->tx_attr->iov_limit > USDF_DGRAM_MAX_SGE) - return -FI_ENODATA; - - /* make sure the values for iov_limit and size are within appropriate - * bounds. if only one of the two was given, then set the other based - * on: - * max_credits = size * iov_limit; - */ - if (hints->tx_attr->iov_limit && hints->tx_attr->size) { - defaults.size = hints->tx_attr->size; - defaults.iov_limit = hints->tx_attr->iov_limit; - } else if (hints->tx_attr->iov_limit) { - defaults.iov_limit = hints->tx_attr->iov_limit; - defaults.size = - dap->uda_max_send_credits / defaults.iov_limit; - } else if (hints->tx_attr->size) { - defaults.size = hints->tx_attr->size; - defaults.iov_limit = - dap->uda_max_send_credits / defaults.size; - } - - entries = defaults.size * defaults.iov_limit; - if (entries > dap->uda_max_send_credits) - return -FI_ENODATA; - - if (hints->tx_attr->rma_iov_limit > defaults.rma_iov_limit) - return -FI_ENODATA; - -out: - /* Non-prefix mode requires extra descriptor for header. - */ - if (!hints || (hints && !(hints->mode & FI_MSG_PREFIX))) - defaults.iov_limit -= 1; - - /* catch version changes here. */ - ret = usdf_catch_tx_attr(version, &defaults); - if (ret) - return ret; - - *fi->tx_attr = defaults; - - return FI_SUCCESS; -} - -int usdf_dgram_fill_rx_attr(uint32_t version, const struct fi_info *hints, - struct fi_info *fi, struct usd_device_attrs *dap) -{ - int ret; - struct fi_rx_attr defaults; - size_t entries; - - defaults = dgram_dflt_rx_attr; - - defaults.size = dap->uda_max_recv_credits / defaults.iov_limit; - - if (!hints || !hints->rx_attr) - goto out; - - /* make sure we can support the capabilities that are requested */ - if (hints->rx_attr->caps & ~USDF_DGRAM_CAPS) - return -FI_ENODATA; - - /* clear the mode bits the app doesn't support */ - if (hints->mode || hints->tx_attr->mode) - defaults.mode &= (hints->mode | hints->rx_attr->mode); - - defaults.op_flags |= hints->rx_attr->op_flags; - - if ((hints->rx_attr->msg_order | USDF_DGRAM_MSG_ORDER) != - USDF_DGRAM_MSG_ORDER) - return -FI_ENODATA; - if ((hints->rx_attr->comp_order | USDF_DGRAM_COMP_ORDER) != - USDF_DGRAM_COMP_ORDER) - return -FI_ENODATA; - - if (hints->rx_attr->total_buffered_recv > - defaults.total_buffered_recv) - return -FI_ENODATA; - - if (hints->rx_attr->iov_limit > USDF_DGRAM_MAX_SGE) - return -FI_ENODATA; - - /* make sure the values for iov_limit and size are within appropriate - * bounds. if only one of the two was given, then set the other based - * on: - * max_credits = size * iov_limit; - */ - if (hints->rx_attr->iov_limit && hints->rx_attr->size) { - defaults.size = hints->rx_attr->size; - defaults.iov_limit = hints->rx_attr->iov_limit; - } else if (hints->rx_attr->iov_limit) { - defaults.iov_limit = hints->rx_attr->iov_limit; - defaults.size = - dap->uda_max_recv_credits / defaults.iov_limit; - } else if (hints->rx_attr->size) { - defaults.size = hints->rx_attr->size; - defaults.iov_limit = - dap->uda_max_recv_credits / defaults.size; - } - - entries = defaults.size * defaults.iov_limit; - if (entries > dap->uda_max_recv_credits) - return -FI_ENODATA; - -out: - /* Non-prefix mode requires extra descriptor for header. - */ - if (!hints || (hints && !(hints->mode & FI_MSG_PREFIX))) - defaults.iov_limit -= 1; - - /* catch version changes here. */ - ret = usdf_catch_rx_attr(version, &defaults); - if (ret) - return ret; - - *fi->rx_attr = defaults; - - return FI_SUCCESS; -} - -static int usdf_ep_dgram_control(struct fid *fid, int command, void *arg) -{ - struct fid_ep *ep; - int ret; - - USDF_TRACE_SYS(EP_CTRL, "\n"); - - switch (fid->fclass) { - case FI_CLASS_EP: - ep = container_of(fid, struct fid_ep, fid); - switch (command) { - case FI_ENABLE: - ret = usdf_ep_dgram_enable(ep); - break; - default: - ret = -FI_ENOSYS; - } - break; - default: - ret = -FI_ENOSYS; - } - - return ret; -} - -static struct fi_ops usdf_ep_dgram_ops = { - .size = sizeof(struct fi_ops), - .close = usdf_ep_dgram_close, - .bind = usdf_ep_dgram_bind, - .control = usdf_ep_dgram_control, - .ops_open = fi_no_ops_open -}; - -int -usdf_ep_dgram_open(struct fid_domain *domain, struct fi_info *info, - struct fid_ep **ep_o, void *context) -{ - struct usdf_domain *udp; - struct usdf_ep *ep; - int ret; - struct usdf_pep *parent_pep; - void *src_addr; - int is_bound; - size_t tx_size; - size_t rx_size; - - USDF_TRACE_SYS(EP_CTRL, "\n"); - - parent_pep = NULL; - src_addr = NULL; - - if ((info->caps & ~USDF_DGRAM_CAPS) != 0) { - return -FI_EBADF; - } - - if (info->handle != NULL) { - if (info->handle->fclass != FI_CLASS_PEP) { - USDF_WARN_SYS(EP_CTRL, - "\"handle\" should be a PEP (or NULL)\n"); - return -FI_EINVAL; - } - parent_pep = pep_fidtou(info->handle); - } - - udp = dom_ftou(domain); - - ep = calloc(1, sizeof(*ep)); - if (ep == NULL) { - return -FI_ENOMEM; - } - - is_bound = 0; - if (parent_pep != NULL) { - ret = usdf_pep_steal_socket(parent_pep, &is_bound, &ep->e.dg.ep_sock); - if (ret) { - goto fail; - } - } else { - ep->e.dg.ep_sock = socket(AF_INET, SOCK_DGRAM, 0); - if (ep->e.dg.ep_sock == -1) { - ret = -errno; - goto fail; - } - } - - if (!is_bound) { - if (info->src_addr != NULL) - src_addr = usdf_format_to_sin(info, info->src_addr); - - if (src_addr != NULL) { - ret = bind(ep->e.dg.ep_sock, src_addr, - sizeof(struct sockaddr_in)); - if (ret == -1) { - ret = -errno; - goto fail; - } - } - - usdf_free_sin_if_needed(info, src_addr); - } - - ep->ep_fid.fid.fclass = FI_CLASS_EP; - ep->ep_fid.fid.context = context; - ep->ep_fid.fid.ops = &usdf_ep_dgram_ops; - ep->ep_fid.cm = &usdf_cm_dgram_ops; - ep->ep_fid.atomic = &usdf_dgram_atomic_ops; - ep->ep_domain = udp; - ep->ep_caps = info->caps; - ep->ep_mode = info->mode; - - ep->e.dg.tx_iov_limit = USDF_DGRAM_IOV_LIMIT; - tx_size = udp->dom_fabric->fab_dev_attrs->uda_max_send_credits / - ep->e.dg.tx_iov_limit; - - ep->e.dg.rx_iov_limit = USDF_DGRAM_IOV_LIMIT; - rx_size = udp->dom_fabric->fab_dev_attrs->uda_max_recv_credits / - ep->e.dg.rx_iov_limit; - - /* - * TODO: Add better management of tx_attr/rx_attr to getinfo and dgram - * open. - */ - if (info->tx_attr) { - ep->e.dg.tx_op_flags = info->tx_attr->op_flags; - if (info->tx_attr->iov_limit) - ep->e.dg.tx_iov_limit = info->tx_attr->iov_limit; - if (info->tx_attr->size) - tx_size = info->tx_attr->size; - } - - if (info->rx_attr) { - ep->e.dg.rx_op_flags = info->rx_attr->op_flags; - if (info->rx_attr->iov_limit) - ep->e.dg.rx_iov_limit = info->rx_attr->iov_limit; - if (info->rx_attr->size) - rx_size = info->rx_attr->size; - } - - if (info->ep_attr) - ep->max_msg_size = info->ep_attr->max_msg_size; - - if (ep->ep_mode & FI_MSG_PREFIX) { - ep->ep_wqe = tx_size * ep->e.dg.tx_iov_limit; - ep->ep_rqe = rx_size * ep->e.dg.rx_iov_limit; - } else { - ep->ep_wqe = tx_size * (ep->e.dg.tx_iov_limit + 1); - ep->ep_rqe = rx_size * (ep->e.dg.rx_iov_limit + 1); - } - - /* Check that the requested credit size is less than the max credit - * counts. If the fi_info struct was acquired from fi_getinfo then this - * will always be the case. - */ - if (ep->ep_wqe > udp->dom_fabric->fab_dev_attrs->uda_max_send_credits) { - ret = -FI_EINVAL; - goto fail; - } - if (ep->ep_rqe > udp->dom_fabric->fab_dev_attrs->uda_max_recv_credits) { - ret = -FI_EINVAL; - goto fail; - } - - if (ep->ep_mode & FI_MSG_PREFIX) { - if (info->ep_attr == NULL) { - ret = -FI_EBADF; - goto fail; - } - - ep->ep_fid.ops = &usdf_base_dgram_prefix_ops; - info->ep_attr->msg_prefix_size = USDF_HDR_BUF_ENTRY; - ep->ep_fid.msg = &usdf_dgram_prefix_ops; - } else { - ep->ep_fid.ops = &usdf_base_dgram_ops; - ep->ep_fid.msg = &usdf_dgram_ops; - } - ofi_atomic_initialize32(&ep->ep_refcnt, 0); - ofi_atomic_inc32(&udp->dom_refcnt); - - *ep_o = ep_utof(ep); - return 0; - -fail: - if (ep != NULL) { - if (ep->e.dg.ep_sock != -1) { - close(ep->e.dg.ep_sock); - } - free(ep); - } - return ret; -} diff --git a/prov/usnic/src/usdf_eq.c b/prov/usnic/src/usdf_eq.c deleted file mode 100644 index 5030b73d552..00000000000 --- a/prov/usnic/src/usdf_eq.c +++ /dev/null @@ -1,660 +0,0 @@ -/* - * Copyright (c) 2014-2017, Cisco Systems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include "ofi.h" -#include "ofi_enosys.h" - -#include "usnic_direct.h" -#include "usd.h" -#include "usdf.h" -#include "usdf_wait.h" -#include "ofi_util.h" - -static inline int -usdf_eq_empty(struct usdf_eq *eq) -{ - return (ofi_atomic_get32(&eq->eq_num_events) == 0); -} - -static inline int -usdf_eq_error(struct usdf_eq *eq) -{ - return ((eq->eq_ev_tail->ue_flags & USDF_EVENT_FLAG_ERROR) != 0); -} - -/* - * read an event from the ring. Caller must hold eq lock, and caller - * needs to have checked for empty and error - */ -static inline ssize_t usdf_eq_read_event(struct usdf_eq *eq, uint32_t *event, - void *buf, size_t len, uint64_t flags) -{ - struct usdf_event *ev; - size_t copylen; - ssize_t nbytes; - uint64_t val; - - ev = eq->eq_ev_tail; - - copylen = MIN(ev->ue_len, len); - - if (copylen < ev->ue_len) { - USDF_WARN_SYS(EQ, - "buffer too small, got: %zu needed %zu\n", - copylen, ev->ue_len); - return -FI_ETOOSMALL; - } - - /* copy out the event */ - if (event) - *event = ev->ue_event; - - memcpy(buf, ev->ue_buf, copylen); - - if (!(flags & FI_PEEK)) { - /* update count */ - ofi_atomic_dec32(&eq->eq_num_events); - - /* Free the event buf if needed */ - if (ev->ue_flags & USDF_EVENT_FLAG_FREE_BUF) - free(ev->ue_buf); - - /* new tail */ - eq->eq_ev_tail++; - if (eq->eq_ev_tail >= eq->eq_ev_end) - eq->eq_ev_tail = eq->eq_ev_ring; - - /* consume the event in eventfd */ - if (eq->eq_attr.wait_obj == FI_WAIT_FD) { - nbytes = read(eq->eq_fd, &val, sizeof(val)); - if (nbytes != sizeof(val)) - return -errno; - } - } - - return copylen; -} - -/* - * unconditionally write an event to the EQ. Caller is responsible for - * ensuring there is room. EQ must be locked. - */ -static inline ssize_t -usdf_eq_write_event(struct usdf_eq *eq, uint32_t event, - const void *buf, size_t len, uint64_t flags) -{ - struct usdf_event *ev; - void *ev_buf; - - ev = eq->eq_ev_head; - ev->ue_event = event; - ev->ue_len = len; - ev->ue_flags = flags; - - /* save the event data if we can, else malloc() */ - if (len <= sizeof(struct fi_eq_entry)) { - ev_buf = eq->eq_ev_buf + (ev - eq->eq_ev_ring); - } else { - ev_buf = malloc(len); - if (ev_buf == NULL) { - return -errno; - } - ev->ue_flags |= USDF_EVENT_FLAG_FREE_BUF; - } - memcpy(ev_buf, buf, len); - ev->ue_buf = ev_buf; - - /* new head */ - eq->eq_ev_head++; - if (eq->eq_ev_head >= eq->eq_ev_end) { - eq->eq_ev_head = eq->eq_ev_ring; - } - - /* increment queued event count */ - ofi_atomic_inc32(&eq->eq_num_events); - - return len; -} - -static void usdf_eq_clean_err(struct usdf_eq *eq, uint8_t destroy) -{ - struct usdf_err_data_entry *err_data_entry; - struct slist_entry *entry; - - while (!slist_empty(&eq->eq_err_data)) { - entry = slist_remove_head(&eq->eq_err_data); - err_data_entry = container_of(entry, struct usdf_err_data_entry, - entry); - if (err_data_entry->seen || destroy) { - free(err_data_entry); - } else { - /* Oops, the rest hasn't been seen yet. Put this back - * and exit. - */ - slist_insert_head(entry, &eq->eq_err_data); - break; - } - } -} - -static ssize_t usdf_eq_readerr(struct fid_eq *feq, - struct fi_eq_err_entry *given_buffer, uint64_t flags) -{ - struct usdf_err_data_entry *err_data_entry; - struct fi_eq_err_entry entry; - struct usdf_eq *eq; - ssize_t ret, err_data_size; - uint32_t api_version; - void *err_data = NULL; - - USDF_TRACE_SYS(EQ, "\n"); - - if (!feq) { - USDF_DBG_SYS(EQ, "invalid input\n"); - return -FI_EINVAL; - } - - eq = eq_ftou(feq); - - pthread_spin_lock(&eq->eq_lock); - - /* make sure there is an error on top */ - if (usdf_eq_empty(eq) || !usdf_eq_error(eq)) { - pthread_spin_unlock(&eq->eq_lock); - ret = -FI_EAGAIN; - goto done; - } - - ret = usdf_eq_read_event(eq, NULL, &entry, sizeof(entry), flags); - - pthread_spin_unlock(&eq->eq_lock); - - /* read the user's setting for err_data. */ - err_data = given_buffer->err_data; - err_data_size = given_buffer->err_data_size; - - /* Copy the entry. */ - *given_buffer = entry; - - /* Mark as seen so it can be cleaned on the next iteration of read. */ - if (entry.err_data_size) { - err_data_entry = container_of(entry.err_data, - struct usdf_err_data_entry, err_data); - err_data_entry->seen = 1; - } - - - /* For release > 1.5, we will copy the err_data directly - * to the user's buffer. - */ - api_version = eq->eq_fabric->fab_attr.fabric->api_version; - if (FI_VERSION_GE(api_version, FI_VERSION(1, 5))) { - given_buffer->err_data = err_data; - given_buffer->err_data_size = - MIN(err_data_size, entry.err_data_size); - memcpy(given_buffer->err_data, entry.err_data, - given_buffer->err_data_size); - - if (err_data_size < entry.err_data_size) { - USDF_DBG_SYS(EQ, "err_data truncated by %zd bytes.\n", - entry.err_data_size - err_data_size); - } - - usdf_eq_clean_err(eq, 0); - } - -done: - return ret; -} - -static ssize_t _usdf_eq_read(struct usdf_eq *eq, uint32_t *event, void *buf, - size_t len, uint64_t flags) -{ - ssize_t ret; - - pthread_spin_lock(&eq->eq_lock); - - if (usdf_eq_empty(eq)) { - ret = -FI_EAGAIN; - goto done; - } - - if (usdf_eq_error(eq)) { - ret = -FI_EAVAIL; - goto done; - } - - if (!slist_empty(&eq->eq_err_data)) - usdf_eq_clean_err(eq, 0); - - ret = usdf_eq_read_event(eq, event, buf, len, flags); - -done: - pthread_spin_unlock(&eq->eq_lock); - return ret; -} - -static ssize_t usdf_eq_read(struct fid_eq *feq, uint32_t *event, void *buf, - size_t len, uint64_t flags) -{ - struct usdf_eq *eq; - - USDF_DBG_SYS(EQ, "\n"); - - eq = eq_ftou(feq); - - /* Don't bother acquiring the lock if there is nothing to read. */ - if (usdf_eq_empty(eq)) - return -FI_EAGAIN; - - return _usdf_eq_read(eq, event, buf, len, flags); -} - -/* TODO: The timeout handling seems off on this one. */ -static ssize_t usdf_eq_sread_fd(struct fid_eq *feq, uint32_t *event, void *buf, - size_t len, int timeout, uint64_t flags) -{ - struct usdf_eq *eq; - struct pollfd pfd; - int ret; - - USDF_DBG_SYS(EQ, "\n"); - - eq = eq_ftou(feq); - - /* Setup poll context to block until the FD becomes readable. */ - pfd.fd = eq->eq_fd; - pfd.events = POLLIN; - -retry: - ret = poll(&pfd, 1, timeout); - if (ret < 0) - return -errno; - else if (ret == 0) - return -FI_EAGAIN; - - ret = _usdf_eq_read(eq, event, buf, len, flags); - if (ret == -FI_EAGAIN) - goto retry; - - return ret; -} - -ssize_t usdf_eq_write_internal(struct usdf_eq *eq, uint32_t event, - const void *buf, size_t len, uint64_t flags) -{ - uint64_t val = 1; - int ret; - int n; - - USDF_DBG_SYS(EQ, "event=%#" PRIx32 " flags=%#" PRIx64 "\n", event, - flags); - - pthread_spin_lock(&eq->eq_lock); - - /* Return -FI_EAGAIN if the EQ is full. - * TODO: Disable the EQ. - */ - if (ofi_atomic_get32(&eq->eq_num_events) == eq->eq_ev_ring_size) { - ret = -FI_EAGAIN; - goto done; - } - - ret = usdf_eq_write_event(eq, event, buf, len, flags); - - /* If successful, post to eventfd */ - if (ret >= 0 && eq->eq_attr.wait_obj == FI_WAIT_FD) { - n = write(eq->eq_fd, &val, sizeof(val)); - - /* TODO: If the write call fails, then roll back the EQ entry. - */ - if (n != sizeof(val)) - ret = -FI_EIO; - } - -done: - pthread_spin_unlock(&eq->eq_lock); - return ret; -} - -static ssize_t usdf_eq_write(struct fid_eq *feq, uint32_t event, - const void *buf, size_t len, uint64_t flags) -{ - struct usdf_eq *eq; - - USDF_DBG_SYS(EQ, "\n"); - - if (!feq) { - USDF_DBG_SYS(EQ, "invalid input\n"); - return -FI_EINVAL; - } - - eq = eq_ftou(feq); - - return usdf_eq_write_internal(eq, event, buf, len, flags); -} - -static const char * -usdf_eq_strerror(struct fid_eq *feq, int prov_errno, const void *err_data, - char *buf, size_t len) -{ - return NULL; -} - -static int usdf_eq_get_wait(struct usdf_eq *eq, void *arg) -{ - USDF_TRACE_SYS(EQ, "\n"); - - switch (eq->eq_attr.wait_obj) { - case FI_WAIT_FD: - *(int *) arg = eq->eq_fd; - break; - default: - USDF_WARN_SYS(EQ, "unsupported wait type\n"); - return -FI_EINVAL; - } - - return FI_SUCCESS; -} - -static int -usdf_eq_control(fid_t fid, int command, void *arg) -{ - struct usdf_eq *eq; - - USDF_TRACE_SYS(EQ, "\n"); - - eq = eq_fidtou(fid); - - switch (command) { - case FI_GETWAIT: - break; - default: - return -FI_EINVAL; - } - - return usdf_eq_get_wait(eq, arg); -} - -static int usdf_eq_bind_wait(struct usdf_eq *eq) -{ - int ret; - struct usdf_wait *wait_priv; - - if (!eq->eq_attr.wait_set) { - USDF_DBG_SYS(EQ, "can't bind to non-existent wait set\n"); - return -FI_EINVAL; - } - - wait_priv = wait_ftou(eq->eq_attr.wait_set); - - ret = fid_list_insert(&wait_priv->list, &wait_priv->lock, - &eq->eq_fid.fid); - if (ret) { - USDF_WARN_SYS(EQ, - "failed to associate eq with wait fid list\n"); - return ret; - } - - ret = ofi_epoll_add(wait_priv->object.epfd, eq->eq_fd, OFI_EPOLL_IN, eq); - if (ret) { - USDF_WARN_SYS(EQ, "failed to associate FD with wait set\n"); - goto err; - } - - USDF_DBG_SYS(EQ, "associated EQ FD %d with epoll FD %d using fid %p\n", - eq->eq_fd, wait_priv->object.epfd, &eq->eq_fid.fid); - - return ret; - -err: - fid_list_remove(&wait_priv->list, &wait_priv->lock, &eq->eq_fid.fid); - return ret; -} - -static int usdf_eq_unbind_wait(struct usdf_eq *eq) -{ - int ret; - struct usdf_wait *wait_priv; - - if (!eq->eq_attr.wait_set) { - USDF_DBG_SYS(EQ, "can't unbind from non-existent wait set\n"); - return -FI_EINVAL; - } - - wait_priv = wait_ftou(eq->eq_attr.wait_set); - - ret = ofi_epoll_del(wait_priv->object.epfd, eq->eq_fd); - if (ret) { - USDF_WARN_SYS(EQ, - "failed to remove FD from wait set\n"); - return ret; - } - - fid_list_remove(&wait_priv->list, &wait_priv->lock, &eq->eq_fid.fid); - - ofi_atomic_dec32(&wait_priv->wait_refcnt); - - USDF_DBG_SYS(EQ, - "dissasociated EQ FD %d from epoll FD %d using FID: %p\n", - eq->eq_fd, wait_priv->object.epfd, &eq->eq_fid.fid); - - return FI_SUCCESS; -} - -static int -usdf_eq_close(fid_t fid) -{ - struct usdf_eq *eq; - int ret = FI_SUCCESS; - - USDF_TRACE_SYS(EQ, "\n"); - - eq = eq_fidtou(fid); - - if (ofi_atomic_get32(&eq->eq_refcnt) > 0) { - return -FI_EBUSY; - } - ofi_atomic_dec32(&eq->eq_fabric->fab_refcnt); - - /* release wait obj */ - switch (eq->eq_attr.wait_obj) { - case FI_WAIT_SET: - ret = usdf_eq_unbind_wait(eq); - /* FALLTHROUGH */ - /* Need to close the FD used for wait set. */ - case FI_WAIT_FD: - close(eq->eq_fd); - break; - default: - break; - } - - /* Set destroy flag to clear everything out */ - usdf_eq_clean_err(eq, 1); - - free(eq->eq_ev_ring); - free(eq->eq_ev_buf); - free(eq); - - return ret; -} - -static struct fi_ops_eq usdf_eq_ops = { - .size = sizeof(struct fi_ops_eq), - .read = usdf_eq_read, - .readerr = usdf_eq_readerr, - .write = usdf_eq_write, - .sread = fi_no_eq_sread, - .strerror = usdf_eq_strerror, -}; - -static struct fi_ops usdf_eq_fi_ops = { - .size = sizeof(struct fi_ops), - .close = usdf_eq_close, - .bind = fi_no_bind, - .control = usdf_eq_control, - .ops_open = fi_no_ops_open, -}; - -int -usdf_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr, - struct fid_eq **feq, void *context) -{ - struct usdf_eq *eq; - struct usdf_fabric *fab; - int ret; - - USDF_TRACE_SYS(EQ, "\n"); - - fab = fab_ftou(fabric); - - eq = calloc(1, sizeof(*eq)); - if (eq == NULL) { - ret = -errno; - goto fail; - } - - /* fill in the EQ struct */ - eq->eq_fid.fid.fclass = FI_CLASS_EQ; - eq->eq_fid.fid.context = context; - eq->eq_fid.fid.ops = &usdf_eq_fi_ops; - eq->eq_fid.ops = &eq->eq_ops_data; - - eq->eq_fabric = fab; - ofi_atomic_initialize32(&eq->eq_refcnt, 0); - ret = pthread_spin_init(&eq->eq_lock, PTHREAD_PROCESS_PRIVATE); - if (ret != 0) { - ret = -ret; - goto fail; - } - - slist_init(&eq->eq_err_data); - - /* get baseline routines */ - eq->eq_ops_data = usdf_eq_ops; - - /* fill in sread based on wait type */ - switch (attr->wait_obj) { - case FI_WAIT_NONE: - break; - case FI_WAIT_UNSPEC: - /* default to FD */ - attr->wait_obj = FI_WAIT_FD; - /* FALLTHROUGH */ - case FI_WAIT_FD: - eq->eq_ops_data.sread = usdf_eq_sread_fd; - /* FALLTHROUGH */ - /* Don't set sread for wait set. */ - case FI_WAIT_SET: - eq->eq_fd = eventfd(0, EFD_NONBLOCK | EFD_SEMAPHORE); - if (eq->eq_fd == -1) { - ret = -errno; - goto fail; - } - - if (attr->wait_obj == FI_WAIT_SET) { - ret = usdf_eq_bind_wait(eq); - if (ret) - goto fail; - } - break; - default: - ret = -FI_ENOSYS; - goto fail; - } - - /* - * Dis-allow write if requested - */ - if ((attr->flags & FI_WRITE) == 0) { - eq->eq_ops_data.write = fi_no_eq_write; - } - - /* - * Allocate and initialize event ring - */ - if (attr->size == 0) { - attr->size = 1024; // XXX - } - eq->eq_ev_ring = calloc(attr->size, sizeof(*eq->eq_ev_ring)); - eq->eq_ev_buf = calloc(attr->size, sizeof(*eq->eq_ev_buf)); - if (eq->eq_ev_ring == NULL || eq->eq_ev_buf == NULL) { - ret = -errno; - goto fail; - } - eq->eq_ev_head = eq->eq_ev_ring; - eq->eq_ev_tail = eq->eq_ev_ring; - eq->eq_ev_ring_size = attr->size; - eq->eq_ev_end = eq->eq_ev_ring + eq->eq_ev_ring_size; - ofi_atomic_initialize32(&eq->eq_num_events, 0); - - ofi_atomic_inc32(&eq->eq_fabric->fab_refcnt); - - eq->eq_attr = *attr; - *feq = eq_utof(eq); - - return 0; - -fail: - if (eq != NULL) { - free(eq->eq_ev_ring); - free(eq->eq_ev_buf); - free(eq); - } - return ret; -} diff --git a/prov/usnic/src/usdf_ext.c b/prov/usnic/src/usdf_ext.c deleted file mode 100644 index eefdec67908..00000000000 --- a/prov/usnic/src/usdf_ext.c +++ /dev/null @@ -1,248 +0,0 @@ -/* - * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ofi.h" - -#include "usdf.h" -#include "usnic_direct.h" -#include "fi_ext_usnic.h" -#include "usdf_av.h" - -/******************************************************************************* - * Fabric extensions - ******************************************************************************/ -static int -usdf_usnic_getinfo_v1(uint32_t version, struct fid_fabric *fabric, - struct fi_usnic_info *uip) -{ - struct usdf_fabric *fp; - struct usd_device_attrs *dap; - - USDF_TRACE("\n"); - - fp = fab_ftou(fabric); - dap = fp->fab_dev_attrs; - - /* this assignment was missing in libfabric v1.1.1 and earlier */ - uip->ui_version = 1; - - uip->ui.v1.ui_link_speed = dap->uda_bandwidth; - uip->ui.v1.ui_netmask_be = dap->uda_netmask_be; - snprintf(uip->ui.v1.ui_ifname, sizeof(uip->ui.v1.ui_ifname), "%s", - dap->uda_ifname); - uip->ui.v1.ui_num_vf = dap->uda_num_vf; - uip->ui.v1.ui_qp_per_vf = dap->uda_qp_per_vf; - uip->ui.v1.ui_cq_per_vf = dap->uda_cq_per_vf; - - return 0; -} - -static int usdf_usnic_getinfo_v2(uint32_t version, struct fid_fabric *ffabric, - struct fi_usnic_info *uip) -{ - struct usd_open_params params; - struct usd_device_attrs *dap; - struct usdf_fabric *fabric; - struct usd_device *dev; - struct fi_usnic_cap **cap; - size_t len; - int ret; - int i; - - USDF_TRACE("\n"); - - fabric = fab_ftou(ffabric); - dap = fabric->fab_dev_attrs; - - memset(¶ms, 0, sizeof(params)); - params.flags = UOPF_SKIP_LINK_CHECK | UOPF_SKIP_PD_ALLOC; - params.cmd_fd = -1; - params.context = NULL; - - ret = usd_open_with_params(dap->uda_devname, ¶ms, &dev); - if (ret) - return -ret; - - uip->ui_version = FI_EXT_USNIC_INFO_VERSION; - - len = ARRAY_SIZE(uip->ui.v2.ui_devname); - strncpy(uip->ui.v2.ui_devname, dap->uda_devname, len - 1); - uip->ui.v2.ui_devname[len - 1] = '\0'; - - len = ARRAY_SIZE(uip->ui.v2.ui_ifname); - strncpy(uip->ui.v2.ui_ifname, dap->uda_ifname, len - 1); - uip->ui.v2.ui_ifname[len - 1] = '\0'; - - memcpy(uip->ui.v2.ui_mac_addr, dap->uda_mac_addr, - MIN(sizeof(dap->uda_mac_addr), - sizeof(uip->ui.v2.ui_mac_addr))); - - uip->ui.v2.ui_ipaddr_be = dap->uda_ipaddr_be; - uip->ui.v2.ui_netmask_be = dap->uda_netmask_be; - uip->ui.v2.ui_prefixlen = dap->uda_prefixlen; - uip->ui.v2.ui_mtu = dap->uda_mtu; - uip->ui.v2.ui_link_up = dap->uda_link_state; - - uip->ui.v2.ui_vendor_id = dap->uda_vendor_id; - uip->ui.v2.ui_vendor_part_id = dap->uda_vendor_part_id; - uip->ui.v2.ui_device_id = dap->uda_device_id; - - len = ARRAY_SIZE(uip->ui.v2.ui_firmware); - strncpy(uip->ui.v2.ui_firmware, dap->uda_firmware, len - 1); - uip->ui.v2.ui_firmware[len - 1] = '\0'; - - uip->ui.v2.ui_num_vf = dap->uda_num_vf; - uip->ui.v2.ui_cq_per_vf = dap->uda_cq_per_vf; - uip->ui.v2.ui_qp_per_vf = dap->uda_qp_per_vf; - uip->ui.v2.ui_intr_per_vf = dap->uda_intr_per_vf; - uip->ui.v2.ui_max_cq = dap->uda_max_cq; - uip->ui.v2.ui_max_qp = dap->uda_max_qp; - - uip->ui.v2.ui_link_speed = dap->uda_bandwidth; - uip->ui.v2.ui_max_cqe = dap->uda_max_cqe; - uip->ui.v2.ui_max_send_credits = dap->uda_max_send_credits; - uip->ui.v2.ui_max_recv_credits = dap->uda_max_recv_credits; - - uip->ui.v2.ui_caps = calloc(USD_CAP_MAX + 1, - sizeof(*uip->ui.v2.ui_caps)); - if (!uip->ui.v2.ui_caps) - return -FI_ENOMEM; - - uip->ui.v2.ui_nicname = usd_devid_to_nicname(uip->ui.v2.ui_vendor_id, - uip->ui.v2.ui_device_id); - uip->ui.v2.ui_pid = usd_devid_to_pid(uip->ui.v2.ui_vendor_id, - uip->ui.v2.ui_device_id); - - for (i = 0; i < USD_CAP_MAX; i++) { - uip->ui.v2.ui_caps[i] = calloc(1, - sizeof(*(uip->ui.v2.ui_caps[i]))); - - if (!uip->ui.v2.ui_caps[i]) { - ret = -FI_ENOMEM; - goto fail; - } - - uip->ui.v2.ui_caps[i]->uc_capability = usd_capability(i); - uip->ui.v2.ui_caps[i]->uc_present = usd_get_cap(dev, i); - } - - usd_close(dev); - - return FI_SUCCESS; - -fail: - for (cap = uip->ui.v2.ui_caps; *cap; cap++) - free(*cap); - - free(uip->ui.v2.ui_caps); - - usd_close(dev); - - return ret; -} - -static int usdf_usnic_getinfo(uint32_t version, struct fid_fabric *fabric, - struct fi_usnic_info *uip) -{ - assert(FI_EXT_USNIC_INFO_VERSION == 2); - - switch (version) { - case 1: - return usdf_usnic_getinfo_v1(version, fabric, uip); - case 2: - return usdf_usnic_getinfo_v2(version, fabric, uip); - default: - USDF_DBG_SYS(FABRIC, "invalid version\n"); - return -FI_EINVAL; - } -} - -static struct fi_usnic_ops_fabric usdf_usnic_ops_fabric = { - .size = sizeof(struct fi_usnic_ops_fabric), - .getinfo = usdf_usnic_getinfo -}; - -int -usdf_fabric_ops_open(struct fid *fid, const char *ops_name, uint64_t flags, - void **ops, void *context) -{ - USDF_TRACE("\n"); - - if (strcmp(ops_name, FI_USNIC_FABRIC_OPS_1) == 0) { - *ops = &usdf_usnic_ops_fabric; - } else { - return -FI_EINVAL; - } - - return 0; -} - -/******************************************************************************* - * Address vector extensions - ******************************************************************************/ -static int -usdf_am_get_distance(struct fid_av *fav, void *addr, int *metric_o) -{ - struct usdf_av *av; - struct usdf_domain *udp; - struct sockaddr_in *sin; - int ret; - - USDF_TRACE_SYS(DOMAIN, "\n"); - - av = av_ftou(fav); - udp = av->av_domain; - sin = addr; - - ret = usd_get_dest_distance(udp->dom_dev, - sin->sin_addr.s_addr, metric_o); - return ret; -} - -static struct fi_usnic_ops_av usdf_usnic_ops_av = { - .size = sizeof(struct fi_usnic_ops_av), - .get_distance = usdf_am_get_distance, -}; - -int usdf_av_ops_open(struct fid *fid, const char *ops_name, uint64_t flags, - void **ops, void *context) -{ - USDF_TRACE_SYS(AV, "\n"); - - if (strcmp(ops_name, FI_USNIC_AV_OPS_1) == 0) { - *ops = &usdf_usnic_ops_av; - } else { - return -FI_EINVAL; - } - - return 0; -} diff --git a/prov/usnic/src/usdf_fabric.c b/prov/usnic/src/usdf_fabric.c deleted file mode 100644 index 65b2e5daaa2..00000000000 --- a/prov/usnic/src/usdf_fabric.c +++ /dev/null @@ -1,1057 +0,0 @@ -/* - * Copyright (c) 2014-2019, Cisco Systems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include "ofi.h" -#include "ofi_enosys.h" -#include "ofi_prov.h" - -#include "usnic_direct.h" -#include "libnl_utils.h" - -#include "usdf.h" -#include "usdf_wait.h" -#include "fi_ext_usnic.h" -#include "usdf_progress.h" -#include "usdf_timer.h" -#include "usdf_dgram.h" -#include "usdf_cm.h" - -struct usdf_usnic_info *__usdf_devinfo; - -static int usdf_fabric_getname(uint32_t version, struct usd_device_attrs *dap, - char **name) -{ - int ret = FI_SUCCESS; - char *bufp = NULL; - struct in_addr in; - char *addrnetw; - - if (FI_VERSION_GE(version, FI_VERSION(1, 4))) { - in.s_addr = dap->uda_ipaddr_be & dap->uda_netmask_be; - addrnetw = inet_ntoa(in); - ret = asprintf(&bufp, "%s/%d", addrnetw, dap->uda_prefixlen); - if (ret < 0) { - USDF_DBG( - "asprintf failed while creating fabric name\n"); - ret = -ENOMEM; - } - } else { - bufp = strdup(dap->uda_devname); - if (!bufp) { - USDF_DBG("strdup failed while creating fabric name\n"); - ret = -errno; - } - } - - *name = bufp; - - return ret; -} - -static bool usdf_fabric_checkname(uint32_t version, - struct usd_device_attrs *dap, const char *hint) -{ - int ret; - bool valid = false; - char *reference; - - USDF_DBG("checking devname: version=%d, devname='%s'\n", version, hint); - - if (version) { - ret = usdf_fabric_getname(version, dap, &reference); - if (ret < 0) - return false; - - if (strcmp(reference, hint) == 0) { - valid = true; - } else { - USDF_DBG("hint %s failed to match %s\n", hint, - reference); - } - - free(reference); - return valid; - } - - /* The hint string itself is kind of a version check, in pre-1.4 the - * name was just the device name. In 1.4 and beyond, then name is - * actually CIDR - * notation. - */ - if (strstr(hint, "/")) - return usdf_fabric_checkname(FI_VERSION(1, 4), dap, hint); - - return usdf_fabric_checkname(FI_VERSION(1, 3), dap, hint); -} - -static int usdf_validate_hints(uint32_t version, const struct fi_info *hints) -{ - struct fi_fabric_attr *fattrp; - size_t size; - - switch (hints->addr_format) { - case FI_FORMAT_UNSPEC: - case FI_SOCKADDR_IN: - size = sizeof(struct sockaddr_in); - break; - case FI_SOCKADDR: - size = sizeof(struct sockaddr); - break; - case FI_ADDR_STR: - if (hints->src_addr != NULL && - strlen((char *)hints->src_addr) > USDF_ADDR_STR_LEN) - return -FI_ENODATA; - - if (hints->dest_addr != NULL && - strlen((char *)hints->dest_addr) > USDF_ADDR_STR_LEN) - return -FI_ENODATA; - - goto skip_sockaddr_size_check; - default: - return -FI_ENODATA; - } - - if (hints->src_addr != NULL && hints->src_addrlen < size) { - return -FI_ENODATA; - } - if (hints->dest_addr != NULL && hints->dest_addrlen < size) { - return -FI_ENODATA; - } - -skip_sockaddr_size_check: - if (hints->ep_attr != NULL) { - switch (hints->ep_attr->protocol) { - case FI_PROTO_UNSPEC: - case FI_PROTO_UDP: - case FI_PROTO_RUDP: - break; - default: - return -FI_ENODATA; - } - - if (hints->ep_attr->auth_key || hints->ep_attr->auth_key_size) { - USDF_WARN_SYS(EP_CTRL, - "\"authorization key\" is not supported in this provider.\n"); - return -FI_ENODATA; - } - } - - fattrp = hints->fabric_attr; - if (fattrp != NULL) { - if (fattrp->prov_version != 0 && - fattrp->prov_version != USDF_PROV_VERSION) { - return -FI_ENODATA; - } - } - return FI_SUCCESS; -} - -static int -usdf_fill_sockaddr_info(struct fi_info *fi, - struct sockaddr_in *src, struct sockaddr_in *dest, - struct usd_device_attrs *dap) -{ - int ret; - struct sockaddr_in *sin; - - sin = calloc(1, sizeof(*sin)); - fi->src_addr = sin; - if (sin == NULL) { - ret = -FI_ENOMEM; - return ret; - } - fi->src_addrlen = sizeof(struct sockaddr_in); - sin->sin_family = AF_INET; - sin->sin_addr.s_addr = dap->uda_ipaddr_be; - if (src != NULL) - sin->sin_port = src->sin_port; - - /* copy in dest if specified */ - if (dest != NULL) { - sin = calloc(1, sizeof(*sin)); - if (NULL == sin) { - free(fi->src_addr); - return -FI_ENOMEM; - } - *sin = *dest; - fi->dest_addr = sin; - fi->dest_addrlen = sizeof(*sin); - } - return FI_SUCCESS; -} - -static int -usdf_fill_straddr_info(struct fi_info *fi, - char *src, char *dest, struct usd_device_attrs *dap) -{ - char *address_string; - struct sockaddr_in *sin; - - /* If NULL, we have to create the sockaddr_in - * and convert it to string format. - */ - if (src == NULL) { - sin = calloc(1, sizeof(*sin)); - if (NULL == sin) - return -FI_ENOMEM; - sin->sin_family = AF_INET; - sin->sin_addr.s_addr = dap->uda_ipaddr_be; - - address_string = calloc(1, USDF_ADDR_STR_LEN); - fi->src_addr = address_string; - fi->src_addrlen = USDF_ADDR_STR_LEN; - - usdf_addr_tostr(sin, fi->src_addr, &fi->src_addrlen); - free(sin); - } else { - /* Otherwise, it is already in string format. - * Just copy it. - */ - address_string = strdup(src); - if (NULL == address_string) - return -FI_ENOMEM; - fi->src_addr = address_string; - fi->src_addrlen = strlen(address_string); - } - - /* Same goes for dest. */ - if (dest != NULL) { - address_string = strdup(dest); - fi->dest_addr = address_string; - fi->dest_addrlen = strlen(address_string); - } - - return FI_SUCCESS; -} -static int -usdf_fill_addr_info(struct fi_info *fi, uint32_t addr_format, - void *src, void *dest, struct usd_device_attrs *dap) -{ - int ret; - - if (addr_format != FI_FORMAT_UNSPEC) { - fi->addr_format = addr_format; - } else { - fi->addr_format = FI_SOCKADDR_IN; - } - - switch (fi->addr_format) { - case FI_SOCKADDR: - case FI_SOCKADDR_IN: - ret = usdf_fill_sockaddr_info(fi, src, dest, dap); - if (ret != FI_SUCCESS) - goto fail; - break; - case FI_ADDR_STR: - ret = usdf_fill_straddr_info(fi, src, dest, dap); - if (ret != FI_SUCCESS) - goto fail; - break; - default: - ret = -FI_ENODATA; - goto fail; - } - - return 0; - -fail: - return ret; // fi_freeinfo() in caller frees all -} - -static int validate_modebits(uint32_t version, const struct fi_info *hints, - uint64_t supported, uint64_t *mode_out) -{ - uint64_t mode; - - /* If there is no hints, return everything we supported. */ - if (!hints) { - *mode_out = supported; - return FI_SUCCESS; - } - - mode = hints->mode & supported; - - /* Before version 1.5, FI_LOCAL_MR is a requirement. */ - if (FI_VERSION_LT(version, FI_VERSION(1, 5))) { - if ((mode & FI_LOCAL_MR) == 0) - return -FI_ENODATA; - } - - *mode_out = mode; - - return FI_SUCCESS; -} - -static int usdf_alloc_fid_nic(struct fi_info *fi, - struct usd_device_attrs *dap) -{ - int ret; - struct fid_nic *nic = NULL; - struct fi_device_attr *da = NULL; - struct fi_link_attr *la = NULL; - - nic = ofi_nic_dup(NULL); - if (!nic) - goto nomem; - - da = nic->device_attr; - da->name = strdup(dap->uda_devname); - if (!da->name) - goto nomem; - ret = asprintf(&da->device_id, "%s (%s)", - usd_devid_to_pid(dap->uda_vendor_id, - dap->uda_device_id), - usd_devid_to_nicname(dap->uda_vendor_id, - dap->uda_device_id)); - if (ret < 0) - goto nomem; - ret = asprintf(&da->device_version, "0x%x", dap->uda_vendor_part_id); - if (ret < 0) - goto nomem; - ret = asprintf(&da->vendor_id, "0x%x", dap->uda_vendor_id); - if (ret < 0) - goto nomem; - da->driver = strdup("usnic_verbs"); - if (!da->driver) - goto nomem; - da->firmware = strdup(dap->uda_firmware); - if (!da->firmware) - goto nomem; - - // usnic does not currently expose PCI bus information, so we - // set the bus type to unknown. - nic->bus_attr->bus_type = FI_BUS_UNKNOWN; - - la = nic->link_attr; - - socklen_t size = INET_ADDRSTRLEN; - la->address = calloc(1, size); - if (!la->address) - goto nomem; - inet_ntop(AF_INET, &dap->uda_ipaddr_be, la->address, size); - la->mtu = dap->uda_mtu; - la->speed = dap->uda_bandwidth; - switch (dap->uda_link_state) { - case USD_LINK_UP: - la->state = FI_LINK_UP; - break; - case USD_LINK_DOWN: - la->state = FI_LINK_DOWN; - break; - default: - la->state = FI_LINK_UNKNOWN; - break; - } - la->network_type = strdup("Ethernet"); - if (!la->network_type) - goto nomem; - - fi->nic = nic; - - return FI_SUCCESS; - -nomem: - if (nic) - fi_close(&nic->fid); - return -FI_ENOMEM; -} - -static int usdf_fill_info_dgram( - uint32_t version, - const struct fi_info *hints, - void *src, - void *dest, - struct usd_device_attrs *dap, - struct fi_info **fi_first, - struct fi_info **fi_last) -{ - struct fi_info *fi; - struct fi_fabric_attr *fattrp; - uint32_t addr_format; - int ret; - - fi = fi_allocinfo(); - if (fi == NULL) { - ret = -FI_ENOMEM; - goto fail; - } - - fi->caps = USDF_DGRAM_CAPS; - - ret = validate_modebits(version, hints, - USDF_DGRAM_SUPP_MODE, &fi->mode); - if (ret) - goto fail; - - if (hints != NULL) { - addr_format = hints->addr_format; - - /* check that we are capable of what's requested */ - if ((hints->caps & ~USDF_DGRAM_CAPS) != 0) { - ret = -FI_ENODATA; - goto fail; - } - - fi->handle = hints->handle; - } else { - addr_format = FI_FORMAT_UNSPEC; - } - fi->ep_attr->type = FI_EP_DGRAM; - - ret = usdf_fill_addr_info(fi, addr_format, src, dest, dap); - if (ret != 0) { - goto fail; - } - - /* fabric attrs */ - fattrp = fi->fabric_attr; - ret = usdf_fabric_getname(version, dap, &fattrp->name); - if (ret < 0 || fattrp->name == NULL) { - ret = -FI_ENOMEM; - goto fail; - } - - if (fi->mode & FI_MSG_PREFIX) { - if (FI_VERSION_GE(version, FI_VERSION(1, 1))) - fi->ep_attr->msg_prefix_size = USDF_HDR_BUF_ENTRY; - else - fi->mode &= ~FI_MSG_PREFIX; - } - - ret = usdf_dgram_fill_ep_attr(version, hints, fi, dap); - if (ret) - goto fail; - - ret = usdf_dgram_fill_dom_attr(version, hints, fi, dap); - if (ret) - goto fail; - - ret = usdf_dgram_fill_tx_attr(version, hints, fi, dap); - if (ret) - goto fail; - - ret = usdf_dgram_fill_rx_attr(version, hints, fi, dap); - if (ret) - goto fail; - - ret = usdf_alloc_fid_nic(fi, dap); - if (ret) - goto fail; - - /* add to tail of list */ - if (*fi_first == NULL) { - *fi_first = fi; - } else { - (*fi_last)->next = fi; - } - *fi_last = fi; - - return 0; - -fail: - if (fi != NULL) { - fi_freeinfo(fi); - } - return ret; -} - -static int -usdf_get_devinfo(void) -{ - struct usdf_usnic_info *dp; - struct usdf_dev_entry *dep; - struct usd_open_params params; - int ret; - int d; - - assert(__usdf_devinfo == NULL); - - dp = calloc(1, sizeof(*dp)); - if (dp == NULL) { - ret = -FI_ENOMEM; - goto fail; - } - __usdf_devinfo = dp; - - dp->uu_num_devs = USD_MAX_DEVICES; - ret = usd_get_device_list(dp->uu_devs, &dp->uu_num_devs); - if (ret != 0) { - dp->uu_num_devs = 0; - goto fail; - } - - for (d = 0; d < dp->uu_num_devs; ++d) { - dep = &dp->uu_info[d]; - - memset(¶ms, 0, sizeof(params)); - params.flags = UOPF_SKIP_PD_ALLOC; - params.cmd_fd = -1; - params.context = NULL; - ret = usd_open_with_params(dp->uu_devs[d].ude_devname, - ¶ms, &dep->ue_dev); - if (ret != 0) { - continue; - } - - ret = usd_get_device_attrs(dep->ue_dev, &dep->ue_dattr); - if (ret != 0) { - continue; - } - - dep->ue_dev_ok = 1; /* this device is OK */ - - usd_close(dep->ue_dev); - dep->ue_dev = NULL; - } - return 0; - -fail: - return ret; -} - -static int -usdf_get_distance( - struct usd_device_attrs *dap, - uint32_t daddr_be, - int *metric_o) -{ - uint32_t nh_ip_addr; - int ret; - - USDF_TRACE("\n"); - - ret = usnic_nl_rt_lookup(dap->uda_ipaddr_be, daddr_be, - dap->uda_ifindex, &nh_ip_addr); - if (ret != 0) { - *metric_o = -1; - ret = 0; - } else if (nh_ip_addr == 0) { - *metric_o = 0; - } else { - *metric_o = 1; - } - - return ret; -} - -/* Check all things related to a device. Make sure it's okay, the source address - * matches the requested address, the destination is reachable from the device, - * the device fabric name matches the requested fabric name, and the device - * domain name matches the requested domain name. - * - * @param version Libfabric API version used to verify the domain / fabric name. - * @param hints Hints passed to fi_getinfo. - * @param src Source address being requested. - * @param dest Destination address to communicate with. - * @param dep usNIC device entry being checked. - * - * @return true on success, false on failure. For debug logging can be enabled - * to see why a device was disqualified. - */ -static bool usdf_check_device(uint32_t version, const struct fi_info *hints, - void *src, void *dest, - struct usdf_dev_entry *dep) -{ - char dest_str[INET_ADDRSTRLEN]; - char src_str[INET_ADDRSTRLEN]; - char dev_str[INET_ADDRSTRLEN]; - struct usd_device_attrs *dap; - struct sockaddr_in *sin; - int reachable; - int ret; - - reachable = -1; - dap = &dep->ue_dattr; - - /* Skip the device if it has problems. */ - if (!dep->ue_dev_ok) { - USDF_WARN_SYS(FABRIC, "skipping %s/%s device not ok\n", - dap->uda_devname, dap->uda_ifname); - return false; - } - - /* If the given source address is not INADDR_ANY, compare against the - * device. - */ - if (src) { - sin = usdf_format_to_sin(hints, src); - if (sin->sin_addr.s_addr != INADDR_ANY) { - if (sin->sin_addr.s_addr != dap->uda_ipaddr_be) { - inet_ntop(AF_INET, &sin->sin_addr.s_addr, - src_str, sizeof(src_str)); - inet_ntop(AF_INET, &dap->uda_ipaddr_be, - dev_str, sizeof(dev_str)); - USDF_WARN_SYS(FABRIC, - "src addr<%s> != dev addr<%s>\n", - src_str, dev_str); - goto fail; - } - } - - usdf_free_sin_if_needed(hints, sin); - } - - /* Check that the given destination address is reachable from the - * interface. - */ - if (dest) { - sin = usdf_format_to_sin(hints, dest); - if (sin->sin_addr.s_addr != INADDR_ANY) { - ret = usdf_get_distance(dap, sin->sin_addr.s_addr, - &reachable); - if (ret) { - inet_ntop(AF_INET, - &sin->sin_addr.s_addr, dest_str, - sizeof(dest_str)); - USDF_WARN_SYS(FABRIC, - "get_distance failed @ %s\n", - dest_str); - goto fail; - } - } - - if (reachable == -1) { - inet_ntop(AF_INET, &sin->sin_addr.s_addr, dest_str, - sizeof(dest_str)); - USDF_WARN_SYS(FABRIC, - "dest %s unreachable from %s/%s, skipping\n", - dest_str, dap->uda_devname, - dap->uda_ifname); - goto fail; - } - - usdf_free_sin_if_needed(hints, sin); - } - - /* Checks that the fabric name is correct for the given interface. The - * fabric name contains the CIDR notation for the interface. - */ - if (hints && hints->fabric_attr && hints->fabric_attr->name) { - if (!usdf_fabric_checkname(version, dap, - hints->fabric_attr->name)) - return false; - } - - /* Check that the domain name is correct for the given interface. The - * domain name is the device name. - */ - if (hints && hints->domain_attr && hints->domain_attr->name) { - if (!usdf_domain_checkname(version, dap, - hints->domain_attr->name)) - return false; - } - - return true; - -fail: - usdf_free_sin_if_needed(hints, sin); - - return false; -} - -static int -usdf_handle_node_and_service(const char *node, const char *service, - uint64_t flags, void **src, void **dest, - const struct fi_info *hints, struct addrinfo **ai) -{ - int ret; - struct sockaddr_in *sin; - - if (node != NULL || service != NULL) { - if (hints && hints->addr_format == FI_ADDR_STR) { - /* FI_ADDR_STR can't have service param. */ - if (service) - return -FI_EINVAL; - - sin = usdf_format_to_sin(hints, node); - - if (!sin) - /* This could be invalid or no memory. */ - return -FI_EINVAL; - } else { - ret = getaddrinfo(node, service, NULL, ai); - if (ret != 0) { - USDF_DBG("getaddrinfo failed: %d: <%s>\n", ret, - gai_strerror(ret)); - return ret; - } - sin = (struct sockaddr_in *)(*ai)->ai_addr; - } - - if (flags & FI_SOURCE) - *src = usdf_sin_to_format(hints, sin, NULL); - else - *dest = usdf_sin_to_format(hints, sin, NULL); - } - - return FI_SUCCESS; -} - -static int -usdf_getinfo(uint32_t version, const char *node, const char *service, - uint64_t flags, const struct fi_info *hints, struct fi_info **info) -{ - struct usdf_usnic_info *dp; - struct usdf_dev_entry *dep; - struct usd_device_attrs *dap; - struct fi_info *fi_first; - struct fi_info *fi_last; - struct addrinfo *ai; - void *src; - void *dest; - enum fi_ep_type ep_type; - int d; - int ret; - - USDF_TRACE("\n"); - - fi_first = NULL; - fi_last = NULL; - ai = NULL; - src = NULL; - dest = NULL; - - /* - * Get and cache usNIC device info - */ - if (__usdf_devinfo == NULL) { - ret = usdf_get_devinfo(); - if (ret != 0) { - USDF_WARN("failed to usdf_get_devinfo, ret=%d (%s)\n", - ret, fi_strerror(-ret)); - if (ret == -FI_ENODEV) - ret = -FI_ENODATA; - goto fail; - } - } - dp = __usdf_devinfo; - - /* Check the hints up front and fail if they're invalid. */ - if (hints) { - ret = usdf_validate_hints(version, hints); - if (ret) { - USDF_WARN_SYS(FABRIC, "hints failed to validate\n"); - goto fail; - } - } - - /* Get the src and dest if user specified. */ - ret = usdf_handle_node_and_service(node, service, flags, - &src, &dest, hints, &ai); - if (ret) { - USDF_WARN_SYS(FABRIC, "failed to handle node and service.\n"); - goto fail; - } - - if (hints != NULL) { - if (dest == NULL && hints->dest_addr != NULL) - dest = hints->dest_addr; - if (src == NULL && hints->src_addr != NULL) - src = hints->src_addr; - } - - for (d = 0; d < dp->uu_num_devs; ++d) { - dep = &dp->uu_info[d]; - dap = &dep->ue_dattr; - - /* If the device has an issue or the hints don't match the - * device information, then skip. - */ - if (!usdf_check_device(version, hints, src, dest, dep)) - continue; - - if (hints && hints->ep_attr) - ep_type = hints->ep_attr->type; - else - ep_type = FI_EP_UNSPEC; - - if (ep_type == FI_EP_DGRAM || ep_type == FI_EP_UNSPEC) { - ret = usdf_fill_info_dgram(version, hints, src, dest, - dap, &fi_first, &fi_last); - if (ret != 0 && ret != -FI_ENODATA) { - goto fail; - } - } - } - - if (fi_first != NULL) { - *info = fi_first; - ret = 0; - } else { - ret = -FI_ENODATA; - } - - -fail: - if (ai) - freeaddrinfo(ai); - - if (ret != 0) { - fi_freeinfo(fi_first); - USDF_INFO("returning %d (%s)\n", ret, fi_strerror(-ret)); - } - - return ret; -} - -static int -usdf_fabric_close(fid_t fid) -{ - struct usdf_fabric *fp; - int ret; - void *rv; - - USDF_TRACE("\n"); - - fp = fab_fidtou(fid); - if (ofi_atomic_get32(&fp->fab_refcnt) > 0) { - return -FI_EBUSY; - } - /* Tell progression thread to exit */ - fp->fab_exit = 1; - - free(fp->fab_attr.name); - free(fp->fab_attr.prov_name); - - if (fp->fab_thread) { - ret = usdf_fabric_wake_thread(fp); - if (ret != 0) { - return ret; - } - pthread_join(fp->fab_thread, &rv); - } - usdf_timer_deinit(fp); - if (fp->fab_epollfd != OFI_EPOLL_INVALID) { - ofi_epoll_close(fp->fab_epollfd); - } - if (fp->fab_eventfd != -1) { - close(fp->fab_eventfd); - } - if (fp->fab_arp_sockfd != -1) { - close(fp->fab_arp_sockfd); - } - - free(fp); - return 0; -} - -static struct fi_ops usdf_fi_ops = { - .size = sizeof(struct fi_ops), - .close = usdf_fabric_close, - .bind = fi_no_bind, - .control = fi_no_control, - .ops_open = usdf_fabric_ops_open, -}; - -static struct fi_ops_fabric usdf_ops_fabric = { - .size = sizeof(struct fi_ops_fabric), - .domain = usdf_domain_open, - .passive_ep = usdf_pep_open, - .eq_open = usdf_eq_open, - .wait_open = usdf_wait_open, - .trywait = usdf_trywait -}; - -static int -usdf_fabric_open(struct fi_fabric_attr *fattrp, struct fid_fabric **fabric, - void *context) -{ - struct fid_fabric *ff; - struct usdf_fabric *fp; - struct usdf_usnic_info *dp; - struct usdf_dev_entry *dep; - struct sockaddr_in sin; - int ret; - int d; - - USDF_TRACE("\n"); - - /* Make sure this fabric exists */ - dp = __usdf_devinfo; - for (d = 0; d < dp->uu_num_devs; ++d) { - dep = &dp->uu_info[d]; - if (dep->ue_dev_ok && - usdf_fabric_checkname(0, &(dep->ue_dattr), fattrp->name)) { - break; - } - } - if (d >= dp->uu_num_devs) { - USDF_INFO("device \"%s\" does not exit, returning -FI_ENODEV\n", - fattrp->name); - return -FI_ENODEV; - } - - fp = calloc(1, sizeof(*fp)); - if (fp == NULL) { - USDF_INFO("unable to allocate memory for fabric\n"); - return -FI_ENOMEM; - } - fp->fab_epollfd = OFI_EPOLL_INVALID; - fp->fab_arp_sockfd = -1; - LIST_INIT(&fp->fab_domain_list); - - fp->fab_attr.fabric = fab_utof(fp); - fp->fab_attr.name = strdup(fattrp->name); - fp->fab_attr.prov_name = strdup(USDF_PROV_NAME); - fp->fab_attr.prov_version = USDF_PROV_VERSION; - if (fp->fab_attr.name == NULL || - fp->fab_attr.prov_name == NULL) { - ret = -FI_ENOMEM; - goto fail; - } - - fp->fab_fid.fid.fclass = FI_CLASS_FABRIC; - fp->fab_fid.fid.context = context; - fp->fab_fid.fid.ops = &usdf_fi_ops; - fp->fab_fid.ops = &usdf_ops_fabric; - - fp->fab_dev_attrs = &dep->ue_dattr; - - ret = ofi_epoll_create(&fp->fab_epollfd); - if (ret) { - USDF_INFO("unable to allocate epoll fd\n"); - goto fail; - } - - fp->fab_eventfd = eventfd(0, EFD_NONBLOCK | EFD_SEMAPHORE); - if (fp->fab_eventfd == -1) { - ret = -errno; - USDF_INFO("unable to allocate event fd\n"); - goto fail; - } - fp->fab_poll_item.pi_rtn = usdf_fabric_progression_cb; - fp->fab_poll_item.pi_context = fp; - ret = ofi_epoll_add(fp->fab_epollfd, fp->fab_eventfd, OFI_EPOLL_IN, - &fp->fab_poll_item); - if (ret) { - USDF_INFO("unable to EPOLL_CTL_ADD\n"); - goto fail; - } - - /* initialize timer subsystem */ - ret = usdf_timer_init(fp); - if (ret != 0) { - USDF_INFO("unable to initialize timer\n"); - goto fail; - } - - /* create and bind socket for ARP resolution */ - memset(&sin, 0, sizeof(sin)); - sin.sin_family = AF_INET; - sin.sin_addr.s_addr = fp->fab_dev_attrs->uda_ipaddr_be; - fp->fab_arp_sockfd = socket(AF_INET, SOCK_DGRAM, 0); - if (fp->fab_arp_sockfd == -1) { - USDF_INFO("unable to create socket\n"); - goto fail; - } - ret = bind(fp->fab_arp_sockfd, (struct sockaddr *) &sin, sizeof(sin)); - if (ret == -1) { - ret = -errno; - goto fail; - } - - ofi_atomic_initialize32(&fp->fab_refcnt, 0); - ofi_atomic_initialize32(&fp->num_blocked_waiting, 0); - - ret = pthread_create(&fp->fab_thread, NULL, - usdf_fabric_progression_thread, fp); - if (ret != 0) { - ret = -ret; - USDF_INFO("unable to create progress thread\n"); - goto fail; - } - - fattrp->fabric = fab_utof(fp); - fattrp->prov_version = USDF_PROV_VERSION; - *fabric = fab_utof(fp); - USDF_INFO("successfully opened %s/%s\n", fattrp->name, - fp->fab_dev_attrs->uda_ifname); - return 0; - -fail: - free(fp->fab_attr.name); - free(fp->fab_attr.prov_name); - ff = fab_utof(fp); - usdf_fabric_close(&ff->fid); - USDF_DBG("returning %d (%s)\n", ret, fi_strerror(-ret)); - return ret; -} - -static void usdf_fini(void) -{ - USDF_TRACE("\n"); -} - -struct fi_provider usdf_ops = { - .name = USDF_PROV_NAME, - .version = USDF_PROV_VERSION, - .fi_version = OFI_VERSION_LATEST, - .getinfo = usdf_getinfo, - .fabric = usdf_fabric_open, - .cleanup = usdf_fini -}; - -USNIC_INI -{ -#if USNIC_BUILD_FAKE_VERBS_DRIVER - usdf_setup_fake_ibv_provider(); -#endif - return (&usdf_ops); -} diff --git a/prov/usnic/src/usdf_fake_ibv.c b/prov/usnic/src/usdf_fake_ibv.c deleted file mode 100644 index 5b51d0b363d..00000000000 --- a/prov/usnic/src/usdf_fake_ibv.c +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright (c) 2015, Cisco Systems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * The code in this file prevents spurious libibverbs warnings on - * stderr about devices that it doesn't recognize. - * - * Specifically, Cisco usNIC devices are exposed through the Linux - * InfiniBand kernel interface (i.e., they show up in - * /sys/class/infiniband). However, the userspace side of these - * drivers is not exposed through libibverbs (i.e., there is no - * libibverbs provider/plugin for usNIC). Therefore, when - * ibv_get_device_list() is invoked, libibverbs cannot find a plugin - * for usnic devices. This causes libibverbs to emit a spurious - * warning message on stderr. - * - * Since libfabric can have a verbs provider, libibverbs is invoked, - * triggering the sequence described above, resulting in warning - * messages about usnic devices. To avoid these extra stderr - * warnings, we insert a fake usnic verbs libibverbs provider that - * safely squelches these warnings. - * - * More specifically: the userspace side of usNIC is exposed through - * libfabric; we don't need libibverbs warnings about not being able - * to find a usnic driver. - */ - -#include "config.h" - -#include - -#include -#include - -/***********************************************************************/ - -#ifndef PCI_VENDOR_ID_CISCO -#define PCI_VENDOR_ID_CISCO 0x1137 -#endif - -static struct ibv_context *fake_alloc_context(struct ibv_device *ibdev, - int cmd_fd) -{ - /* Nothing to do here */ - return NULL; -} - -static void fake_free_context(struct ibv_context *ibctx) -{ - /* Nothing to do here */ -} - -/* Put just enough in here to convince libibverbs that this is a valid - device, and a little extra just in case someone looks at this - struct in a debugger. */ -static struct ibv_device fake_dev = { - .ops = { - .alloc_context = fake_alloc_context, - .free_context = fake_free_context - }, - .name = "fake ibv_device inserted by libfabric:usNIC" -}; - -static struct ibv_device *fake_driver_init(const char *uverbs_sys_path, - int abi_version) -{ - char value[8]; - int vendor; - - /* This function should only be invoked for - /sys/class/infiniband/usnic_X devices, but double check just to - be absolutely sure: read the vendor ID and ensure that it is - Cisco. */ - if (ibv_read_sysfs_file(uverbs_sys_path, "device/vendor", - value, sizeof(value)) < 0) { - return NULL; - } - sscanf(value, "%i", &vendor); - - if (vendor == PCI_VENDOR_ID_CISCO) { - return &fake_dev; - } - - /* We didn't find a device that we want to support */ - return NULL; -} - - -void usdf_setup_fake_ibv_provider(void) -{ - /* Register a fake driver for "usnic_verbs" devices */ - ibv_register_driver("usnic_verbs", fake_driver_init); -} diff --git a/prov/usnic/src/usdf_mem.c b/prov/usnic/src/usdf_mem.c deleted file mode 100644 index 10fd43744ed..00000000000 --- a/prov/usnic/src/usdf_mem.c +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Copyright (c) 2014-2017, Cisco Systems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include "ofi.h" -#include "ofi_enosys.h" - -#include "usnic_direct.h" -#include "usdf.h" - -static -int usdf_dereg_mr(fid_t fid) -{ - struct usdf_mr *mr; - int ret; - - mr = container_of(fid, struct usdf_mr, mr_fid.fid); - ret = usd_dereg_mr(mr->mr_mr); - if (ret == 0) { - free(mr); - } - return ret; -} - -static struct fi_ops usdf_mr_ops = { - .size = sizeof(struct fi_ops), - .close = usdf_dereg_mr, - .bind = fi_no_bind, - .control = fi_no_control, - .ops_open = fi_no_ops_open, -}; - -int -usdf_reg_mr(struct fid *fid, const void *buf, size_t len, - uint64_t access, uint64_t offset, uint64_t requested_key, - uint64_t flags, struct fid_mr **mr_o, void *context) -{ - struct usdf_mr *mr; - struct usdf_domain *udp; - int ret; - struct fid_domain *domain; - - if (flags != 0) { - return -FI_EBADFLAGS; - } - - if (fid->fclass != FI_CLASS_DOMAIN) { - USDF_DBG("memory registration only supported " - "for struct fid_domain\n"); - return -FI_EINVAL; - } - domain = container_of(fid, struct fid_domain, fid); - - mr = calloc(1, sizeof *mr); - if (mr == NULL) { - return -FI_ENOMEM; - } - - mr->mr_fid.fid.fclass = FI_CLASS_MR; - mr->mr_fid.fid.context = context; - mr->mr_fid.fid.ops = &usdf_mr_ops; - - udp = container_of(domain, struct usdf_domain, dom_fid.fid); - ret = usd_reg_mr(udp->dom_dev, (void *) buf, len, &mr->mr_mr); - if (ret != 0) { - goto fail; - } - - *mr_o = &mr->mr_fid; - return 0; - -fail: - free(mr); - return ret; -} - -/* We dont have proper support for regv and regattr. This is just - * a simple mapping to usdf_reg_mr. We can do this because we forced - * mr_iov_limit = 1 (made this mapping possible) by default. - */ -int usdf_regv_mr(struct fid *fid, const struct iovec *iov, - size_t count, uint64_t access, - uint64_t offset, uint64_t requested_key, - uint64_t flags, struct fid_mr **mr, void *context) -{ - if (count > USDF_MR_IOV_LIMIT) { - USDF_DBG_SYS(DOMAIN, "usnic provider only support 1 iov.\n"); - return -FI_EINVAL; - } - - return usdf_reg_mr(fid, iov[0].iov_base, iov[0].iov_len, access, - offset, requested_key, flags, mr, context); -} - -int usdf_regattr(struct fid *fid, const struct fi_mr_attr *attr, - uint64_t flags, struct fid_mr **mr) -{ - if (attr->iov_count > USDF_MR_IOV_LIMIT) { - USDF_DBG_SYS(DOMAIN, "usnic provider only support 1 iov.\n"); - return -FI_EINVAL; - } - - return usdf_reg_mr(fid, attr->mr_iov[0].iov_base, - attr->mr_iov[0].iov_len, - attr->access, - attr->offset, - attr->requested_key, - flags, mr, attr->context); -} diff --git a/prov/usnic/src/usdf_pep.c b/prov/usnic/src/usdf_pep.c deleted file mode 100644 index 0d4811e279d..00000000000 --- a/prov/usnic/src/usdf_pep.c +++ /dev/null @@ -1,838 +0,0 @@ -/* - * Copyright (c) 2014-2019, Cisco Systems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include "ofi.h" -#include "ofi_enosys.h" -#include "ofi_file.h" - -#include "fi_ext_usnic.h" -#include "usnic_direct.h" -#include "usd.h" -#include "usdf.h" -#include "usdf_endpoint.h" -#include "usdf_cm.h" - -static int -usdf_pep_bind(fid_t fid, fid_t bfid, uint64_t flags) -{ - struct usdf_pep *pep; - - USDF_TRACE_SYS(EP_CTRL, "\n"); - - pep = pep_fidtou(fid); - - switch (bfid->fclass) { - - case FI_CLASS_EQ: - if (pep->pep_eq != NULL) { - return -FI_EINVAL; - } - pep->pep_eq = eq_fidtou(bfid); - ofi_atomic_inc32(&pep->pep_eq->eq_refcnt); - break; - - default: - return -FI_EINVAL; - } - - return 0; -} - -static struct fi_info * -usdf_pep_conn_info(struct usdf_connreq *crp) -{ - struct fi_info *ip; - struct usdf_pep *pep; - struct sockaddr_in *sin; - struct usdf_connreq_msg *reqp; - - pep = crp->cr_pep; - reqp = (struct usdf_connreq_msg *)crp->cr_data; - - ip = fi_dupinfo(pep->pep_info); - if (!ip) { - USDF_WARN_SYS(EP_CTRL, "failed to duplicate pep info\n"); - return NULL; - } - - /* fill in dest addr */ - ip->dest_addrlen = ip->src_addrlen; - sin = calloc(1, ip->dest_addrlen); - if (sin == NULL) { - goto fail; - } - sin->sin_family = AF_INET; - sin->sin_addr.s_addr = reqp->creq_ipaddr; - sin->sin_port = reqp->creq_port; - - ip->dest_addr = usdf_sin_to_format(pep->pep_info, sin, - &ip->dest_addrlen); - - ip->handle = (fid_t) crp; - return ip; -fail: - fi_freeinfo(ip); - return NULL; -} - -/* - * Remove connection request from epoll list if not done already. - * crp->cr_pollitem.pi_rtn is non-NULL when epoll() is active - */ -static int -usdf_pep_creq_epoll_del(struct usdf_connreq *crp) -{ - int ret; - struct usdf_pep *pep; - - pep = crp->cr_pep; - - if (crp->cr_pollitem.pi_rtn != NULL) { - ret = ofi_epoll_del(pep->pep_fabric->fab_epollfd, - crp->cr_sockfd); - crp->cr_pollitem.pi_rtn = NULL; - if (ret != 0) { - ret = -errno; - } - } else { - ret = 0; - } - return ret; -} - -static int -usdf_pep_read_connreq(void *v) -{ - struct usdf_connreq *crp; - struct usdf_pep *pep; - struct usdf_connreq_msg *reqp; - struct fi_eq_cm_entry *entry; - size_t entry_len; - int ret; - int n; - - crp = v; - pep = crp->cr_pep; - - n = read(crp->cr_sockfd, crp->cr_ptr, crp->cr_resid); - if (n == -1) { - ret = -errno; - goto report_failure_skip_data; - } - - crp->cr_ptr += n; - crp->cr_resid -= n; - - reqp = (struct usdf_connreq_msg *)crp->cr_data; - - if (crp->cr_resid == 0 && crp->cr_ptr == crp->cr_data + sizeof(*reqp)) { - reqp->creq_datalen = ntohl(reqp->creq_datalen); - crp->cr_resid = reqp->creq_datalen; - } - - /* if resid is 0 now, completely done */ - if (crp->cr_resid == 0) { - ret = usdf_pep_creq_epoll_del(crp); - if (ret != 0) - goto report_failure_skip_data; - - /* create CONNREQ EQ entry */ - entry_len = sizeof(*entry) + reqp->creq_datalen; - entry = malloc(entry_len); - if (entry == NULL) { - ret = -errno; - goto report_failure_skip_data; - } - - entry->fid = &pep->pep_fid.fid; - entry->info = usdf_pep_conn_info(crp); - if (entry->info == NULL) { - ret = -FI_ENOMEM; - goto free_entry_and_report_failure; - } - - memcpy(entry->data, reqp->creq_data, reqp->creq_datalen); - ret = usdf_eq_write_internal(pep->pep_eq, FI_CONNREQ, entry, - entry_len, 0); - - if (ret != (int)entry_len) - goto free_entry_and_report_failure; - - free(entry); - } - - return 0; - -free_entry_and_report_failure: - free(entry); -report_failure_skip_data: - usdf_cm_report_failure(crp, ret, false); - return 0; -} - -static int -usdf_pep_listen_cb(void *v) -{ - struct usdf_pep *pep; - struct sockaddr_in sin; - struct usdf_connreq *crp; - socklen_t socklen; - int ret; - int s; - - pep = v; - - socklen = sizeof(sin); - s = accept(pep->pep_sock, &sin, &socklen); - if (s == -1) { - /* ignore early failure */ - return 0; - } - crp = NULL; - pthread_spin_lock(&pep->pep_cr_lock); - if (!TAILQ_EMPTY(&pep->pep_cr_free)) { - crp = TAILQ_FIRST(&pep->pep_cr_free); - TAILQ_REMOVE_MARK(&pep->pep_cr_free, crp, cr_link); - TAILQ_NEXT(crp, cr_link) = NULL; - } - pthread_spin_unlock(&pep->pep_cr_lock); - - /* no room for request, just drop it */ - if (crp == NULL) { - /* XXX send response? */ - close(s); - return 0; - } - - crp->cr_sockfd = s; - crp->cr_pep = pep; - crp->cr_ptr = crp->cr_data; - crp->cr_resid = sizeof(struct usdf_connreq_msg); - - crp->cr_pollitem.pi_rtn = usdf_pep_read_connreq; - crp->cr_pollitem.pi_context = crp; - - ret = ofi_epoll_add(pep->pep_fabric->fab_epollfd, crp->cr_sockfd, - OFI_EPOLL_IN, &crp->cr_pollitem); - if (ret) { - usdf_cm_report_failure(crp, ret, false); - return 0; - } - - TAILQ_INSERT_TAIL(&pep->pep_cr_pending, crp, cr_link); - - return 0; -} - -static int -usdf_pep_listen(struct fid_pep *fpep) -{ - struct usdf_pep *pep; - struct usdf_fabric *fp; - struct sockaddr_in *sin; - socklen_t socklen; - int ret; - bool addr_format_str; - - USDF_TRACE_SYS(EP_CTRL, "\n"); - - pep = pep_ftou(fpep); - fp = pep->pep_fabric; - addr_format_str = (pep->pep_info->addr_format == FI_ADDR_STR); - sin = NULL; - - switch (pep->pep_state) { - case USDF_PEP_UNBOUND: - case USDF_PEP_BOUND: - break; - case USDF_PEP_LISTENING: - USDF_WARN_SYS(EP_CTRL, "PEP already LISTENING!\n"); - return -FI_EOPBADSTATE; - case USDF_PEP_ROBBED: - USDF_WARN_SYS(EP_CTRL, - "PEP already consumed, you may only fi_close() now\n"); - return -FI_EOPBADSTATE; - default: - USDF_WARN_SYS(EP_CTRL, "unhandled case! (%d)\n", - pep->pep_state); - abort(); - } - - /* we could already be bound if the user called fi_setname() or if we - * already did the bind in a previous call to usdf_pep_listen() and the - * listen(2) call failed */ - if (pep->pep_state == USDF_PEP_UNBOUND) { - sin = usdf_format_to_sin(pep->pep_info, &pep->pep_src_addr); - if (sin == NULL) - goto fail; - - ret = bind(pep->pep_sock, sin, sizeof(struct sockaddr_in)); - if (ret == -1) { - goto fail; - } - - /* Get the actual port (since we may have requested - * port 0) - */ - socklen = sizeof(*sin); - ret = getsockname(pep->pep_sock, sin, - &socklen); - if (ret == -1) - goto fail; - - /* If it's FI_ADDR_STR, we have to update the string - * with this method. (FI_SOCKADDR_IN got taken care of, above) - */ - if (addr_format_str) { - pep->pep_info->src_addrlen = USDF_ADDR_STR_LEN; - usdf_addr_tostr(sin, pep->pep_src_addr.addr_str, - &pep->pep_info->src_addrlen); - } - - /* Update the state to bound. */ - pep->pep_state = USDF_PEP_BOUND; - } - - ret = listen(pep->pep_sock, pep->pep_backlog); - if (ret != 0) { - goto fail; - } - pep->pep_state = USDF_PEP_LISTENING; - - pep->pep_pollitem.pi_rtn = usdf_pep_listen_cb; - pep->pep_pollitem.pi_context = pep; - ret = ofi_epoll_add(fp->fab_epollfd, pep->pep_sock, OFI_EPOLL_IN, - &pep->pep_pollitem); - if (ret) { - errno = -ret; - goto fail; - } - - return 0; - -fail: - usdf_free_sin_if_needed(pep->pep_info, sin); - - return -errno; -} - -/* Register as a callback triggered by the socket becoming writeable. Write as - * much data as can be written in a single write, and keep track of how much - * data is left. If the data is not fully written, it will finish getting - * written in another iteration of the progression. - */ -static int usdf_pep_reject_async(void *vreq) -{ - struct usdf_connreq *crp; - int ret; - - crp = vreq; - - do { - ret = write(crp->cr_sockfd, crp->cr_ptr, crp->cr_resid); - } while ((ret < 0) && (errno == EINTR)); - - if ((ret <= 0) && (errno != EAGAIN)) { - USDF_DBG_SYS(EP_CTRL, "write failed: %s\n", - strerror(errno)); - usdf_cm_report_failure(crp, -errno, false); - return -errno; - } - - crp->cr_resid -= ret; - crp->cr_ptr += ret; - - return FI_SUCCESS; -} - -static int usdf_pep_reject(struct fid_pep *fpep, fid_t handle, const void *param, - size_t paramlen) -{ - struct usdf_pep *pep; - struct usdf_connreq *crp; - struct usdf_connreq_msg *reqp; - int ret; - - if (paramlen > USDF_MAX_CONN_DATA) { - USDF_WARN_SYS(EP_CTRL, - "reject payload size %zu exceeds max %u\n", - paramlen, USDF_MAX_CONN_DATA); - return -FI_EINVAL; - } - - if (!fpep || !handle) { - USDF_WARN_SYS(EP_CTRL, - "handle and passive ep needed for reject\n"); - return -FI_EINVAL; - } - - if (!param && paramlen > 0) { - USDF_WARN_SYS(EP_CTRL, - "NULL data pointer with non-zero data length\n"); - return -FI_EINVAL; - } - - /* usdf_pep_conn_info stashed the pep pointer into the handle field of - * the info struct previously returned - */ - crp = (struct usdf_connreq *) handle; - pep = pep_ftou(fpep); - - crp->cr_ptr = crp->cr_data; - crp->cr_resid = sizeof(*reqp) + paramlen; - - reqp = (struct usdf_connreq_msg *) crp->cr_data; - - /* The result field is used on the remote end to detect whether the - * connection succeeded or failed. - */ - reqp->creq_result = htonl(-FI_ECONNREFUSED); - reqp->creq_datalen = htonl(paramlen); - memcpy(reqp->creq_data, param, paramlen); - - crp->cr_pollitem.pi_rtn = usdf_pep_reject_async; - crp->cr_pollitem.pi_context = crp; - - ret = ofi_epoll_add(pep->pep_fabric->fab_epollfd, crp->cr_sockfd, - OFI_EPOLL_OUT, &crp->cr_pollitem); - return ret; -} - -static void -usdf_pep_free_cr_lists(struct usdf_pep *pep) -{ - struct usdf_connreq *crp; - - while (!TAILQ_EMPTY(&pep->pep_cr_free)) { - crp = TAILQ_FIRST(&pep->pep_cr_free); - TAILQ_REMOVE(&pep->pep_cr_free, crp, cr_link); - free(crp); - } - - while (!TAILQ_EMPTY(&pep->pep_cr_pending)) { - crp = TAILQ_FIRST(&pep->pep_cr_pending); - TAILQ_REMOVE(&pep->pep_cr_pending, crp, cr_link); - free(crp); - } -} - -static int -usdf_pep_grow_backlog(struct usdf_pep *pep) -{ - struct usdf_connreq *crp; - size_t extra; - - extra = sizeof(struct usdf_connreq_msg) + pep->pep_cr_max_data; - - while (pep->pep_cr_alloced < pep->pep_backlog) { - crp = calloc(1, sizeof(*crp) + extra); - if (crp == NULL) { - return -FI_ENOMEM; - } - crp->handle.fclass = FI_CLASS_CONNREQ; - pthread_spin_lock(&pep->pep_cr_lock); - TAILQ_INSERT_TAIL(&pep->pep_cr_free, crp, cr_link); - ++pep->pep_cr_alloced; - pthread_spin_unlock(&pep->pep_cr_lock); - } - return 0; -} - -static int -usdf_pep_close(fid_t fid) -{ - struct usdf_pep *pep; - - USDF_TRACE_SYS(EP_CTRL, "\n"); - - pep = pep_fidtou(fid); - if (ofi_atomic_get32(&pep->pep_refcnt) > 0) { - return -FI_EBUSY; - } - - usdf_pep_free_cr_lists(pep); - close(pep->pep_sock); - pep->pep_sock = -1; - if (pep->pep_eq != NULL) { - ofi_atomic_dec32(&pep->pep_eq->eq_refcnt); - } - ofi_atomic_dec32(&pep->pep_fabric->fab_refcnt); - fi_freeinfo(pep->pep_info); - free(pep); - - return 0; -} - -static int usdf_pep_getname(fid_t fid, void *addr, size_t *addrlen) -{ - int ret; - struct usdf_pep *pep; - struct fi_info *info; - size_t copylen; - - USDF_TRACE_SYS(EP_CTRL, "\n"); - - ret = FI_SUCCESS; - pep = pep_fidtou(fid); - info = pep->pep_info; - - copylen = info->src_addrlen; - memcpy(addr, &pep->pep_src_addr, MIN(copylen, *addrlen)); - - if (*addrlen < copylen) { - USDF_WARN_SYS(EP_CTRL, "*addrlen is too short\n"); - ret = -FI_ETOOSMALL; - } - - *addrlen = copylen; - return ret; -} - -static int usdf_pep_setname(fid_t fid, void *addr, size_t addrlen) -{ - int ret; - struct usdf_pep *pep; - struct fi_info *info; - struct sockaddr_in *sin; - uint32_t req_addr_be; - socklen_t socklen; - char namebuf[INET_ADDRSTRLEN]; - char servbuf[INET_ADDRSTRLEN]; - bool addr_format_str; - - USDF_TRACE_SYS(EP_CTRL, "\n"); - - pep = pep_fidtou(fid); - info = pep->pep_info; - addr_format_str = (info->addr_format == FI_ADDR_STR); - sin = NULL; - - if (pep->pep_state != USDF_PEP_UNBOUND) { - USDF_WARN_SYS(EP_CTRL, "PEP cannot be bound\n"); - return -FI_EOPBADSTATE; - } - - switch (info->addr_format) { - case FI_SOCKADDR: - case FI_SOCKADDR_IN: - /* It is possible for passive endpoint to not have src_addr. */ - if (info->src_addr) { - ret = usdf_cm_addr_is_valid_sin(info->src_addr, - info->src_addrlen, - info->addr_format); - if (!ret) - return -FI_EINVAL; - } - break; - case FI_ADDR_STR: - break; - default: - return -FI_EINVAL; - } - - sin = usdf_format_to_sin(info, addr); - req_addr_be = sin->sin_addr.s_addr; - - namebuf[0] = '\0'; - servbuf[0] = '\0'; - ret = getnameinfo((struct sockaddr *)sin, sizeof(struct sockaddr_in), - namebuf, sizeof(namebuf), - servbuf, sizeof(servbuf), - NI_NUMERICHOST|NI_NUMERICSERV); - if (ret != 0) - USDF_WARN_SYS(EP_CTRL, "unable to getnameinfo(0x%x)\n", - req_addr_be); - - if (req_addr_be != pep->pep_fabric->fab_dev_attrs->uda_ipaddr_be) { - USDF_WARN_SYS(EP_CTRL, "requested addr (%s:%s) does not match fabric addr\n", - namebuf, servbuf); - return -FI_EADDRNOTAVAIL; - } - - ret = bind(pep->pep_sock, sin, sizeof(*sin)); - if (ret == -1) { - return -errno; - } - pep->pep_state = USDF_PEP_BOUND; - - /* store the resulting port so that can implement getname() properly */ - socklen = sizeof(*sin); - ret = getsockname(pep->pep_sock, sin, &socklen); - if (ret == -1) { - ret = -errno; - USDF_WARN_SYS(EP_CTRL, "getsockname failed %d (%s), PEP may be in bad state\n", - ret, strerror(-ret)); - return ret; - } - - if (addr_format_str) { - /* We have to reset src_addrlen here and - * the conversion will update it to the correct len. - */ - info->src_addrlen = USDF_ADDR_STR_LEN; - usdf_addr_tostr(sin, pep->pep_src_addr.addr_str, - &info->src_addrlen); - free(sin); - } else { - memcpy(&pep->pep_src_addr, sin, sizeof(*sin)); - } - - return 0; -} - -struct fi_ops usdf_pep_ops = { - .size = sizeof(struct fi_ops), - .close = usdf_pep_close, - .bind = usdf_pep_bind, - .control = fi_no_control, - .ops_open = fi_no_ops_open -}; - -static struct fi_ops_ep usdf_pep_base_ops = { - .size = sizeof(struct fi_ops_ep), - .cancel = fi_no_cancel, - .getopt = usdf_ep_getopt_connected, - .setopt = usdf_ep_setopt, - .tx_ctx = fi_no_tx_ctx, - .rx_ctx = fi_no_rx_ctx, - .rx_size_left = fi_no_rx_size_left, - .tx_size_left = fi_no_tx_size_left, -}; - -static struct fi_ops_cm usdf_pep_cm_ops = { - .size = sizeof(struct fi_ops_cm), - .setname = usdf_pep_setname, - .getname = usdf_pep_getname, - .getpeer = fi_no_getpeer, - .connect = fi_no_connect, - .listen = usdf_pep_listen, - .accept = fi_no_accept, - .reject = usdf_pep_reject, - .shutdown = fi_no_shutdown, - .join = fi_no_join, -}; - -int -usdf_pep_open(struct fid_fabric *fabric, struct fi_info *info, - struct fid_pep **pep_o, void *context) -{ - struct usdf_pep *pep; - struct usdf_fabric *fp; - struct sockaddr_in *sin; - int ret; - int optval; - - USDF_TRACE_SYS(EP_CTRL, "\n"); - - if (!info) { - USDF_DBG_SYS(EP_CTRL, "null fi_info struct is invalid\n"); - return -FI_EINVAL; - } - - if (info->ep_attr->type != FI_EP_MSG) { - return -FI_ENODEV; - } - - switch (info->addr_format) { - case FI_SOCKADDR: - case FI_SOCKADDR_IN: - /* It is possible for passive endpoint to not have src_addr. */ - if (info->src_addr) { - ret = usdf_cm_addr_is_valid_sin(info->src_addr, - info->src_addrlen, - info->addr_format); - if (!ret) - return -FI_EINVAL; - } - break; - case FI_ADDR_STR: - break; - default: - USDF_WARN_SYS(EP_CTRL, "unknown/unsupported addr_format\n"); - return -FI_EINVAL; - } - - fp = fab_ftou(fabric); - - pep = calloc(1, sizeof(*pep)); - if (pep == NULL) { - return -FI_ENOMEM; - } - - pep->pep_fid.fid.fclass = FI_CLASS_PEP; - pep->pep_fid.fid.context = context; - pep->pep_fid.fid.ops = &usdf_pep_ops; - pep->pep_fid.ops = &usdf_pep_base_ops; - pep->pep_fid.cm = &usdf_pep_cm_ops; - pep->pep_fabric = fp; - - pep->pep_state = USDF_PEP_UNBOUND; - pep->pep_sock = socket(AF_INET, SOCK_STREAM, 0); - if (pep->pep_sock == -1) { - ret = -errno; - goto fail; - } - ret = fi_fd_nonblock(pep->pep_sock); - if (ret) { - ret = -errno; - goto fail; - } - - /* set SO_REUSEADDR to prevent annoying "Address already in use" errors - * on successive runs of programs listening on a well known port */ - optval = 1; - ret = setsockopt(pep->pep_sock, SOL_SOCKET, SO_REUSEADDR, &optval, - sizeof(optval)); - if (ret == -1) { - ret = -errno; - goto fail; - } - - pep->pep_info = fi_dupinfo(info); - if (!pep->pep_info) { - ret = -FI_ENOMEM; - goto fail; - } - - if (info->src_addrlen == 0) { - /* Copy the source address information from the device - * attributes. - */ - pep->pep_info->src_addrlen = sizeof(struct sockaddr_in); - sin = calloc(1, pep->pep_info->src_addrlen); - if (!sin) { - USDF_WARN_SYS(EP_CTRL, - "calloc for src address failed\n"); - goto fail; - } - - sin->sin_family = AF_INET; - sin->sin_addr.s_addr = fp->fab_dev_attrs->uda_ipaddr_be; - - pep->pep_info->src_addr = - usdf_sin_to_format(pep->pep_info, - sin, &pep->pep_info->src_addrlen); - } - - memcpy(&pep->pep_src_addr, pep->pep_info->src_addr, - pep->pep_info->src_addrlen); - - /* initialize connreq freelist */ - ret = pthread_spin_init(&pep->pep_cr_lock, PTHREAD_PROCESS_PRIVATE); - if (ret != 0) { - ret = -ret; - goto fail; - } - TAILQ_INIT(&pep->pep_cr_free); - TAILQ_INIT(&pep->pep_cr_pending); - pep->pep_backlog = 10; - pep->pep_cr_max_data = USDF_MAX_CONN_DATA; - - ret = usdf_pep_grow_backlog(pep); - if (ret != 0) { - goto fail; - } - - ofi_atomic_initialize32(&pep->pep_refcnt, 0); - ofi_atomic_inc32(&fp->fab_refcnt); - - *pep_o = pep_utof(pep); - return 0; - -fail: - if (pep != NULL) { - usdf_pep_free_cr_lists(pep); - if (pep->pep_sock != -1) { - close(pep->pep_sock); - } - fi_freeinfo(pep->pep_info); - free(pep); - } - return ret; -} - -/* Steals the socket underpinning the PEP for use by an active endpoint. After - * this call, the only valid action a user may take on this PEP is to close it. - * Sets "*is_bound=1" if the socket was already bound to an address, - * "*is_bound=0" if not bound, or "*is_bound" will be undefined if this function - * returns a non-zero error code. */ -int usdf_pep_steal_socket(struct usdf_pep *pep, int *is_bound, int *sock_o) -{ - switch (pep->pep_state) { - case USDF_PEP_UNBOUND: - if (is_bound != NULL) - *is_bound = 0; - break; - case USDF_PEP_BOUND: - if (is_bound != NULL) - *is_bound = 1; - break; - case USDF_PEP_LISTENING: - USDF_WARN_SYS(EP_CTRL, - "PEP already listening, cannot use as \"handle\" in fi_endpoint()\n"); - return -FI_EOPBADSTATE; - case USDF_PEP_ROBBED: - USDF_WARN_SYS(EP_CTRL, - "PEP already consumed, you may only fi_close() now\n"); - return -FI_EOPBADSTATE; - } - - *sock_o = pep->pep_sock; - pep->pep_sock = -1; - pep->pep_state = USDF_PEP_ROBBED; - return 0; -} diff --git a/prov/usnic/src/usdf_poll.c b/prov/usnic/src/usdf_poll.c deleted file mode 100644 index 9c8081c34f9..00000000000 --- a/prov/usnic/src/usdf_poll.c +++ /dev/null @@ -1,293 +0,0 @@ -/* - * Copyright (c) 2016, Cisco Systems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include - -#include "usdf.h" -#include "usdf_cq.h" -#include "usdf_poll.h" - -static int usdf_poll_poll(struct fid_poll *fps, void **context, int count) -{ - struct usdf_cq *cq; - struct usdf_poll *ps; - struct dlist_entry *item; - struct fid_list_entry *entry; - int progressed = 0; - int copied = 0; - int pending; - - if (!fps || !context) { - USDF_WARN_SYS(DOMAIN, "pollset and context can't be NULL.\n"); - return -FI_EINVAL; - } - - ps = poll_ftou(fps); - - ofi_mutex_lock(&ps->lock); - - dlist_foreach(&ps->list, item) { - entry = container_of(item, struct fid_list_entry, entry); - assert(entry->fid->fclass == FI_CLASS_CQ); - - cq = cq_fidtou(entry->fid); - - if (cq->cq_is_soft) { - if (!progressed) { - usdf_domain_progress(ps->poll_domain); - progressed = 1; - } - - pending = !usdf_check_empty_soft_cq(cq); - } else { - pending = !usdf_check_empty_hard_cq(cq); - } - - if (pending) { - context[copied++] = entry->fid->context; - - if (copied >= count) - break; - } - } - - ofi_mutex_unlock(&ps->lock); - - return copied; -} - -static int usdf_poll_add(struct fid_poll *fps, struct fid *event_fid, - uint64_t flags) -{ - struct usdf_poll *ps; - struct usdf_cq *cq; - int ret; - - USDF_TRACE_SYS(DOMAIN, "\n"); - - if (!fps || !event_fid) { - USDF_WARN_SYS(DOMAIN, "pollset and event_fid can't be NULL.\n"); - return -FI_EINVAL; - } - - ps = poll_ftou(fps); - - switch (event_fid->fclass) { - case FI_CLASS_CQ: - break; - default: - USDF_WARN_SYS(DOMAIN, "invalid fid class.\n"); - return -FI_EINVAL; - } - - ret = fid_list_insert(&ps->list, &ps->lock, event_fid); - if (ret) - return ret; - - cq = cq_fidtou(event_fid); - ret = ofi_atomic_inc32(&cq->cq_refcnt); - assert(ret > 0); - USDF_DBG_SYS(DOMAIN, "associated with CQ: [%p] with new refcnt: [%d]\n", - cq, ret); - - return FI_SUCCESS; -} - -static int usdf_poll_del(struct fid_poll *fps, struct fid *event_fid, - uint64_t flags) -{ - struct usdf_poll *ps; - struct usdf_cq *cq; - int ret; - - if (!fps || !event_fid) { - USDF_WARN_SYS(DOMAIN, "pollset and event_fid can't be NULL.\n"); - return -FI_EINVAL; - } - - USDF_TRACE_SYS(DOMAIN, "\n"); - - ps = poll_ftou(fps); - - switch (event_fid->fclass) { - case FI_CLASS_CQ: - break; - default: - USDF_WARN_SYS(DOMAIN, "invalid fid class.\n"); - return -FI_EINVAL; - } - - fid_list_remove(&ps->list, &ps->lock, event_fid); - - cq = cq_fidtou(event_fid); - ret = ofi_atomic_dec32(&cq->cq_refcnt); - - USDF_DBG_SYS(DOMAIN, - "disassociating from CQ: [%p] with new refcnt: [%d]\n", - cq, ret); - assert(ret >= 0); - - if (ret >= 0) - ret = FI_SUCCESS; - else - ret = -FI_EINVAL; - return ret; -} - -static int usdf_poll_close(struct fid *fps) -{ - struct usdf_poll *ps; - struct dlist_entry *item; - struct dlist_entry *head; - struct fid_list_entry *entry; - struct usdf_cq *cq; - int val, ret = FI_SUCCESS; - - USDF_TRACE_SYS(DOMAIN, "\n"); - - if (!fps) { - USDF_WARN_SYS(DOMAIN, "pollset can't be NULL.\n"); - return -FI_EINVAL; - } - - ps = poll_ftou(fps); - - if (ofi_atomic_get32(&ps->poll_refcnt) > 0) { - USDF_WARN_SYS(DOMAIN, - "failed to close pollset with non-zero refcnt"); - return -FI_EBUSY; - } - - head = &ps->list; - while (!dlist_empty(head)) { - item = head->next; - entry = container_of(item, struct fid_list_entry, entry); - - switch (entry->fid->fclass) { - case FI_CLASS_CQ: - cq = cq_fidtou(entry->fid); - val = ofi_atomic_dec32(&cq->cq_refcnt); - - USDF_DBG_SYS(DOMAIN, - "disassociating from CQ: [%p] with new refcnt: [%d]\n", - cq, val); - assert(val >= 0); - if (val < 0) - ret = -FI_EINVAL; - break; - default: - USDF_WARN_SYS(DOMAIN, "invalid object\n"); - break; - } - - dlist_remove(item); - free(entry); - } - - ofi_atomic_dec32(&ps->poll_domain->dom_refcnt); - ofi_mutex_destroy(&ps->lock); - free(ps); - - return ret; -} - -struct fi_ops_poll usdf_poll_ops = { - .size = sizeof(struct fi_ops_poll), - .poll = usdf_poll_poll, - .poll_add = usdf_poll_add, - .poll_del = usdf_poll_del -}; - -struct fi_ops usdf_poll_fi_ops = { - .size = sizeof(struct fi_ops), - .close = usdf_poll_close, - .bind = fi_no_bind, - .ops_open = fi_no_ops_open -}; - -int usdf_poll_open(struct fid_domain *fdom, struct fi_poll_attr *attr, - struct fid_poll **fps) -{ - struct usdf_poll *ps; - struct usdf_domain *dom; - int ret; - - USDF_TRACE_SYS(DOMAIN, "\n"); - - if (attr && attr->flags != 0) { - USDF_WARN_SYS(DOMAIN, "flags field of poll attr must be 0.\n"); - ret = -FI_EINVAL; - goto error; - } - - dom = dom_ftou(fdom); - - ps = calloc(1, sizeof(*ps)); - if (!ps) { - USDF_WARN_SYS(DOMAIN, - "unable to allocate memory for poll obj"); - ret = -FI_ENOMEM; - goto error; - } - - dlist_init(&ps->list); - ofi_atomic_initialize32(&ps->poll_refcnt, 0); - ofi_mutex_init(&ps->lock); - - ps->poll_fid.fid.ops = &usdf_poll_fi_ops; - ps->poll_fid.fid.fclass = FI_CLASS_POLL; - ps->poll_fid.fid.context = 0; - - ps->poll_fid.ops = &usdf_poll_ops; - - ps->poll_domain = dom; - - ret = ofi_atomic_inc32(&ps->poll_domain->dom_refcnt); - - USDF_DBG_SYS(DOMAIN, - "created pollset from domain: [%p] with new refcnt: [%d]\n", - ps->poll_domain, ret); - - *fps = &ps->poll_fid; - - return FI_SUCCESS; - -error: - *fps = NULL; - return ret; -} diff --git a/prov/usnic/src/usdf_poll.h b/prov/usnic/src/usdf_poll.h deleted file mode 100644 index 1bb27c9e5b8..00000000000 --- a/prov/usnic/src/usdf_poll.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2016, Cisco Systems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _USDF_POLL_H_ -#define _USDF_POLL_H_ - -#include "ofi_list.h" - -struct usdf_poll { - struct fid_poll poll_fid; - struct usdf_domain *poll_domain; - - ofi_atomic32_t poll_refcnt; - ofi_mutex_t lock; - struct dlist_entry list; -}; - -#define poll_ftou(fpl) container_of((fpl), struct usdf_poll, poll_fid) - -int usdf_poll_open(struct fid_domain *domain, struct fi_poll_attr *attr, - struct fid_poll **pollset); - -#endif diff --git a/prov/usnic/src/usdf_progress.c b/prov/usnic/src/usdf_progress.c deleted file mode 100644 index b5db07e14f7..00000000000 --- a/prov/usnic/src/usdf_progress.c +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Copyright (c) 2014-2016, Cisco Systems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include "ofi.h" - -#include "usnic_direct.h" -#include "usdf.h" -#include "usdf_progress.h" -#include "usdf_timer.h" - -int -usdf_fabric_wake_thread(struct usdf_fabric *fp) -{ - uint64_t val; - int n; - - val = 1; - n = write(fp->fab_eventfd, &val, sizeof(val)); - if (n != sizeof(val)) { - return -FI_EIO; - } - return 0; -} - -int -usdf_fabric_progression_cb(void *v) -{ - struct usdf_fabric *fp; - uint64_t val; - int n; - - fp = v; - n = read(fp->fab_eventfd, &val, sizeof(val)); - if (n != sizeof(val)) { - return -FI_EIO; - } - return 0; -} - -void * -usdf_fabric_progression_thread(void *v) -{ - struct usdf_fabric *fp; - struct usdf_poll_item *pip; - struct usdf_domain *dom; - int num_blocked_waiting; - int sleep_time; - ofi_epoll_t epfd; - struct ofi_epollfds_event event; - int ret; - int n; - - fp = v; - epfd = fp->fab_epollfd; - - while (1) { - num_blocked_waiting = ofi_atomic_get32(&fp->num_blocked_waiting); - - /* sleep inifinitely if nothing to do */ - if ((fp->fab_active_timer_count > 0) || - (num_blocked_waiting > 0)) { - sleep_time = 1; - } else { - sleep_time = -1; - } - - n = ofi_epoll_wait(epfd, &event, 1, sleep_time); - if (fp->fab_exit || (n < 0 && n != EINTR)) { - pthread_exit(NULL); - } - - /* consume event if there was one */ - if (n == 1) { - pip = event.data.ptr; - ret = pip->pi_rtn(pip->pi_context); - if (ret != 0) { - pthread_exit(NULL); - } - } - - /* call timer progress each wakeup */ - usdf_timer_progress(fp); - - LIST_FOREACH(dom, &fp->fab_domain_list, dom_link) { - usdf_domain_progress(dom); - } - } -} - -/* - * Progress operations in this domain - */ -void -usdf_domain_progress(struct usdf_domain *udp) -{ - struct usdf_tx *tx; - struct usdf_cq_hard *hcq; - - /* one big hammer lock... */ - pthread_spin_lock(&udp->dom_progress_lock); - - TAILQ_FOREACH(hcq, &udp->dom_hcq_list, cqh_dom_link) { - hcq->cqh_progress(hcq); - } - - while (!TAILQ_EMPTY(&udp->dom_tx_ready)) { - tx = TAILQ_FIRST(&udp->dom_tx_ready); - TAILQ_REMOVE_MARK(&udp->dom_tx_ready, tx, tx_link); - - tx->tx_progress(tx); - } - - pthread_spin_unlock(&udp->dom_progress_lock); -} diff --git a/prov/usnic/src/usdf_progress.h b/prov/usnic/src/usdf_progress.h deleted file mode 100644 index 5ac184fa00b..00000000000 --- a/prov/usnic/src/usdf_progress.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ -#ifndef _USDF_PROGRESS_H_ -#define _USDF_PROGRESS_H_ - -struct usdf_poll_item { - int (*pi_rtn)(void *context); - void *pi_context; -}; - -struct usdf_fabric; -struct usdf_domain; - -void *usdf_fabric_progression_thread(void *v); -int usdf_fabric_wake_thread(struct usdf_fabric *fp); -int usdf_fabric_progression_cb(void *v); -void usdf_domain_progress(struct usdf_domain *udp); - -#endif /* _USDF_PROGRESS_H_ */ diff --git a/prov/usnic/src/usdf_rudp.h b/prov/usnic/src/usdf_rudp.h deleted file mode 100644 index e284408dfd9..00000000000 --- a/prov/usnic/src/usdf_rudp.h +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ -#ifndef _USDF_RUDP_H_ -#define _USDF_RUDP_H_ - -#include "usnic_direct.h" - -#define USDF_RUDP_SEQ_CREDITS 256 -#define USDF_RUDP_ACK_TIMEOUT 5 /* ms */ - -#define RUDP_SEQ_DIFF(A, B) ((int16_t)((u_int16_t)(A) - (u_int16_t)(B))) -#define RUDP_SEQ_LT(A, B) (RUDP_SEQ_DIFF((A), (B)) < 0) -#define RUDP_SEQ_LE(A, B) (RUDP_SEQ_DIFF((A), (B)) <= 0) -#define RUDP_SEQ_GT(A, B) (RUDP_SEQ_DIFF((A), (B)) > 0) -#define RUDP_SEQ_GE(A, B) (RUDP_SEQ_DIFF((A), (B)) >= 0) - -#define RUDP_MSGID_DIFF(A, B) ((int32_t)((u_int32_t)(A) - (u_int32_t)(B))) -#define RUDP_MSGID_LT(A, B) (RUDP_MSGID_DIFF((A), (B)) < 0) -#define RUDP_MSGID_LE(A, B) (RUDP_MSGID_DIFF((A), (B)) <= 0) -#define RUDP_MSGID_GT(A, B) (RUDP_MSGID_DIFF((A), (B)) > 0) -#define RUDP_MSGID_GE(A, B) (RUDP_MSGID_DIFF((A), (B)) >= 0) - -enum { - /* data messages (a bitmask of FIRST and LAST) */ - RUDP_OP_MID = 0x00, - RUDP_OP_FIRST = 0x01, - RUDP_OP_LAST = 0x02, - RUDP_OP_ONLY = 0x03, - - /* control messages */ - RUDP_OP_CONNECT_REQ = 0x81, - RUDP_OP_CONNECT_RESP = 0x82, - RUDP_OP_NAK = 0x83, - RUDP_OP_ACK = 0x84, -}; - -#define RUDP_OP_DATA_MASK (RUDP_OP_FIRST | RUDP_OP_LAST) - -struct rudp_rc_data_msg { - u_int32_t offset; /* 4 */ - u_int16_t rkey; /* 8 */ - u_int16_t length; /* 10 */ - u_int16_t seqno; /* 12 */ - u_int16_t rdma_id; /* 14 */ -} __attribute__ ((__packed__)); - -struct rudp_msg { - u_int16_t opcode; - u_int16_t src_peer_id; - u_int32_t msg_id; - union { - struct rudp_rc_data_msg rc_data; - struct { - u_int16_t dst_peer_id; - } connect_req; - struct { - u_int16_t dst_peer_id; - } connect_resp; - struct { - u_int16_t ack_seq; - } ack; - struct { - u_int16_t nak_seq; - u_int32_t seq_mask; - } nak; - } __attribute__ ((__packed__)) m; -} __attribute__ ((__packed__)); - - -struct rudp_pkt { - struct usd_udp_hdr hdr; - struct rudp_msg msg; -} __attribute__ ((__packed__)); - - -#endif /* _USDF_RUDP_H_ */ diff --git a/prov/usnic/src/usdf_socket.c b/prov/usnic/src/usdf_socket.c deleted file mode 100644 index 2d1f7cfe438..00000000000 --- a/prov/usnic/src/usdf_socket.c +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" - -#include -#include -#include -#include - -#include - -#include "usdf_socket.h" - -int -usdf_check_sock_error(int sock, uint32_t events) -{ - socklen_t len; - int status; - int ret; - - if (events & EPOLLBAD) { - return -FI_ECONNRESET; - } - - len = sizeof(status); - ret = getsockopt(sock, SOL_SOCKET, SO_ERROR, &status, &len); - if (ret == -1) { - return -errno; - } - if (status != 0) { - return -status; - } - return 0; -} diff --git a/prov/usnic/src/usdf_socket.h b/prov/usnic/src/usdf_socket.h deleted file mode 100644 index 790a0fe59e0..00000000000 --- a/prov/usnic/src/usdf_socket.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ -#ifndef _USDF_SOCKET_H_ -#define _USDF_SOCKET_H_ - -#define EPOLLBAD (EPOLLHUP | EPOLLRDHUP | EPOLLERR) - -int usdf_check_sock_error(int sock, uint32_t events); - -#endif /* _USDF_SOCKET_H_ */ diff --git a/prov/usnic/src/usdf_timer.c b/prov/usnic/src/usdf_timer.c deleted file mode 100644 index 2ee31697d8b..00000000000 --- a/prov/usnic/src/usdf_timer.c +++ /dev/null @@ -1,266 +0,0 @@ -/* - * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" - -#include -#include -#include - -#include "rdma/fabric.h" -#include "rdma/fi_errno.h" -#include "ofi.h" - -#include "usnic_direct.h" - -#include "usdf.h" -#include "usdf_timer.h" - -enum { - USDF_TF_QUEUED = (1 << 0), - USDF_TF_ACTIVE = (1 << 1), - USDF_TF_FREED = (1 << 2) -}; - -#define USDF_NUM_TIMER_BUCKETS (16 * 1024) /* roughly 16 seconds max delay */ - -struct usdf_timer_entry { - struct usdf_fabric *te_fabric; - - usdf_timer_callback_t te_callback; - void *te_context; - - uint32_t te_flags; - LIST_ENTRY(usdf_timer_entry) te_link; -}; - -/* - * Create a timer entry, registering a callback and argument. - */ -int -usdf_timer_alloc(usdf_timer_callback_t cb, void *context, - struct usdf_timer_entry **entry_o) -{ - struct usdf_timer_entry *entry; - - entry = calloc(1, sizeof(*entry)); - if (entry == NULL) { - return -FI_ENOMEM; - } - - entry->te_callback = cb; - entry->te_context = context; - entry->te_flags = 0; - - *entry_o = entry; - return 0; -} - -void -usdf_timer_free(struct usdf_fabric *fp, struct usdf_timer_entry *entry) -{ - pthread_spin_lock(&fp->fab_timer_lock); - - if (entry->te_flags & USDF_TF_ACTIVE) { - entry->te_flags |= USDF_TF_FREED; - } else { - if (entry->te_flags & USDF_TF_QUEUED) { - LIST_REMOVE(entry, te_link); - } - free(entry); - } - - pthread_spin_unlock(&fp->fab_timer_lock); -} - -void -usdf_timer_cancel(struct usdf_fabric *fp, struct usdf_timer_entry *entry) -{ - pthread_spin_lock(&fp->fab_timer_lock); - - if (entry->te_flags & USDF_TF_QUEUED) { - LIST_REMOVE(entry, te_link); - entry->te_flags &= ~USDF_TF_QUEUED; - --fp->fab_active_timer_count; - } - - pthread_spin_unlock(&fp->fab_timer_lock); -} - -/* - * Set this timer to fire "ms" milliseconds from now. If the timer is already - * queued, previous timeout will be discarded. - * - * When timer expires, the registered timer callback will be called and - * the timer entry removed from the queued list. The timer routine will not - * be called again until usdf_timer_set() is called again to re-set it. - * usdf_timer_set() is safe to call from timer service routine. - */ -static inline int -_usdf_timer_do_set(struct usdf_fabric *fp, struct usdf_timer_entry *entry, - uint32_t ms) -{ - int ret; - unsigned bucket; - - /* If no timers active, cur_bucket_ms may need catchup */ - ++fp->fab_active_timer_count; - if (fp->fab_active_timer_count == 1) { - fp->fab_cur_bucket_ms = usdf_get_ms(); - ret = usdf_fabric_wake_thread(fp); - if (ret != 0) { - --fp->fab_active_timer_count; - return ret; - } - } - - if (entry->te_flags & USDF_TF_QUEUED) { - LIST_REMOVE(entry, te_link); - --fp->fab_active_timer_count; - } - - // we could make "overflow" bucket... - if (ms >= USDF_NUM_TIMER_BUCKETS) { - --fp->fab_active_timer_count; - return -FI_EINVAL; - } - bucket = (fp->fab_cur_bucket + ms) & (USDF_NUM_TIMER_BUCKETS - 1); - - LIST_INSERT_HEAD(&fp->fab_timer_buckets[bucket], entry, te_link); - entry->te_flags |= USDF_TF_QUEUED; - return 0; -} - -int -usdf_timer_set(struct usdf_fabric *fp, struct usdf_timer_entry *entry, - uint32_t ms) -{ - int ret; - - pthread_spin_lock(&fp->fab_timer_lock); - if (entry->te_flags & USDF_TF_QUEUED) { - ret = 0; - } else { - ret = _usdf_timer_do_set(fp, entry, ms); - } - pthread_spin_unlock(&fp->fab_timer_lock); - - return ret; -} - -int -usdf_timer_reset(struct usdf_fabric *fp, struct usdf_timer_entry *entry, - uint32_t ms) -{ - int ret; - - pthread_spin_lock(&fp->fab_timer_lock); - ret = _usdf_timer_do_set(fp, entry, ms); - pthread_spin_unlock(&fp->fab_timer_lock); - - return ret; -} - - -static inline void -usdf_run_bucket(struct usdf_fabric *fp, struct usdf_timer_bucket *bp) -{ - struct usdf_timer_entry *entry; - - while (!LIST_EMPTY(bp)) { - entry = LIST_FIRST(bp); - LIST_REMOVE(entry, te_link); - entry->te_flags |= USDF_TF_ACTIVE; - entry->te_flags &= ~USDF_TF_QUEUED; - --fp->fab_active_timer_count; - - /* call timer service routine without lock */ - pthread_spin_unlock(&fp->fab_timer_lock); - entry->te_callback(entry->te_context); - pthread_spin_lock(&fp->fab_timer_lock); - } -} - -/* - * Called only from fabric progression thread - */ -void -usdf_timer_progress(struct usdf_fabric *fp) -{ - pthread_spin_lock(&fp->fab_timer_lock); - - while (fp->fab_cur_bucket_ms < usdf_get_ms()) { - usdf_run_bucket(fp, - &fp->fab_timer_buckets[fp->fab_cur_bucket]); - - ++fp->fab_cur_bucket_ms; - fp->fab_cur_bucket = (fp->fab_cur_bucket + 1) & - (USDF_NUM_TIMER_BUCKETS - 1); - } - - pthread_spin_unlock(&fp->fab_timer_lock); -} - -/* - * Initialize timer data - */ -int -usdf_timer_init(struct usdf_fabric *fp) -{ - int i; - - pthread_spin_init(&fp->fab_timer_lock, PTHREAD_PROCESS_PRIVATE); - - fp->fab_timer_buckets = calloc(USDF_NUM_TIMER_BUCKETS, - sizeof(struct usdf_timer_bucket)); - if (fp->fab_timer_buckets == NULL) { - return -FI_ENOMEM; - } - - for (i = 0; i < USDF_NUM_TIMER_BUCKETS; ++i) { - LIST_INIT(&fp->fab_timer_buckets[i]); - } - - fp->fab_cur_bucket = 0; - fp->fab_cur_bucket_ms = usdf_get_ms(); - return 0; -} - -void -usdf_timer_deinit(struct usdf_fabric *fp) -{ - free(fp->fab_timer_buckets); -} diff --git a/prov/usnic/src/usdf_timer.h b/prov/usnic/src/usdf_timer.h deleted file mode 100644 index aecebc8257d..00000000000 --- a/prov/usnic/src/usdf_timer.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ -#ifndef _USDF_TIMER_H_ -#define _USDF_TIMER_H_ - -#include - -struct usdf_timer_entry; - -static inline uint64_t -usdf_get_ms(void) -{ - struct timespec now; - uint64_t ms; - - clock_gettime(CLOCK_MONOTONIC, &now); - ms = now.tv_sec * 1000 + now.tv_nsec / 1000000; - - return ms; -} - -typedef void (*usdf_timer_callback_t)(void *); - -int usdf_timer_alloc(usdf_timer_callback_t cb, void *arg, - struct usdf_timer_entry **entry); - -void usdf_timer_free(struct usdf_fabric *fp, struct usdf_timer_entry *entry); - -int usdf_timer_set(struct usdf_fabric *fp, struct usdf_timer_entry *entry, - uint32_t timeout); -int usdf_timer_reset(struct usdf_fabric *fp, struct usdf_timer_entry *entry, - uint32_t timeout); - -void usdf_timer_cancel(struct usdf_fabric *fp, struct usdf_timer_entry *entry); - -void usdf_timer_progress(struct usdf_fabric *fp); - -int usdf_timer_init(struct usdf_fabric *fp); -void usdf_timer_deinit(struct usdf_fabric *fp); - -#endif /* _USDF_TIMER_H_ */ diff --git a/prov/usnic/src/usdf_wait.c b/prov/usnic/src/usdf_wait.c deleted file mode 100644 index d2125e9c1ee..00000000000 --- a/prov/usnic/src/usdf_wait.c +++ /dev/null @@ -1,346 +0,0 @@ -/* - * Copyright (c) 2016, Cisco Systems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/* This needs to be included for usdf.h */ -#include "ofi.h" -#include "ofi_enosys.h" -#include "ofi_util.h" - -#include "usdf.h" -#include "usdf_cq.h" -#include "usdf_wait.h" - - -/* Necessary to support top-of-file struct declarations. */ -static int usdf_wait_wait(struct fid_wait *wait_fid, int timeout); -static int usdf_wait_close(struct fid *waitset); -static int usdf_wait_control(struct fid *fid, int command, void *arg); - -static struct fi_ops_wait usdf_wait_ops = { - .size = sizeof(struct fi_ops_wait), - .wait = usdf_wait_wait, -}; - -static struct fi_ops usdf_wait_fi_ops = { - .size = sizeof(struct fi_ops), - .close = usdf_wait_close, - .bind = fi_no_bind, - .control = usdf_wait_control, - .ops_open = fi_no_ops_open -}; - -static int usdf_wait_trywait(struct fid *fwait) -{ - struct usdf_wait *wait; - struct dlist_entry *item; - struct fid_list_entry *entry; - int ret = FI_SUCCESS; - - wait = wait_fidtou(fwait); - - dlist_foreach(&wait->list, item) { - entry = container_of(item, struct fid_list_entry, entry); - - switch (entry->fid->fclass) { - case FI_CLASS_EQ: - continue; - case FI_CLASS_CQ: - ret = usdf_cq_trywait(entry->fid); - if (ret) - return ret; - break; - default: - USDF_DBG_SYS(FABRIC, "invalid fid %p\n", entry->fid); - return -FI_EINVAL; - } - } - - return ret; -} - -int usdf_trywait(struct fid_fabric *fabric, struct fid **fids, int count) -{ - size_t i; - int ret; - - for (i = 0; i < count; i++) { - assert(fids[i]); - - switch (fids[i]->fclass) { - case FI_CLASS_EQ: - continue; - case FI_CLASS_CQ: - ret = usdf_cq_trywait(fids[i]); - break; - case FI_CLASS_WAIT: - ret = usdf_wait_trywait(fids[i]); - break; - default: - USDF_DBG_SYS(FABRIC, "invalid fid\n"); - return -FI_EINVAL; - } - - if (ret) - return ret; - } - - return FI_SUCCESS; -} - -/* Since a domain hasn't been opened at the time of wait object creation, open a - * device temporarily to check for the group interrupt capability. - */ -static int usdf_wait_check_support(struct usdf_fabric *fabric_priv) -{ - struct usd_open_params params = { - .flags = UOPF_SKIP_PD_ALLOC, - .cmd_fd = -1, - .context = NULL - }; - struct usd_device *dev; - int ret; - - ret = usd_open_with_params(fabric_priv->fab_dev_attrs->uda_devname, - ¶ms, &dev); - if (ret) { - USDF_DBG_SYS(FABRIC, - "opening device to check fd support failed.\n"); - return ret; - } - - if (!usd_get_cap(dev, USD_CAP_GRP_INTR)) { - USDF_WARN_SYS(FABRIC, "FD request invalid.\n"); - USDF_WARN_SYS(FABRIC, "group interrupts not supported.\n"); - ret = usd_close(dev); - if (ret) - USDF_WARN_SYS(FABRIC, "closing usd device failed: %s\n", - strerror(ret)); - - return -FI_EOPNOTSUPP; - } - - return usd_close(dev); -} - -/* Non-static because this is exported due to being returned as a callback for - * fabric ops. - * - * Supporting wait objects in the usNIC provider is done using an epoll - * context. When fi_wait_open is called an epoll context is created using - * epoll_create1. This simplifies multi-CQ support and also allows us to get - * around a limitation of the usNIC provider. IB completion channels are opened - * on the domain because we have a context associated with the domain. At - * fi_wait_open time, we only have access to the fabric. It isn't guaranteed - * that a domain has been opened yet. The epoll context approach allows us to - * defer creating the completion channel for the CQ until CQ open time. - */ -int usdf_wait_open(struct fid_fabric *fabric, struct fi_wait_attr *attr, - struct fid_wait **waitset) -{ - struct usdf_wait *wait_priv; - struct usdf_fabric *fabric_priv; - ofi_epoll_t epfd; - int ret; - - USDF_TRACE_SYS(FABRIC, "\n"); - - switch (attr->wait_obj) { - case FI_WAIT_UNSPEC: - case FI_WAIT_FD: - break; - default: - USDF_WARN_SYS(FABRIC, "unsupported wait object type\n"); - ret = -FI_EINVAL; - goto error; - } - - fabric_priv = fab_fidtou(fabric); - ret = usdf_wait_check_support(fabric_priv); - if (ret) - goto error; - - ret = ofi_epoll_create(&epfd); - if (ret) { - USDF_WARN_SYS(FABRIC, "failed to create epoll fd[%d]\n", errno); - goto error; - } - - USDF_DBG_SYS(FABRIC, "successfully created epoll fd: %d\n", epfd); - - wait_priv = calloc(1, sizeof(*wait_priv)); - if (!wait_priv) { - USDF_WARN_SYS(FABRIC, - "unable to allocate memory for usdf_wait obj"); - ret = -FI_ENOMEM; - goto calloc_fail; - } - - wait_priv->wait_fid.fid.fclass = FI_CLASS_WAIT; - wait_priv->wait_fid.fid.ops = &usdf_wait_fi_ops; - wait_priv->wait_fid.ops = &usdf_wait_ops; - wait_priv->wait_fid.fid.context = 0; - wait_priv->wait_fabric = fabric_priv; - wait_priv->wait_obj = attr->wait_obj; - wait_priv->object.epfd = epfd; - - ofi_atomic_initialize32(&wait_priv->wait_refcnt, 0); - ofi_mutex_init(&wait_priv->lock); - dlist_init(&wait_priv->list); - - ofi_atomic_inc32(&wait_priv->wait_fabric->fab_refcnt); - - *waitset = &wait_priv->wait_fid; - - return FI_SUCCESS; - -calloc_fail: - ofi_epoll_close(epfd); -error: - *waitset = NULL; - return ret; -} - -/* Close a wait object. Make sure all resources associated with the wait object - * have been closed. - */ -static int usdf_wait_close(struct fid *waitset) -{ - struct usdf_wait *wait_priv; - - USDF_TRACE_SYS(FABRIC, "\n"); - if (!waitset) { - USDF_WARN_SYS(FABRIC, "invalid input.\n"); - return -FI_EINVAL; - } - - wait_priv = wait_ftou(waitset); - - if (ofi_atomic_get32(&wait_priv->wait_refcnt) > 0) { - USDF_DBG_SYS(FABRIC, - "failed to close waitset with non-zero refcnt"); - return -FI_EBUSY; - } - - switch (wait_priv->wait_obj) { - case FI_WAIT_UNSPEC: - case FI_WAIT_FD: - ofi_epoll_close(wait_priv->object.epfd); - break; - default: - USDF_WARN_SYS(FABRIC, - "unsupported wait object type\n"); - return -FI_EINVAL; - } - - ofi_atomic_dec32(&wait_priv->wait_fabric->fab_refcnt); - free(wait_priv); - - return FI_SUCCESS; -} - -static int usdf_wait_wait(struct fid_wait *fwait, int timeout) -{ - struct usdf_wait *wait; - struct ofi_epollfds_event event; - int ret = FI_SUCCESS; - int nevents; - - USDF_TRACE_SYS(FABRIC, "\n"); - wait = wait_ftou(fwait); - - ret = usdf_wait_trywait(&fwait->fid); - if (ret) { - if (ret == -FI_EAGAIN) - return FI_SUCCESS; - - return ret; - } - - nevents = ofi_epoll_wait(wait->object.epfd, &event, 1, timeout); - if (nevents == 0) { - ret = -FI_ETIMEDOUT; - } else if (nevents < 0) { - USDF_DBG_SYS(FABRIC, "epoll wait failed\n"); - ret = nevents; - } - - return ret; -} - -static int usdf_wait_get_wait(struct usdf_wait *wait_priv, void *arg) -{ - USDF_TRACE_SYS(FABRIC, "\n"); - - if (!arg || !wait_priv) { - USDF_WARN_SYS(FABRIC, "invalid input\n"); - return -FI_EINVAL; - } - - switch (wait_priv->wait_obj) { - case FI_WAIT_UNSPEC: - case FI_WAIT_FD: -#ifdef HAVE_EPOLL - *(int *) arg = wait_priv->object.epfd; -#else - return -FI_ENOSYS; -#endif - break; - default: - USDF_DBG_SYS(FABRIC, "unsupported wait type\n"); - return -FI_EINVAL; - } - - return FI_SUCCESS; -} - -static int usdf_wait_control(struct fid *fid, int command, void *arg) -{ - struct usdf_wait *wait_priv; - - USDF_TRACE_SYS(FABRIC, "\n"); - - wait_priv = container_of(fid, struct usdf_wait, wait_fid.fid); - - switch (command) { - case FI_GETWAIT: - break; - default: - USDF_DBG_SYS(FABRIC, "unsupported control command\n"); - return -FI_EINVAL; - } - - return usdf_wait_get_wait(wait_priv, arg); -} diff --git a/prov/usnic/src/usdf_wait.h b/prov/usnic/src/usdf_wait.h deleted file mode 100644 index 64219f15afd..00000000000 --- a/prov/usnic/src/usdf_wait.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 2016, Cisco Systems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _USDF_WAIT_H_ -#define _USDF_WAIT_H_ - -#include "ofi_list.h" - -struct usdf_wait { - struct fid_wait wait_fid; - struct usdf_fabric *wait_fabric; - - enum fi_wait_obj wait_obj; - union { - ofi_epoll_t epfd; - struct fi_mutex_cond mutex_cond; - } object; - - ofi_atomic32_t wait_refcnt; - - ofi_mutex_t lock; - struct dlist_entry list; -}; - -#define wait_ftou(FWT) container_of(FWT, struct usdf_wait, wait_fid) -#define wait_fidtou(FWT) container_of(FWT, struct usdf_wait, wait_fid.fid) - -int usdf_wait_open(struct fid_fabric *fabric, struct fi_wait_attr *attr, - struct fid_wait **waitset); -int usdf_trywait(struct fid_fabric *fabric, struct fid **fids, int count); - -#endif diff --git a/prov/usnic/src/usnic_direct/cq_desc.h b/prov/usnic/src/usnic_direct/cq_desc.h deleted file mode 100644 index f110c02c79c..00000000000 --- a/prov/usnic/src/usnic_direct/cq_desc.h +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright 2008-2010 Cisco Systems, Inc. All rights reserved. - * Copyright 2007 Nuova Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#ifndef _CQ_DESC_H_ -#define _CQ_DESC_H_ - -/* - * Completion queue descriptor types - */ -enum cq_desc_types { - CQ_DESC_TYPE_WQ_ENET = 0, - CQ_DESC_TYPE_DESC_COPY = 1, - CQ_DESC_TYPE_WQ_EXCH = 2, - CQ_DESC_TYPE_RQ_ENET = 3, - CQ_DESC_TYPE_RQ_FCP = 4, - CQ_DESC_TYPE_IOMMU_MISS = 5, - CQ_DESC_TYPE_SGL = 6, - CQ_DESC_TYPE_CLASSIFIER = 7, - CQ_DESC_TYPE_TEST = 127, -}; - -/* Completion queue descriptor: 16B - * - * All completion queues have this basic layout. The - * type_specfic area is unique for each completion - * queue type. - */ -struct cq_desc { - __le16 completed_index; - __le16 q_number; - u8 type_specfic[11]; - u8 type_color; -}; - -#define CQ_DESC_TYPE_BITS 4 -#define CQ_DESC_TYPE_MASK ((1 << CQ_DESC_TYPE_BITS) - 1) -#define CQ_DESC_COLOR_MASK 1 -#define CQ_DESC_COLOR_SHIFT 7 -#define CQ_DESC_Q_NUM_BITS 10 -#define CQ_DESC_Q_NUM_MASK ((1 << CQ_DESC_Q_NUM_BITS) - 1) -#define CQ_DESC_COMP_NDX_BITS 12 -#define CQ_DESC_COMP_NDX_MASK ((1 << CQ_DESC_COMP_NDX_BITS) - 1) - -static inline void cq_color_enc(struct cq_desc *desc, const u8 color) -{ - if (color) - desc->type_color |= (1 << CQ_DESC_COLOR_SHIFT); - else - desc->type_color &= ~(1 << CQ_DESC_COLOR_SHIFT); -} - -static inline void cq_desc_enc(struct cq_desc *desc, - const u8 type, const u8 color, const u16 q_number, - const u16 completed_index) -{ - desc->type_color = (type & CQ_DESC_TYPE_MASK) | - ((color & CQ_DESC_COLOR_MASK) << CQ_DESC_COLOR_SHIFT); - desc->q_number = cpu_to_le16(q_number & CQ_DESC_Q_NUM_MASK); - desc->completed_index = cpu_to_le16(completed_index & - CQ_DESC_COMP_NDX_MASK); -} - -static inline void cq_desc_dec(const struct cq_desc *desc_arg, - u8 *type, u8 *color, u16 *q_number, u16 *completed_index) -{ - const struct cq_desc *desc = desc_arg; - const u8 type_color = desc->type_color; - - *color = (type_color >> CQ_DESC_COLOR_SHIFT) & CQ_DESC_COLOR_MASK; - -#if !defined(__LIBUSNIC__) - /* - * Make sure color bit is read from desc *before* other fields - * are read from desc. Hardware guarantees color bit is last - * bit (byte) written. Adding the rmb() prevents the compiler - * and/or CPU from reordering the reads which would potentially - * result in reading stale values. - */ - - rmb(); -#endif - - *type = type_color & CQ_DESC_TYPE_MASK; - *q_number = le16_to_cpu(desc->q_number) & CQ_DESC_Q_NUM_MASK; - *completed_index = le16_to_cpu(desc->completed_index) & - CQ_DESC_COMP_NDX_MASK; -} - -static inline void cq_color_dec(const struct cq_desc *desc_arg, u8 *color) -{ - volatile const struct cq_desc *desc = desc_arg; - - *color = (desc->type_color >> CQ_DESC_COLOR_SHIFT) & CQ_DESC_COLOR_MASK; -} - -#endif /* _CQ_DESC_H_ */ diff --git a/prov/usnic/src/usnic_direct/cq_enet_desc.h b/prov/usnic/src/usnic_direct/cq_enet_desc.h deleted file mode 100644 index c616095d4d8..00000000000 --- a/prov/usnic/src/usnic_direct/cq_enet_desc.h +++ /dev/null @@ -1,269 +0,0 @@ -/* - * Copyright 2008-2010 Cisco Systems, Inc. All rights reserved. - * Copyright 2007 Nuova Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#ifndef _CQ_ENET_DESC_H_ -#define _CQ_ENET_DESC_H_ - -#include "cq_desc.h" - -/* Ethernet completion queue descriptor: 16B */ -struct cq_enet_wq_desc { - __le16 completed_index; - __le16 q_number; - u8 reserved[11]; - u8 type_color; -}; - -static inline void cq_enet_wq_desc_enc(struct cq_enet_wq_desc *desc, - u8 type, u8 color, u16 q_number, u16 completed_index) -{ - cq_desc_enc((struct cq_desc *)desc, type, - color, q_number, completed_index); -} - -static inline void cq_enet_wq_desc_dec(struct cq_enet_wq_desc *desc, - u8 *type, u8 *color, u16 *q_number, u16 *completed_index) -{ - cq_desc_dec((struct cq_desc *)desc, type, - color, q_number, completed_index); -} - -/* Completion queue descriptor: Ethernet receive queue, 16B */ -struct cq_enet_rq_desc { - __le16 completed_index_flags; - __le16 q_number_rss_type_flags; - __le32 rss_hash; - __le16 bytes_written_flags; - __le16 vlan; - __le16 checksum_fcoe; - u8 flags; - u8 type_color; -}; - -#define CQ_ENET_RQ_DESC_FLAGS_INGRESS_PORT (0x1 << 12) -#define CQ_ENET_RQ_DESC_FLAGS_FCOE (0x1 << 13) -#define CQ_ENET_RQ_DESC_FLAGS_EOP (0x1 << 14) -#define CQ_ENET_RQ_DESC_FLAGS_SOP (0x1 << 15) - -#define CQ_ENET_RQ_DESC_RSS_TYPE_BITS 4 -#define CQ_ENET_RQ_DESC_RSS_TYPE_MASK \ - ((1 << CQ_ENET_RQ_DESC_RSS_TYPE_BITS) - 1) -#define CQ_ENET_RQ_DESC_RSS_TYPE_NONE 0 -#define CQ_ENET_RQ_DESC_RSS_TYPE_IPv4 1 -#define CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv4 2 -#define CQ_ENET_RQ_DESC_RSS_TYPE_IPv6 3 -#define CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv6 4 -#define CQ_ENET_RQ_DESC_RSS_TYPE_IPv6_EX 5 -#define CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv6_EX 6 - -#define CQ_ENET_RQ_DESC_FLAGS_CSUM_NOT_CALC (0x1 << 14) - -#define CQ_ENET_RQ_DESC_BYTES_WRITTEN_BITS 14 -#define CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK \ - ((1 << CQ_ENET_RQ_DESC_BYTES_WRITTEN_BITS) - 1) -#define CQ_ENET_RQ_DESC_FLAGS_TRUNCATED (0x1 << 14) -#define CQ_ENET_RQ_DESC_FLAGS_VLAN_STRIPPED (0x1 << 15) - -#define CQ_ENET_RQ_DESC_VLAN_TCI_VLAN_BITS 12 -#define CQ_ENET_RQ_DESC_VLAN_TCI_VLAN_MASK \ - ((1 << CQ_ENET_RQ_DESC_VLAN_TCI_VLAN_BITS) - 1) -#define CQ_ENET_RQ_DESC_VLAN_TCI_CFI_MASK (0x1 << 12) -#define CQ_ENET_RQ_DESC_VLAN_TCI_USER_PRIO_BITS 3 -#define CQ_ENET_RQ_DESC_VLAN_TCI_USER_PRIO_MASK \ - ((1 << CQ_ENET_RQ_DESC_VLAN_TCI_USER_PRIO_BITS) - 1) -#define CQ_ENET_RQ_DESC_VLAN_TCI_USER_PRIO_SHIFT 13 - -#define CQ_ENET_RQ_DESC_FCOE_SOF_BITS 8 -#define CQ_ENET_RQ_DESC_FCOE_SOF_MASK \ - ((1 << CQ_ENET_RQ_DESC_FCOE_SOF_BITS) - 1) -#define CQ_ENET_RQ_DESC_FCOE_EOF_BITS 8 -#define CQ_ENET_RQ_DESC_FCOE_EOF_MASK \ - ((1 << CQ_ENET_RQ_DESC_FCOE_EOF_BITS) - 1) -#define CQ_ENET_RQ_DESC_FCOE_EOF_SHIFT 8 - -#define CQ_ENET_RQ_DESC_FLAGS_TCP_UDP_CSUM_OK (0x1 << 0) -#define CQ_ENET_RQ_DESC_FCOE_FC_CRC_OK (0x1 << 0) -#define CQ_ENET_RQ_DESC_FLAGS_UDP (0x1 << 1) -#define CQ_ENET_RQ_DESC_FCOE_ENC_ERROR (0x1 << 1) -#define CQ_ENET_RQ_DESC_FLAGS_TCP (0x1 << 2) -#define CQ_ENET_RQ_DESC_FLAGS_IPV4_CSUM_OK (0x1 << 3) -#define CQ_ENET_RQ_DESC_FLAGS_IPV6 (0x1 << 4) -#define CQ_ENET_RQ_DESC_FLAGS_IPV4 (0x1 << 5) -#define CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT (0x1 << 6) -#define CQ_ENET_RQ_DESC_FLAGS_FCS_OK (0x1 << 7) - -static inline void cq_enet_rq_desc_enc(struct cq_enet_rq_desc *desc, - u8 type, u8 color, u16 q_number, u16 completed_index, - u8 ingress_port, u8 fcoe, u8 eop, u8 sop, u8 rss_type, u8 csum_not_calc, - u32 rss_hash, u16 bytes_written, u8 packet_error, u8 vlan_stripped, - u16 vlan, u16 checksum, u8 fcoe_sof, u8 fcoe_fc_crc_ok, - u8 fcoe_enc_error, u8 fcoe_eof, u8 tcp_udp_csum_ok, u8 udp, u8 tcp, - u8 ipv4_csum_ok, u8 ipv6, u8 ipv4, u8 ipv4_fragment, u8 fcs_ok) -{ - cq_desc_enc((struct cq_desc *)desc, type, - color, q_number, completed_index); - - desc->completed_index_flags |= cpu_to_le16( - (ingress_port ? CQ_ENET_RQ_DESC_FLAGS_INGRESS_PORT : 0) | - (fcoe ? CQ_ENET_RQ_DESC_FLAGS_FCOE : 0) | - (eop ? CQ_ENET_RQ_DESC_FLAGS_EOP : 0) | - (sop ? CQ_ENET_RQ_DESC_FLAGS_SOP : 0)); - - desc->q_number_rss_type_flags |= cpu_to_le16( - ((rss_type & CQ_ENET_RQ_DESC_RSS_TYPE_MASK) << - CQ_DESC_Q_NUM_BITS) | - (csum_not_calc ? CQ_ENET_RQ_DESC_FLAGS_CSUM_NOT_CALC : 0)); - - desc->rss_hash = cpu_to_le32(rss_hash); - - desc->bytes_written_flags = cpu_to_le16( - (bytes_written & CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK) | - (packet_error ? CQ_ENET_RQ_DESC_FLAGS_TRUNCATED : 0) | - (vlan_stripped ? CQ_ENET_RQ_DESC_FLAGS_VLAN_STRIPPED : 0)); - - desc->vlan = cpu_to_le16(vlan); - - if (fcoe) { - desc->checksum_fcoe = cpu_to_le16( - (fcoe_sof & CQ_ENET_RQ_DESC_FCOE_SOF_MASK) | - ((fcoe_eof & CQ_ENET_RQ_DESC_FCOE_EOF_MASK) << - CQ_ENET_RQ_DESC_FCOE_EOF_SHIFT)); - } else { - desc->checksum_fcoe = cpu_to_le16(checksum); - } - - desc->flags = - (tcp_udp_csum_ok ? CQ_ENET_RQ_DESC_FLAGS_TCP_UDP_CSUM_OK : 0) | - (udp ? CQ_ENET_RQ_DESC_FLAGS_UDP : 0) | - (tcp ? CQ_ENET_RQ_DESC_FLAGS_TCP : 0) | - (ipv4_csum_ok ? CQ_ENET_RQ_DESC_FLAGS_IPV4_CSUM_OK : 0) | - (ipv6 ? CQ_ENET_RQ_DESC_FLAGS_IPV6 : 0) | - (ipv4 ? CQ_ENET_RQ_DESC_FLAGS_IPV4 : 0) | - (ipv4_fragment ? CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT : 0) | - (fcs_ok ? CQ_ENET_RQ_DESC_FLAGS_FCS_OK : 0) | - (fcoe_fc_crc_ok ? CQ_ENET_RQ_DESC_FCOE_FC_CRC_OK : 0) | - (fcoe_enc_error ? CQ_ENET_RQ_DESC_FCOE_ENC_ERROR : 0); -} - -static inline void cq_enet_rq_desc_dec(struct cq_enet_rq_desc *desc, - u8 *type, u8 *color, u16 *q_number, u16 *completed_index, - u8 *ingress_port, u8 *fcoe, u8 *eop, u8 *sop, u8 *rss_type, - u8 *csum_not_calc, u32 *rss_hash, u16 *bytes_written, u8 *packet_error, - u8 *vlan_stripped, u16 *vlan_tci, u16 *checksum, u8 *fcoe_sof, - u8 *fcoe_fc_crc_ok, u8 *fcoe_enc_error, u8 *fcoe_eof, - u8 *tcp_udp_csum_ok, u8 *udp, u8 *tcp, u8 *ipv4_csum_ok, - u8 *ipv6, u8 *ipv4, u8 *ipv4_fragment, u8 *fcs_ok) -{ - u16 completed_index_flags; - u16 q_number_rss_type_flags; - u16 bytes_written_flags; - - cq_desc_dec((struct cq_desc *)desc, type, - color, q_number, completed_index); - - completed_index_flags = le16_to_cpu(desc->completed_index_flags); - q_number_rss_type_flags = - le16_to_cpu(desc->q_number_rss_type_flags); - bytes_written_flags = le16_to_cpu(desc->bytes_written_flags); - - *ingress_port = (completed_index_flags & - CQ_ENET_RQ_DESC_FLAGS_INGRESS_PORT) ? 1 : 0; - *fcoe = (completed_index_flags & CQ_ENET_RQ_DESC_FLAGS_FCOE) ? - 1 : 0; - *eop = (completed_index_flags & CQ_ENET_RQ_DESC_FLAGS_EOP) ? - 1 : 0; - *sop = (completed_index_flags & CQ_ENET_RQ_DESC_FLAGS_SOP) ? - 1 : 0; - - *rss_type = (u8)((q_number_rss_type_flags >> CQ_DESC_Q_NUM_BITS) & - CQ_ENET_RQ_DESC_RSS_TYPE_MASK); - *csum_not_calc = (q_number_rss_type_flags & - CQ_ENET_RQ_DESC_FLAGS_CSUM_NOT_CALC) ? 1 : 0; - - *rss_hash = le32_to_cpu(desc->rss_hash); - - *bytes_written = bytes_written_flags & - CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK; - *packet_error = (bytes_written_flags & - CQ_ENET_RQ_DESC_FLAGS_TRUNCATED) ? 1 : 0; - *vlan_stripped = (bytes_written_flags & - CQ_ENET_RQ_DESC_FLAGS_VLAN_STRIPPED) ? 1 : 0; - - /* - * Tag Control Information(16) = user_priority(3) + cfi(1) + vlan(12) - */ - *vlan_tci = le16_to_cpu(desc->vlan); - - if (*fcoe) { - *fcoe_sof = (u8)(le16_to_cpu(desc->checksum_fcoe) & - CQ_ENET_RQ_DESC_FCOE_SOF_MASK); - *fcoe_fc_crc_ok = (desc->flags & - CQ_ENET_RQ_DESC_FCOE_FC_CRC_OK) ? 1 : 0; - *fcoe_enc_error = (desc->flags & - CQ_ENET_RQ_DESC_FCOE_ENC_ERROR) ? 1 : 0; - *fcoe_eof = (u8)((le16_to_cpu(desc->checksum_fcoe) >> - CQ_ENET_RQ_DESC_FCOE_EOF_SHIFT) & - CQ_ENET_RQ_DESC_FCOE_EOF_MASK); - *checksum = 0; - } else { - *fcoe_sof = 0; - *fcoe_fc_crc_ok = 0; - *fcoe_enc_error = 0; - *fcoe_eof = 0; - *checksum = le16_to_cpu(desc->checksum_fcoe); - } - - *tcp_udp_csum_ok = - (desc->flags & CQ_ENET_RQ_DESC_FLAGS_TCP_UDP_CSUM_OK) ? 1 : 0; - *udp = (desc->flags & CQ_ENET_RQ_DESC_FLAGS_UDP) ? 1 : 0; - *tcp = (desc->flags & CQ_ENET_RQ_DESC_FLAGS_TCP) ? 1 : 0; - *ipv4_csum_ok = - (desc->flags & CQ_ENET_RQ_DESC_FLAGS_IPV4_CSUM_OK) ? 1 : 0; - *ipv6 = (desc->flags & CQ_ENET_RQ_DESC_FLAGS_IPV6) ? 1 : 0; - *ipv4 = (desc->flags & CQ_ENET_RQ_DESC_FLAGS_IPV4) ? 1 : 0; - *ipv4_fragment = - (desc->flags & CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT) ? 1 : 0; - *fcs_ok = (desc->flags & CQ_ENET_RQ_DESC_FLAGS_FCS_OK) ? 1 : 0; -} - -#endif /* _CQ_ENET_DESC_H_ */ diff --git a/prov/usnic/src/usnic_direct/kcompat.h b/prov/usnic/src/usnic_direct/kcompat.h deleted file mode 100644 index e650e01c021..00000000000 --- a/prov/usnic/src/usnic_direct/kcompat.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright 2008-2018 Cisco Systems, Inc. All rights reserved. - * Copyright 2007 Nuova Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#ifndef _KCOMPAT_H_ -#define _KCOMPAT_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define EXPORT_SYMBOL(x) -#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) -#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1) -#define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask)) -#define ETH_ALEN 6 -#define BUG() assert(0) -#define BUG_ON(x) assert(!x) -#define kzalloc(x, flags) calloc(1, x) -#define kfree(x) free(x) - -#define __iomem -#define udelay usleep -#define readl ioread32 -#define writel iowrite32 - -typedef int gfp_t; - -#ifndef ARRAY_SIZE -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -#endif - -#ifndef offsetof -#define offsetof(t, m) ((size_t) &((t *)0)->m) -#endif - -static inline uint32_t ioread32(const volatile void *addr) -{ - return *(volatile uint32_t *)addr; -} - -static inline uint16_t ioread16(const volatile void *addr) -{ - return *(volatile uint16_t *)addr; -} - -static inline uint8_t ioread8(const volatile void *addr) -{ - return *(volatile uint8_t *)addr; -} - -static inline void iowrite64(uint64_t val, const volatile void *addr) -{ - *(volatile uint64_t *)addr = val; -} - -static inline void iowrite32(uint32_t val, const volatile void *addr) -{ - *(volatile uint32_t *)addr = val; -} - -#endif /* _KCOMPAT_H_ */ diff --git a/prov/usnic/src/usnic_direct/kcompat_priv.h b/prov/usnic/src/usnic_direct/kcompat_priv.h deleted file mode 100644 index 01a7b2eca3c..00000000000 --- a/prov/usnic/src/usnic_direct/kcompat_priv.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#ifndef _KCOMPAT_PRIV_H_ -#define _KCOMPAT_PRIV_H_ - -#include -#include - -struct pci_dev; -typedef uint64_t dma_addr_t; -struct usd_device; - -int usd_alloc_mr(struct usd_device *dev, size_t size, void **vaddr_o); -int usd_free_mr(void *); -char *pci_name(struct pci_dev *pdev); - -static inline void *pci_alloc_consistent(struct pci_dev *hwdev, - size_t size, - dma_addr_t * dma_handle) -{ - int ret; - void *va; - - ret = usd_alloc_mr((struct usd_device *) hwdev, size, &va); - if (ret == 0) { - *dma_handle = (dma_addr_t) va; - return va; - } else { - return NULL; - } -} - -static inline void pci_free_consistent( __attribute__ ((unused)) - struct pci_dev *pdev, - __attribute__ ((unused)) size_t - size, void *vaddr, - __attribute__ ((unused)) dma_addr_t - dma) -{ - (void) usd_free_mr(vaddr); -} - -#define usd_err(args...) fprintf(stderr, args) -#define pr_err usd_err -#define pr_warning(args...) - -#ifndef wmb -#define wmb() asm volatile("" ::: "memory") -#endif - -#ifndef rmb -#define rmb() asm volatile("" ::: "memory") -#endif - -#endif /* _KCOMPAT_PRIV_H_ */ diff --git a/prov/usnic/src/usnic_direct/libnl1_utils.h b/prov/usnic/src/usnic_direct/libnl1_utils.h deleted file mode 100644 index a559373bf3a..00000000000 --- a/prov/usnic/src/usnic_direct/libnl1_utils.h +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#ifndef LIBNL1_UTILS_H -#define LIBNL1_UTILS_H - -#include -#include -#include -#include -#include - -typedef struct nl_handle NL_HANDLE; - -#define NLMSG_SIZE(size) nlmsg_msg_size(size) -#define NL_GETERROR(err) nl_geterror() -#define NL_HANDLE_ALLOC nl_handle_alloc -#define NL_HANDLE_FREE nl_handle_destroy -#define NL_DISABLE_SEQ_CHECK nl_disable_sequence_check -#define INC_CB_MSGCNT(arg) \ - do { \ - arg->msg_cnt++; \ - } while (0) - -/* - * the return value of nl_recvmsgs_default does not tell - * whether it returns because of successful read or socket - * timeout. This is a limitation in libnl1. So we compare - * message count before and after the call to decide if there - * is no new message arriving. In this case, this function - * needs to terminate to prevent the caller from - * blocking forever. - * NL_CB_MSG_IN traps every received message, so - * there should be no premature exit - */ -#define NL_RECVMSGS(nlh, cb_arg, rc, err, out) \ - do { \ - int msg_cnt = cb_arg.msg_cnt; \ - err = nl_recvmsgs_default(nlh); \ - if (err < 0) { \ - usnic_err("Failed to receive netlink reply message, error %s\n", \ - NL_GETERROR(err)); \ - goto out; \ - } \ - if (msg_cnt == cb_arg.msg_cnt) {\ - err = rc; \ - goto out; \ - } \ - } while (0) - -struct usnic_rt_cb_arg { - uint32_t nh_addr; - int oif; - int found; - int msg_cnt; - int retry; - struct usnic_nl_sk *unlsk; -}; - -/* libnl1 and libnl3 return kernel resource exhaustion in different - * ways. Use this macro to abstract the differences away. - * - * In libnl1, nl_send() will return -ECONNREFUSED. */ -#define USD_NL_SEND(nlh, msg, ret, retry) \ - do { \ - retry = 0; \ - ret = nl_send((nlh), (msg)); \ - if (ret == -ECONNREFUSED) { \ - retry = 1; \ - } \ - } while(0); - -#endif /* LIBNL1_UTILS_H */ diff --git a/prov/usnic/src/usnic_direct/libnl3_utils.h b/prov/usnic/src/usnic_direct/libnl3_utils.h deleted file mode 100644 index efc57f8da62..00000000000 --- a/prov/usnic/src/usnic_direct/libnl3_utils.h +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#ifndef LIBNL3_UTILS_H -#define LIBNL3_UTILS_H - -#include -#include -#include -#include -#include - -typedef struct nl_sock NL_HANDLE; - -#define NLMSG_SIZE(size) nlmsg_size(size) -#define NL_GETERROR(err) nl_geterror(err) -#define NL_HANDLE_ALLOC nl_socket_alloc -#define NL_HANDLE_FREE nl_socket_free -#define NL_DISABLE_SEQ_CHECK nl_socket_disable_seq_check -#define INC_CB_MSGCNT(arg) - -/* err will be returned as -NLE_AGAIN */ -/* if the socket times out */ -#define NL_RECVMSGS(nlh, cb_arg, rc, err, out) \ - do { \ - err = nl_recvmsgs_default(nlh); \ - if (err < 0) { \ - usnic_err("Failed to receive netlink reply message, error %s\n", \ - NL_GETERROR(err)); \ - if (err == -NLE_AGAIN) \ - err = rc; \ - goto out; \ - } \ - } while (0) - -struct usnic_rt_cb_arg { - uint32_t nh_addr; - int oif; - int found; - int retry; - struct usnic_nl_sk *unlsk; -}; - -/* libnl1 and libnl3 return kernel resource exhaustion in different - * ways. Use this macro to abstract the differences away. - * - * In libnl3, nl_send() will return -NLE_FAILURE and - * errno==ECONNREFUSED. */ -#define USD_NL_SEND(nlh, msg, ret, retry) \ - do { \ - retry = 0; \ - ret = nl_send((nlh), (msg)); \ - if (ret == -NLE_FAILURE && errno == ECONNREFUSED) { \ - retry = 1; \ - } \ - } while(0); - -#endif /* LIBNL3_UTILS_H */ diff --git a/prov/usnic/src/usnic_direct/libnl_utils.h b/prov/usnic/src/usnic_direct/libnl_utils.h deleted file mode 100644 index 77a965d88c8..00000000000 --- a/prov/usnic/src/usnic_direct/libnl_utils.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2014-2015, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#ifndef LIBNL_UTILS_H -#define LIBNL_UTILS_H - -#if !defined (HAVE_LIBNL3) -#error You must define HAVE_LIBNL3 to 0 or 1 before including libnl_utils.h -#elif HAVE_LIBNL3 -#include "libnl3_utils.h" -#else -#include "libnl1_utils.h" -#endif - -struct usnic_nl_sk { - NL_HANDLE *nlh; - uint32_t seq; -}; - -int usnic_nl_rt_lookup(uint32_t src_addr, uint32_t dst_addr, int oif, - uint32_t *nh_addr); -int usnic_nl_neigh_lookup(uint32_t dst_ip, int ifindex, uint8_t *n_lladdr, - uint16_t *n_state); - -#endif /* LIBNL_UTILS_H */ diff --git a/prov/usnic/src/usnic_direct/libnl_utils_common.c b/prov/usnic/src/usnic_direct/libnl_utils_common.c deleted file mode 100644 index 7dd45b6afb0..00000000000 --- a/prov/usnic/src/usnic_direct/libnl_utils_common.c +++ /dev/null @@ -1,465 +0,0 @@ -/* - * Copyright (c) 2014-2016, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ -#include -#include -#include -#include - -#include "libnl_utils.h" -#include "usnic_user_utils.h" - -#if USNIC_LOG_LVL >= USNIC_LOG_LVL_ERR -#define usnic_nlmsg_dump(msg) nl_msg_dump(msg, stderr) -#else -#define usnic_nlmsg_dump(msg) -#endif - -/* - * Querying the routing tables via netlink is expensive, especially - * when many processes are doing so at the same time on a single - * server (e.g., in an MPI job). As such, we cache netlink responses - * to alleviate pressure on the netlink kernel interface. - */ - struct usd_nl_cache_entry { - time_t timestamp; - - uint32_t src_ipaddr_be; - uint32_t dest_ipaddr_be; - int ifindex; - uint32_t nh_addr; - int reachable; - - /* For now, this cache is a simple linked list. Eventually, - * this cache should be a better data structure, such as a - * hash table. */ - struct usd_nl_cache_entry *prev; - struct usd_nl_cache_entry *next; -}; - -/* Semi-arbitrarily set cache TTL to 2 minutes */ -static time_t usd_nl_cache_timeout = 120; - -static struct usd_nl_cache_entry *cache = NULL; - - -static struct nla_policy route_policy[RTA_MAX+1] = { - [RTA_IIF] = { .type = NLA_STRING, - .maxlen = IFNAMSIZ, }, - [RTA_OIF] = { .type = NLA_U32 }, - [RTA_PRIORITY] = { .type = NLA_U32 }, - [RTA_FLOW] = { .type = NLA_U32 }, - [RTA_MP_ALGO] = { .type = NLA_U32 }, - [RTA_CACHEINFO] = { .minlen = sizeof(struct rta_cacheinfo) }, - [RTA_METRICS] = { .type = NLA_NESTED }, - [RTA_MULTIPATH] = { .type = NLA_NESTED }, -}; - -static int usnic_is_nlreply_expected(struct usnic_nl_sk *unlsk, - struct nlmsghdr *nlm_hdr) -{ - if (nlm_hdr->nlmsg_pid != nl_socket_get_local_port(unlsk->nlh) - || nlm_hdr->nlmsg_seq != unlsk->seq) { - usnic_err("Not an expected reply msg pid: %u local pid: %u msg seq: %u expected seq: %u\n", - nlm_hdr->nlmsg_pid, - nl_socket_get_local_port(unlsk->nlh), - nlm_hdr->nlmsg_seq, unlsk->seq); - return 0; - } - - return 1; -} - -static int usnic_is_nlreply_err(struct nlmsghdr *nlm_hdr, - struct usnic_rt_cb_arg *arg) -{ - if (nlm_hdr->nlmsg_type == NLMSG_ERROR) { - struct nlmsgerr *e = (struct nlmsgerr *)nlmsg_data(nlm_hdr); - if (nlm_hdr->nlmsg_len >= (__u32)NLMSG_SIZE(sizeof(*e))) { - usnic_strerror(e->error, - "Received a netlink error message"); - /* Sometimes nl_send() succeeds, but the - * request fails because the kernel is - * temporarily out of resources. In these - * cases, we should tell the caller that they - * should try again. */ - if (e->error == -ECONNREFUSED) { - arg->retry = 1; - } - } else - usnic_err( - "Received a truncated netlink error message\n"); - return 1; - } - - return 0; -} - -static int usnic_nl_send_query(struct usnic_nl_sk *unlsk, struct nl_msg *msg, - int protocol, int flag) -{ - int ret, retry; - struct nlmsghdr *nlhdr; - - nlhdr = nlmsg_hdr(msg); - while (1) { - nlhdr->nlmsg_pid = nl_socket_get_local_port(unlsk->nlh); - nlhdr->nlmsg_seq = ++unlsk->seq; - nlmsg_set_proto(msg, protocol); - nlhdr->nlmsg_flags = flag; - - /* Sometimes nl_send() can fail simply because the - * kernel is temporarily out of resources, and we - * should just try again. libnl1 and libnl3 handle - * this case a little differently, so use the - * USD_NL_SEND() macro to hide the differences. If - * retry comes back as true, then sleep a little and - * try again. */ - USD_NL_SEND(unlsk->nlh, msg, ret, retry); - if (retry) { - usleep(5); - continue; - } - break; - } - - return ret; -} - -static int usnic_nl_set_rcvsk_timer(NL_HANDLE *nlh) -{ - int err = 0; - struct timeval timeout; - - timeout.tv_sec = 1; - timeout.tv_usec = 0; - - err = setsockopt(nl_socket_get_fd(nlh), SOL_SOCKET, SO_RCVTIMEO, - (char *)&timeout, sizeof(timeout)); - if (err < 0) - usnic_perr("Failed to set SO_RCVTIMEO for nl socket"); - - return err; -} - -static int usnic_nl_sk_alloc(struct usnic_nl_sk **p_sk, int protocol) -{ - struct usnic_nl_sk *unlsk; - NL_HANDLE *nlh; - int err; - - unlsk = calloc(1, sizeof(*unlsk)); - if (!unlsk) { - usnic_err("Failed to allocate usnic_nl_sk struct\n"); - return ENOMEM; - } - - nlh = NL_HANDLE_ALLOC(); - if (!nlh) { - usnic_err("Failed to allocate nl handle\n"); - err = ENOMEM; - goto err_free_unlsk; - } - - err = nl_connect(nlh, protocol); - if (err < 0) { - usnic_err("Failed to connnect netlink route socket error: %s\n", - NL_GETERROR(err)); - err = EINVAL; - goto err_free_nlh; - } - - NL_DISABLE_SEQ_CHECK(nlh); - err = usnic_nl_set_rcvsk_timer(nlh); - if (err < 0) - goto err_close_nlh; - - unlsk->nlh = nlh; - unlsk->seq = time(NULL); - *p_sk = unlsk; - return 0; - -err_close_nlh: - nl_close(nlh); -err_free_nlh: - NL_HANDLE_FREE(nlh); -err_free_unlsk: - free(unlsk); - return err; -} - -static void usnic_nl_sk_free(struct usnic_nl_sk *unlsk) -{ - nl_close(unlsk->nlh); - NL_HANDLE_FREE(unlsk->nlh); - free(unlsk); -} - -static int usnic_rt_raw_parse_cb(struct nl_msg *msg, void *arg) -{ - struct usnic_rt_cb_arg *lookup_arg = (struct usnic_rt_cb_arg *)arg; - struct usnic_nl_sk *unlsk = lookup_arg->unlsk; - struct nlmsghdr *nlm_hdr = nlmsg_hdr(msg); - struct rtmsg *rtm; - struct nlattr *tb[RTA_MAX + 1]; - int found = 0; - int err; - - INC_CB_MSGCNT(lookup_arg); - - if (!usnic_is_nlreply_expected(unlsk, nlm_hdr)) { - usnic_nlmsg_dump(msg); - return NL_SKIP; - } - - if (usnic_is_nlreply_err(nlm_hdr, lookup_arg)) { - usnic_nlmsg_dump(msg); - return NL_SKIP; - } - - if (nlm_hdr->nlmsg_type != RTM_NEWROUTE) { - char buf[128]; - nl_nlmsgtype2str(nlm_hdr->nlmsg_type, buf, sizeof(buf)); - usnic_err("Received an invalid route request reply message type: %s\n", - buf); - usnic_nlmsg_dump(msg); - return NL_SKIP; - } - - rtm = nlmsg_data(nlm_hdr); - if (rtm->rtm_family != AF_INET) { - usnic_err("RTM message contains invalid AF family: %u\n", - rtm->rtm_family); - usnic_nlmsg_dump(msg); - return NL_SKIP; - } - - err = nlmsg_parse(nlm_hdr, sizeof(struct rtmsg), tb, RTA_MAX, - route_policy); - if (err < 0) { - usnic_err("nlmsg parse error %s\n", NL_GETERROR(err)); - usnic_nlmsg_dump(msg); - return NL_SKIP; - } - - if (tb[RTA_OIF]) { - if (nla_get_u32(tb[RTA_OIF]) == (uint32_t)lookup_arg->oif) - found = 1; - else - usnic_err("Retrieved route has a different outgoing interface %d (expected %d)\n", - nla_get_u32(tb[RTA_OIF]), - lookup_arg->oif); - } - - if (found && tb[RTA_GATEWAY]) - lookup_arg->nh_addr = nla_get_u32(tb[RTA_GATEWAY]); - - lookup_arg->found = found; - return NL_STOP; -} - - -static struct usd_nl_cache_entry * -usd_nl_cache_lookup(uint32_t src_ipaddr_be, uint32_t dest_ipaddr_be, int ifindex) -{ - time_t now; - struct usd_nl_cache_entry *nlce; - struct usd_nl_cache_entry *stale; - - now = time(NULL); - for (nlce = cache; NULL != nlce; ) { - /* While we're traversing the cache, we might as well - * remove stale entries */ - if (now > nlce->timestamp + usd_nl_cache_timeout) { - stale = nlce; - nlce = nlce->next; - - if (stale->prev) { - stale->prev->next = stale->next; - } - if (stale->next) { - stale->next->prev = stale->prev; - } - if (cache == stale) { - cache = nlce; - } - free(stale); - - continue; - } - - if (nlce->src_ipaddr_be == src_ipaddr_be && - nlce->dest_ipaddr_be == dest_ipaddr_be && - nlce->ifindex == ifindex) { - return nlce; - } - - nlce = nlce->next; - } - - return NULL; -} - -static void -usd_nl_cache_save(int32_t src_ipaddr_be, uint32_t dest_ipaddr_be, int ifindex, - uint32_t nh_addr, int reachable) -{ - struct usd_nl_cache_entry *nlce; - - nlce = calloc(1, sizeof(*nlce)); - if (NULL == nlce) { - return; - } - - nlce->timestamp = time(NULL); - nlce->src_ipaddr_be = src_ipaddr_be; - nlce->dest_ipaddr_be = dest_ipaddr_be; - nlce->ifindex = ifindex; - nlce->nh_addr = nh_addr; - nlce->reachable = reachable; - - nlce->next = cache; - if (cache) { - cache->prev = nlce; - } - cache = nlce; -} - - -int usnic_nl_rt_lookup(uint32_t src_addr, uint32_t dst_addr, int oif, - uint32_t *nh_addr) -{ - struct usnic_nl_sk *unlsk; - struct nl_msg *nlm; - struct rtmsg rmsg; - struct usnic_rt_cb_arg arg; - int err; - - /* See if we have this NL result cached */ - struct usd_nl_cache_entry *nlce; - nlce = usd_nl_cache_lookup(src_addr, dst_addr, oif); - if (nlce) { - if (nlce->reachable) { - *nh_addr = nlce->nh_addr; - return 0; - } else { - return EHOSTUNREACH; - } - } - -retry: - unlsk = NULL; - err = usnic_nl_sk_alloc(&unlsk, NETLINK_ROUTE); - if (err) - return err; - - memset(&rmsg, 0, sizeof(rmsg)); - rmsg.rtm_family = AF_INET; - rmsg.rtm_dst_len = sizeof(dst_addr) * CHAR_BIT; - rmsg.rtm_src_len = sizeof(src_addr) * CHAR_BIT; - - nlm = nlmsg_alloc_simple(RTM_GETROUTE, 0); - if (!nlm) { - usnic_err("Failed to alloc nl message, %s\n", - NL_GETERROR(err)); - err = ENOMEM; - goto out; - } - nlmsg_append(nlm, &rmsg, sizeof(rmsg), NLMSG_ALIGNTO); - nla_put_u32(nlm, RTA_DST, dst_addr); - nla_put_u32(nlm, RTA_SRC, src_addr); - - err = usnic_nl_send_query(unlsk, nlm, NETLINK_ROUTE, NLM_F_REQUEST); - nlmsg_free(nlm); - if (err < 0) { - usnic_err("Failed to send RTM_GETROUTE query message, error %s\n", - NL_GETERROR(err)); - err = EINVAL; - goto out; - } - - memset(&arg, 0, sizeof(arg)); - arg.oif = oif; - arg.unlsk = unlsk; - err = nl_socket_modify_cb(unlsk->nlh, NL_CB_MSG_IN, NL_CB_CUSTOM, - usnic_rt_raw_parse_cb, &arg); - if (err != 0) { - usnic_err("Failed to setup callback function, error %s\n", - NL_GETERROR(err)); - err = EINVAL; - goto out; - } - - /* Sometimes the recvmsg can fail because something is - * temporarily out of resources. In this case, delay a little - * and try again. */ - do { - err = 0; - NL_RECVMSGS(unlsk->nlh, arg, EAGAIN, err, out); - if (err == EAGAIN) { - usleep(5); - } - } while (err == EAGAIN); - - /* If we got a reply back that indicated that the kernel was - * too busy to handle this request, delay a little and try - * again. */ - if (arg.retry) { - usleep(5); - goto retry; - } - - if (arg.found) { - *nh_addr = arg.nh_addr; - err = 0; - } else { - err = EHOSTUNREACH; - } - - /* Save this result in the cache */ - usd_nl_cache_save(src_addr, dst_addr, oif, - arg.nh_addr, arg.found); - -out: - usnic_nl_sk_free(unlsk); - return err; -} - diff --git a/prov/usnic/src/usnic_direct/linux/delay.h b/prov/usnic/src/usnic_direct/linux/delay.h deleted file mode 100644 index 37760d6176c..00000000000 --- a/prov/usnic/src/usnic_direct/linux/delay.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#ifndef _DELAY_H_ -#define _DELAY_H_ - -/* Fake header file so that we can compile kernel code in userspace. */ - -#endif /* _DELAY_H_ */ diff --git a/prov/usnic/src/usnic_direct/linux/slab.h b/prov/usnic/src/usnic_direct/linux/slab.h deleted file mode 100644 index c90c608281f..00000000000 --- a/prov/usnic/src/usnic_direct/linux/slab.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#ifndef _SLAB_H_ -#define _SLAB_H_ - -/* Fake header file so that we can compile kernel code in userspace. */ - -#endif /* _SLAB_H_ */ diff --git a/prov/usnic/src/usnic_direct/linux_types.h b/prov/usnic/src/usnic_direct/linux_types.h deleted file mode 100644 index fc0cfb5fdd0..00000000000 --- a/prov/usnic/src/usnic_direct/linux_types.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#ifndef __LINUX_TYPES_H__ -#define __LINUX_TYPES_H__ - -typedef u_int8_t u8; -typedef u_int16_t u16; -typedef u_int32_t u32; -typedef u_int64_t u64; - -typedef u_int16_t __le16; -typedef u_int32_t __le32; -#define __le64 ___le64 -typedef u_int64_t __le64; - -#define le16_to_cpu -#define le32_to_cpu -#define le64_to_cpu -#define cpu_to_le16 -#define cpu_to_le32 -#define cpu_to_le64 - -#if !defined(__LIBUSNIC__) -#define rmb() asm volatile("" ::: "memory") -#endif - -#endif // __LINUX_TYPES_H__ diff --git a/prov/usnic/src/usnic_direct/rq_enet_desc.h b/prov/usnic/src/usnic_direct/rq_enet_desc.h deleted file mode 100644 index 3eed402b359..00000000000 --- a/prov/usnic/src/usnic_direct/rq_enet_desc.h +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright 2008-2010 Cisco Systems, Inc. All rights reserved. - * Copyright 2007 Nuova Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#ifndef _RQ_ENET_DESC_H_ -#define _RQ_ENET_DESC_H_ - -/* Ethernet receive queue descriptor: 16B */ -struct rq_enet_desc { - __le64 address; - __le16 length_type; - u8 reserved[6]; -}; - -enum rq_enet_type_types { - RQ_ENET_TYPE_ONLY_SOP = 0, - RQ_ENET_TYPE_NOT_SOP = 1, - RQ_ENET_TYPE_RESV2 = 2, - RQ_ENET_TYPE_RESV3 = 3, -}; - -#define RQ_ENET_ADDR_BITS 64 -#define RQ_ENET_LEN_BITS 14 -#define RQ_ENET_LEN_MASK ((1 << RQ_ENET_LEN_BITS) - 1) -#define RQ_ENET_TYPE_BITS 2 -#define RQ_ENET_TYPE_MASK ((1 << RQ_ENET_TYPE_BITS) - 1) - -static inline void rq_enet_desc_enc(struct rq_enet_desc *desc, - u64 address, u8 type, u16 length) -{ - desc->address = cpu_to_le64(address); - desc->length_type = cpu_to_le16((length & RQ_ENET_LEN_MASK) | - ((type & RQ_ENET_TYPE_MASK) << RQ_ENET_LEN_BITS)); -} - -static inline void rq_enet_desc_dec(struct rq_enet_desc *desc, - u64 *address, u8 *type, u16 *length) -{ - *address = le64_to_cpu(desc->address); - *length = le16_to_cpu(desc->length_type) & RQ_ENET_LEN_MASK; - *type = (u8)((le16_to_cpu(desc->length_type) >> RQ_ENET_LEN_BITS) & - RQ_ENET_TYPE_MASK); -} - -#endif /* _RQ_ENET_DESC_H_ */ diff --git a/prov/usnic/src/usnic_direct/usd.h b/prov/usnic/src/usnic_direct/usd.h deleted file mode 100644 index b7f5c2adead..00000000000 --- a/prov/usnic/src/usnic_direct/usd.h +++ /dev/null @@ -1,324 +0,0 @@ -/* - * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#ifndef _USD_H_ -#define _USD_H_ - -#include - -#include "kcompat.h" -#include "vnic_rq.h" -#include "vnic_wq.h" -#include "vnic_cq.h" -#include "wq_enet_desc.h" -#include "rq_enet_desc.h" -#include "vnic_intr.h" - -#include "usnic_abi.h" -#include "usnic_direct.h" -#include "usd_ib_sysfs.h" - -#define USD_INVALID_HANDLE (~0) -#define USD_SF_ISSET(flags, flagname) \ - ((flags >> USD_SFS_##flagname) & 1) - -#define USD_SEND_MAX_COPY 992 -#define USD_MAX_PRESEND 4 - -#define USD_CTXF_CLOSE_CMD_FD (1u << 0) -#define USD_DEVF_CLOSE_CTX (1u << 0) - -#ifndef USD_DEBUG -#define USD_DEBUG 0 -#endif - -/* - * Group interrupt vector userspace map info - */ -struct usd_grp_vect_map { - void *va; - size_t len; - uint32_t vfid; -}; - -/* - * structure storing interrupt resource and its mapping to FD - */ -struct usd_cq_comp_intr { - struct vnic_intr uci_vintr; - int uci_offset; - int uci_refcnt; - LIST_ENTRY(usd_cq_comp_intr) uci_ctx_link; -}; - -/* - * Instance of a usd context, corresponding to an - * opened libibverbs context - */ -struct usd_context { - struct usd_ib_dev *ucx_ib_dev; /* parent IB dev */ - int ucx_ib_dev_fd; /* file desc for IB dev */ - int ucmd_ib_dev_fd; /* Another open file descriptor for IB dev - * used for encapusulating user commands - * through GET_CONTEXT IB command */ - - uint32_t ucx_flags; - int ucx_caps[USD_CAP_MAX]; /* device capablities */ - - pthread_mutex_t ucx_mutex; /* protect intr_list */ - LIST_HEAD(intr_head, usd_cq_comp_intr) ucx_intr_list; - - /* Remove these after moving ud_attrs here */ - int event_fd; - unsigned num_comp_vectors; -}; - -/* - * Instance of a device opened by user - */ -struct usd_device { - struct usd_context *ud_ctx; - - uint32_t ud_flags; - struct usd_device_attrs ud_attrs; /* TODO move this to usd_ctx */ - - /* VFs we have associated with this device */ - struct usd_vf *ud_vf_list; - - /* PD for this device */ - uint32_t ud_pd_handle; - - /* destination related */ - int ud_arp_sockfd; /* for ARP */ - TAILQ_HEAD(, usd_dest_req) ud_pending_reqs; - TAILQ_HEAD(, usd_dest_req) ud_completed_reqs; - - TAILQ_ENTRY(usd_device) ud_link; - - struct usd_grp_vect_map grp_vect_map; -}; - -/* - * Registered memory region - */ -struct usd_mr { - struct usd_device *umr_dev; - void *umr_vaddr; - uint32_t umr_handle; - uint32_t umr_lkey; - uint32_t umr_rkey; - size_t umr_length; -}; - -/* - * Information about the PCI virtual function - */ -struct usd_vf { - uint32_t vf_id; - int vf_refcnt; - struct vnic_dev_bar vf_bar0; - size_t vf_bar_map_len; - struct vnic_dev *vf_vdev; - struct vnic_dev_iomap_info iomaps[RES_TYPE_MAX]; - - /* Will also protect the devcmd region */ - pthread_mutex_t vf_lock; - struct usd_vf *vf_next; - struct usd_vf *vf_prev; -}; - -/* - * Holding place for information about a VF - */ -struct usd_vf_info { - uint32_t vi_vfid; - dma_addr_t vi_bar_bus_addr; - uint32_t vi_bar_len; - size_t vi_barhead_len; - struct usnic_vnic_barres_info barres[RES_TYPE_MAX]; -}; - -/* - * Internal representation of a filter - */ -struct usd_qp_filter { - enum usd_filter_type qf_type; - union { - struct { - int u_sockfd; - } qf_udp; - } qf_filter; -}; - -/* - * Definitions and structures about queues - */ - -/* - * this is used to keep track of what has been allocated and/or - * initialized to assist with teardown of partially completed queues - */ -enum usd_qstate { - USD_QS_FILTER_ALLOC = (1 << 0), - USD_QS_VERBS_CREATED = (1 << 1), - USD_QS_VF_MAPPED = (1 << 2), - USD_QS_VNIC_ALLOCATED = (1 << 3), - USD_QS_VNIC_INITIALIZED = (1 << 4), - USD_QS_READY = (1 << 5) -}; - -struct usd_cq_impl { - struct usd_cq ucq_cq; - struct usd_device *ucq_dev; - struct usd_vf *ucq_vf; - - uint32_t ucq_state; - - struct vnic_cq ucq_vnic_cq; - - void *ucq_desc_ring; - uint32_t ucq_next_desc; - uint32_t ucq_last_color; - - uint32_t ucq_index; - uint32_t ucq_num_entries; - uint32_t ucq_cqe_mask; - uint32_t ucq_color_shift; - uint32_t ucq_handle; - - int comp_fd; - int comp_vec; - int comp_req_notify; - int intr_offset; - struct usd_cq_comp_intr *ucq_intr; - - struct usd_rq **ucq_rq_map; - struct usd_wq **ucq_wq_map; -}; -#define to_cqi(CQ) ((struct usd_cq_impl *)(CQ)) -#define to_usdcq(CQ) (&(CQ)->ucq_cq) - -struct usd_rq { - struct usd_cq_impl *urq_cq; - uint32_t urq_state; - - uint32_t urq_index; - uint32_t urq_num_entries; - struct vnic_rq urq_vnic_rq; - - void **urq_context; - - char *urq_rxbuf; - char **urq_post_addr; - uint32_t urq_recv_credits; /* number of available descriptors */ - struct rq_enet_desc *urq_desc_ring; - struct rq_enet_desc *urq_next_desc; - uint32_t urq_post_index; /* next rxbuf to post */ - uint32_t urq_post_index_mask; - uint32_t urq_last_comp; - uint32_t urq_accum_bytes; - - uint32_t urq_num_rxbuf; - uint32_t urq_rxbuf_size; -}; - -struct usd_wq_post_info { - void *wp_context; - uint32_t wp_len; -}; - -struct usd_wq { - struct usd_cq_impl *uwq_cq; - uint32_t uwq_state; - struct vnic_wq uwq_vnic_wq; - - uint32_t uwq_index; - uint32_t uwq_num_entries; - uint32_t uwq_send_credits; - struct wq_enet_desc *uwq_desc_ring; - struct wq_enet_desc *uwq_next_desc; - uint32_t uwq_post_index; - uint32_t uwq_post_index_mask; - uint32_t uwq_last_comp; - - uint8_t *uwq_copybuf; - struct usd_wq_post_info *uwq_post_info; - - /* used only for PIO QPs */ - void *pio_v_wq_addr; - uint64_t pio_p_wq_addr; - char *pio_v_pkt_buf; - uint64_t pio_p_pkt_buf; -}; - -struct usd_qp_impl { - struct usd_qp uq_qp; /* user's view of QP */ - - struct usd_device *uq_dev; - struct usd_vf *uq_vf; - - struct usd_qp_attrs uq_attrs; - - uint32_t uq_qp_handle; /* IB QP handle */ - uint32_t uq_qp_num; - - /* primary filter for this QP */ - struct usd_qp_filter uq_filter; - - struct usd_wq uq_wq; - struct usd_rq uq_rq; -}; -#define to_qpi(Q) ((struct usd_qp_impl *)(Q)) -#define to_usdqp(Q) (&(Q)->uq_qp) - -struct usd_dest { - union { - struct { - struct usd_udp_hdr u_hdr; - } ds_udp; - } ds_dest; -}; - -extern struct usd_qp_ops usd_qp_ops_ud_udp; -extern struct usd_qp_ops usd_qp_ops_ud_pio_udp; -extern struct usd_qp_ops usd_qp_ops_ud_raw; -#endif /* _USD_H_ */ diff --git a/prov/usnic/src/usnic_direct/usd_caps.c b/prov/usnic/src/usnic_direct/usd_caps.c deleted file mode 100644 index c1c7d4068a6..00000000000 --- a/prov/usnic/src/usnic_direct/usd_caps.c +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "usnic_direct.h" -#include "usd.h" - -int -usd_get_cap( - struct usd_device *dev, - enum usd_capability cap) -{ - if (cap >= USD_CAP_MAX) { - return 0; - } - - return dev->ud_ctx->ucx_caps[cap]; -} - diff --git a/prov/usnic/src/usnic_direct/usd_caps.h b/prov/usnic/src/usnic_direct/usd_caps.h deleted file mode 100644 index a24128934bd..00000000000 --- a/prov/usnic/src/usnic_direct/usd_caps.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#ifndef _USD_CAPS_H_ -#define _USD_CAPS_H_ - -int usd_read_caps(struct usd_device *dev); - -#endif /* _USD_CAPS_H_ */ diff --git a/prov/usnic/src/usnic_direct/usd_dest.c b/prov/usnic/src/usnic_direct/usd_dest.c deleted file mode 100644 index a1c42a9e355..00000000000 --- a/prov/usnic/src/usnic_direct/usd_dest.c +++ /dev/null @@ -1,595 +0,0 @@ -/* - * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "usnic_ip_utils.h" -#include "libnl_utils.h" - -#include "usnic_direct.h" -#include "usd.h" -#include "usd_queue.h" -#include "usd_time.h" -#include "usd_dest.h" -#include "usd_socket.h" - -extern TAILQ_HEAD(, usd_device) usd_device_list; - -static struct usd_dest_params usd_dest_params = { - .dp_arp_timeout = 1000, - .dp_max_arps = 10 -}; - -int -usd_get_dest_distance( - struct usd_device *dev, - uint32_t daddr_be, - int *metric_o) -{ - uint32_t nh_ip_addr; - int ret; - - ret = usnic_nl_rt_lookup(dev->ud_attrs.uda_ipaddr_be, daddr_be, - dev->ud_attrs.uda_ifindex, &nh_ip_addr); - if (ret != 0) { - *metric_o = -1; - ret = 0; - } else if (nh_ip_addr == 0) { - *metric_o = 0; - } else { - *metric_o = 1; - } - - return ret; -} - -static void -usd_dest_set_complete( - struct usd_device *dev, - struct usd_dest_req *req) -{ - req->udr_complete = 1; - if (req->udr_status != 0 && req->udr_dest != NULL) { - free(req->udr_dest); - req->udr_dest = NULL; - } - TAILQ_REMOVE(&dev->ud_pending_reqs, req, udr_link); - TAILQ_INSERT_TAIL(&dev->ud_completed_reqs, req, udr_link); -} - -static int -usd_dest_trigger_arp( - struct usd_device *dev, - struct usd_dest_req *req) -{ - int ret; - - usd_get_time(&req->udr_last_arp); - req->udr_arps_sent++; - - ret = - usnic_arp_request(req->udr_daddr_be, dev->ud_arp_sockfd); - return ret; -} - -static int -usd_check_dest_resolved( - struct usd_device *dev, - struct usd_dest_req *req) -{ - struct ether_header *eth; - int ret; - - eth = &req->udr_dest->ds_dest.ds_udp.u_hdr.uh_eth; - ret = usnic_arp_lookup(dev->ud_attrs.uda_ifname, - req->udr_daddr_be, dev->ud_arp_sockfd, - ð->ether_dhost[0]); - - if (ret == EAGAIN) - return -EAGAIN; - - /* for better or worse, resolution is complete */ - req->udr_status = -ret; - return 0; -} - -/* - * Loop through the ordered pending create_dest request queue. - * If an entry is complete, move it to the completed queue. - * If the retry timeout for an entry has arrived, re-trigger the ARP - */ -static void -usd_dest_progress_dev( - struct usd_device *dev) -{ - struct usd_dest_req *req; - struct usd_dest_req *tmpreq; - usd_time_t now; - int delta; - int ret; - - usd_get_time(&now); - - TAILQ_FOREACH_SAFE(req, tmpreq, &dev->ud_pending_reqs, udr_link) { - - /* resolution complete? */ - ret = usd_check_dest_resolved(dev, req); - if (ret == 0) { - usd_dest_set_complete(dev, req); - continue; - } - - - /* time for next ARP trigger? */ - delta = usd_time_diff(req->udr_last_arp, now); - if (delta > (int) usd_dest_params.dp_arp_timeout) { - if (req->udr_arps_sent >= usd_dest_params.dp_max_arps) { - req->udr_status = -EHOSTUNREACH; - usd_dest_set_complete(dev, req); - continue; - } - - ret = usd_dest_trigger_arp(dev, req); - if (ret != 0) { - req->udr_status = ret; - usd_dest_set_complete(dev, req); - } - } - } -} - -static void -usd_dest_progress(void) -{ - struct usd_device *dev; - - TAILQ_FOREACH(dev, &usd_device_list, ud_link) { - usd_dest_progress_dev(dev); - } -} - -/* - * Fill in all of a header except the dest MAC and the UDP ports - * specified remote host - */ -void -usd_fill_udp_dest( - struct usd_dest *dest, - struct usd_device_attrs *dap, - uint32_t daddr_be, - uint16_t dport_be) -{ - struct ether_header eth = { - .ether_type = htons(0x0800) - }; - - struct udphdr udp = { - .dest = dport_be - }; - - struct iphdr ip = { - .saddr = dap->uda_ipaddr_be, - .daddr = daddr_be, - .protocol = IPPROTO_UDP, - .version = 4, - .frag_off = 0, - .ihl = 5, /* no options */ - .tos = 0, - .ttl = 8 - }; - - /* Workaround taking a pointer to an element of a packed structure due to - * warnings in Clang 4.0.1 and beyond. - */ - memcpy(eth.ether_shost, dap->uda_mac_addr, ETH_ALEN); - dest->ds_dest.ds_udp.u_hdr.uh_eth = eth; - dest->ds_dest.ds_udp.u_hdr.uh_udp = udp; - dest->ds_dest.ds_udp.u_hdr.uh_ip = ip; -} - -static int -usd_create_udp_dest_start( - struct usd_device *dev, - uint32_t daddr_be, - uint16_t dport_be, - struct usd_dest_req **req_o) -{ - struct usd_dest_req *req; - struct usd_dest *dest; - uint32_t first_hop_daddr_be; - int ret; - - /* catch a mistake that will almost always lead to hung programs */ - if (daddr_be == 0 || dport_be == 0) { - return -EINVAL; - } - - req = calloc(sizeof(*req), 1); - dest = calloc(sizeof(*dest), 1); - if (req == NULL || dest == NULL) { - ret = -errno; - goto fail; - } - - ret = usnic_nl_rt_lookup(dev->ud_attrs.uda_ipaddr_be, - daddr_be, dev->ud_attrs.uda_ifindex, - &first_hop_daddr_be); - if (ret != 0) { - /* EHOSTUNREACH is non-fatal, but we are done with resolution */ - if (ret == EHOSTUNREACH) { - req->udr_status = -EHOSTUNREACH; - free(dest); - goto complete; - } else { - ret = -ret; - } - goto fail; - } - if (first_hop_daddr_be == 0) - first_hop_daddr_be = daddr_be; - - /* Fill in dest as much as we can */ - usd_fill_udp_dest(dest, &dev->ud_attrs, daddr_be, dport_be); - - /* initiate request and add to tail of pending list */ - req->udr_daddr_be = first_hop_daddr_be; - req->udr_dest = dest; - - ret = usd_dest_trigger_arp(dev, req); - if (ret != 0) - goto fail; - -complete: - TAILQ_INSERT_TAIL(&dev->ud_pending_reqs, req, udr_link); - if (req->udr_status != 0) { - usd_dest_set_complete(dev, req); - } - *req_o = req; - - return 0; - - fail: - if (req != NULL) - free(req); - if (dest != NULL) - free(dest); - return ret; -} - - -/* - * synchronously create a UDP destination by initiating the - * resolution, then waiting for it to complete - */ -static int -usd_create_udp_dest( - struct usd_device *dev, - uint32_t daddr_be, - uint16_t dport_be, - struct usd_dest **dest_o) -{ - struct usd_dest_req *req; - int ret; - - ret = usd_create_udp_dest_start(dev, daddr_be, dport_be, &req); - if (ret != 0) - return ret; - - /* loop until request completes or times out */ - while (req->udr_complete == 0) { - usd_dest_progress(); - } - - ret = req->udr_status; - if (ret == 0) - *dest_o = req->udr_dest; - - TAILQ_REMOVE(&dev->ud_completed_reqs, req, udr_link); - free(req); - return ret; -} - -/* - * Build and save a IP header appropriate for sending to the - * specified remote host - */ -int -usd_create_ip_dest( - struct usd_device *dev, - uint32_t daddr_be, - struct usd_dest **dest_o) -{ - int ret; - - ret = usd_create_udp_dest(dev, daddr_be, 0, dest_o); - return ret; -} - -void -usd_dest_set_udp_ports( - struct usd_dest *dest, - struct usd_qp *src_uqp, - uint16_t dest_port_be) -{ - struct usd_qp_impl *qp = to_qpi(src_uqp); - struct udphdr udp = { - .source = qp->uq_attrs.uqa_local_addr.ul_addr.ul_udp.u_addr.sin_port, - .dest = dest_port_be - }; - - /* Workaround taking a pointer to an element of a packed structure due to - * warnings in Clang 4.0.1 and beyond. - */ - dest->ds_dest.ds_udp.u_hdr.uh_udp = udp; -} - -/* - * Synchronously creates a destination - */ -int -usd_create_dest( - struct usd_device *dev, - uint32_t daddr_be, - uint16_t dport_be, - struct usd_dest **dest_o) -{ - int ret; - - ret = usd_create_udp_dest(dev, daddr_be, dport_be, dest_o); - - return ret; -} - -int -usd_destroy_dest( - struct usd_dest *dest) -{ - if (dest != NULL) { - free(dest); - } - return 0; -} - -/* - * Get address resolution settings - */ -int -usd_get_dest_params( - struct usd_dest_params *params) -{ - if (params == NULL) - return -EINVAL; - - *params = usd_dest_params; - return 0; -} - -/* - * Set address resolution settings - * Settings may not be changed while any resolution requests are in progress. - */ -int -usd_set_dest_params( - struct usd_dest_params *params) -{ - if (params == NULL) - return -EINVAL; - - /* blindly set parameters, allowing user to shoot self if desired */ - usd_dest_params.dp_arp_timeout = params->dp_arp_timeout; - usd_dest_params.dp_max_arps = params->dp_max_arps; - - return 0; -} - -/* - * Start destination creation - * Resolution progress is performed in usd_create_dest_query() and - * usd_create_dest_poll() - */ -int -usd_create_dest_start( - struct usd_device *dev, - uint32_t daddr_be, - uint16_t dport_be, - void *context) -{ - struct usd_dest_req *req; - int ret; - - req = NULL; - ret = usd_create_udp_dest_start(dev, daddr_be, dport_be, &req); - - if (ret == 0) { - req->udr_context = context; - } - - return ret; -} - -/* - * Return first completed destinatin request - */ -int -usd_create_dest_poll( - struct usd_device *dev, - void **context_o, - int *status, - struct usd_dest **dest_o) -{ - struct usd_dest_req *req; - - usd_dest_progress(); - - if (!TAILQ_EMPTY(&dev->ud_completed_reqs)) { - req = TAILQ_FIRST(&dev->ud_completed_reqs); - TAILQ_REMOVE(&dev->ud_completed_reqs, req, udr_link); - *context_o = req->udr_context; - *status = req->udr_status; - if (*status == 0) - *dest_o = req->udr_dest; - free(req); - return 0; - - } else { - return -EAGAIN; - } -} - -/* - * Check completion of a particular request - */ -int -usd_create_dest_query( - struct usd_device *dev, - void *context, - int *status, - struct usd_dest **dest_o) -{ - struct usd_dest_req *req; - - usd_dest_progress(); - - TAILQ_FOREACH(req, &dev->ud_completed_reqs, udr_link) { - if (req->udr_context == context) { - TAILQ_REMOVE(&dev->ud_completed_reqs, req, udr_link); - *status = req->udr_status; - if (*status == 0) - *dest_o = req->udr_dest; - free(req); - return 0; - } - } - - return -EAGAIN; -} - -/* - * Cancel a destination creation in progress - * Look through both the pending and completed queues, simply - * squash the record if we find it. - */ -int -usd_create_dest_cancel( - struct usd_device *dev, - void *context) -{ - struct usd_dest_req *req; - - TAILQ_FOREACH(req, &dev->ud_pending_reqs, udr_link) { - if (req->udr_context == context) { - TAILQ_REMOVE(&dev->ud_pending_reqs, req, udr_link); - goto found; - } - } - - TAILQ_FOREACH(req, &dev->ud_completed_reqs, udr_link) { - if (req->udr_context == context) { - TAILQ_REMOVE(&dev->ud_completed_reqs, req, udr_link); - goto found; - } - } - - return -EINVAL; - - found: - free(req->udr_dest); - free(req); - return 0; -} - -/* - * Create a destination given a MAC address - */ -int -usd_create_dest_with_mac( - struct usd_device *dev, - uint32_t daddr_be, - uint16_t dport_be, - uint8_t * dmac, - struct usd_dest **dest_o) -{ - struct ether_header *eth; - struct usd_dest *dest; - - dest = calloc(sizeof(*dest), 1); - if (dest == NULL) - return -errno; - - /* Fill in dest as much as we can */ - usd_fill_udp_dest(dest, &dev->ud_attrs, daddr_be, dport_be); - - /* copy in MAC from caller */ - eth = &dest->ds_dest.ds_udp.u_hdr.uh_eth; - memcpy(ð->ether_dhost[0], dmac, ETH_ALEN); - - *dest_o = dest; - return 0; -} - -/* - * Expand a destination - */ -int -usd_expand_dest( - struct usd_dest *dest, - uint32_t *ip_be_o, - uint16_t *port_be_o) -{ - if (ip_be_o != NULL) { - *ip_be_o = dest->ds_dest.ds_udp.u_hdr.uh_ip.daddr; - } - if (port_be_o != NULL) { - *port_be_o = dest->ds_dest.ds_udp.u_hdr.uh_udp.dest; - } - - return 0; -} diff --git a/prov/usnic/src/usnic_direct/usd_dest.h b/prov/usnic/src/usnic_direct/usd_dest.h deleted file mode 100644 index 8322e66b08a..00000000000 --- a/prov/usnic/src/usnic_direct/usd_dest.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - * definitions for address resolution - */ - -#ifndef _USD_DEST_H_ -#define _USD_DEST_H_ - -#include "usd_queue.h" -#include "usd_time.h" - -/* - * Record describing an address resolution in progress - */ -typedef struct usd_dest_req udr_t; -struct usd_dest_req { - struct usd_dest *udr_dest; - - uint32_t udr_daddr_be; - - unsigned udr_arps_sent; - usd_time_t udr_last_arp; /* time of last */ - - int udr_complete; - int udr_status; - - void *udr_context; - - TAILQ_ENTRY(usd_dest_req) udr_link; -}; - -void usd_fill_udp_dest(struct usd_dest *dest, struct usd_device_attrs *dap, - uint32_t daddr_be, uint16_t dport_be); - -#endif /* _USD_DEST_H_ */ diff --git a/prov/usnic/src/usnic_direct/usd_device.c b/prov/usnic/src/usnic_direct/usd_device.c deleted file mode 100644 index 3674a80c671..00000000000 --- a/prov/usnic/src/usnic_direct/usd_device.c +++ /dev/null @@ -1,689 +0,0 @@ -/* - * Copyright (c) 2014-2016, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "usnic_direct.h" -#include "usd.h" -#include "usd_ib_sysfs.h" -#include "usd_ib_cmd.h" -#include "usd_socket.h" -#include "usd_device.h" - -static pthread_once_t usd_init_once = PTHREAD_ONCE_INIT; - -static struct usd_ib_dev *usd_ib_dev_list; -static int usd_init_error; - -TAILQ_HEAD(,usd_device) usd_device_list = - TAILQ_HEAD_INITIALIZER(usd_device_list); - -/* - * Perform one-time initialization - */ -static void -do_usd_init(void) -{ - usd_init_error = usd_ib_get_devlist(&usd_ib_dev_list); -} - -/* - * Unmap group vector when releasing usd_dev - */ -static void -usd_unmap_grp_vect(struct usd_device *dev) -{ - if (dev->grp_vect_map.va != NULL) { - munmap(dev->grp_vect_map.va, dev->grp_vect_map.len); - dev->grp_vect_map.va = NULL; - } -} - -/* - * Init routine - */ -static int -usd_init(void) -{ - /* Do initialization one time */ - pthread_once(&usd_init_once, do_usd_init); - return usd_init_error; -} - -/* - * Return list of currently available devices - */ -int -usd_get_device_list( - struct usd_device_entry *entries, - int *num_entries) -{ - int n; - struct usd_ib_dev *idp; - int ret; - - n = 0; - - ret = usd_init(); - if (ret != 0) { - goto out; - } - - idp = usd_ib_dev_list; - while (idp != NULL && n < *num_entries) { - strncpy(entries[n].ude_devname, idp->id_usnic_name, - sizeof(entries[n].ude_devname) - 1); - ++n; - idp = idp->id_next; - } - -out: - *num_entries = n; - return ret; -} - -/* - * Allocate a context from the driver - */ -static int -usd_open_ibctx(struct usd_context *uctx) -{ - int ret; - - ret = usd_ib_cmd_get_context(uctx); - return ret; -} - -const char * -usd_devid_to_pid(uint32_t vendor_id, uint32_t device_id) -{ - const char *pid; - - if (vendor_id != 0x1137) - return "Unknown"; - - switch (device_id) { - case 0x4f: - // Vasona - pid = "UCSC-VIC-M82-8P"; - break; - case 0x84: - // Cotati - pid = "UCSB-MLOM-40G-01"; - break; - case 0x85: - // Lexington - pid = "UCSC-PCIE-CSC-02"; - break; - case 0xcd: - // Icehouse - pid = "UCSC-PCIE-C40Q-02"; - break; - case 0xce: - // Kirkwood Lake - pid = "UCSC-PCIE-C10T-02"; - break; - case 0x12e: - // Susanville MLOM - pid = "UCSC-MLOM-CSC-02"; - break; - case 0x139: - // Torrance MLOM - pid = "UCSC-MLOM-C10T-02"; - break; - - case 0x12c: - // Calistoga MLOM - pid = "UCSB-MLOM-40G-03"; - break; - case 0x137: - // Mountain View (Cruz mezz) - pid = "UCSB-VIC-M83-8P"; - break; - case 0x138: - // Walnut Creek - pid = "UCSB-B3116S-LOM"; - break; - case 0x14b: - // Mount Tian - pid = "UCSC-C3260-SIOC"; - break; - case 0x14d: - // Clearlake - pid = "UCSC-PCIE-C40Q-03"; - break; - case 0x157: - // Mount Tian2 - pid = "UCSC-C3260-SIOC"; - break; - case 0x15d: - // Claremont MLOM - pid = "UCSC-MLOM-C40Q-03"; - break; - - case 0x0218: - // Bradbury - pid = "UCSC-MLOM-C25Q-04"; - break; - case 0x0217: - // Brentwood - pid = "UCSC-PCIE-C25Q-04"; - break; - case 0x021a: - // Burlingame - pid = "UCSC-MLOM-C40Q-04"; - break; - case 0x0219: - // Bayside - pid = "UCSC-PCIE-C40Q-04"; - break; - case 0x0215: - // Bakersfield - pid = "UCSB-MLOM-40G-04"; - break; - case 0x0216: - // Boonville - pid = "UCSB-VIC-M84-4P"; - break; - case 0x024a: - // Benicia - pid = "UCSC-PCIE-C100-04"; - break; - case 0x024b: - // Beaumont - pid = "UCSC-MLOM-C100-04"; - break; - - default: - pid = "Unknown Cisco Device"; - break; - } - - return pid; -} - -const char * -usd_devid_to_nicname(uint32_t vendor_id, uint32_t device_id) -{ - const char *nicname; - - if (vendor_id != 0x1137) - return "Unknown"; - - switch (device_id) { - case 0x4f: - // Vasona - nicname = "VIC 1280"; - break; - case 0x84: - // Cotati - nicname = "VIC 1240"; - break; - case 0x85: - // Lexington - nicname = "VIC 1225"; - break; - case 0xcd: - // Icehouse - nicname = "VIC 1285"; - break; - case 0xce: - // Kirkwood Lake - nicname = "VIC 1225T"; - break; - case 0x12e: - // Susanville MLOM - nicname = "VIC 1227"; - break; - case 0x139: - // Torrance MLOM - nicname = "VIC 1227T"; - break; - - case 0x12c: - // Calistoga MLOM - nicname = "VIC 1340"; - break; - case 0x137: - // Mountain View (Cruz mezz) - nicname = "VIC 1380"; - break; - case 0x138: - // Walnut Creek - nicname = "UCSB-B3116S"; - break; - case 0x14b: - // Mount Tian - nicname = ""; - break; - case 0x14d: - // Clearlake - nicname = "VIC 1385"; - break; - case 0x157: - // Mount Tian2 - nicname = ""; - break; - case 0x15d: - // Claremont MLOM - nicname = "VIC 1387"; - break; - - case 0x0218: - // Bradbury - nicname = "VIC 1457"; - break; - case 0x0217: - // Brentwood - nicname = "VIC 1455"; - break; - case 0x021a: - // Burlingame - nicname = "VIC 1487"; - break; - case 0x0219: - // Bayside - nicname = "VIC 1485"; - break; - case 0x0215: - // Bakersfield - nicname = "VIC 1440"; - break; - case 0x0216: - // Boonville - nicname = "VIC 1480"; - break; - case 0x024a: - // Benicia - nicname = "VIC 1495"; - break; - case 0x024b: - // Beaumont - nicname = "VIC 1497"; - break; - - default: - nicname = "Unknown Cisco Device"; - break; - } - - return nicname; -} - -/* - * Rummage around and collect all the info about this device we can find - */ -static int -usd_discover_device_attrs( - struct usd_device *dev, - const char *dev_name) -{ - struct usd_device_attrs *dap; - int ret; - - /* find interface name */ - ret = usd_get_iface(dev); - if (ret != 0) - return ret; - - ret = usd_get_mac(dev, dev->ud_attrs.uda_mac_addr); - if (ret != 0) - return ret; - - ret = usd_get_usnic_config(dev); - if (ret != 0) - return ret; - - ret = usd_get_firmware(dev); - if (ret != 0) - return ret; - - /* ipaddr, netmask, mtu */ - ret = usd_get_dev_if_info(dev); - if (ret != 0) - return ret; - - /* get what attributes we can from querying IB */ - ret = usd_ib_query_dev(dev); - if (ret != 0) - return ret; - - /* constants that should come from driver */ - dap = &dev->ud_attrs; - dap->uda_max_cqe = (1 << 16) - 1;; - dap->uda_max_send_credits = (1 << 12) - 1; - dap->uda_max_recv_credits = (1 << 12) - 1; - strncpy(dap->uda_devname, dev_name, sizeof(dap->uda_devname) - 1); - - return 0; -} - -static void -usd_dev_free(struct usd_device *dev) -{ - if (dev->ud_arp_sockfd != -1) - close(dev->ud_arp_sockfd); - - if (dev->ud_ctx != NULL && - (dev->ud_flags & USD_DEVF_CLOSE_CTX)) { - usd_close_context(dev->ud_ctx); - } - free(dev); -} - -/* - * Allocate a usd_device without allocating a PD - */ -static int -usd_dev_alloc_init(const char *dev_name, struct usd_open_params *uop_param, - struct usd_device **dev_o) -{ - struct usd_device *dev = NULL; - int ret; - - dev = calloc(sizeof(*dev), 1); - if (dev == NULL) { - ret = -errno; - goto out; - } - - dev->ud_flags = 0; - if (uop_param->context == NULL) { - ret = usd_open_context(dev_name, uop_param->cmd_fd, - &dev->ud_ctx); - if (ret != 0) { - goto out; - } - dev->ud_flags |= USD_DEVF_CLOSE_CTX; - } else { - dev->ud_ctx = uop_param->context; - } - - dev->ud_arp_sockfd = -1; - - TAILQ_INIT(&dev->ud_pending_reqs); - TAILQ_INIT(&dev->ud_completed_reqs); - - if (uop_param->context == NULL) - ret = usd_discover_device_attrs(dev, dev_name); - else - ret = usd_discover_device_attrs(dev, - uop_param->context->ucx_ib_dev->id_usnic_name); - if (ret != 0) - goto out; - - dev->ud_attrs.uda_event_fd = dev->ud_ctx->event_fd; - dev->ud_attrs.uda_num_comp_vectors = dev->ud_ctx->num_comp_vectors; - - if (!(uop_param->flags & UOPF_SKIP_LINK_CHECK)) { - ret = usd_device_ready(dev); - if (ret != 0) { - goto out; - } - } - - *dev_o = dev; - return 0; - -out: - if (dev != NULL) - usd_dev_free(dev); - return ret; -} - -int -usd_close_context(struct usd_context *ctx) -{ - pthread_mutex_destroy(&ctx->ucx_mutex); - - /* XXX - verify all other resources closed out */ - if (ctx->ucx_flags & USD_CTXF_CLOSE_CMD_FD) - close(ctx->ucx_ib_dev_fd); - if (ctx->ucmd_ib_dev_fd != -1) - close(ctx->ucmd_ib_dev_fd); - - free(ctx); - - return 0; -} - -int -usd_open_context(const char *dev_name, int cmd_fd, - struct usd_context **ctx_o) -{ - struct usd_context *ctx = NULL; - struct usd_ib_dev *idp; - int ret; - - if (dev_name == NULL) - return -EINVAL; - - ret = usd_init(); - if (ret != 0) { - return ret; - } - - /* Look for matching device */ - idp = usd_ib_dev_list; - while (idp != NULL) { - if (dev_name == NULL || strcmp(idp->id_usnic_name, dev_name) == 0) { - break; - } - idp = idp->id_next; - } - - /* not found, leave now */ - if (idp == NULL) { - ret = -ENXIO; - goto out; - } - - /* - * Found matching device, open an instance - */ - ctx = calloc(sizeof(*ctx), 1); - if (ctx == NULL) { - ret = -errno; - goto out; - } - ctx->ucx_ib_dev_fd = -1; - ctx->ucmd_ib_dev_fd = -1; - ctx->ucx_flags = 0; - - /* Save pointer to IB device */ - ctx->ucx_ib_dev = idp; - - /* Open the fd we will be using for IB commands */ - if (cmd_fd == -1) { - ctx->ucx_ib_dev_fd = open(idp->id_dev_path, O_RDWR); - if (ctx->ucx_ib_dev_fd == -1) { - ret = -ENODEV; - goto out; - } - ctx->ucx_flags |= USD_CTXF_CLOSE_CMD_FD; - } else { - ctx->ucx_ib_dev_fd = cmd_fd; - } - - /* - * Open another fd to send encapsulated user commands through - * CMD_GET_CONTEXT call. The reason to open an additional fd is - * that ib core does not allow multiple get_context call on one - * file descriptor. - */ - ctx->ucmd_ib_dev_fd = open(idp->id_dev_path, O_RDWR | O_CLOEXEC); - if (ctx->ucmd_ib_dev_fd == -1) { - ret = -ENODEV; - goto out; - } - - /* allocate a context from driver */ - ret = usd_open_ibctx(ctx); - if (ret != 0) { - goto out; - } - - LIST_INIT(&ctx->ucx_intr_list); - if (pthread_mutex_init(&ctx->ucx_mutex, NULL) != 0) - goto out; - - *ctx_o = ctx; - return 0; - -out: - if (ctx != NULL) - usd_close_context(ctx); - return ret; -} - -/* - * Close a raw USNIC device - */ -int -usd_close( - struct usd_device *dev) -{ - usd_unmap_grp_vect(dev); - - TAILQ_REMOVE(&usd_device_list, dev, ud_link); - usd_dev_free(dev); - - return 0; -} - -/* - * Open a raw USNIC device - */ -int -usd_open( - const char *dev_name, - struct usd_device **dev_o) -{ - struct usd_open_params params; - - memset(¶ms, 0, sizeof(params)); - params.cmd_fd = -1; - params.context = NULL; - return usd_open_with_params(dev_name, ¶ms, dev_o); -} - -/* - * Most generic usd device open function - */ -int usd_open_with_params(const char *dev_name, - struct usd_open_params* uop_param, - struct usd_device **dev_o) -{ - struct usd_device *dev = NULL; - int ret; - - ret = usd_dev_alloc_init(dev_name, uop_param, &dev); - if (ret != 0) { - goto out; - } - - if (!(uop_param->flags & UOPF_SKIP_PD_ALLOC)) { - ret = usd_ib_cmd_alloc_pd(dev, &dev->ud_pd_handle); - if (ret != 0) { - goto out; - } - } - - TAILQ_INSERT_TAIL(&usd_device_list, dev, ud_link); - *dev_o = dev; - return 0; - -out: - if (dev != NULL) - usd_dev_free(dev); - return ret; -} - -/* - * Return attributes of a device - */ -int -usd_get_device_attrs( - struct usd_device *dev, - struct usd_device_attrs *dattrs) -{ - int ret; - - /* ipaddr, netmask, mtu */ - ret = usd_get_dev_if_info(dev); - if (ret != 0) - return ret; - - /* get what attributes we can from querying IB */ - ret = usd_ib_query_dev(dev); - if (ret != 0) - return ret; - - *dattrs = dev->ud_attrs; - return 0; -} - -/* - * Check that device is ready to have queues created - */ -int -usd_device_ready( - struct usd_device *dev) -{ - if (dev->ud_attrs.uda_ipaddr_be == 0) { - return -EADDRNOTAVAIL; - } - if (dev->ud_attrs.uda_link_state != USD_LINK_UP) { - return -ENETDOWN; - } - - return 0; -} diff --git a/prov/usnic/src/usnic_direct/usd_device.h b/prov/usnic/src/usnic_direct/usd_device.h deleted file mode 100644 index 93bbde17030..00000000000 --- a/prov/usnic/src/usnic_direct/usd_device.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - * definitions for device management - */ - -#ifndef _USD_DEVICE_H_ -#define _USD_DEVICE_H_ - -int usd_device_ready(struct usd_device *dev); - -#endif /* _USD_DEVICE_H_ */ diff --git a/prov/usnic/src/usnic_direct/usd_enum.c b/prov/usnic/src/usnic_direct/usd_enum.c deleted file mode 100644 index 29cc91a7f99..00000000000 --- a/prov/usnic/src/usnic_direct/usd_enum.c +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#include "usnic_direct.h" - -const char *usd_link_state_str(enum usd_link_state state) -{ - switch (state) { - case USD_LINK_DOWN: return "USD_LINK_DOWN"; - case USD_LINK_UP: return "USD_LINK_UP"; - default: return "UNKNOWN"; - } -} - -const char *usd_completion_status_str(enum usd_completion_status cstatus) -{ - switch (cstatus) { - case USD_COMPSTAT_SUCCESS: return "USD_COMPSTAT_SUCCESS"; - case USD_COMPSTAT_ERROR_CRC: return "USD_COMPSTAT_ERROR_CRC"; - case USD_COMPSTAT_ERROR_TRUNC: return "USD_COMPSTAT_ERROR_TRUNC"; - case USD_COMPSTAT_ERROR_TIMEOUT: return "USD_COMPSTAT_ERROR_TIMEOUT"; - case USD_COMPSTAT_ERROR_INTERNAL: return "USD_COMPSTAT_ERROR_INTERNAL"; - default: return "UNKNOWN"; - } -} - -const char *usd_completion_type_str(enum usd_completion_type ctype) -{ - switch (ctype) { - case USD_COMPTYPE_SEND: return "USD_COMPTYPE_SEND"; - case USD_COMPTYPE_RECV: return "USD_COMPTYPE_RECV"; - default: return "UNKNOWN"; - } -} - -const char *usd_filter_type_str(enum usd_filter_type ftype) -{ - switch (ftype) { - case USD_FTY_UDP: return "USD_FTY_UDP"; - case USD_FTY_UDP_SOCK: return "USD_FTY_UDP_SOCK"; - case USD_FTY_TCP: return "USD_FTY_TCP"; - case USD_FTY_MCAST: return "USD_FTY_MCAST"; - case USD_FTY_8915: return "USD_FTY_8915"; - default: return "UNKNOWN"; - } -} - -const char *usd_qp_transport_str(enum usd_qp_transport qpt) -{ - switch (qpt) { - case USD_QTR_RAW: return "USD_QTR_RAW"; - case USD_QTR_UDP: return "USD_QTR_UDP"; - default: return "UNKNOWN"; - } -} - -const char *usd_qp_type_str(enum usd_qp_type qpt) -{ - switch (qpt) { - case USD_QTY_UD: return "USD_QTY_UD"; - case USD_QTY_UD_PIO: return "USD_QTY_UD_PIO"; - default: return "UNKNOWN"; - } -} - -const char *usd_qp_event_event_type_str(enum usd_device_event_type det) -{ - switch (det) { - case USD_EVENT_LINK_UP: return "USD_EVENT_LINK_UP"; - case USD_EVENT_LINK_DOWN: return "USD_EVENT_LINK_DOWN"; - default: return "UNKNOWN"; - } -} - -const char *usd_send_flag_sift_str(enum usd_send_flag_shift sfs) -{ - switch (sfs) { - case USD_SFS_SIGNAL: return "USD_SFS_SIGNAL"; - default: return "UNKNOWN"; - } -} - -const char *usd_capability(enum usd_capability cap) -{ - switch (cap) { - case USD_CAP_CQ_SHARING: return "USD_CAP_CQ_SHARING"; - case USD_CAP_MAP_PER_RES: return "USD_CAP_MAP_PER_RES"; - case USD_CAP_PIO: return "USD_CAP_PIO"; - case USD_CAP_CQ_INTR: return "USD_CAP_CQ_INTR"; - case USD_CAP_GRP_INTR: return "USD_CAP_GRP_INTR"; - case USD_CAP_MAX: return "USD_CAP_MAX"; - default: return "UNKNOWN"; - } -} diff --git a/prov/usnic/src/usnic_direct/usd_event.c b/prov/usnic/src/usnic_direct/usd_event.c deleted file mode 100644 index 9eb24326544..00000000000 --- a/prov/usnic/src/usnic_direct/usd_event.c +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (c) 2014-2017, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#include -#include -#include - -#include - -#include - -#include "usnic_direct.h" -#include "usd.h" - -/* - * Read an event from IB event fd - */ -int -usd_get_device_event(struct usd_device *dev, - struct usd_device_event *devent) -{ - struct ib_uverbs_async_event_desc ib_event; - int n; - - n = read(dev->ud_attrs.uda_event_fd, &ib_event, sizeof(ib_event)); - if (n == 0) - return -EAGAIN; - else if (n < 0) - return -errno; - - switch (ib_event.event_type) { - case IBV_EVENT_PORT_ACTIVE: - devent->ude_type = USD_EVENT_LINK_UP; - break; - case IBV_EVENT_PORT_ERR: - devent->ude_type = USD_EVENT_LINK_DOWN; - break; - default: - printf("Unexpected event type: %d\n", ib_event.event_type); - return -EAGAIN; - break; - } - - return 0; -} diff --git a/prov/usnic/src/usnic_direct/usd_ib_cmd.c b/prov/usnic/src/usnic_direct/usd_ib_cmd.c deleted file mode 100644 index a0b7903c5a0..00000000000 --- a/prov/usnic/src/usnic_direct/usd_ib_cmd.c +++ /dev/null @@ -1,1032 +0,0 @@ -/* - * Copyright (c) 2014-2017, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "kcompat.h" -#include "usnic_ib_abi.h" - -#include "usnic_direct.h" -#include "usd.h" -#include "usd_ib_cmd.h" - -int -usd_ib_cmd_get_context(struct usd_context *uctx) -{ - struct usnic_get_context cmd; - struct usnic_get_context_resp resp; - struct ib_uverbs_cmd_hdr *ich; - struct ib_uverbs_get_context *icp; - struct ib_uverbs_get_context_resp *irp; - struct usnic_ib_get_context_cmd *ucp; - struct usnic_ib_get_context_resp *urp; - int n; - - /* clear cmd and response */ - memset(&cmd, 0, sizeof(cmd)); - memset(&resp, 0, sizeof(resp)); - - /* fill in the command struct */ - ich = &cmd.ibv_cmd_hdr; - ich->command = IB_USER_VERBS_CMD_GET_CONTEXT; - ich->in_words = sizeof(cmd) / 4; - ich->out_words = sizeof(resp) / 4; - - icp = &cmd.ibv_cmd; - icp->response = (uintptr_t) & resp; - - ucp = &cmd.usnic_cmd; - -/* - * Because usnic_verbs kernel module with USNIC_CTX_RESP_VERSION as 1 - * silently returns success even it receives resp_version larger than 1, - * without filling in capbility information, here we still fill in - * command with resp_version as 1 in order to retrive cababiltiy information. - * Later when we decide to drop support for this version of kernel - * module, we should replace the next two lines of code with commented-out - * code below. - ucp->resp_version = USNIC_CTX_RESP_VERSION; - ucp->v2.encap_subcmd = 0; - ucp->v2.num_caps = USNIC_CAP_CNT; -*/ - ucp->resp_version = 1; - ucp->v1.num_caps = USNIC_CAP_CNT; - - n = write(uctx->ucx_ib_dev_fd, &cmd, sizeof(cmd)); - if (n != sizeof(cmd)) { - return -errno; - } - - irp = &resp.ibv_resp; - uctx->event_fd = irp->async_fd; - uctx->num_comp_vectors = irp->num_comp_vectors; - - urp = &resp.usnic_resp; - -/* - * Replace the code below with the commented-out line if dropping - * support for kernel module with resp_version support as 1 - if (urp->resp_version == USNIC_CTX_RESP_VERSION) { - */ - if (urp->resp_version == 1) { - if (urp->num_caps > USNIC_CAP_CQ_SHARING && - urp->cap_info[USNIC_CAP_CQ_SHARING] > 0) { - uctx->ucx_caps[USD_CAP_CQ_SHARING] = 1; - } - if (urp->num_caps > USNIC_CAP_MAP_PER_RES && - urp->cap_info[USNIC_CAP_MAP_PER_RES] > 0) { - uctx->ucx_caps[USD_CAP_MAP_PER_RES] = 1; - } - if (urp->num_caps > USNIC_CAP_PIO && - urp->cap_info[USNIC_CAP_PIO] > 0) { - uctx->ucx_caps[USD_CAP_PIO] = 1; - } - if (urp->num_caps > USNIC_CAP_CQ_INTR && - urp->cap_info[USNIC_CAP_CQ_INTR] > 0) { - uctx->ucx_caps[USD_CAP_CQ_INTR] = 1; - } - if (urp->num_caps > USNIC_CAP_GRP_INTR && - urp->cap_info[USNIC_CAP_GRP_INTR] > 0) { - uctx->ucx_caps[USD_CAP_GRP_INTR] = 1; - } - } - - return 0; -} - -int -usd_ib_cmd_devcmd( - struct usd_device *dev, - enum vnic_devcmd_cmd devcmd, - u64 *a0, u64 *a1, int wait) -{ - struct usnic_get_context cmd; - struct usnic_get_context_resp resp; - struct ib_uverbs_cmd_hdr *ich; - struct ib_uverbs_get_context *icp; - struct usnic_ib_get_context_cmd *ucp; - struct usnic_ib_get_context_resp *urp; - struct usnic_udevcmd_cmd udevcmd; - struct usnic_udevcmd_resp udevcmd_resp; - int n; - - if (dev->ud_ctx->ucmd_ib_dev_fd < 0) - return -ENOENT; - - /* clear cmd and response */ - memset(&cmd, 0, sizeof(cmd)); - memset(&resp, 0, sizeof(resp)); - memset(&udevcmd, 0, sizeof(udevcmd)); - memset(&udevcmd_resp, 0, sizeof(udevcmd_resp)); - - /* fill in the command struct */ - ich = &cmd.ibv_cmd_hdr; - ich->command = IB_USER_VERBS_CMD_GET_CONTEXT; - ich->in_words = sizeof(cmd) / 4; - ich->out_words = sizeof(resp) / 4; - - icp = &cmd.ibv_cmd; - icp->response = (uintptr_t) & resp; - - /* fill in usnic devcmd struct */ - udevcmd.vnic_idx = dev->ud_vf_list->vf_id; - udevcmd.devcmd = devcmd; - udevcmd.wait = wait; - udevcmd.num_args = 2; - udevcmd.args[0] = *a0; - udevcmd.args[1] = *a1; - - ucp = &cmd.usnic_cmd; - ucp->resp_version = USNIC_CTX_RESP_VERSION; - ucp->v2.encap_subcmd = 1; - ucp->v2.usnic_ucmd.ucmd = USNIC_USER_CMD_DEVCMD; - ucp->v2.usnic_ucmd.inbuf = (uintptr_t) &udevcmd; - ucp->v2.usnic_ucmd.inlen = (u32)sizeof(udevcmd); - ucp->v2.usnic_ucmd.outbuf = (uintptr_t) &udevcmd_resp; - ucp->v2.usnic_ucmd.outlen = (u32)sizeof(udevcmd_resp); - - n = write(dev->ud_ctx->ucmd_ib_dev_fd, &cmd, sizeof(cmd)); - urp = &resp.usnic_resp; - /* - * If returns success, it's an old kernel who does not understand - * version 2 command, then we need to close the command FD to - * release the created ucontext object - */ - if (n == sizeof(cmd)) { - usd_err( - "The running usnic_verbs kernel module does not support " - "encapsulating devcmd through IB GET_CONTEXT command\n"); - close(dev->ud_ctx->ucmd_ib_dev_fd); - dev->ud_ctx->ucmd_ib_dev_fd = -1; - return -ENOTSUP; - } else if (errno != ECHILD) { - return -errno; - } else if (urp->resp_version != USNIC_CTX_RESP_VERSION) { - /* Kernel needs to make sure it returns response with a format - * understandable by the library. */ - usd_err( - "The returned resp version does not match with requested\n"); - return -ENOTSUP; - } - - *a0 = udevcmd_resp.args[0]; - *a1 = udevcmd_resp.args[1]; - - return 0; -} - -/* - * Issue IB DEALLOC_PD command to alloc a PD in kernel - */ -static int -_usd_ib_cmd_dealloc_pd( - struct usd_device *dev, - uint32_t pd_handle) -{ - struct usnic_dealloc_pd cmd; - struct ib_uverbs_cmd_hdr *ich; - struct ib_uverbs_dealloc_pd *icp; - int n; - - memset(&cmd, 0, sizeof(cmd)); - - ich = &cmd.ibv_cmd_hdr; - ich->command = IB_USER_VERBS_CMD_DEALLOC_PD; - ich->in_words = sizeof(cmd) / 4; - ich->out_words = 0; - - icp = &cmd.ibv_cmd; - icp->pd_handle = pd_handle; - - n = write(dev->ud_ctx->ucx_ib_dev_fd, &cmd, sizeof(cmd)); - if (n != sizeof(cmd)) { - return -errno; - } - - return 0; -} - -/* - * Issue IB ALLOC_PD command to alloc a PD in kernel - */ -static int -_usd_ib_cmd_alloc_pd( - struct usd_device *dev, - uint32_t *handle_o, - uint32_t *vfid, - uint32_t *grp_vect_buf_len) -{ - struct usnic_alloc_pd cmd; - struct usnic_alloc_pd_resp resp; - struct ib_uverbs_cmd_hdr *ich; - struct ib_uverbs_alloc_pd *icp; - struct usnic_ib_alloc_pd_cmd *ucp; - struct ib_uverbs_alloc_pd_resp *irp; - struct usnic_ib_alloc_pd_resp *urp; - int n; - - memset(&cmd, 0, sizeof(cmd)); - memset(&resp, 0, sizeof(resp)); - - /* fill in command */ - ich = &cmd.ibv_cmd_hdr; - ich->command = IB_USER_VERBS_CMD_ALLOC_PD; - ich->in_words = sizeof(cmd) / 4; - ich->out_words = sizeof(resp) / 4; - - icp = &cmd.ibv_cmd; - icp->response = (uintptr_t) & resp; - - /* - * Only need to get group vector size and vf information - * if group interrupt is enabled - */ - if (dev->ud_ctx->ucx_caps[USD_CAP_GRP_INTR] > 0) { - ucp = &cmd.usnic_cmd; - ucp->resp_version = USNIC_IB_ALLOC_PD_VERSION; - } - - n = write(dev->ud_ctx->ucx_ib_dev_fd, &cmd, sizeof(cmd)); - if (n != sizeof(cmd)) { - return -errno; - } - - /* process response */ - irp = &resp.ibv_resp; - *handle_o = irp->pd_handle; - urp = &resp.usnic_resp; - if (urp->resp_version >= 1) { - *vfid = urp->cur.vfid; - *grp_vect_buf_len = urp->cur.grp_vect_buf_len; - } - - return 0; -} - -/* - * Create a protection domain - */ -int -usd_ib_cmd_alloc_pd( - struct usd_device *dev, - uint32_t *handle_o) -{ - uint32_t vfid = 0; - uint32_t grp_vect_buf_len = 0; - int err; - - /* Issue IB alloc_pd command, get assigned VF id and group vector size */ - err = _usd_ib_cmd_alloc_pd(dev, handle_o, &vfid, &grp_vect_buf_len); - if (err) { - return err; - } - - /* MAP group vector address to userspace - * Kernel module then maps group vector user address to IOMMU and - * program VIC HW register - */ - if (dev->ud_ctx->ucx_caps[USD_CAP_GRP_INTR] > 0) { - void *va; - off64_t offset; - - offset = USNIC_ENCODE_PGOFF(vfid, USNIC_MMAP_GRPVECT, 0); - va = mmap64(NULL, grp_vect_buf_len, PROT_READ + PROT_WRITE, - MAP_SHARED, dev->ud_ctx->ucx_ib_dev_fd, offset); - - if (va == MAP_FAILED) { - usd_err("Failed to map group vector for vf %u, grp_vect_size %u, " - "error %d\n", - vfid, grp_vect_buf_len, errno); - _usd_ib_cmd_dealloc_pd(dev, *handle_o); - return -errno; - } - - dev->grp_vect_map.va = va; - dev->grp_vect_map.len = grp_vect_buf_len; - dev->grp_vect_map.vfid = vfid; - } - - return 0; -} - -int -usd_ib_cmd_reg_mr( - struct usd_device *dev, - void *vaddr, - size_t length, - struct usd_mr *mr) -{ - struct usnic_reg_mr cmd; - struct usnic_reg_mr_resp resp; - struct ib_uverbs_cmd_hdr *ich; - struct ib_uverbs_reg_mr *icp; - struct ib_uverbs_reg_mr_resp *irp; - int n; - - memset(&cmd, 0, sizeof(cmd)); - memset(&resp, 0, sizeof(resp)); - - ich = &cmd.ibv_cmd_hdr; - ich->command = IB_USER_VERBS_CMD_REG_MR; - ich->in_words = sizeof(cmd) / 4; - ich->out_words = sizeof(resp) / 4; - - icp = &cmd.ibv_cmd; - icp->response = (uintptr_t) & resp; - icp->start = (uintptr_t) vaddr; - icp->length = length; - icp->hca_va = (uintptr_t) vaddr; - icp->pd_handle = dev->ud_pd_handle; - icp->access_flags = IBV_ACCESS_LOCAL_WRITE; - - /* Issue command to IB driver */ - n = write(dev->ud_ctx->ucx_ib_dev_fd, &cmd, sizeof(cmd)); - if (n != sizeof(cmd)) { - return errno; - } - - /* process response */ - irp = &resp.ibv_resp; - mr->umr_handle = irp->mr_handle; - mr->umr_lkey = irp->lkey; - mr->umr_rkey = irp->rkey; - - return 0; -} - -int -usd_ib_cmd_dereg_mr( - struct usd_device *dev, - struct usd_mr *mr) -{ - struct usnic_dereg_mr cmd; - struct ib_uverbs_cmd_hdr *ich; - struct ib_uverbs_dereg_mr *icp; - int n; - - memset(&cmd, 0, sizeof(cmd)); - - ich = &cmd.ibv_cmd_hdr; - ich->command = IB_USER_VERBS_CMD_DEREG_MR; - ich->in_words = sizeof(cmd) / 4; - ich->out_words = 0; - - icp = &cmd.ibv_cmd; - icp->mr_handle = mr->umr_handle; - - /* Issue command to IB driver */ - n = write(dev->ud_ctx->ucx_ib_dev_fd, &cmd, sizeof(cmd)); - if (n != sizeof(cmd)) { - return -errno; - } - - return 0; -} - -/* - * Make the verbs call to create a CQ - */ -int -usd_ib_cmd_create_cq( - struct usd_device *dev, - struct usd_cq_impl *cq, - void *ibv_cq, - int comp_channel, - int comp_vector) -{ - struct usnic_create_cq cmd; - struct usnic_create_cq_resp resp; - struct ib_uverbs_cmd_hdr *ich; - struct ib_uverbs_create_cq *icp; - struct ib_uverbs_create_cq_resp *irp; - cpu_set_t *affinity_mask = NULL; - int flags = 0; - int n; - - memset(&cmd, 0, sizeof(cmd)); - memset(&resp, 0, sizeof(resp)); - - ich = &cmd.ibv_cmd_hdr; - ich->command = IB_USER_VERBS_CMD_CREATE_CQ; - ich->in_words = sizeof(cmd) / 4; - ich->out_words = sizeof(resp) / 4; - - icp = &cmd.ibv_cmd; - icp->response = (uintptr_t) & resp; - - if (ibv_cq == NULL) { - icp->user_handle = (uintptr_t) cq; - } else { - icp->user_handle = (uintptr_t) ibv_cq; /* Pass real verbs cq pointer to kernel - * to make ibv_get_cq_event happy */ - flags |= USNIC_CQ_COMP_SIGNAL_VERBS; - } - icp->cqe = cq->ucq_num_entries; - icp->comp_channel = comp_channel; - icp->comp_vector = comp_vector; - - if (comp_channel != -1) { - if (dev->ud_ctx->ucx_caps[USD_CAP_GRP_INTR] != 1) { - usd_err("usd_create_cq failed. No interrupt support\n"); - return -ENOTSUP; - } - cmd.usnic_cmd.resp_version = USNIC_IB_CREATE_CQ_VERSION; - cmd.usnic_cmd.cur.flags = flags; - cmd.usnic_cmd.cur.comp_event_fd = comp_channel; - if ((affinity_mask = CPU_ALLOC(sysconf(_SC_NPROCESSORS_ONLN))) - != NULL && - sched_getaffinity(getpid(), - CPU_ALLOC_SIZE(sysconf(_SC_NPROCESSORS_ONLN)), - affinity_mask) == 0) { - cmd.usnic_cmd.cur.affinity_mask_ptr = (u64)affinity_mask; - cmd.usnic_cmd.cur.affinity_mask_len = - CPU_ALLOC_SIZE(sysconf(_SC_NPROCESSORS_ONLN)); - } else { - cmd.usnic_cmd.cur.affinity_mask_ptr = (u64)NULL; - cmd.usnic_cmd.cur.affinity_mask_len = 0; - } - } else { - /* - * If appliation does not request cq completion event support, - * send command with version 0 to allow compatibility with - * old kernel library - */ - cmd.usnic_cmd.resp_version = 0; - } - - /* Issue command to IB driver */ - n = write(dev->ud_ctx->ucx_ib_dev_fd, &cmd, sizeof(cmd)); - if (n != sizeof(cmd)) { - return -errno; - } - - /* process response */ - irp = &resp.ibv_resp; - cq->ucq_handle = irp->cq_handle; - - if (affinity_mask != NULL) - CPU_FREE(affinity_mask); - - return 0; -} - -/* - * Make the verbs call to destroy a CQ - */ -int -usd_ib_cmd_destroy_cq( - struct usd_device *dev, - struct usd_cq_impl *cq) -{ - struct usnic_destroy_cq cmd; - struct ib_uverbs_cmd_hdr *ich; - struct ib_uverbs_destroy_cq *icp; - int n; - - memset(&cmd, 0, sizeof(cmd)); - - ich = &cmd.ibv_cmd_hdr; - ich->command = IB_USER_VERBS_CMD_DESTROY_CQ; - ich->in_words = sizeof(cmd) / 4; - ich->out_words = 0; - - icp = &cmd.ibv_cmd; - icp->cq_handle = cq->ucq_handle; - - /* Issue command to IB driver */ - n = write(dev->ud_ctx->ucx_ib_dev_fd, &cmd, sizeof(cmd)); - if (n != sizeof(cmd)) { - return -errno; - } - - return 0; -} - -/* - * Create a verbs QP without attaching any real resources to it yet - */ -int -usd_ib_cmd_create_qp( - struct usd_device *dev, - struct usd_qp_impl *qp, - struct usd_vf_info *vfip) -{ - struct usnic_create_qp cmd; - struct usnic_create_qp_resp *resp; - struct ib_uverbs_cmd_hdr *ich; - struct ib_uverbs_create_qp *icp; - struct ib_uverbs_create_qp_resp *irp = NULL; - struct usnic_ib_create_qp_cmd *ucp; - struct usnic_ib_create_qp_resp *urp; - struct usd_qp_filter *qfilt; - int ret; - int n; - uint32_t i; - struct usnic_vnic_barres_info *resources; - - ucp = NULL; - resources = NULL; - irp = NULL; - memset(&cmd, 0, sizeof(cmd)); - - resp = calloc(1, sizeof(*resp)); - if (resp == NULL) { - usd_err("Failed to allocate memory for create_qp_resp\n"); - return -ENOMEM; - } - - ich = &cmd.ibv_cmd_hdr; - ich->command = IB_USER_VERBS_CMD_CREATE_QP; - ich->in_words = sizeof(cmd) / 4; - ich->out_words = sizeof(*resp) / 4; - - icp = &cmd.ibv_cmd; - icp->response = (uintptr_t) resp; - icp->user_handle = (uintptr_t) qp; - icp->pd_handle = dev->ud_pd_handle; - icp->send_cq_handle = qp->uq_wq.uwq_cq->ucq_handle; - icp->recv_cq_handle = qp->uq_rq.urq_cq->ucq_handle; - icp->srq_handle = 0; - icp->max_send_wr = qp->uq_wq.uwq_num_entries; - icp->max_recv_wr = qp->uq_rq.urq_num_entries; - icp->max_send_sge = 1; - icp->max_recv_sge = 1; - icp->max_inline_data = 1024; - icp->sq_sig_all = 0; - icp->qp_type = IBV_QPT_UD; - icp->is_srq = 0; - icp->reserved = 0; - - ucp = &cmd.usnic_cmd; - - if (dev->ud_ctx->ucx_caps[USD_CAP_GRP_INTR]) { - ucp->cmd_version = 2; - } else { - /* - * Allow compatibility with old kernel module when - * application does not require cq completion notification - */ - ucp->cmd_version = 1; - } - - qfilt = &qp->uq_filter; - if (qfilt->qf_type == USD_FTY_UDP || - qfilt->qf_type == USD_FTY_UDP_SOCK) { - /* - * Command versions 0,1,2 need to fill in the spec_v2 struct. - * Newer versions need to fill in the spec struct. - */ - if (ucp->cmd_version <= 2) { - ucp->spec_v2.trans_type = USNIC_TRANSPORT_IPV4_UDP; - ucp->spec_v2.ip.sock_fd = qfilt->qf_filter.qf_udp.u_sockfd; - } else { - ucp->spec.trans_type = USNIC_TRANSPORT_IPV4_UDP; - ucp->spec.ip.sock_fd = qfilt->qf_filter.qf_udp.u_sockfd; - } - } else { - ret = -EINVAL; - goto out; - } - - ucp->u.cur.resources_len = RES_TYPE_MAX * sizeof(*resources); - resources = calloc(RES_TYPE_MAX, sizeof(*resources)); - if (resources == NULL) { - usd_err("unable to allocate resources array\n"); - ret = -ENOMEM; - goto out; - } - ucp->u.cur.resources = (u64)(uintptr_t)resources; - - /* Issue command to IB driver */ - n = write(dev->ud_ctx->ucx_ib_dev_fd, &cmd, sizeof(cmd)); - if (n != sizeof(cmd)) { - ret = -errno; - goto out; - } - - /* process IB part of response */ - irp = &resp->ibv_resp; - qp->uq_qp_handle = irp->qp_handle; - qp->uq_qp_num = irp->qpn; - - /* process usnic part response */ - urp = &resp->usnic_resp; - - qp->uq_rq.urq_index = urp->rq_idx[0]; - qp->uq_wq.uwq_index = urp->wq_idx[0]; - - qp->uq_rq.urq_cq->ucq_index = urp->cq_idx[0]; - if (qp->uq_rq.urq_cq != qp->uq_wq.uwq_cq) { - qp->uq_wq.uwq_cq->ucq_index = urp->cq_idx[1]; - } - - /* Pull VF info */ - vfip->vi_vfid = urp->vfid; - vfip->vi_bar_bus_addr = urp->bar_bus_addr; - vfip->vi_bar_len = urp->bar_len; - - if (urp->cmd_version == ucp->cmd_version) { - /* got expected version */ - if (dev->ud_ctx->ucx_caps[USD_CAP_MAP_PER_RES] > 0) { - for (i = 0; i < MIN(RES_TYPE_MAX, urp->u.cur.num_barres); i++) { - enum vnic_res_type type = resources[i].type; - if (type < RES_TYPE_MAX) { - vfip->barres[type].type = type; - vfip->barres[type].bus_addr = resources[i].bus_addr; - vfip->barres[type].len = resources[i].len; - } - } - if (vfip->barres[RES_TYPE_WQ].bus_addr == 0) { - usd_err("Failed to retrieve WQ res info\n"); - ret = -ENXIO; - goto out; - } - if (vfip->barres[RES_TYPE_RQ].bus_addr == 0) { - usd_err("Failed to retrieve RQ res info\n"); - ret = -ENXIO; - goto out; - } - if (vfip->barres[RES_TYPE_CQ].bus_addr == 0) { - usd_err("Failed to retrieve CQ res info\n"); - ret = -ENXIO; - goto out; - } - if (vfip->barres[RES_TYPE_INTR_CTRL].bus_addr == 0) { - usd_err("Failed to retrieve INTR res info\n"); - ret = -ENXIO; - goto out; - } - if (vfip->barres[RES_TYPE_DEVCMD].bus_addr == 0) { - usd_err("Failed to retrieve DEVCMD res info\n"); - ret = -ENXIO; - goto out; - } - } - } else if (urp->cmd_version == 0) { - /* special case, old kernel that won't tell us about individual barres - * info but should otherwise work fine */ - - if (dev->ud_ctx->ucx_caps[USD_CAP_MAP_PER_RES] != 0) { - /* should not happen, only the presence of never-released kernel - * code should cause this case */ - usd_err("USD_CAP_MAP_PER_RES claimed but qp_create cmd_version == 0\n"); - ret = -ENXIO; - goto out; - } - } else { - usd_err("unexpected cmd_version (%u)\n", urp->cmd_version); - ret = -ENXIO; - goto out; - } - - /* version 2 and beyond has interrupt support */ - if (urp->cmd_version > 1) { - qp->uq_rq.urq_cq->intr_offset = urp->u.cur.rcq_intr_offset; - if (qp->uq_rq.urq_cq != qp->uq_wq.uwq_cq) { - qp->uq_wq.uwq_cq->intr_offset = urp->u.cur.wcq_intr_offset; - } - vfip->vi_barhead_len = urp->u.cur.barhead_len; - } - - free(resources); - free(resp); - return 0; - - out: - if (irp != NULL) /* indicates successful IB create QP */ - usd_ib_cmd_destroy_qp(dev, qp); - free(resources); - free(resp); - return ret; -} - -int -usd_ib_cmd_modify_qp( - struct usd_device *dev, - struct usd_qp_impl *qp, - int state) -{ - struct usnic_modify_qp cmd; - struct ib_uverbs_cmd_hdr *ich; - struct ib_uverbs_modify_qp *icp; - int n; - - memset(&cmd, 0, sizeof(cmd)); - - ich = &cmd.ibv_cmd_hdr; - ich->command = IB_USER_VERBS_CMD_MODIFY_QP; - ich->in_words = sizeof(cmd) / 4; - ich->out_words = 0; - - icp = &cmd.ibv_cmd; - icp->qp_handle = qp->uq_qp_handle; - icp->attr_mask = IBV_QP_STATE; - icp->qp_state = state; - - /* Issue command to IB driver */ - n = write(dev->ud_ctx->ucx_ib_dev_fd, &cmd, sizeof(cmd)); - if (n != sizeof(cmd)) { - return -errno; - } - - return 0; -} - -int -usd_ib_cmd_destroy_qp( - struct usd_device *dev, - struct usd_qp_impl *qp) -{ - struct usnic_destroy_qp cmd; - struct ib_uverbs_destroy_qp_resp resp; - struct ib_uverbs_cmd_hdr *ich; - struct ib_uverbs_destroy_qp *icp; - int n; - - memset(&cmd, 0, sizeof(cmd)); - - ich = &cmd.ibv_cmd_hdr; - ich->command = IB_USER_VERBS_CMD_DESTROY_QP; - ich->in_words = sizeof(cmd) / 4; - ich->out_words = sizeof(resp) / 4; - - icp = &cmd.ibv_cmd; - icp->response = (uintptr_t) & resp; - icp->qp_handle = qp->uq_qp_handle; - - /* Issue command to IB driver */ - n = write(dev->ud_ctx->ucx_ib_dev_fd, &cmd, sizeof(cmd)); - if (n != sizeof(cmd)) { - return -errno; - } - - return 0; -} - -static int -usd_ib_cmd_query_device( - struct usd_device *dev, - struct ib_uverbs_query_device_resp *irp) -{ - struct usnic_query_device cmd; - struct ib_uverbs_cmd_hdr *ich; - struct ib_uverbs_query_device *icp; - int n; - - memset(&cmd, 0, sizeof(cmd)); - - ich = &cmd.ibv_cmd_hdr; - ich->command = IB_USER_VERBS_CMD_QUERY_DEVICE; - ich->in_words = sizeof(cmd) / 4; - ich->out_words = sizeof(*irp) / 4; - - icp = &cmd.ibv_cmd; - icp->response = (uintptr_t) irp; - - /* keep Valgrind happy */ - memset(irp, 0x00, sizeof(*irp)); - - /* Issue command to IB driver */ - n = write(dev->ud_ctx->ucx_ib_dev_fd, &cmd, sizeof(cmd)); - if (n != sizeof(cmd)) { - return -errno; - } - - return 0; -} - -static int -usd_ib_cmd_query_port( - struct usd_device *dev, - struct ib_uverbs_query_port_resp *irp) -{ - struct usnic_query_port cmd; - struct ib_uverbs_cmd_hdr *ich; - struct ib_uverbs_query_port *icp; - int n; - - memset(&cmd, 0, sizeof(cmd)); - - ich = &cmd.ibv_cmd_hdr; - ich->command = IB_USER_VERBS_CMD_QUERY_PORT; - ich->in_words = sizeof(cmd) / 4; - ich->out_words = sizeof(*irp) / 4; - - icp = &cmd.ibv_cmd; - icp->response = (uintptr_t) irp; - icp->port_num = 1; - - /* keep Valgrind happy */ - memset(irp, 0x00, sizeof(*irp)); - - /* Issue command to IB driver */ - n = write(dev->ud_ctx->ucx_ib_dev_fd, &cmd, sizeof(cmd)); - if (n != sizeof(cmd)) { - return -errno; - } - - return 0; -} - -/* - * For code readability, copy these two enums from kernel - * /usr/include/rdma/ib_verbs.h (otherwise, we'd would have to - * hard-code the integer values below). - */ -enum ib_port_width { - IB_WIDTH_1X = 1, - IB_WIDTH_4X = 2, - IB_WIDTH_8X = 4, - IB_WIDTH_12X = 8 -}; - -enum ib_port_speed { - IB_SPEED_SDR = 1, // 2.5 Gbps - IB_SPEED_DDR = 2, // 5 Gbps - IB_SPEED_QDR = 4, // 10 Gbps - IB_SPEED_FDR10 = 8, // 10.3125 Gbps - IB_SPEED_FDR = 16, // 14.0625 Gbps - IB_SPEED_EDR = 32, // 25.78125 Gbps - IB_SPEED_HDR = 64 // 50 Gbps -}; - - -/* - * Issue query commands for device and port and interpret the resaults - */ -int -usd_ib_query_dev( - struct usd_device *dev) -{ - struct ib_uverbs_query_device_resp dresp; - struct ib_uverbs_query_port_resp presp; - struct usd_device_attrs *dap; - unsigned speed; - int ret; - - ret = usd_ib_cmd_query_device(dev, &dresp); - if (ret != 0) - return ret; - - ret = usd_ib_cmd_query_port(dev, &presp); - if (ret != 0) - return ret; - - /* copy out the attributes we care about */ - dap = &dev->ud_attrs; - - dap->uda_link_state = - (presp.state == 4) ? USD_LINK_UP : USD_LINK_DOWN; - - /* - * If link is up, derive bandwidth from speed and width. - * If link is down, driver reports bad speed, try to deduce from the - * NIC device ID. - */ - if (dap->uda_link_state == USD_LINK_UP) { -#define MKSW(S,W) (((S)<<8)|(W)) - speed = MKSW(presp.active_speed, presp.active_width); - switch (speed) { - case MKSW(IB_SPEED_FDR10, IB_WIDTH_1X): - case MKSW(IB_SPEED_DDR, IB_WIDTH_4X): - dap->uda_bandwidth = 10000; - break; - case MKSW(IB_SPEED_QDR, IB_WIDTH_4X): - dap->uda_bandwidth = 25000; - break; - case MKSW(IB_SPEED_FDR10, IB_WIDTH_4X): - dap->uda_bandwidth = 40000; - break; - case MKSW(IB_SPEED_HDR, IB_WIDTH_1X): - dap->uda_bandwidth = 50000; - break; - case MKSW(IB_SPEED_EDR, IB_WIDTH_4X): - dap->uda_bandwidth = 100000; - break; - case MKSW(IB_SPEED_HDR, IB_WIDTH_4X): - dap->uda_bandwidth = 200000; - break; - case MKSW(IB_SPEED_HDR, IB_WIDTH_8X): - dap->uda_bandwidth = 400000; - break; - default: - printf("Warning: unrecognized speed/width %d/%d, defaulting to 10G\n", - presp.active_speed, presp.active_width); - dap->uda_bandwidth = 10000; - break; - } - } else { - /* from pci_ids.h */ - switch (dap->uda_device_id) { - case 0x4f: /* Vasona */ - case 0x84: /* Cotati */ - case 0x85: /* Lexington */ - case 0x12c: /* Calistoga */ - case 0x137: /* Mountain View */ - case 0x138: /* Walnut Creek */ - dap->uda_bandwidth = 10000; - break; - case 0xcd: /* icehouse */ - case 0x14d: /* clearlake */ - dap->uda_bandwidth = 40000; - break; - default: - dap->uda_bandwidth = 0; - } - } - - dap->uda_vendor_id = dresp.vendor_id; - dap->uda_vendor_part_id = dresp.vendor_part_id; - dap->uda_device_id = dresp.hw_ver; - - dap->uda_max_qp = dresp.max_qp; - dap->uda_max_cq = dresp.max_cq; - - return 0; -} - - -int -usd_ib_cmd_create_comp_channel( - struct usd_device *dev, - int *comp_fd_o) -{ - int n; - struct usnic_create_comp_channel cmd; - struct ib_uverbs_create_comp_channel_resp resp; - struct ib_uverbs_cmd_hdr *ich; - struct ib_uverbs_create_comp_channel *icp; - struct ib_uverbs_create_comp_channel_resp *irp; - - memset(&cmd, 0, sizeof(cmd)); - - ich = &cmd.ibv_cmd_hdr; - ich->command = IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL; - ich->in_words = sizeof(cmd) / 4; - ich->out_words = sizeof(resp) / 4; - - icp = &cmd.ibv_cmd; - icp->response = (uintptr_t) & resp; - - /* Issue command to IB driver */ - n = write(dev->ud_ctx->ucx_ib_dev_fd, &cmd, sizeof(cmd)); - if (n != sizeof(cmd)) { - return -errno; - } - - irp = &resp; - *comp_fd_o = irp->fd; - - return 0; -} diff --git a/prov/usnic/src/usnic_direct/usd_ib_cmd.h b/prov/usnic/src/usnic_direct/usd_ib_cmd.h deleted file mode 100644 index b7d7dc33269..00000000000 --- a/prov/usnic/src/usnic_direct/usd_ib_cmd.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#ifndef _USD_IB_CMD_ -#define _USD_IB_CMD_ - -#include "usd.h" - -int usd_ib_cmd_get_context(struct usd_context *uctx); -int usd_ib_cmd_alloc_pd(struct usd_device *dev, uint32_t * pd_handle_o); -int usd_ib_cmd_reg_mr(struct usd_device *dev, void *vaddr, size_t length, - struct usd_mr *mr); -int usd_ib_cmd_dereg_mr(struct usd_device *dev, struct usd_mr *mr); -int usd_ib_cmd_create_cq(struct usd_device *dev, struct usd_cq_impl *cq, - void *ibv_cq, int comp_channel, int comp_vector); -int usd_ib_cmd_destroy_cq(struct usd_device *dev, struct usd_cq_impl *cq); -int usd_ib_cmd_create_qp(struct usd_device *dev, struct usd_qp_impl *qp, - struct usd_vf_info *vfip); -int usd_ib_cmd_modify_qp(struct usd_device *dev, struct usd_qp_impl *qp, - int state); -int usd_ib_cmd_destroy_qp(struct usd_device *dev, struct usd_qp_impl *qp); - -int usd_ib_query_dev(struct usd_device *dev); -int usd_ib_cmd_devcmd(struct usd_device *dev, enum vnic_devcmd_cmd devcmd, - u64 *a0, u64 *a1, int wait); - -int usd_ib_cmd_create_comp_channel(struct usd_device *dev, int *comp_fd_o); -int usd_ib_cmd_destroy_comp_channel(struct usd_device *dev, int comp_fd); - -#endif /* _USD_IB_CMD_ */ diff --git a/prov/usnic/src/usnic_direct/usd_ib_sysfs.c b/prov/usnic/src/usnic_direct/usd_ib_sysfs.c deleted file mode 100644 index b9ddddea49c..00000000000 --- a/prov/usnic/src/usnic_direct/usd_ib_sysfs.c +++ /dev/null @@ -1,383 +0,0 @@ -/* - * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "usd.h" -#include "usd_ib_sysfs.h" -#include "usd_util.h" - -/* - * usnic_direct routines that depend on Infiniband /sysfs directory structure - */ - -/* - * Perform one-time initialization - */ -int -usd_ib_get_devlist( - struct usd_ib_dev **dev_list) -{ - char *class_path = "/sys/class/infiniband_verbs"; - DIR *class_dir; - struct dirent *dent; - struct stat sbuf; - char *dev_path = NULL; - char *ibdev_path = NULL; - char ibdev_buf[32]; - struct usd_ib_dev *idp; - struct usd_ib_dev *last_idp; - int fd; - int rc; - int n; - - /* - * For now, we are glomming onto Infiniband driver for setup - */ - class_dir = opendir(class_path); - if (class_dir == NULL) { - return -ENODEV; - } - - /* Check dir entries for USNIC devices */ - last_idp = NULL; - fd = -1; - while ((dent = readdir(class_dir)) != NULL) { - /* skip "." and ".." */ - if (dent->d_name[0] == '.') - continue; - - /* build path to entry */ - if (asprintf(&dev_path, "%s/%s", class_path, - dent->d_name) <= 0) { - rc = -errno; - usd_perror("failed to asprintf"); - goto out; - } - - /* see if it's a dir */ - rc = stat(dev_path, &sbuf); - if (rc != 0) { - usd_perror(dev_path); - rc = -errno; - goto out; - } - - /* Must be a directory */ - if (!S_ISDIR(sbuf.st_mode)) - continue; - - /* read the ibdev */ - if (asprintf(&ibdev_path, "%s/ibdev", dev_path) <= 0) { - rc = -errno; - usd_perror(ibdev_path); - goto out; - } - fd = open(ibdev_path, O_RDONLY); - if (fd == -1) { - usd_perror(ibdev_path); - rc = -errno; - goto out; - } - memset(ibdev_buf, 0, sizeof(ibdev_buf)); - n = read(fd, ibdev_buf, sizeof(ibdev_buf) - 1); - if (n == -1) { - usd_perror("reading ibdev"); - rc = -errno; - goto out; - } - close(fd); - fd = -1; - if (n > 0 && ibdev_buf[n - 1] == '\n') { - ibdev_buf[n - 1] = '\0'; /* newline -> EOF */ - } - - /* If USNIC device, remember this one */ - if (strncmp(ibdev_buf, "usnic", 5) == 0) { - idp = calloc(sizeof(*idp), 1); - if (idp == NULL) { - usd_perror("calloc IB device"); - rc = -errno; - goto out; - } - strncpy(idp->id_name, dent->d_name, sizeof(idp->id_name) - 1); - strncpy(idp->id_usnic_name, ibdev_buf, - sizeof(idp->id_usnic_name) - 1); - snprintf(idp->id_dev_path, sizeof(idp->id_dev_path) - 1, - "/dev/infiniband/%s", idp->id_name); - snprintf(idp->id_class_path, sizeof(idp->id_class_path) - 1, - "%s/device/infiniband/%s", dev_path, ibdev_buf); - - if (last_idp == NULL) { - *dev_list = idp; - } else { - last_idp->id_next = idp; - } - idp->id_next = NULL; - last_idp = idp; - } - free(dev_path); - dev_path = NULL; - free(ibdev_path); - ibdev_path = NULL; - } - rc = 0; - -out: - /* clean up */ - free(dev_path); - free(ibdev_path); - if (class_dir != NULL) { - closedir(class_dir); - } - if (fd != -1) { - close(fd); - } - - return rc; -} - -/* - * Find MAC for a device - * (we assume port 0) - */ -int -usd_get_mac( - struct usd_device *dev, - uint8_t * mac) -{ - char name[PATH_MAX + 128]; - char gid[80]; - char *p; - uint16_t v; - struct usd_ib_dev *idp; - int fd; - int n; - - idp = dev->ud_ctx->ucx_ib_dev; - snprintf(name, sizeof(name), "%s/ports/1/gids/0", idp->id_class_path); - - fd = open(name, O_RDONLY); - if (fd == -1) { - usd_perror(name); - return -errno; - } - - n = read(fd, gid, sizeof(gid) - 1); - close(fd); - if (n < 0) { - usd_perror("reading GID"); - return -errno; - } - gid[n] = '\0'; - - p = gid + 20; - sscanf(p, "%hx", &v); - *mac++ = (v >> 8) ^ 2; - *mac++ = v & 0xFF; - p += 5; - sscanf(p, "%hx", &v); - *mac++ = v >> 8; - p += 5; - sscanf(p, "%hx", &v); - *mac++ = v & 0xFF; - p += 5; - sscanf(p, "%hx", &v); - *mac++ = v >> 8; - *mac++ = v & 0xFF; - - return 0; -} - -/* - * Find interface for a device - */ -int -usd_get_iface( - struct usd_device *dev) -{ - char name[PATH_MAX + 128]; - struct usd_ib_dev *idp; - int fd; - int n; - - idp = dev->ud_ctx->ucx_ib_dev; - snprintf(name, sizeof(name), "%s/iface", idp->id_class_path); - - fd = open(name, O_RDONLY); - if (fd == -1) { - usd_perror(name); - dev->ud_attrs.uda_ifname[0] = '\0'; - return -errno; - } - - n = read(fd, dev->ud_attrs.uda_ifname, - sizeof(dev->ud_attrs.uda_ifname)); - close(fd); - if (n < 0) { - usd_perror("reading iface"); - return -errno; - } - - dev->ud_attrs.uda_ifname[n - 1] = '\0'; - - return 0; -} - -/* - * Read an integer from a sysfs entry - */ -static int -usd_ib_sysfs_get_int( - struct usd_device *dev, - char *entry, - int *result) -{ - char name[PATH_MAX + 128]; - char buf[32]; - struct usd_ib_dev *idp; - int fd; - int n; - - idp = dev->ud_ctx->ucx_ib_dev; - snprintf(name, sizeof(name), "%s/%s", idp->id_class_path, entry); - - fd = open(name, O_RDONLY); - if (fd == -1) { - usd_perror(name); - return -errno; - } - - n = read(fd, buf, sizeof(buf)); - close(fd); - if (n < 0) { - fprintf(stderr, "Error %d reading %s\n", errno, entry); - return -errno; - } - - *result = atoi(buf); - return 0; -} - -/* - * Get usNIC configuration - */ -int -usd_get_usnic_config( - struct usd_device *dev) -{ - int v; - int ret; - - ret = usd_ib_sysfs_get_int(dev, "max_vf", &v); - if (ret != 0) - return ret; - dev->ud_attrs.uda_num_vf = v; - - ret = usd_ib_sysfs_get_int(dev, "qp_per_vf", &v); - if (ret != 0) - return ret; - dev->ud_attrs.uda_qp_per_vf = v; - - ret = usd_ib_sysfs_get_int(dev, "cq_per_vf", &v); - if (ret != 0) - return ret; - dev->ud_attrs.uda_cq_per_vf = v; - - ret = usd_ib_sysfs_get_int(dev, "intr_per_vf", &v); - if (ret != 0) { - /* older kernels did not export this sysfs node */ - if (ret == -ENOENT) { - dev->ud_attrs.uda_intr_per_vf = 0; - ret = 0; - } - else { - return ret; - } - } else { - dev->ud_attrs.uda_intr_per_vf = v; - } - - return ret; -} - -/* - * Find firmware version - */ -int -usd_get_firmware( - struct usd_device *dev) -{ - char name[PATH_MAX + 128]; - struct usd_ib_dev *idp; - char *fw; - int fd; - int n; - - idp = dev->ud_ctx->ucx_ib_dev; - snprintf(name, sizeof(name), "%s/fw_ver", idp->id_class_path); - - fd = open(name, O_RDONLY); - if (fd == -1) { - usd_perror(name); - return -errno; - } - - fw = &dev->ud_attrs.uda_firmware[0]; - n = read(fd, fw, sizeof(dev->ud_attrs.uda_firmware)); - close(fd); - if (n < 0) { - usd_perror("reading fw_ver"); - return -errno; - } - fw[n - 1] = '\0'; - - return 0; -} diff --git a/prov/usnic/src/usnic_direct/usd_ib_sysfs.h b/prov/usnic/src/usnic_direct/usd_ib_sysfs.h deleted file mode 100644 index 3014d0a2d2f..00000000000 --- a/prov/usnic/src/usnic_direct/usd_ib_sysfs.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#ifndef _USD_IB_SYSFS_ -#define _USD_IB_SYSFS_ - -#include - -/* - * Forward structure defs - */ -struct usd_device; - -/* - * Definition of a usnic IB entry - */ -struct usd_ib_dev { - char id_name[80]; - char id_usnic_name[USD_MAX_DEVNAME]; - char id_dev_path[PATH_MAX]; /* path to IB dev */ - char id_class_path[PATH_MAX]; /* path to IB class info */ - - struct usd_ib_dev *id_next; -}; - -int usd_ib_get_devlist(struct usd_ib_dev **dev_list); -int usd_get_mac(struct usd_device *dev, uint8_t * mac); -int usd_get_iface(struct usd_device *dev); -int usd_get_usnic_config(struct usd_device *dev); -int usd_get_firmware(struct usd_device *dev); -int usd_read_cap_ver(struct usd_device *dev, char *cap_name, int *vers_o); -#endif /* _USD_IB_SYSFS_ */ diff --git a/prov/usnic/src/usnic_direct/usd_mem.c b/prov/usnic/src/usnic_direct/usd_mem.c deleted file mode 100644 index 486e8f85622..00000000000 --- a/prov/usnic/src/usnic_direct/usd_mem.c +++ /dev/null @@ -1,220 +0,0 @@ -/* - * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "usnic_direct.h" -#include "usd.h" -#include "usd_ib_cmd.h" - -/* - * Issue driver command to register memory region - */ -int -usd_reg_mr( - struct usd_device *dev, - void *vaddr, - size_t length, - struct usd_mr **mr_o) -{ - struct usd_mr *mr; - int ret; - - mr = calloc(sizeof(*mr), 1); - if (mr == NULL) { - return -errno; - } - - ret = usd_ib_cmd_reg_mr(dev, vaddr, length, mr); - - if (ret == 0) { - mr->umr_dev = dev; - mr->umr_vaddr = vaddr; - mr->umr_length = length; - *mr_o = mr; - } else { - free(mr); - } - - return ret; -} - -/* - * Issue driver command to de-register memory region - */ -int -usd_dereg_mr( - struct usd_mr *mr) -{ - int ret; - - ret = usd_ib_cmd_dereg_mr(mr->umr_dev, mr); - if (ret == 0) - free(mr); - - return ret; -} - -/* - * Used to allocate memory and an mr to go with it all in one go. Used - * to provide memory to the vnic_* functions that call pci_alloc_consistant - * We want to return a nicely aligned chunk of memory preceded by struct usd_mr. - * We don't know the alignment of the memory we get back, so allocate a big - * enough chunk to hold the following: - * struct usd_mr - * N pad bytes - * true length and pointer to usd_mr - * page aligned buffer for user - */ -int -usd_alloc_mr( - struct usd_device *dev, - size_t size, - void **vaddr_o) -{ - void *vaddr; - void *base_addr; - struct usd_mr *mr; - size_t true_size; - size_t metadata_size; - size_t madv_size; - int ret; - - metadata_size = sizeof(struct usd_mr) + 3 * sizeof(uintptr_t); - madv_size = ALIGN(size, sysconf(_SC_PAGESIZE)); - true_size = madv_size + metadata_size + sysconf(_SC_PAGESIZE) - 1; - base_addr = mmap(NULL, true_size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (base_addr == NULL || base_addr == MAP_FAILED) { - usd_err("Failed to mmap region of size %lu\n", true_size); - return -errno; - } - mr = base_addr; - vaddr = - (void *) ALIGN((uintptr_t) base_addr + metadata_size, - sysconf(_SC_PAGESIZE)); - ((uintptr_t *) vaddr)[-1] = (uintptr_t) mr; - ((uintptr_t *) vaddr)[-2] = true_size; - ((uintptr_t *) vaddr)[-3] = madv_size; - - /* - * Disable copy-on-write for memories internally used by USD. - * For application buffers, disabling copy-on-write should be provided by - * usd wrapper such as libfabric or verbs plugin if fork is supported. - * The memory to be registered starts from page-aligned address, and ends - * at page boundary, so it's impossible for a page to be updated - * with multiple madvise calls when each call reference different VAs on - * the same page. This allows to avoid the need to reference count - * the pages that get updated with mutiple madvise calls. For details, - * see libibverbs ibv_dont_forkrange implementations. - */ - ret = madvise(vaddr, madv_size, MADV_DONTFORK); - if (ret != 0) { - usd_err("Failed to disable child's access to memory %p size %lu\n", - vaddr, size); - ret = errno; - goto err_unmap; - } - - ret = usd_ib_cmd_reg_mr(dev, vaddr, size, mr); - if (ret != 0) { - usd_err("Failed to register memory region %p, size %lu\n", - vaddr, size); - goto err_madvise; - } - mr->umr_dev = dev; - - *vaddr_o = vaddr; - return 0; - -err_madvise: - madvise(vaddr, ALIGN(size, sysconf(_SC_PAGESIZE)), MADV_DOFORK); -err_unmap: - munmap(base_addr, true_size); - return ret; -} - -/* - * See usd_alloc_mr() for explanation of: - * mr = (struct usd_mr *)((uintptr_t *)vaddr)[-1]; - */ -int -usd_free_mr( - void *vaddr) -{ - struct usd_mr *mr; - size_t true_size; - size_t madv_size; - int ret; - - mr = (struct usd_mr *) ((uintptr_t *) vaddr)[-1]; - true_size = ((uintptr_t *) vaddr)[-2]; - madv_size = ((uintptr_t *) vaddr)[-3]; - - ret = usd_ib_cmd_dereg_mr(mr->umr_dev, mr); - if (ret == 0) { - madvise(vaddr, madv_size, MADV_DOFORK); - munmap(mr, true_size); - } - - return ret; -} - -/* - * Utility function for vnic_* routines - */ -char * -pci_name( - struct pci_dev *pdev) -{ - struct usd_device *dev; - - dev = (struct usd_device *) pdev; - - return dev->ud_ctx->ucx_ib_dev->id_usnic_name; -} diff --git a/prov/usnic/src/usnic_direct/usd_poll.c b/prov/usnic/src/usnic_direct/usd_poll.c deleted file mode 100644 index 0ce9008083d..00000000000 --- a/prov/usnic/src/usnic_direct/usd_poll.c +++ /dev/null @@ -1,293 +0,0 @@ -/* - * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#include - - -#include "usd.h" -#include "usd_util.h" -#include "cq_enet_desc.h" - -static inline void -find_rx_lengths( - struct usd_rq *rq, - uint16_t q_index, - size_t *posted_len_o, - size_t *len_in_pkt_o) -{ - dma_addr_t bus_addr; - u16 len; - u8 type; - size_t rcvbuf_len; - uint16_t i; - - i = q_index; - rcvbuf_len = 0; - do { - rq_enet_desc_dec( (struct rq_enet_desc *) - ((uintptr_t)rq->urq_desc_ring + (i<<4)), - &bus_addr, &type, &len); - rcvbuf_len += len; - i = (i - 1) & rq->urq_post_index_mask; - } while (type == RQ_ENET_TYPE_NOT_SOP); - - *posted_len_o = rcvbuf_len; - *len_in_pkt_o = ntohs(((struct usd_udp_hdr *)bus_addr)->uh_ip.tot_len) + - sizeof(struct ether_header); -} - -static inline int -usd_desc_to_rq_comp( - struct usd_cq_impl *cq, - struct cq_desc *desc, - uint16_t qid, - uint16_t q_index, - struct usd_completion *comp) -{ - struct usd_rq *rq; - struct usd_qp_impl *qp; - struct cq_enet_rq_desc *edesc; - uint16_t bytes_written_flags; - uint32_t bytes_written; - uint32_t ci_flags; - uint32_t ipudpok; - unsigned credits; - size_t len_in_pkt; - size_t rcvbuf_len; - - edesc = (struct cq_enet_rq_desc *)desc; - rq = cq->ucq_rq_map[qid]; - qp = usd_container_of(rq, struct usd_qp_impl, uq_rq); - - bytes_written_flags = le16_to_cpu(edesc->bytes_written_flags); - bytes_written = bytes_written_flags & CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK; - ci_flags = le16_to_cpu(edesc->completed_index_flags); - - if (ci_flags & CQ_ENET_RQ_DESC_FLAGS_EOP) { - comp->uc_bytes = bytes_written + rq->urq_accum_bytes; - rq->urq_accum_bytes = 0; - } else { - rq->urq_accum_bytes += bytes_written; - return -1; - } - - comp->uc_context = rq->urq_context[q_index]; - comp->uc_qp = &qp->uq_qp; - - ipudpok = CQ_ENET_RQ_DESC_FLAGS_IPV4_CSUM_OK | - CQ_ENET_RQ_DESC_FLAGS_TCP_UDP_CSUM_OK; - - if (bytes_written_flags & CQ_ENET_RQ_DESC_FLAGS_TRUNCATED || - (edesc->flags & ipudpok) != ipudpok) { - if (((edesc->flags & CQ_ENET_RQ_DESC_FLAGS_FCS_OK) == 0) && - bytes_written == 0) { - find_rx_lengths(rq, q_index, &rcvbuf_len, &len_in_pkt); - - /* - * If only the paddings to meet 64-byte minimum eth frame - * requirement are truncated, do not mark packet as - * error due to truncation. - * The usnic hdr should not be split into multiple receive buffer - * - * If we could afford the extra cycles, we would also compute the - * UDP checksum here and compare it to the UDP header. - */ - if (rcvbuf_len >= 60 || len_in_pkt > rcvbuf_len) { - comp->uc_status = USD_COMPSTAT_ERROR_TRUNC; - } - else { - comp->uc_status = USD_COMPSTAT_SUCCESS; - /* TRUNC means bytes_written==0, so fix this too */ - comp->uc_bytes = len_in_pkt; - } - } else { - comp->uc_status = USD_COMPSTAT_ERROR_CRC; - } - } else { - if (comp->uc_bytes <= 60) { - /* - * The sender may have attempted to send a small frame (<64-bytes) - * that was padded out to 64-bytes by the sending VIC. - * If we posted a recv buffer >= 60 bytes then we wouldn't see - * truncation, but the bytes_written by the VIC will be larger than - * the bytes the sender actually requested to send. Fix that up - * here. - */ - find_rx_lengths(rq, q_index, &rcvbuf_len, &len_in_pkt); - comp->uc_bytes = len_in_pkt; - } - comp->uc_status = USD_COMPSTAT_SUCCESS; - } - - /* needs a little work in multi-SGE case, all credits currently not - * reported as released until next RX - */ - credits = (q_index - rq->urq_last_comp) & rq->urq_post_index_mask; - rq->urq_recv_credits += credits; - rq->urq_last_comp = q_index; - - return 0; -} - -static inline void -usd_desc_to_wq_comp( - struct usd_cq_impl *cq, - uint16_t qid, - uint16_t q_index, - struct usd_completion *comp) -{ - struct usd_wq *wq; - struct usd_qp_impl *qp; - struct usd_wq_post_info *info; - unsigned credits; - - wq = cq->ucq_wq_map[qid]; - qp = usd_container_of(wq, struct usd_qp_impl, uq_wq); - comp->uc_qp = &qp->uq_qp; - - info = &wq->uwq_post_info[(q_index+1)&wq->uwq_post_index_mask]; - comp->uc_context = info->wp_context; - comp->uc_bytes = info->wp_len; - comp->uc_status = USD_COMPSTAT_SUCCESS; - - credits = (q_index - wq->uwq_last_comp) & wq->uwq_post_index_mask; - wq->uwq_send_credits += credits; - wq->uwq_last_comp = q_index; -} - -int -usd_poll_cq_multi( - struct usd_cq *ucq, - int max_comps, - struct usd_completion *comps) -{ - int ret; - int n; - - for (n = 0; n < max_comps; ++n) { - ret = usd_poll_cq(ucq, comps + n); - if (ret == -EAGAIN) { - return n; - } - } - return max_comps; -} - -int -usd_poll_cq( - struct usd_cq *ucq, - struct usd_completion *comp) -{ - struct usd_cq_impl *cq; - struct cq_desc *cq_desc; - uint8_t color; - uint8_t last_color; - uint8_t type_color; - uint8_t type; - uint16_t qid; - uint16_t q_index; - - cq = to_cqi(ucq); - -retry: - /* check for a completion */ - cq_desc = (struct cq_desc *)((uint8_t *)cq->ucq_desc_ring + - (cq->ucq_next_desc << 4)); - last_color = cq->ucq_last_color; - - type_color = cq_desc->type_color; - type = type_color & 0x7f; - color = type_color >> CQ_DESC_COLOR_SHIFT; - qid = le16_to_cpu(cq_desc->q_number) & CQ_DESC_Q_NUM_MASK; - q_index = le16_to_cpu(cq_desc->completed_index) & CQ_DESC_COMP_NDX_MASK; - - if (color == last_color) { - return -EAGAIN; - } else { - - /* bookkeeping */ - cq->ucq_next_desc++; - cq->ucq_last_color ^= (cq->ucq_next_desc >> cq->ucq_color_shift); - cq->ucq_next_desc &= cq->ucq_cqe_mask; - - rmb(); - - comp->uc_type = (enum usd_completion_type) type; - - if (type == USD_COMPTYPE_RECV) { - if (usd_desc_to_rq_comp(cq, cq_desc, qid, q_index, comp) == -1) { - goto retry; - } - } else if (type == USD_COMPTYPE_SEND) { - usd_desc_to_wq_comp(cq, qid, q_index, comp); - } else { - comp->uc_status = USD_COMPSTAT_ERROR_INTERNAL; - } - return 0; - } -} - -/* - * Allow application to unmask interrupt explicitly - */ -int usd_poll_req_notify(struct usd_cq *ucq) -{ - struct usd_cq_impl *cq; - - cq = to_cqi(ucq); - - /* - * application uses a signal thread waiting for one completion FD, - * then calling this function to unmask the interrupt source. If multiple - * cqs are associated with the FD/interrupt, this may be unneccesarilly - * called for subsequent cqs at each poll/wait, but it's OK. A lock isn't - * used here to prevent simultaneous unmasking among multiple threads as - * it's not a valid use case. - * Also this call happens at data path, it's assumed that removing a - * interrupt source from cq happens at control path tear down stage, when - * data path is already finished. - */ - if (cq->comp_fd != -1 && cq->ucq_intr != NULL) - vnic_intr_unmask(&cq->ucq_intr->uci_vintr); - - return 0; -} diff --git a/prov/usnic/src/usnic_direct/usd_post.c b/prov/usnic/src/usnic_direct/usd_post.c deleted file mode 100644 index e1e1b30c0c0..00000000000 --- a/prov/usnic/src/usnic_direct/usd_post.c +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#include "usd.h" -#include "usd_post.h" - -unsigned -usd_get_send_credits( - struct usd_qp *uqp) -{ - struct usd_qp_impl *qp; - - qp = to_qpi(uqp); - - return qp->uq_wq.uwq_send_credits; -} - -unsigned -usd_get_recv_credits( - struct usd_qp *uqp) -{ - struct usd_qp_impl *qp; - - qp = to_qpi(uqp); - - return qp->uq_rq.urq_recv_credits; -} - -int -usd_post_recv( - struct usd_qp *uqp, - struct usd_recv_desc *recv_list) -{ - struct usd_qp_impl *qp; - struct usd_rq *rq; - struct vnic_rq *vrq; - struct rq_enet_desc *desc; - struct iovec *iovp; - uint32_t index; - uint32_t count; - unsigned i; - - qp = to_qpi(uqp); - rq = &qp->uq_rq; - vrq = &rq->urq_vnic_rq; - desc = rq->urq_next_desc; - index = rq->urq_post_index; - - count = 0; - - while (recv_list != NULL) { - iovp = recv_list->urd_iov; - rq->urq_context[index] = recv_list->urd_context; - rq_enet_desc_enc(desc, (dma_addr_t) iovp[0].iov_base, - RQ_ENET_TYPE_ONLY_SOP, iovp[0].iov_len); - count++; - - index = (index+1) & rq->urq_post_index_mask; - desc = (struct rq_enet_desc *) ((uintptr_t)rq->urq_desc_ring - + (index<<4)); - - for (i = 1; i < recv_list->urd_iov_cnt; ++i) { - rq->urq_context[index] = recv_list->urd_context; - rq_enet_desc_enc(desc, (dma_addr_t) iovp[i].iov_base, - RQ_ENET_TYPE_NOT_SOP, iovp[i].iov_len); - count++; - - index = (index+1) & rq->urq_post_index_mask; - desc = (struct rq_enet_desc *) ((uintptr_t)rq->urq_desc_ring - + (index<<4)); - } - recv_list = recv_list->urd_next; - } - - wmb(); - iowrite32(index, &vrq->ctrl->posted_index); - - rq->urq_next_desc = desc; - rq->urq_post_index = index; - rq->urq_recv_credits -= count; - - return 0; -} diff --git a/prov/usnic/src/usnic_direct/usd_post.h b/prov/usnic/src/usnic_direct/usd_post.h deleted file mode 100644 index a7bc5b5e38c..00000000000 --- a/prov/usnic/src/usnic_direct/usd_post.h +++ /dev/null @@ -1,227 +0,0 @@ -/* - * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#ifndef _USD_POST_H_ -#define _USD_POST_H_ - -#include - -#include "usd.h" -#include "usd_util.h" - -static inline uint32_t -_usd_post_send_one( - struct usd_wq *wq, - const void *packet, - size_t length, - u_int8_t cq_entry) -{ - struct vnic_wq *vwq; - uint32_t index; - struct wq_enet_desc *desc; - uint64_t wr; - u_int8_t offload_mode = 0, eop = 1; - u_int16_t mss = 7, header_length = 0, vlan_tag = 0; - u_int8_t vlan_tag_insert = 0, loopback = 0, fcoe_encap = 0; - - vwq = &wq->uwq_vnic_wq; - desc = wq->uwq_next_desc; - index = wq->uwq_post_index; - - wq_enet_desc_enc(desc, (uintptr_t)packet, length, - mss, header_length, offload_mode, - eop, cq_entry, fcoe_encap, - vlan_tag_insert, vlan_tag, loopback); - wmb(); - - wr = vnic_cached_posted_index((dma_addr_t)packet, length, index); - iowrite64(wr, &vwq->ctrl->posted_index); - - wq->uwq_next_desc = (struct wq_enet_desc *) - ((uintptr_t)wq->uwq_desc_ring + (index<<4)); - wq->uwq_post_index = (index+1) & wq->uwq_post_index_mask; - wq->uwq_send_credits--; - - return index; -} - -static inline uint32_t -_usd_post_send_two( - struct usd_wq *wq, - const void *hdr, - size_t hdrlen, - const void *pkt, - size_t pktlen, - u_int8_t cq_entry) -{ - struct vnic_wq *vwq; - uint32_t index; - struct wq_enet_desc *desc; - u_int8_t offload_mode = 0, eop; - u_int16_t mss = 7, header_length = 0, vlan_tag = 0; - u_int8_t vlan_tag_insert = 0, loopback = 0, fcoe_encap = 0; - - vwq = &wq->uwq_vnic_wq; - desc = wq->uwq_next_desc; - index = wq->uwq_post_index; - - eop = 0; - wq_enet_desc_enc(desc, (uintptr_t)hdr, hdrlen, - mss, header_length, offload_mode, - eop, 0, fcoe_encap, - vlan_tag_insert, vlan_tag, loopback); - - desc = (struct wq_enet_desc *) ((uintptr_t)wq->uwq_desc_ring + (index<<4)); - index = (index+1) & wq->uwq_post_index_mask; - - eop = 1; - wq_enet_desc_enc(desc, (uintptr_t)pkt, pktlen, - mss, header_length, offload_mode, - eop, cq_entry, fcoe_encap, - vlan_tag_insert, vlan_tag, loopback); - wmb(); - - iowrite32(index, &vwq->ctrl->posted_index); - - wq->uwq_next_desc = (struct wq_enet_desc *) - ((uintptr_t)wq->uwq_desc_ring + (index<<4)); - wq->uwq_post_index = (index+1) & wq->uwq_post_index_mask; - wq->uwq_send_credits -= 2; - - return index; -} - -static inline uint32_t -_usd_post_send_two_vlan( - struct usd_wq *wq, - const void *hdr, - size_t hdrlen, - const void *pkt, - size_t pktlen, - u_int8_t cq_entry, - u_int16_t vlan_tag) -{ - struct vnic_wq *vwq; - uint32_t index; - struct wq_enet_desc *desc; - u_int8_t offload_mode = 0, eop; - u_int16_t mss = 7, header_length = 0; - u_int8_t vlan_tag_insert = 1, loopback = 0, fcoe_encap = 0; - - vwq = &wq->uwq_vnic_wq; - desc = wq->uwq_next_desc; - index = wq->uwq_post_index; - - eop = 0; - wq_enet_desc_enc(desc, (uintptr_t)hdr, hdrlen, - mss, header_length, offload_mode, - eop, 0, fcoe_encap, - vlan_tag_insert, vlan_tag, loopback); - - desc = (struct wq_enet_desc *) ((uintptr_t)wq->uwq_desc_ring + (index<<4)); - index = (index+1) & wq->uwq_post_index_mask; - - eop = 1; - wq_enet_desc_enc(desc, (uintptr_t)pkt, pktlen, - mss, header_length, offload_mode, - eop, cq_entry, fcoe_encap, - vlan_tag_insert, vlan_tag, loopback); - wmb(); - - iowrite32(index, &vwq->ctrl->posted_index); - - wq->uwq_next_desc = (struct wq_enet_desc *) - ((uintptr_t)wq->uwq_desc_ring + (index<<4)); - wq->uwq_post_index = (index+1) & wq->uwq_post_index_mask; - wq->uwq_send_credits -= 2; - - return index; -} - -/* - * Consume iov count credits, assumes that iov[0] includes usnic header - */ -static inline uint32_t -_usd_post_send_iov( - struct usd_wq *wq, - const struct iovec *iov, - size_t count, - u_int8_t cq_entry) -{ - struct vnic_wq *vwq; - uint32_t index; - struct wq_enet_desc *desc; - u_int8_t offload_mode = 0; - u_int16_t mss = 7, header_length = 0, vlan_tag = 0; - u_int8_t vlan_tag_insert = 0, loopback = 0, fcoe_encap = 0; - unsigned i; - - vwq = &wq->uwq_vnic_wq; - desc = wq->uwq_next_desc; - index = wq->uwq_post_index; - - for (i = 0; i < count - 1; i++) { - wq_enet_desc_enc(desc, (uintptr_t)(iov[i].iov_base), - iov[i].iov_len, mss, header_length, offload_mode, - 0, 0, fcoe_encap, vlan_tag_insert, vlan_tag, loopback); - desc = (struct wq_enet_desc *) ((uintptr_t)wq->uwq_desc_ring - + (index<<4)); - index = (index+1) & wq->uwq_post_index_mask; - } - - wq_enet_desc_enc(desc, (uintptr_t)(iov[i].iov_base), - iov[i].iov_len, mss, header_length, offload_mode, - 1, cq_entry, fcoe_encap, vlan_tag_insert, vlan_tag, loopback); - - wmb(); - - iowrite32(index, &vwq->ctrl->posted_index); - - wq->uwq_next_desc = (struct wq_enet_desc *) - ((uintptr_t)wq->uwq_desc_ring + (index<<4)); - wq->uwq_post_index = (index+1) & wq->uwq_post_index_mask; - wq->uwq_send_credits -= count; - - return index; -} - -#endif /* _USD_POST_H_ */ diff --git a/prov/usnic/src/usnic_direct/usd_post_ud_pio_udp.c b/prov/usnic/src/usnic_direct/usd_post_ud_pio_udp.c deleted file mode 100644 index 64f3cf9180a..00000000000 --- a/prov/usnic/src/usnic_direct/usd_post_ud_pio_udp.c +++ /dev/null @@ -1,251 +0,0 @@ -/* - * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#include - -#include "usd.h" -#include "usd_post.h" - -static int -usd_post_send_one_ud_pio_udp( - struct usd_qp *uqp, - struct usd_dest *dest, - const void *buf, - size_t len, - uint32_t flags, - void *context) -{ - struct usd_qp_impl *qp; - struct usd_udp_hdr *hdr; - struct usd_wq *wq; - struct usd_wq_post_info *info; - struct vnic_wq *vwq; - uint32_t index; - struct wq_enet_desc *desc; - char *v_pkt; - uint64_t p_pkt; - uint64_t *s, *d; - uint32_t copylen; - uint8_t *copybuf; - - u_int8_t offload_mode = 0, eop = 1; - u_int16_t mss = 7, header_length = 0, vlan_tag = 0; - u_int8_t vlan_tag_insert = 0, loopback = 0, fcoe_encap = 0; - - qp = to_qpi(uqp); - wq = &qp->uq_wq; - - hdr = &dest->ds_dest.ds_udp.u_hdr; - - /* adjust lengths and insert source port */ - hdr->uh_ip.tot_len = htons(len + sizeof(struct usd_udp_hdr) - - sizeof(struct ether_header)); - hdr->uh_udp.len = htons((sizeof(struct usd_udp_hdr) - - sizeof(struct ether_header) - - sizeof(struct iphdr)) + len); - hdr->uh_udp.source = - qp->uq_attrs.uqa_local_addr.ul_addr.ul_udp.u_addr.sin_port; - - vwq = &wq->uwq_vnic_wq; - desc = wq->uwq_next_desc; - index = wq->uwq_post_index; - - v_pkt = wq->pio_v_pkt_buf + index * 256; - p_pkt = wq->pio_p_pkt_buf + index * 256; - copylen = (len + sizeof(*hdr) + 7) & ~7; -//printf("len = %lu, p_pkt = 0x%lx, index = %d\n", len, p_pkt, index); - d = (uint64_t *)v_pkt; - d[0] = ((uint64_t *)hdr)[0]; - d[1] = ((uint64_t *)hdr)[1]; - d[2] = ((uint64_t *)hdr)[2]; - d[3] = ((uint64_t *)hdr)[3]; - d[4] = ((uint64_t *)hdr)[4]; - - d += 5; - copybuf = wq->uwq_copybuf; - memcpy(copybuf + 2, buf, len); - s = (uint64_t *)copybuf; - - /* 40 bytes already copied */ - while (copylen > 40) { - *d++ = *s++; - copylen -= 8; - } - - /* encode in shadow ring and write 64 bytes */ - wq_enet_desc_enc(desc, (uintptr_t)p_pkt, len + sizeof(*hdr), - mss, header_length, offload_mode, - eop, USD_SF_ISSET(flags, SIGNAL), fcoe_encap, - vlan_tag_insert, vlan_tag, loopback); - - d = (uint64_t *)((uintptr_t)wq->pio_v_wq_addr + (uintptr_t)desc - - (uintptr_t)wq->uwq_desc_ring); - s = (uint64_t *)desc; - d[0] = s[0]; - d[1] = s[1]; - - wmb(); - -//printf("post %lu[%d] p=0x%lx\n", len + sizeof(*hdr), index, p_pkt); - iowrite32(index, &vwq->ctrl->posted_index); - - wq->uwq_next_desc = (struct wq_enet_desc *) - ((uintptr_t)wq->uwq_desc_ring + (index<<4)); - wq->uwq_post_index = (index+1) & wq->uwq_post_index_mask; - wq->uwq_send_credits--; - - info = &wq->uwq_post_info[index]; - info->wp_context = context; - info->wp_len = len; - - return 0; -} - -/* - * 2 WQEs - our header plus user header in 1st one, user packet in 2nd - */ -static int -usd_post_send_two_ud_pio_udp( - struct usd_qp *uqp, - struct usd_dest *dest, - const void *uhdr, - size_t uhdrlen, - const void *pkt, - size_t pktlen, - uint32_t flags, - void *context) -{ - struct usd_qp_impl *qp; - struct usd_udp_hdr *hdr; - struct usd_wq *wq; - struct usd_wq_post_info *info; - struct vnic_wq *vwq; - uint32_t index; - struct wq_enet_desc *desc; - char *v_pkt; - uint64_t p_pkt; - uint64_t *s, *d; - uint32_t copylen; - uint8_t *copybuf; - size_t len; - - u_int8_t offload_mode = 0, eop = 1; - u_int16_t mss = 7, header_length = 0, vlan_tag = 0; - u_int8_t vlan_tag_insert = 0, loopback = 0, fcoe_encap = 0; - - qp = to_qpi(uqp); - wq = &qp->uq_wq; - - hdr = &dest->ds_dest.ds_udp.u_hdr; - len = uhdrlen + pktlen; - - /* adjust lengths and insert source port */ - hdr->uh_ip.tot_len = htons(len + sizeof(struct usd_udp_hdr) - - sizeof(struct ether_header)); - hdr->uh_udp.len = htons((sizeof(struct usd_udp_hdr) - - sizeof(struct ether_header) - - sizeof(struct iphdr)) + len); - hdr->uh_udp.source = - qp->uq_attrs.uqa_local_addr.ul_addr.ul_udp.u_addr.sin_port; - - vwq = &wq->uwq_vnic_wq; - desc = wq->uwq_next_desc; - index = wq->uwq_post_index; - - v_pkt = wq->pio_v_pkt_buf + index * 256; - p_pkt = wq->pio_p_pkt_buf + index * 256; - copylen = (len + sizeof(*hdr) + 7) & ~7; -//printf("len = %lu, p_pkt = 0x%lx, index = %d\n", len, p_pkt, index); - d = (uint64_t *)v_pkt; - d[0] = ((uint64_t *)hdr)[0]; - d[1] = ((uint64_t *)hdr)[1]; - d[2] = ((uint64_t *)hdr)[2]; - d[3] = ((uint64_t *)hdr)[3]; - d[4] = ((uint64_t *)hdr)[4]; - - d += 5; - copybuf = wq->uwq_copybuf; - memcpy(copybuf + 2, uhdr, uhdrlen); - memcpy(copybuf + 2 + uhdrlen, pkt, pktlen); - s = (uint64_t *)copybuf; - - /* 40 bytes already copied */ - while (copylen > 40) { - *d++ = *s++; - copylen -= 8; - } - - /* encode in shadow ring and write 64 bytes */ - wq_enet_desc_enc(desc, (uintptr_t)p_pkt, len + sizeof(*hdr), - mss, header_length, offload_mode, - eop, USD_SF_ISSET(flags, SIGNAL), fcoe_encap, - vlan_tag_insert, vlan_tag, loopback); - - d = (uint64_t *)((uintptr_t)wq->pio_v_wq_addr + (uintptr_t)desc - - (uintptr_t)wq->uwq_desc_ring); - s = (uint64_t *)desc; - d[0] = s[0]; - d[1] = s[1]; - - wmb(); - -//printf("post %lu[%d] p=0x%lx\n", len + sizeof(*hdr), index, p_pkt); - iowrite32(index, &vwq->ctrl->posted_index); - - wq->uwq_next_desc = (struct wq_enet_desc *) - ((uintptr_t)wq->uwq_desc_ring + (index<<4)); - wq->uwq_post_index = (index+1) & wq->uwq_post_index_mask; - wq->uwq_send_credits--; - - info = &wq->uwq_post_info[index]; - info->wp_context = context; - info->wp_len = len; - - return 0; -} - -struct usd_qp_ops usd_qp_ops_ud_pio_udp = { - .qo_post_send_one = usd_post_send_one_ud_pio_udp, - .qo_post_send_one_prefixed = usd_post_send_one_ud_pio_udp, - .qo_post_send_one_copy = usd_post_send_one_ud_pio_udp, - .qo_post_send_two_copy = usd_post_send_two_ud_pio_udp, -}; diff --git a/prov/usnic/src/usnic_direct/usd_post_ud_raw.c b/prov/usnic/src/usnic_direct/usd_post_ud_raw.c deleted file mode 100644 index 40a6e33ea7a..00000000000 --- a/prov/usnic/src/usnic_direct/usd_post_ud_raw.c +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#include "usd.h" -#include "usd_post.h" - -static int -usd_post_send_one_prefixed_ud_raw( - struct usd_qp *uqp, - struct usd_dest __attribute__ ((unused)) * dest, - const void *buf, - size_t len, - uint32_t flags, - void *context) -{ - struct usd_qp_impl *qp; - struct usd_wq *wq; - uint32_t last_post; - struct usd_wq_post_info *info; - - qp = to_qpi(uqp); - wq = &qp->uq_wq; - - last_post = - _usd_post_send_one(wq, buf, len, USD_SF_ISSET(flags, SIGNAL)); - - info = &wq->uwq_post_info[last_post]; - info->wp_context = context; - info->wp_len = len; - - return 0; -} - -struct usd_qp_ops usd_qp_ops_ud_raw = { - .qo_post_send_one_prefixed = usd_post_send_one_prefixed_ud_raw, -}; diff --git a/prov/usnic/src/usnic_direct/usd_post_ud_udp.c b/prov/usnic/src/usnic_direct/usd_post_ud_udp.c deleted file mode 100644 index c2511230fec..00000000000 --- a/prov/usnic/src/usnic_direct/usd_post_ud_udp.c +++ /dev/null @@ -1,325 +0,0 @@ -/* - * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#include - -#include "usd.h" -#include "usd_post.h" - -static int -usd_post_send_one_ud_udp( - struct usd_qp *uqp, - struct usd_dest *dest, - const void *buf, - size_t len, - uint32_t flags, - void *context) -{ - struct usd_qp_impl *qp; - struct usd_udp_hdr *hdr; - struct usd_wq *wq; - uint32_t last_post; - uint8_t *copybuf; - struct usd_wq_post_info *info; - - qp = to_qpi(uqp); - wq = &qp->uq_wq; - copybuf = wq->uwq_copybuf + wq->uwq_post_index * USD_SEND_MAX_COPY; - - hdr = (struct usd_udp_hdr *)copybuf; - memcpy(hdr, &dest->ds_dest.ds_udp.u_hdr, sizeof(*hdr)); - - /* adjust lengths and insert source port */ - hdr->uh_ip.tot_len = htons(len + sizeof(struct usd_udp_hdr) - - sizeof(struct ether_header)); - hdr->uh_udp.len = htons((sizeof(struct usd_udp_hdr) - - sizeof(struct ether_header) - - sizeof(struct iphdr)) + len); - hdr->uh_udp.source = - qp->uq_attrs.uqa_local_addr.ul_addr.ul_udp.u_addr.sin_port; - - last_post = _usd_post_send_two(wq, hdr, sizeof(*hdr), buf, len, - USD_SF_ISSET(flags, SIGNAL)); - - info = &wq->uwq_post_info[last_post]; - info->wp_context = context; - info->wp_len = len; - - return 0; -} - -static int -usd_post_send_one_vlan_ud_udp( - struct usd_qp *uqp, - struct usd_dest *dest, - const void *buf, - size_t len, - uint16_t vlan, - uint32_t flags, - void *context) -{ - struct usd_qp_impl *qp; - struct usd_udp_hdr *hdr; - struct usd_wq *wq; - uint32_t last_post; - uint8_t *copybuf; - struct usd_wq_post_info *info; - - qp = to_qpi(uqp); - wq = &qp->uq_wq; - copybuf = wq->uwq_copybuf + wq->uwq_post_index * USD_SEND_MAX_COPY; - - hdr = (struct usd_udp_hdr *)copybuf; - memcpy(hdr, &dest->ds_dest.ds_udp.u_hdr, sizeof(*hdr)); - - /* adjust lengths and insert source port */ - hdr->uh_ip.tot_len = htons(len + sizeof(struct usd_udp_hdr) - - sizeof(struct ether_header)); - hdr->uh_udp.len = htons((sizeof(struct usd_udp_hdr) - - sizeof(struct ether_header) - - sizeof(struct iphdr)) + len); - hdr->uh_udp.source = - qp->uq_attrs.uqa_local_addr.ul_addr.ul_udp.u_addr.sin_port; - - last_post = _usd_post_send_two_vlan(wq, hdr, sizeof(*hdr), buf, len, - USD_SF_ISSET(flags, SIGNAL), vlan); - - info = &wq->uwq_post_info[last_post]; - info->wp_context = context; - info->wp_len = len; - - return 0; -} - -static int -usd_post_send_one_copy_ud_udp( - struct usd_qp *uqp, - struct usd_dest *dest, - const void *buf, - size_t len, - uint32_t flags, - void *context) -{ - struct usd_qp_impl *qp; - struct usd_udp_hdr *hdr; - struct usd_wq *wq; - uint8_t *copybuf; - uint32_t last_post; - struct usd_wq_post_info *info; - - qp = to_qpi(uqp); - wq = &qp->uq_wq; - copybuf = wq->uwq_copybuf + wq->uwq_post_index * USD_SEND_MAX_COPY; - - hdr = (struct usd_udp_hdr *) copybuf; - memcpy(hdr, &dest->ds_dest.ds_udp.u_hdr, sizeof(*hdr)); - memcpy(hdr + 1, buf, len); - - /* adjust lengths and insert source port */ - hdr->uh_ip.tot_len = htons(len + sizeof(struct usd_udp_hdr) - - sizeof(struct ether_header)); - hdr->uh_udp.len = htons((sizeof(struct usd_udp_hdr) - - sizeof(struct ether_header) - - sizeof(struct iphdr)) + len); - hdr->uh_udp.source = - qp->uq_attrs.uqa_local_addr.ul_addr.ul_udp.u_addr.sin_port; - - last_post = - _usd_post_send_one(wq, hdr, len + sizeof(struct usd_udp_hdr), - USD_SF_ISSET(flags, SIGNAL)); - - info = &wq->uwq_post_info[last_post]; - info->wp_context = context; - info->wp_len = len; - - return 0; -} - -static int -usd_post_send_one_prefixed_ud_udp( - struct usd_qp *uqp, - struct usd_dest *dest, - const void *buf, - size_t len, - uint32_t flags, - void *context) -{ - struct usd_qp_impl *qp; - struct usd_udp_hdr *hdr; - struct usd_wq *wq; - uint32_t last_post; - struct usd_wq_post_info *info; - - qp = to_qpi(uqp); - wq = &qp->uq_wq; - - hdr = (struct usd_udp_hdr *) buf - 1; - memcpy(hdr, &dest->ds_dest.ds_udp.u_hdr, sizeof(*hdr)); - - /* adjust lengths and insert source port */ - hdr->uh_ip.tot_len = htons(len + sizeof(struct usd_udp_hdr) - - sizeof(struct ether_header)); - hdr->uh_udp.len = htons((sizeof(struct usd_udp_hdr) - - sizeof(struct ether_header) - - sizeof(struct iphdr)) + len); - hdr->uh_udp.source = - qp->uq_attrs.uqa_local_addr.ul_addr.ul_udp.u_addr.sin_port; - - last_post = - _usd_post_send_one(wq, hdr, len + sizeof(struct usd_udp_hdr), - USD_SF_ISSET(flags, SIGNAL)); - - info = &wq->uwq_post_info[last_post]; - info->wp_context = context; - info->wp_len = len; - - return 0; -} - -/* - * 2 WQEs - our header plus user header in 1st one, user packet in 2nd - */ -static int -usd_post_send_two_copy_ud_udp( - struct usd_qp *uqp, - struct usd_dest *dest, - const void *uhdr, - size_t uhdrlen, - const void *pkt, - size_t pktlen, - uint32_t flags, - void *context) -{ - struct usd_qp_impl *qp; - struct usd_udp_hdr *hdr; - struct usd_wq *wq; - uint8_t *copybuf; - size_t tot_ulen; - uint32_t last_post; - struct usd_wq_post_info *info; - - qp = to_qpi(uqp); - wq = &qp->uq_wq; - copybuf = wq->uwq_copybuf + wq->uwq_post_index * USD_SEND_MAX_COPY; - - hdr = (struct usd_udp_hdr *) copybuf; - memcpy(hdr, &dest->ds_dest.ds_udp.u_hdr, sizeof(*hdr)); - memcpy(hdr + 1, uhdr, uhdrlen); - memcpy((char *) (hdr + 1) + uhdrlen, pkt, pktlen); - - /* adjust lengths and insert source port */ - tot_ulen = uhdrlen + pktlen; - hdr->uh_ip.tot_len = htons(tot_ulen + sizeof(struct usd_udp_hdr) - - sizeof(struct ether_header)); - hdr->uh_udp.len = htons((sizeof(struct usd_udp_hdr) - - sizeof(struct ether_header) - - sizeof(struct iphdr)) + tot_ulen); - hdr->uh_udp.source = - qp->uq_attrs.uqa_local_addr.ul_addr.ul_udp.u_addr.sin_port; - - last_post = - _usd_post_send_one(wq, hdr, uhdrlen + sizeof(*hdr) + pktlen, - USD_SF_ISSET(flags, SIGNAL)); - - info = &wq->uwq_post_info[last_post]; - info->wp_context = context; - info->wp_len = uhdrlen + pktlen; - - return 0; -} - -static int -usd_post_send_iov_ud_udp(struct usd_qp *uqp, - struct usd_dest *dest, const struct iovec* iov, - size_t iov_count, uint32_t flags, void *context) -{ - struct usd_qp_impl *qp; - struct usd_udp_hdr *hdr; - struct usd_wq *wq; - uint32_t last_post; - uint8_t *copybuf; - struct usd_wq_post_info *info; - struct iovec send_iov[USD_SEND_MAX_SGE + 1]; - size_t len; - unsigned i; - - qp = to_qpi(uqp); - wq = &qp->uq_wq; - copybuf = wq->uwq_copybuf + wq->uwq_post_index * USD_SEND_MAX_COPY; - - for (i = 0, len = 0; i < iov_count; i++) { - len += iov[i].iov_len; - } - - hdr = (struct usd_udp_hdr *)copybuf; - memcpy(hdr, &dest->ds_dest.ds_udp.u_hdr, sizeof(*hdr)); - - /* adjust lengths and insert source port */ - hdr->uh_ip.tot_len = htons(len + sizeof(struct usd_udp_hdr) - - sizeof(struct ether_header)); - hdr->uh_udp.len = htons((sizeof(struct usd_udp_hdr) - - sizeof(struct ether_header) - - sizeof(struct iphdr)) + len); - hdr->uh_udp.source = - qp->uq_attrs.uqa_local_addr.ul_addr.ul_udp.u_addr.sin_port; - - send_iov[0].iov_base = hdr; - send_iov[0].iov_len = sizeof(*hdr); - memcpy(&send_iov[1], iov, sizeof(struct iovec) * iov_count); - - last_post = _usd_post_send_iov(wq, send_iov, iov_count + 1, - USD_SF_ISSET(flags, SIGNAL)); - info = &wq->uwq_post_info[last_post]; - info->wp_context = context; - info->wp_len = len; - - return 0; -} - -struct usd_qp_ops usd_qp_ops_ud_udp = { - .qo_post_send_one = usd_post_send_one_ud_udp, - .qo_post_send_one_prefixed = usd_post_send_one_prefixed_ud_udp, - .qo_post_send_one_copy = usd_post_send_one_copy_ud_udp, - .qo_post_send_two_copy = usd_post_send_two_copy_ud_udp, - .qo_post_send_iov = usd_post_send_iov_ud_udp, - .qo_post_send_one_vlan = usd_post_send_one_vlan_ud_udp, -}; diff --git a/prov/usnic/src/usnic_direct/usd_queue.h b/prov/usnic/src/usnic_direct/usd_queue.h deleted file mode 100644 index cb912da2638..00000000000 --- a/prov/usnic/src/usnic_direct/usd_queue.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#ifndef _USD_QUEUE_H_ -#define _USD_QUEUE_H_ - -#include - -#define TAILQ_FOREACH_SAFE(var, tmpvar, head, field) \ - for ((var) = ((head)->tqh_first), \ - (tmpvar) = (var)?((var)->field.tqe_next):NULL; \ - (var); \ - (var) = (tmpvar), \ - (tmpvar) = (var)?((var)->field.tqe_next):NULL) - -#endif /* _USD_QUEUE_H_ */ diff --git a/prov/usnic/src/usnic_direct/usd_queues.c b/prov/usnic/src/usnic_direct/usd_queues.c deleted file mode 100644 index a731e0f55a7..00000000000 --- a/prov/usnic/src/usnic_direct/usd_queues.c +++ /dev/null @@ -1,1370 +0,0 @@ -/* - * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "kcompat.h" -#include "cq_enet_desc.h" -#include "wq_enet_desc.h" -#include "rq_enet_desc.h" - -#include "usnic_abi.h" -#include "usnic_direct.h" -#include "usd.h" -#include "usd_ib_cmd.h" -#include "usd_util.h" -#include "usd_vnic.h" -#include "usd_device.h" - -static int usd_create_qp_ud(struct usd_qp_impl *qp); - -/* - * Remove a usecount on a VF, free it if it goes to zero - */ -static void -usd_unmap_vf( - struct usd_device *dev, - struct usd_vf *vf) -{ - uint32_t i; - --vf->vf_refcnt; - - if (vf->vf_refcnt == 0) { - - /* unlink from list (logic works for uninit struct also) */ - if (vf->vf_next != NULL) - vf->vf_next->vf_prev = vf->vf_prev; - if (vf->vf_prev != NULL) - vf->vf_prev->vf_next = vf->vf_next; - if (dev->ud_vf_list == vf) - dev->ud_vf_list = vf->vf_next; - - if (vf->vf_vdev != NULL) - vnic_dev_unregister(vf->vf_vdev); - if (vf->vf_bar0.vaddr != MAP_FAILED) { - munmap(vf->vf_bar0.vaddr, vf->vf_bar_map_len); - } - for (i = 0; i < sizeof(vf->iomaps)/sizeof(vf->iomaps[0]); i++) { - if (vf->iomaps[i].bus_addr != 0 && - vf->iomaps[i].vaddr != MAP_FAILED) { - munmap(vf->iomaps[i].vaddr, vf->iomaps[i].len); - } - } - - free(vf); - } -} - -static int -usd_map_one_res(struct usd_device *dev, struct usd_vf *vf, - struct usnic_vnic_barres_info *barres) -{ - struct vnic_dev_iomap_info* iomap; - off64_t offset; - uint64_t page_size = sysconf(_SC_PAGE_SIZE); - - iomap = &vf->iomaps[barres->type]; - iomap->bus_addr = barres->bus_addr; - iomap->len = (barres->len + (page_size - 1)) & (~(page_size - 1)); - - offset = USNIC_ENCODE_PGOFF(vf->vf_id, USNIC_MMAP_RES, barres->type); - iomap->vaddr = mmap64(NULL, iomap->len, PROT_READ + PROT_WRITE, - MAP_SHARED, dev->ud_ctx->ucx_ib_dev_fd, offset); - if (iomap->vaddr == MAP_FAILED) { - usd_err("Failed to map res type %d, bus_addr 0x%lx, len 0x%lx\n", - barres->type, iomap->bus_addr, iomap->len); - return -errno; - } - vnic_dev_upd_res_vaddr(vf->vf_vdev, iomap); - - return 0; -} - -static int -usd_map_vnic_res(struct usd_device *dev, struct usd_vf *vf, - struct usd_vf_info *vfip) -{ - int i, err; - - /* unmap bar0 */ - if (vf->vf_bar0.vaddr != MAP_FAILED) { - munmap(vf->vf_bar0.vaddr, vf->vf_bar_map_len); - vf->vf_bar0.vaddr = MAP_FAILED; - } - - for (i = RES_TYPE_EOL + 1; i < RES_TYPE_MAX; i++) { - if (vfip->barres[i].bus_addr != 0) { - err = usd_map_one_res(dev, vf, &vfip->barres[i]); - if (err) - return err; - } else { - /* Disable any other res not reported by kernel module */ - struct vnic_dev_iomap_info iomap; - iomap.vaddr = 0; - iomap.bus_addr = vnic_dev_get_res_bus_addr( - vf->vf_vdev, i, 0); - iomap.len = vnic_dev_get_res_type_len( - vf->vf_vdev, i); - vnic_dev_upd_res_vaddr(vf->vf_vdev, &iomap); - } - } - - return 0; -} - -/* - * Create a VF structure if we don't already have one in use, - * update refcnt - */ -static int -usd_map_vf( - struct usd_device *dev, - struct usd_vf_info *vfip, - struct usd_vf **vf_o) -{ - struct usd_vf *vf; - off64_t offset; - int ret; - - /* find matching VF */ - vf = dev->ud_vf_list; - while (vf != NULL) { - if (vf->vf_id == vfip->vi_vfid) break; - vf = vf->vf_next; - } - - /* Was VF actually found? If not, create and add */ - if (vf == NULL) { - vf = calloc(sizeof(*vf), 1); - if (vf == NULL) { - ret = -errno; - goto out; - } - - /* Fill in function */ - vf->vf_id = vfip->vi_vfid; - vf->vf_refcnt = 1; - vf->vf_bar0.bus_addr = vfip->vi_bar_bus_addr; - vf->vf_bar0.len = vfip->vi_bar_len; - - /* map BAR0 HEAD first to get res info */ - if (vfip->vi_barhead_len > 0) { - offset = USNIC_ENCODE_PGOFF(vf->vf_id, USNIC_MMAP_BARHEAD, 0); - vf->vf_bar_map_len = vfip->vi_barhead_len; - } else { - offset = USNIC_ENCODE_PGOFF(vf->vf_id, USNIC_MMAP_BAR, 0); - vf->vf_bar_map_len = vfip->vi_bar_len; - } - vf->vf_bar0.vaddr = mmap64(NULL, vf->vf_bar_map_len, - PROT_READ + PROT_WRITE, MAP_SHARED, - dev->ud_ctx->ucx_ib_dev_fd, - offset); - if (vf->vf_bar0.vaddr == MAP_FAILED) { - usd_err("Failed to map bar0\n"); - ret = -errno; - goto out; - } - - /* Register it */ - vf->vf_vdev = vnic_dev_alloc_discover(NULL, NULL, (void *)dev, - &vf->vf_bar0, 1); - if (vf->vf_vdev == NULL) { - ret = -ENOENT; - goto out; - } - - /* map individual vnic resource seperately */ - if (dev->ud_ctx->ucx_caps[USNIC_CAP_MAP_PER_RES] > 0) { - ret = usd_map_vnic_res(dev, vf, vfip); - if (ret) - goto out; - } - - /* link it in */ - vf->vf_next = dev->ud_vf_list; - dev->ud_vf_list = vf; - - if (vf->vf_next != NULL) - vf->vf_next->vf_prev = vf; - vf->vf_prev = NULL; - - /* Found existing VF, bump reference count */ - } else { - ++vf->vf_refcnt; - } - - *vf_o = vf; - - return 0; - - out: - if (vf != NULL) - usd_unmap_vf(dev, vf); - return ret; -} - -static void -usd_get_vf( - struct usd_vf *vf) -{ - ++vf->vf_refcnt; -} - -/* - * Get a cq interrupt source - */ -static struct usd_cq_comp_intr * -usd_get_cq_intr( - struct usd_cq_impl *cq, - struct usd_vf *vf) -{ - struct usd_context *uctx; - struct usd_cq_comp_intr *intr; - int ret; - - uctx = cq->ucq_dev->ud_ctx; - - pthread_mutex_lock(&uctx->ucx_mutex); - LIST_FOREACH(intr, &uctx->ucx_intr_list, uci_ctx_link) { - if (intr->uci_offset == cq->intr_offset) { - intr->uci_refcnt ++; - goto out; - } - } - - intr = calloc(sizeof(*intr), 1); - if (intr != NULL) { - ret = vnic_grpmbrintr_alloc(vf->vf_vdev, &intr->uci_vintr, - cq->intr_offset); - if (ret) { - usd_err("Failed to alloc cq completion intr\n"); - free(intr); - pthread_mutex_unlock(&uctx->ucx_mutex); - return NULL; - } - - /* init host interrupt registers */ - iowrite32(0, &intr->uci_vintr.ctrl->coalescing_timer); - iowrite32(0, &intr->uci_vintr.ctrl->coalescing_type); - iowrite32(1, &intr->uci_vintr.ctrl->mask_on_assertion); - iowrite32(0, &intr->uci_vintr.ctrl->int_credits); - iowrite32(0, &intr->uci_vintr.ctrl->mask); /* unmask */ - - intr->uci_offset = cq->intr_offset; - intr->uci_refcnt = 1; - LIST_INSERT_HEAD(&uctx->ucx_intr_list, intr, uci_ctx_link); - } - -out: - pthread_mutex_unlock(&uctx->ucx_mutex); - return intr; -} - -/* - * put a cq interrupt source - */ -static void -usd_put_cq_intr( - struct usd_cq_impl *cq) -{ - struct usd_context *uctx; - struct usd_cq_comp_intr *intr; - - uctx = cq->ucq_dev->ud_ctx; - - pthread_mutex_lock(&uctx->ucx_mutex); - LIST_FOREACH(intr, &uctx->ucx_intr_list, uci_ctx_link) { - if (intr->uci_offset == cq->intr_offset) { - intr->uci_refcnt--; - if (intr->uci_refcnt == 0) - vnic_grpmbrintr_free(&intr->uci_vintr); - break; - } - } - - if (intr != NULL) { - LIST_REMOVE(intr, uci_ctx_link); - free(intr); - } - pthread_mutex_unlock(&uctx->ucx_mutex); -} - - - -/* - * Function that does whatever is needed to make a CQ go away - */ -int -usd_destroy_cq( - struct usd_cq *ucq) -{ - struct usd_cq_impl *cq; - - cq = to_cqi(ucq); - - if (cq->ucq_intr != NULL) { - usd_put_cq_intr(cq); - cq->ucq_intr = NULL; - } - if (cq->ucq_state & USD_QS_VERBS_CREATED) - usd_ib_cmd_destroy_cq(cq->ucq_dev, cq); - - if (cq->ucq_state & USD_QS_VF_MAPPED) - usd_unmap_vf(cq->ucq_dev, cq->ucq_vf); - - if (cq->ucq_desc_ring != NULL) - usd_free_mr(cq->ucq_desc_ring); - if (cq->ucq_rq_map != NULL) - free(cq->ucq_rq_map); - if (cq->ucq_wq_map != NULL) - free(cq->ucq_wq_map); - free(cq); - - return 0; -} - -static int -usd_vnic_wq_init( - struct usd_wq *wq, - struct usd_vf *vf, - uint64_t desc_ring) -{ - struct vnic_wq *vwq; - int ret; - - vwq = &wq->uwq_vnic_wq; - - /* get address of control register */ - vwq->ctrl = vnic_dev_get_res(vf->vf_vdev, RES_TYPE_WQ, wq->uwq_index); - if (vwq->ctrl == NULL) - return -EINVAL; - - ret = vnic_wq_disable(vwq); - if (ret != 0) - return ret; - - writeq(desc_ring, &vwq->ctrl->ring_base); - iowrite32(wq->uwq_num_entries, &vwq->ctrl->ring_size); - iowrite32(0, &vwq->ctrl->fetch_index); - iowrite32(0, &vwq->ctrl->posted_index); - iowrite32(wq->uwq_cq->ucq_index, &vwq->ctrl->cq_index); - iowrite32(0, &vwq->ctrl->error_interrupt_enable); - iowrite32(0, &vwq->ctrl->error_interrupt_offset); - iowrite32(0, &vwq->ctrl->error_status); - - wq->uwq_state |= USD_QS_VNIC_INITIALIZED; - wq->uwq_next_desc = wq->uwq_desc_ring; - wq->uwq_send_credits = wq->uwq_num_entries - 1; - - return 0; -} - -/* - * Allocate the resources for a previously created WQ for UD QP - */ -static int -usd_create_wq_ud( - struct usd_qp_impl *qp) -{ - struct usd_wq *wq; - uint32_t ring_size; - int ret; - - wq = &qp->uq_wq; - - /* Allocate resources for WQ */ - ring_size = sizeof(struct wq_enet_desc) * wq->uwq_num_entries; - ret = usd_alloc_mr(qp->uq_dev, ring_size, (void **)&wq->uwq_desc_ring); - if (ret != 0) - return ret; - - ret = usd_vnic_wq_init(wq, qp->uq_vf, (uint64_t)wq->uwq_desc_ring); - if (ret != 0) - goto out; - - return 0; - -out: - if (wq->uwq_desc_ring != NULL) { - usd_free_mr(wq->uwq_desc_ring); - wq->uwq_desc_ring = NULL; - } - return ret; -} - -/* - * Allocate the resources for a previously created WQ - */ -static int -usd_create_wq_pio( - struct usd_qp_impl *qp) -{ - uint32_t pio_memsize; - uint32_t used_size; - uint32_t ring_size; - void *pio_vaddr; - uint64_t pio_paddr; - uint64_t ivaddr; - struct usd_wq *wq; - struct usd_device *dev; - int ret; - - dev = qp->uq_dev; - if (dev->ud_ctx->ucx_caps[USNIC_CAP_PIO] == 0 || - vnic_dev_get_res_bus_addr(qp->uq_vf->vf_vdev, RES_TYPE_MEM, 0) == 0) { - usd_err("dev does not support PIO\n"); - return -ENODEV; - } - - pio_memsize = vnic_dev_get_res_count(qp->uq_vf->vf_vdev, RES_TYPE_MEM); - pio_vaddr = vnic_dev_get_res(qp->uq_vf->vf_vdev, RES_TYPE_MEM, 0); - - ret = usd_get_piopa(qp); - if (ret != 0) - return ret; - pio_paddr = qp->uq_attrs.uqa_pio_paddr; - - /* 512-byte alignment must match */ - if ((((uint64_t)pio_vaddr ^ pio_paddr) & 511) != 0) { - fprintf(stderr, "Alignment mismatch, %p vs 0x%lx, cannot do PIO\n", - pio_vaddr, pio_paddr); - return -ENXIO; - } - - /* skip past size */ - ivaddr = (uintptr_t)pio_vaddr; - ivaddr += sizeof(uint64_t); - - /* round up to 512 bytes */ - ivaddr = (ivaddr + 511) & ~511; - - /* WQ ring goes first. Allow space for 64-byte write of last desc */ - wq = &qp->uq_wq; - ring_size = wq->uwq_num_entries * sizeof(struct wq_enet_desc); - ring_size += 64 - sizeof(struct wq_enet_desc); - wq->pio_v_wq_addr = (void *)ivaddr; - wq->pio_p_wq_addr = pio_paddr + ivaddr - (uint64_t)pio_vaddr; - ivaddr += ring_size; - - /* round up to 64 bytes */ - ivaddr = (ivaddr + 63) & ~63; - - /* we keep a copy of the ring, also */ - ret = usd_alloc_mr(qp->uq_dev, ring_size, (void **)&wq->uwq_desc_ring); - if (ret != 0) - return ret; - - /* packet buffer */ - wq->pio_v_pkt_buf = (void *)ivaddr; - wq->pio_p_pkt_buf = pio_paddr + ivaddr - (uint64_t)pio_vaddr; - ivaddr += wq->uwq_num_entries * 256; - - used_size = ivaddr - (uintptr_t)pio_vaddr; - if (used_size > pio_memsize) { - ret = -ENOMEM; - goto out; - } - - ret = usd_vnic_wq_init(wq, qp->uq_vf, wq->pio_p_wq_addr); - if (ret != 0) - goto out; - - return 0; - -out: - if (wq->uwq_desc_ring != NULL) { - usd_free_mr(wq->uwq_desc_ring); - wq->uwq_desc_ring = NULL; - } - return ret; -} - -/* - * Allocate the resources for a previously created WQ - */ -static int -usd_create_wq( - struct usd_qp_impl *qp) -{ - struct usd_wq *wq; - int ret; - - switch (qp->uq_attrs.uqa_qtype) { - case USD_QTY_UD_PIO: - ret = usd_create_wq_pio(qp); - break; - case USD_QTY_UD: - ret = usd_create_wq_ud(qp); - break; - default: - ret = -1; - break; - } - - if (ret == 0) { - wq = &qp->uq_wq; - wq->uwq_post_index_mask = (wq->uwq_num_entries-1); - wq->uwq_post_index = 1; - wq->uwq_last_comp = (wq->uwq_num_entries-1); - } - - return ret; -} - -static int -usd_vnic_rq_init( - struct usd_rq *rq, - struct usd_vf *vf, - uint64_t desc_ring) -{ - struct vnic_rq *vrq; - int ret; - - vrq = &rq->urq_vnic_rq; - - /* get address of control register */ - vrq->ctrl = vnic_dev_get_res(vf->vf_vdev, RES_TYPE_RQ, rq->urq_index); - if (vrq->ctrl == NULL) - return -EINVAL; - - ret = vnic_rq_disable(vrq); - if (ret != 0) - return ret; - - writeq(desc_ring, &vrq->ctrl->ring_base); - iowrite32(rq->urq_num_entries, &vrq->ctrl->ring_size); - iowrite32(0, &vrq->ctrl->fetch_index); - iowrite32(0, &vrq->ctrl->posted_index); - iowrite32(rq->urq_cq->ucq_index, &vrq->ctrl->cq_index); - iowrite32(0, &vrq->ctrl->error_interrupt_enable); - iowrite32(0, &vrq->ctrl->error_interrupt_offset); - iowrite32(0, &vrq->ctrl->error_status); - - rq->urq_state |= USD_QS_VNIC_INITIALIZED; - rq->urq_next_desc = rq->urq_desc_ring; - rq->urq_recv_credits = rq->urq_num_entries - 1; - - return 0; -} - -/* - * Allocate the resources for a previously created RQ - */ -static int -usd_create_rq(struct usd_qp_impl *qp) -{ - struct usd_rq *rq; - uint32_t ring_size; - int ret; - - rq = &qp->uq_rq; - - /* Allocate resources for RQ */ - ring_size = sizeof(struct rq_enet_desc) * rq->urq_num_entries; - ret = usd_alloc_mr(qp->uq_dev, ring_size, (void **)&rq->urq_desc_ring); - if (ret != 0) - return ret; - - ret = usd_vnic_rq_init(rq, qp->uq_vf, (uint64_t)rq->urq_desc_ring); - if (ret != 0) - goto out; - - rq->urq_post_index_mask = (rq->urq_num_entries-1); - rq->urq_post_index = 0; - rq->urq_last_comp = (rq->urq_num_entries-1); - - return 0; -out: - if (rq->urq_desc_ring != NULL) { - usd_free_mr(rq->urq_desc_ring); - rq->urq_desc_ring = NULL; - } - return ret; -} - -static int -usd_vnic_disable_qp( - struct usd_qp_impl *qp) -{ - struct usd_rq *rq; - struct usd_wq *wq; - int ret; - - wq = &qp->uq_wq; - rq = &qp->uq_rq; - - /* disable both queues */ - ret = vnic_wq_disable(&wq->uwq_vnic_wq); - if (ret != 0) - return ret; - ret = vnic_rq_disable(&rq->urq_vnic_rq); - - return ret; -} - -static void -usd_vnic_enable_qp( - struct usd_qp_impl *qp) -{ - struct usd_rq *rq; - struct usd_wq *wq; - - wq = &qp->uq_wq; - rq = &qp->uq_rq; - - vnic_rq_enable(&rq->urq_vnic_rq); - vnic_wq_enable(&wq->uwq_vnic_wq); -} - -/* - * QP has been created and resources allocated. Issue the IB commands to - * change the state to INIT/RTR/RTS to trigger filter creation and enable the - * QP to send and receive traffic. - */ -static int -usd_enable_verbs_qp( - struct usd_qp_impl *qp) -{ - struct usd_rq *rq; - struct usd_wq *wq; - struct usd_device *dev; - int ret; - - dev = qp->uq_dev; - wq = &qp->uq_wq; - rq = &qp->uq_rq; - - /* XXX is this really necessary? */ - ret = usd_vnic_disable_qp(qp); - if (ret != 0) { - goto out; - } - - /* state to INIT */ - ret = usd_ib_cmd_modify_qp(dev, qp, IBV_QPS_INIT); - if (ret != 0) { - goto out; - } - - /* state to "ready to receive," enable rq */ - ret = usd_ib_cmd_modify_qp(dev, qp, IBV_QPS_RTR); - if (ret != 0) { - goto out; - } - - /* state to "ready to send," enable wq */ - ret = usd_ib_cmd_modify_qp(dev, qp, IBV_QPS_RTS); - if (ret != 0) { - goto out; - } - - usd_vnic_enable_qp(qp); - rq->urq_state |= USD_QS_READY; - wq->uwq_state |= USD_QS_READY; - - out: - return ret; -} - -/* - * Public interface to disable a QP - */ -int -usd_disable_qp( - struct usd_qp *uqp) -{ - struct usd_qp_impl *qp; - - qp = to_qpi(uqp); - usd_vnic_disable_qp(qp); - return 0; -} - -/* - * Public interface to enable a QP - */ -int -usd_enable_qp( - struct usd_qp *uqp) -{ - struct usd_qp_impl *qp; - - qp = to_qpi(uqp); - usd_vnic_enable_qp(qp); - return 0; -} - -/* - * Public interface to create a CQ - * First, issue the verbs command to create a CW instance in the driver. - * Second, allocate the data structures so that poll_cq can succeed, though - * we will not actually have VIC resources allocated until the first create_qp - * that uses this CQ. We will finish configuring the CQ at that time. - */ -int -usd_create_cq( - struct usd_device *dev, - struct usd_cq_init_attr *init_attr, - struct usd_cq **cq_o) -{ - unsigned num_entries; - int comp_vec; - unsigned qp_per_vf; - struct usd_cq *ucq; - struct usd_cq_impl *cq; - unsigned ring_size; - int ret; - - if (init_attr == NULL) - return -EINVAL; - - num_entries = init_attr->num_entries; - comp_vec = init_attr->comp_vec; - - /* Make sure device ready */ - ret = usd_device_ready(dev); - if (ret != 0) { - return ret; - } - - if (num_entries > dev->ud_attrs.uda_max_cqe) { - return -EINVAL; - } - - if (init_attr->comp_fd != -1) { - if (dev->ud_ctx->ucx_caps[USD_CAP_GRP_INTR] == 0) { - usd_err("CQ completion event is not supported\n"); - return -EINVAL; - } - if (comp_vec >= (int)dev->ud_attrs.uda_num_comp_vectors) { - usd_err("too large comp_vec (%d) requested, num_comp_vectors=%d\n", - comp_vec, (int)dev->ud_attrs.uda_num_comp_vectors); - return -EINVAL; - } - } - - cq = (struct usd_cq_impl *)calloc(sizeof(*cq), 1); - if (cq == NULL) { - ret = -errno; - goto out; - } - - qp_per_vf = dev->ud_attrs.uda_qp_per_vf; - - cq->ucq_wq_map = calloc(qp_per_vf, sizeof(struct usd_wq *)); - cq->ucq_rq_map = calloc(qp_per_vf, sizeof(struct usd_rq *)); - if (cq->ucq_wq_map == NULL || cq->ucq_rq_map == NULL) { - ret = -ENOMEM; - goto out; - } - - cq->ucq_dev = dev; - - /* add 1 and round up to next POW2 and min() with 64 */ - num_entries = 1 << msbit(num_entries); - if (num_entries < 64) { - num_entries = 64; - } - - cq->ucq_num_entries = num_entries; - - ring_size = sizeof(struct cq_desc) * num_entries; - ret = usd_alloc_mr(dev, ring_size, &cq->ucq_desc_ring); - if (ret != 0) - goto out; - memset(cq->ucq_desc_ring, 0, ring_size); - - /* - * kernel currently has no support for handling negative comp_vec values, - * just use 0 which is guaranteed to be available - */ - if (comp_vec < 0) - comp_vec = 0; - - ret = usd_ib_cmd_create_cq(dev, cq, init_attr->ibv_cq, init_attr->comp_fd, - comp_vec); - if (ret != 0) - goto out; - - cq->ucq_state |= USD_QS_VERBS_CREATED; - - /* initialize polling variables */ - cq->ucq_cqe_mask = num_entries - 1; - cq->ucq_color_shift = msbit(num_entries) - 1; - cq->comp_fd = init_attr->comp_fd; - cq->comp_vec = comp_vec; - cq->comp_req_notify = init_attr->comp_req_notify; - - ucq = to_usdcq(cq); - ucq->ucq_num_entries = num_entries - 1; - *cq_o = to_usdcq(cq); - return 0; - -out: - if (cq != NULL) { - usd_destroy_cq(to_usdcq(cq)); - } - return ret; -} - -/* - * Finish CQ creation after first QP has been created. Associate a vf - * and configure the CQ on the VIC. It's OK if CQ is already configured, but - * VFs must match. - */ -static int -usd_finish_create_cq( - struct usd_cq_impl *cq, - struct usd_vf *vf) -{ - struct vnic_cq *vcq; - - if (cq->ucq_state & USD_QS_VNIC_INITIALIZED) { - if (cq->ucq_vf == vf) { - return 0; - } else { - usd_err("Cannot share CQ across VFs\n"); - return -EINVAL; - } - } - - vcq = &cq->ucq_vnic_cq; - vcq->index = cq->ucq_index; - vcq->vdev = vf->vf_vdev; - - vcq->ctrl = vnic_dev_get_res(vcq->vdev, RES_TYPE_CQ, vcq->index); - if (vcq->ctrl == NULL) - return -EINVAL; - - cq->ucq_vf = vf; - usd_get_vf(vf); /* bump the reference count */ - cq->ucq_state |= USD_QS_VF_MAPPED; - - /* - * Tell the VIC about this CQ - */ - { - unsigned int cq_flow_control_enable = 0; - unsigned int cq_color_enable = 1; - unsigned int cq_head = 0; - unsigned int cq_tail = 0; - unsigned int cq_tail_color = 1; - unsigned int cq_entry_enable = 1; - unsigned int cq_msg_enable = 0; - unsigned int cq_intr_enable = 0; - unsigned int cq_intr_offset = 0; - uint64_t cq_msg_addr = 0; - - if (cq->comp_fd != -1) { - cq->ucq_intr = usd_get_cq_intr(cq, vf); - if (cq->ucq_intr == NULL) { - usd_err("Failed to alloc cq completion intr\n"); - return -ENOMEM; - } else { - cq_intr_enable = 1; - cq_intr_offset = cq->intr_offset; - } - } - - cq->ucq_vnic_cq.ring.base_addr = (uintptr_t)cq->ucq_desc_ring; - cq->ucq_vnic_cq.ring.desc_count = cq->ucq_num_entries; - - vnic_cq_init(&cq->ucq_vnic_cq, cq_flow_control_enable, - cq_color_enable, cq_head, cq_tail, cq_tail_color, - cq_intr_enable, cq_entry_enable, cq_msg_enable, - cq_intr_offset, cq_msg_addr); - } - cq->ucq_state |= USD_QS_VNIC_INITIALIZED; - - return 0; -} - -/* - * Fill in ops field for QP - */ -static int -usd_qp_get_ops( - struct usd_qp_impl *qp) -{ - int tt; - -#define USD_TT(TY,TR) ((TY)<<16|(TR)) - tt = USD_TT(qp->uq_attrs.uqa_transport, qp->uq_attrs.uqa_qtype); - - switch (tt) { - case USD_TT(USD_QTR_UDP, USD_QTY_UD): - qp->uq_qp.uq_ops = usd_qp_ops_ud_udp; - break; - case USD_TT(USD_QTR_UDP, USD_QTY_UD_PIO): - qp->uq_qp.uq_ops = usd_qp_ops_ud_pio_udp; - break; - case USD_TT(USD_QTR_RAW, USD_QTY_UD): - qp->uq_qp.uq_ops = usd_qp_ops_ud_raw; - break; - default: - return -EINVAL; - } - - return 0; -} - -/* - * Convert user's filter into internal representation - */ -static int -usd_filter_alloc( - struct usd_device *dev, - struct usd_filter *filt, - struct usd_qp_filter *qfilt) -{ - struct sockaddr_in sin; - int ret; - int s; - - switch (filt->uf_type) { - case USD_FTY_UDP_SOCK: - qfilt->qf_type = USD_FTY_UDP_SOCK; - qfilt->qf_filter.qf_udp.u_sockfd = filt->uf_filter.uf_udp_sock.u_sock; - break; - - case USD_FTY_UDP: - qfilt->qf_type = USD_FTY_UDP; - qfilt->qf_filter.qf_udp.u_sockfd = -1; - - s = socket(AF_INET, SOCK_DGRAM, 0); - if (s == -1) - return -errno; - memset(&sin, 0, sizeof(sin)); - sin.sin_family = AF_INET; - sin.sin_addr.s_addr = dev->ud_attrs.uda_ipaddr_be; - sin.sin_port = htons(filt->uf_filter.uf_udp.u_port); - ret = bind(s, (struct sockaddr *)&sin, sizeof(sin)); - if (ret == -1) { - ret = -errno; - close(s); - return ret; - } - - /* save the socket */ - qfilt->qf_filter.qf_udp.u_sockfd = s; - break; - - default: - return -EINVAL; - } - - return 0; -} - -/* - * Fill in local address given filter and return from verbs QP create - */ -static int -usd_get_qp_local_addr( - struct usd_qp_impl *qp) -{ - socklen_t addrlen; - int ret; - - switch (qp->uq_attrs.uqa_transport) { - - case USD_QTR_UDP: - /* find out what address we got */ - addrlen = sizeof(qp->uq_attrs.uqa_local_addr.ul_addr.ul_udp.u_addr); - ret = getsockname(qp->uq_filter.qf_filter.qf_udp.u_sockfd, - (struct sockaddr *) &qp->uq_attrs.uqa_local_addr.ul_addr.ul_udp.u_addr, - &addrlen); - if (ret == -1) - return -errno; - break; - - default: - break; - } - return 0; -} - -static void -usd_filter_free( - struct usd_qp_filter *qfilt) -{ - switch (qfilt->qf_type) { - case USD_FTY_UDP: - close(qfilt->qf_filter.qf_udp.u_sockfd); - break; - default: - break; - } -} - -/* - * Destroy a QP - */ -int -usd_destroy_qp( - struct usd_qp *uqp) -{ - struct usd_wq *wq; - struct usd_rq *rq; - struct usd_qp_impl *qp; - - qp = to_qpi(uqp); - - wq = &qp->uq_wq; - rq = &qp->uq_rq; - - if (wq->uwq_state & USD_QS_READY) - usd_disable_qp(uqp); - - if (rq->urq_state & USD_QS_VNIC_ALLOCATED) - vnic_rq_free(&rq->urq_vnic_rq); - - if (wq->uwq_state & USD_QS_VF_MAPPED) - usd_unmap_vf(qp->uq_dev, qp->uq_vf); - - if (wq->uwq_state & USD_QS_VERBS_CREATED) - usd_ib_cmd_destroy_qp(qp->uq_dev, qp); - - if (rq->urq_state & USD_QS_FILTER_ALLOC) - usd_filter_free(&qp->uq_filter); - - if (rq->urq_context != NULL) - free(rq->urq_context); - if (wq->uwq_post_info != NULL) - free(wq->uwq_post_info); - if (wq->uwq_copybuf != NULL) - usd_free_mr(wq->uwq_copybuf); - if (wq->uwq_desc_ring != NULL) - usd_free_mr(wq->uwq_desc_ring); - if (rq->urq_desc_ring != NULL) - usd_free_mr(rq->urq_desc_ring); - - free(qp); - - return 0; -} - -/* - * Create a normal or PIO UD QP - */ -static int -usd_create_qp_ud( - struct usd_qp_impl *qp) -{ - struct usd_device *dev; - unsigned num_wq_entries; - unsigned num_rq_entries; - struct usd_vf_info vf_info; - struct usd_vf *vf; - struct usd_rq *rq; - struct usd_wq *wq; - struct usd_cq_impl *wcq; - struct usd_cq_impl *rcq; - size_t copybuf_size; - int ret; - - dev = qp->uq_dev; - vf = NULL; - - wq = &qp->uq_wq; - rq = &qp->uq_rq; - wcq = wq->uwq_cq; - rcq = rq->urq_cq; - - ret = usd_qp_get_ops(qp); - if (ret != 0) { - goto fail; - } - - num_wq_entries = wq->uwq_num_entries; - num_rq_entries = rq->urq_num_entries; - - rq->urq_context = calloc(sizeof(void *), num_rq_entries); - wq->uwq_post_info = calloc(sizeof(struct usd_wq_post_info), num_wq_entries); - if (rq->urq_context == NULL || wq->uwq_post_info == NULL) { - ret = -ENOMEM; - goto fail; - } - - /* - * Issue verbs command to create the QP. This does not actually - * instanstiate the filter in the VIC yet, need to bring the - * verbs QP up to RTR state for that - */ - memset(&vf_info, 0, sizeof(vf_info)); - ret = usd_ib_cmd_create_qp(dev, qp, &vf_info); - if (ret != 0) { - goto fail; - } - - /* verbs create_qp command has been completed */ - rq->urq_state |= USD_QS_VERBS_CREATED; - wq->uwq_state |= USD_QS_VERBS_CREATED; - - /* - * Create/regmr for wq copybuf after verbs QP is created - * because QP number information may be needed to register - * mr under shared PD - */ - copybuf_size = USD_SEND_MAX_COPY * num_wq_entries; - ret = usd_alloc_mr(dev, copybuf_size, (void **)&wq->uwq_copybuf); - if (ret != 0) - goto fail; - - ret = usd_map_vf(dev, &vf_info, &vf); - if (ret != 0) { - goto fail; - } - - qp->uq_vf = vf; - rq->urq_state |= USD_QS_VF_MAPPED; - wq->uwq_state |= USD_QS_VF_MAPPED; - - /* - * Now that we have a VF, we can finish creating the CQs. - * It's OK if rcq==wcq, finish_create_cq allows for CQ sharing - */ - ret = usd_finish_create_cq(wcq, vf); - if (ret != 0) { - goto fail; - } - ret = usd_finish_create_cq(rcq, vf); - if (ret != 0) { - goto fail; - } - - /* define the WQ and RQ to the VIC */ - ret = usd_create_wq(qp); - if (ret != 0) { - goto fail; - } - ret = usd_create_rq(qp); - if (ret != 0) { - goto fail; - } - - /* Issue commands to driver to enable the QP */ - ret = usd_enable_verbs_qp(qp); - if (ret != 0) { - goto fail; - } - - /* Attach WQ and RQ to CW */ - rcq->ucq_rq_map[rq->urq_index] = rq; - wcq->ucq_wq_map[wq->uwq_index] = wq; - - qp->uq_attrs.uqa_max_send_credits = wq->uwq_num_entries - 1; - qp->uq_attrs.uqa_max_recv_credits = rq->urq_num_entries - 1; - qp->uq_attrs.uqa_max_inline = USD_SEND_MAX_COPY - - qp->uq_attrs.uqa_hdr_len; - - /* build local address */ - ret = usd_get_qp_local_addr(qp); - if (ret != 0) { - goto fail; - } - - return 0; - - fail: - return ret; -} - -/* - * Public interface to create QP - */ -int -usd_create_qp( - struct usd_device *dev, - enum usd_qp_transport transport, - enum usd_qp_type qtype, - struct usd_cq *wucq, - struct usd_cq *rucq, - unsigned num_send_credits, - unsigned num_recv_credits, - struct usd_filter *filt, - struct usd_qp **uqp_o) -{ - struct usd_qp_impl *qp; - unsigned num_rq_entries; - unsigned num_wq_entries; - struct usd_cq_impl *wcq; - struct usd_cq_impl *rcq; - struct usd_rq *rq; - struct usd_wq *wq; - int ret; - - qp = NULL; - - /* Make sure device ready */ - ret = usd_device_ready(dev); - if (ret != 0) { - goto fail; - } - - qp = calloc(sizeof(*qp), 1); - if (qp == NULL) { - ret = -ENOMEM; - goto fail; - } - - qp->uq_dev = dev; - qp->uq_attrs.uqa_transport = transport; - qp->uq_attrs.uqa_qtype = qtype; - - ret = usd_qp_get_ops(qp); - if (ret != 0) { - goto fail; - } - - if (num_recv_credits > dev->ud_attrs.uda_max_recv_credits) { - ret = -EINVAL; - goto fail; - } - /* Add 1 and round num_entries up to POW2 and min to 32 */ - num_rq_entries = 1 << msbit(num_recv_credits); - if (num_rq_entries < 32) num_rq_entries = 32; - - if (num_send_credits > dev->ud_attrs.uda_max_send_credits) { - ret = -EINVAL; - goto fail; - } - num_wq_entries = 1 << msbit(num_send_credits); - if (num_wq_entries < 32) num_wq_entries = 32; - - rcq = to_cqi(rucq); - wcq = to_cqi(wucq); - - rq = &qp->uq_rq; - rq->urq_num_entries = num_rq_entries; - rq->urq_cq = rcq; - - wq = &qp->uq_wq; - wq->uwq_num_entries = num_wq_entries; - wq->uwq_cq = wcq; - - /* do filter setup */ - ret = usd_filter_alloc(dev, filt, &qp->uq_filter); - if (ret != 0) { - goto fail; - } - rq->urq_state |= USD_QS_FILTER_ALLOC; - - /* Fill in some attrs */ - switch (transport) { - case USD_QTR_UDP: - qp->uq_attrs.uqa_hdr_len = sizeof(struct usd_udp_hdr); - break; - case USD_QTR_RAW: - qp->uq_attrs.uqa_hdr_len = 0; - break; - } - - /* - * Now, do the type-specific configuration - */ - switch (qtype) { - case USD_QTY_UD: - case USD_QTY_UD_PIO: - ret = usd_create_qp_ud(qp); - if (ret != 0) { - goto fail; - } - break; - default: - ret = -EINVAL; - goto fail; - break; - } - - *uqp_o = to_usdqp(qp); - return 0; - -fail: - if (qp != NULL) { - usd_destroy_qp(to_usdqp(qp)); - } - return ret; -} - -/* - * Return attributes of a QP - */ -int -usd_get_qp_attrs( - struct usd_qp *uqp, - struct usd_qp_attrs *qattrs) -{ - struct usd_qp_impl *qp; - - qp = to_qpi(uqp); - *qattrs = qp->uq_attrs; - return 0; -} - -int usd_get_completion_fd(struct usd_device *dev, int *comp_fd_o) -{ - if (dev == NULL || comp_fd_o == NULL) - return -EINVAL; - - return usd_ib_cmd_create_comp_channel(dev, comp_fd_o); -} - -int usd_put_completion_fd(struct usd_device *dev, int comp_fd) -{ - if (dev == NULL || comp_fd < 0) - return -EINVAL; - - if (close(comp_fd) == -1) - return -errno; - - return 0; -} diff --git a/prov/usnic/src/usnic_direct/usd_socket.c b/prov/usnic/src/usnic_direct/usd_socket.c deleted file mode 100644 index c23cb8f4d83..00000000000 --- a/prov/usnic/src/usnic_direct/usd_socket.c +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "usd.h" -#include "usd_util.h" -#include "usd_socket.h" - -/* - * Get the IP address and other information associated with this - * device's interface. - */ -int -usd_get_dev_if_info( - struct usd_device *dev) -{ - struct sockaddr_in sin; - struct ifreq ifr; - struct usd_device_attrs *dp; - uint32_t netmask; - int s; - int ret; - - s = socket(AF_INET, SOCK_DGRAM, 0); - if (s == -1) - return -errno; - - dp = &dev->ud_attrs; - - dp->uda_ifindex = if_nametoindex(dp->uda_ifname); - if (dp->uda_ifindex == 0) - goto out; - - ifr.ifr_addr.sa_family = AF_INET; - strncpy(ifr.ifr_name, dp->uda_ifname, IFNAMSIZ - 1); - - ret = ioctl(s, SIOCGIFADDR, &ifr); - if (ret == 0) { - dp->uda_ipaddr_be = - ((struct sockaddr_in *) &ifr.ifr_addr)->sin_addr.s_addr; - } - - ret = ioctl(s, SIOCGIFNETMASK, &ifr); - if (ret == 0) { - dp->uda_netmask_be = - ((struct sockaddr_in *) &ifr.ifr_netmask)->sin_addr.s_addr; - netmask = ntohl(dp->uda_netmask_be); - dp->uda_prefixlen = 32 - msbit(~netmask); - } - - ret = ioctl(s, SIOCGIFMTU, &ifr); - if (ret == 0) { - dp->uda_mtu = ifr.ifr_mtu; - } - - if (dp->uda_ipaddr_be != 0) { - memset(&sin, 0, sizeof(sin)); - sin.sin_family = AF_INET; - sin.sin_addr.s_addr = dp->uda_ipaddr_be; - sin.sin_port = 0; - ret = bind(s, (struct sockaddr *) &sin, sizeof(sin)); - if (ret == -1) - goto out; - dev->ud_arp_sockfd = s; - } else { - close(s); - } - - return 0; - out: - close(s); - return -errno; -} diff --git a/prov/usnic/src/usnic_direct/usd_socket.h b/prov/usnic/src/usnic_direct/usd_socket.h deleted file mode 100644 index a8c015f8c0a..00000000000 --- a/prov/usnic/src/usnic_direct/usd_socket.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#ifndef _USD_SOCKET_ -#define _USD_SOCKET_ - -/* - * Forward structure defs - */ -struct usd_device; - -int usd_get_dev_if_info(struct usd_device *dev); -int usd_ip_to_mac(struct usd_device *dev, uint32_t ipaddr, - uint8_t * mac_o); -#endif /* _USD_SOCKET_ */ diff --git a/prov/usnic/src/usnic_direct/usd_time.h b/prov/usnic/src/usnic_direct/usd_time.h deleted file mode 100644 index c331025dd24..00000000000 --- a/prov/usnic/src/usnic_direct/usd_time.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - * definitions about time - */ - -#ifndef _USD_TIME_H_ -#define _USD_TIME_H_ - -#include - -typedef uint64_t usd_time_t; - -static inline void usd_get_time(usd_time_t * timep) -{ - struct timespec now; - - clock_gettime(CLOCK_MONOTONIC, &now); - *timep = now.tv_sec * 1000 + now.tv_nsec / 1000000; -} - -/* - * Returns time delta in ms - */ -static inline int usd_time_diff(usd_time_t time1, usd_time_t time2) -{ - return time2 - time1; -} -#endif /* _USD_TIME_H_ */ diff --git a/prov/usnic/src/usnic_direct/usd_util.h b/prov/usnic/src/usnic_direct/usd_util.h deleted file mode 100644 index a7736309b34..00000000000 --- a/prov/usnic/src/usnic_direct/usd_util.h +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#ifndef _USD_UTIL_H_ -#define _USD_UTIL_H_ - -#include -#include - -static uint8_t bittab[] = { - 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, -}; - -static inline int -msbit( - uint32_t val) -{ - int bit; - - bit = 0; - - if (val & (0xffff << 16)) { - val >>= 16; - bit += 16; - } - if (val & (0xff << 8)) { - val >>= 8; - bit += 8; - } - return bittab[val] + bit; -} - -#define usd_offset_of(type, memb) \ - ((unsigned long)(&((type *)0)->memb)) -#define usd_container_of(obj, type, memb) \ - ((type *)(((char *)obj) - usd_offset_of(type, memb))) - -static inline void hex(void *vcp, int len) -{ - uint8_t *cp = vcp; - int i; - for (i = 0; i < len; ++i) { - printf("%02x%c", *cp++, ((i & 15) == 15) ? 10 : 32); - } - if (i & 15) - puts(""); -} - -/* - * 48-bit type. Byte aligned. - */ -typedef struct { - unsigned char net_data[6]; -} net48_t; - -/** - * net48_get(net) - fetch from a network-order 48-bit field. - * - * @param net pointer to type net48_t, network-order 48-bit data. - * @return the host-order value. - */ -static inline u_int64_t net48_get(const net48_t * net) -{ - return ((u_int64_t) net->net_data[0] << 40) | - ((u_int64_t) net->net_data[1] << 32) | - ((u_int64_t) net->net_data[2] << 24) | - ((u_int64_t) net->net_data[3] << 16) | - ((u_int64_t) net->net_data[4] << 8) | - (u_int64_t) net->net_data[5]; -} - -/** - * net48_put(net, val) - store to a network-order 48-bit field. - * - * @param net pointer to a net48_t, network-order 48-bit data. - * @param val host-order value to be stored at net. - */ -static inline void net48_put(net48_t * net, u_int64_t val) -{ - net->net_data[0] = (u_int8_t)((val >> 40) & 0xFF); - net->net_data[1] = (u_int8_t)((val >> 32) & 0xFF); - net->net_data[2] = (u_int8_t)((val >> 24) & 0xFF); - net->net_data[3] = (u_int8_t)((val >> 16) & 0xFF); - net->net_data[4] = (u_int8_t)((val >> 8) & 0xFF); - net->net_data[5] = (u_int8_t)(val & 0xFF); -} - -static inline void usd_perror(const char *s) -{ - if (USD_DEBUG) { - perror(s); - } -} -#endif /* _USD_UTIL_H_ */ diff --git a/prov/usnic/src/usnic_direct/usd_vnic.c b/prov/usnic/src/usnic_direct/usd_vnic.c deleted file mode 100644 index 3090cfeb087..00000000000 --- a/prov/usnic/src/usnic_direct/usd_vnic.c +++ /dev/null @@ -1,213 +0,0 @@ -/* - * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#include -#include - -#include "kcompat.h" -#include "vnic_dev.h" -#include "vnic_enet.h" - -#include "usd.h" -#include "usd_util.h" -#include "usd_vnic.h" -#include "usd_ib_cmd.h" - -#define GET_CONFIG(m) \ - do { \ - ret = usd_dev_spec(qp->uq_dev, \ - usd_offset_of(struct vnic_enet_config, m), \ - sizeof(c->m), &c->m); \ - if (ret) { \ - printf("Error %d getting " #m "\n", ret); \ - } else { \ - printf(#m " = 0x%lx\n", (u64)c->m); \ - } \ - } while (0) - - -int usd_vnic_dev_cmd(struct usd_device *dev, enum vnic_devcmd_cmd cmd, - u64 *a0, u64 *a1, int wait) -{ - return usd_ib_cmd_devcmd(dev, cmd, a0, a1, wait); -} - -#if 0 -/* - * Dump the devspec (for debugging) - */ -int -usd_dump_devspec( - struct usd_qp_impl *qp) -{ - struct vnic_enet_config config; - struct vnic_enet_config *c; - int ret; - - c = &config; - memset(&config, 0, sizeof(config)); - - GET_CONFIG(flags); - GET_CONFIG(wq_desc_count); - GET_CONFIG(rq_desc_count); - GET_CONFIG(mtu); - GET_CONFIG(intr_timer_deprecated); - GET_CONFIG(intr_timer_type); - GET_CONFIG(intr_mode); - GET_CONFIG(intr_timer_usec); - GET_CONFIG(loop_tag); - GET_CONFIG(vf_rq_count); - GET_CONFIG(num_arfs); - GET_CONFIG(mem_paddr); - - ret = vnic_dev_spec(qp->uq_vf->vf_vdev, - usd_offset_of(struct vnic_enet_config, devname), - 8, &c->devname[0]); - ret |= vnic_dev_spec(qp->uq_vf->vf_vdev, - usd_offset_of(struct vnic_enet_config, devname) + 8, - 8, &c->devname[8]); - printf("devname = \"%s\", ret = %d\n", c->devname, ret); - - return 0; -} -#endif - -/* - * Get some QP settings from devspec - */ -/* -int -usd_get_devspec( - struct usd_qp_impl *qp) -{ - struct vnic_enet_config config; - unsigned int offset; - int ret; - - offset = usd_offset_of(struct vnic_enet_config, mem_paddr); - ret = vnic_dev_spec(qp->uq_vf->vf_vdev, offset, - sizeof(config.mem_paddr), &config.mem_paddr); - if (ret != 0) { - return ret; - } - - qp->uq_attrs.uqa_pio_paddr = config.mem_paddr; - - return 0; -} -*/ - -/* - * general dev_spec function to replace vnic_dev_spec - */ -int usd_dev_spec(struct usd_device *dev, unsigned int offset, - size_t size, void *value) -{ - u64 a0, a1; - int wait = 1000; - int err; - - a0 = offset; - a1 = size; - - err = usd_vnic_dev_cmd(dev, CMD_DEV_SPEC, &a0, &a1, wait); - - switch (size) { - case 1: - *(u8 *)value = (u8)a0; - break; - case 2: - *(u16 *)value = (u16)a0; - break; - case 4: - *(u32 *)value = (u32)a0; - break; - case 8: - *(u64 *)value = a0; - break; - default: - return -EINVAL; - break; - } - - return err; -} - -int usd_get_piopa(struct usd_qp_impl *qp) -{ - struct vnic_enet_config config; - unsigned int offset; - int ret; - - offset = usd_offset_of(struct vnic_enet_config, mem_paddr); - ret = usd_dev_spec(qp->uq_dev, offset, - sizeof(config.mem_paddr), &config.mem_paddr); - if (ret != 0) { - return ret; - } - - qp->uq_attrs.uqa_pio_paddr = config.mem_paddr; - - return 0; -} - -/* - * Issue HANG_NOTIFY to the VNIC - */ -int -usd_vnic_hang_notify( - struct usd_qp *uqp) -{ - struct usd_qp_impl *qp; - u64 a0; - int ret; - - qp = to_qpi(uqp); - ret = usd_vnic_dev_cmd(qp->uq_dev, CMD_HANG_NOTIFY, - &a0, &a0, 1000); - if (ret != 0) { - fprintf(stderr, "hang_notify ret = %d\n", ret); - return ret; - } - - return 0; -} diff --git a/prov/usnic/src/usnic_direct/usd_vnic.h b/prov/usnic/src/usnic_direct/usd_vnic.h deleted file mode 100644 index 80051a8a41f..00000000000 --- a/prov/usnic/src/usnic_direct/usd_vnic.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#ifndef _USD_VNIC_H_ -#define _USD_VNIC_H_ - -int usd_vnic_dev_cmd(struct usd_device *dev, enum vnic_devcmd_cmd cmd, - u64 *a0, u64 *a1, int wait); -int usd_dev_spec(struct usd_device *dev, unsigned int offset, - size_t size, void *value); -int usd_get_piopa(struct usd_qp_impl *qp); -int usd_vnic_hang_notify(struct usd_qp *uqp); - -#endif /* _USD_VNIC_H_ */ diff --git a/prov/usnic/src/usnic_direct/usnic_abi.h b/prov/usnic/src/usnic_direct/usnic_abi.h deleted file mode 100644 index 1554a0d446a..00000000000 --- a/prov/usnic/src/usnic_direct/usnic_abi.h +++ /dev/null @@ -1,361 +0,0 @@ -/* - * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - - -#ifndef USNIC_ABI_H -#define USNIC_ABI_H - -/* ABI between userspace and kernel */ -#define USNIC_UVERBS_ABI_VERSION 4 - -#define USNIC_QP_GRP_MAX_WQS 8 -#define USNIC_QP_GRP_MAX_RQS 8 -#define USNIC_QP_GRP_MAX_CQS 16 - -#define USNIC_DECODE_PGOFF_VFID(pgoff) ((pgoff) & ((1ULL << 32) - 1)) -#define USNIC_DECODE_PGOFF_TYPE(pgoff) ((pgoff) >> 48) -#define USNIC_DECODE_PGOFF_RES(pgoff) (((pgoff) >> 32) & ((1ULL << 16) - 1)) -#define USNIC_DECODE_PGOFF_BAR(pgoff) (((pgoff) >> 32) & ((1ULL << 16) - 1)) - -#define USNIC_ENCODE_PGOFF(vfid, map_type, res_type_bar_id) \ - (((((uint64_t)map_type & 0xffff) << 48) | \ - (((uint64_t)res_type_bar_id & 0xffff) << 32) | \ - ((uint64_t)vfid & ((1ULL << 32) - 1))) * sysconf(_SC_PAGE_SIZE)) - -/* - * The kernel module eventually issues proxy the devcmd through enic and the - * maximum number of devcmd arguments supported for a vnic is VNIC_DEVCMD_NARGS - * (= 15). Among them, 2 arguments are consumed by proxy command for - * proxy_index and proxy devcmd. Hence, only a maximum of 13 arguments are - * supported on input in practice, even though the ABI between user space and - * the kernel has space for 15. - * - * Keep _NARGS as 15 for backwards compatibility (newer user space with older - * kernel), otherwise usnic_ucmd_devcmd() on the older kernel will fail with - * -EINVAL. - */ -#define USNIC_UDEVCMD_NARGS 15 -#define USNIC_UDEVCMD_MAX_IN_ARGS (USNIC_UDEVCMD_NARGS - 2) - -enum usnic_mmap_type { - USNIC_MMAP_BAR = 0, - USNIC_MMAP_RES = 1, - USNIC_MMAP_BARHEAD = 2, - USNIC_MMAP_GRPVECT = 3, -}; - -enum usnic_transport_type { - USNIC_TRANSPORT_UNKNOWN = 0, - USNIC_TRANSPORT_ROCE_CUSTOM = 1, - USNIC_TRANSPORT_IPV4_UDP = 2, - USNIC_TRANSPORT_IPV4_TCP_3T = 3, - USNIC_TRANSPORT_ROCEV2 = 4, - USNIC_TRANSPORT_MAX = 5, -}; - -#define ROCEV2_PORT 4791 - -enum usnic_ucmd_type { - USNIC_USER_CMD_DEVCMD, - USNIC_USER_CMD_MAX, -}; - -struct usnic_user_cmd { - u32 ucmd; - u32 pad_to_8byte; - u64 inbuf; - u64 outbuf; - u32 inlen; - u32 outlen; -}; - -struct usnic_udevcmd_cmd { - u32 vnic_idx; - u32 devcmd; - u32 wait; - u32 num_args; - u64 args[USNIC_UDEVCMD_NARGS]; -}; - -struct usnic_udevcmd_resp { - u32 num_args; - u64 args[USNIC_UDEVCMD_NARGS]; -}; - -/* - * This is the version of the transport_spec structure that is used - * in CREATE_QP versions 0..2 - */ -struct usnic_transport_spec_v2 { - enum usnic_transport_type trans_type; - union { - struct { - uint16_t port_num; - } usnic_roce; - struct { - uint32_t sock_fd; - } ip; - }; -}; - -/* - * This is the version of the transport_spec structure that is used - * in CREATE_QP versions 3.. - */ -struct usnic_transport_spec { - enum usnic_transport_type trans_type; - union { - struct { - uint16_t port_num; - } usnic_roce; - struct { - uint32_t sock_fd; - } ip; - struct { - uint32_t qpn; - uint32_t ipaddr_be; - } rocev2; - u_int8_t pad[256]; - }; -}; - -#define USNIC_IB_ALLOC_PD_VERSION 1 - -struct usnic_ib_alloc_pd_cmd { - u32 resp_version; /* response version requested */ - u32 pad_to_8byte; -}; - -struct usnic_ib_alloc_pd_resp { - u32 resp_version; - u32 pad_to_8byte; - union { - struct { - u32 vfid; - u32 grp_vect_buf_len; - } cur; /* v1 */ - }; -}; - -#define USNIC_IB_CREATE_QP_VERSION 3 - -struct usnic_ib_create_qp_cmd_v0 { - struct usnic_transport_spec_v2 spec_v2; -}; - -struct usnic_ib_create_qp_cmd_v2 { - struct usnic_transport_spec_v2 spec_v2; - u32 cmd_version; - union { - struct { - /* length in bytes of resources array */ - u32 resources_len; - - /* ptr to array of struct usnic_vnic_barres_info */ - u64 resources; - } cur; /* v1 and v2 cmd */ - } u; -}; - -struct usnic_ib_create_qp_cmd { - /* - * This is the old transport spec struct that must stay as the - * first member of this struct for backwards compatibility/ABI - * reasons.. It is "v2" because it is used with CREATE_QP - * versions 0, 1, and 2. - */ - struct usnic_transport_spec_v2 spec_v2; - u32 cmd_version; - union { - struct { - /* length in bytes of resources array */ - u32 resources_len; - - /* ptr to array of struct usnic_vnic_barres_info */ - u64 resources; - } cur; /* v1 and v2 cmd */ - } u; - /* - * This is the current version of the transport spec struct. - */ - struct usnic_transport_spec spec; -}; - - -/* - * infomation of vnic bar resource - */ -struct usnic_vnic_barres_info { - int32_t type; - uint32_t padding; - uint64_t bus_addr; - uint64_t len; -}; - -/* - * All create_qp responses must start with this for backwards compatability - */ -#define USNIC_IB_CREATE_QP_RESP_V0_FIELDS \ - u32 vfid; \ - u32 qp_grp_id; \ - u64 bar_bus_addr; \ - u32 bar_len; \ - u32 wq_cnt; \ - u32 rq_cnt; \ - u32 cq_cnt; \ - u32 wq_idx[USNIC_QP_GRP_MAX_WQS]; \ - u32 rq_idx[USNIC_QP_GRP_MAX_RQS]; \ - u32 cq_idx[USNIC_QP_GRP_MAX_CQS]; \ - u32 transport; - -struct usnic_ib_create_qp_resp_v0 { - USNIC_IB_CREATE_QP_RESP_V0_FIELDS - u32 reserved[9]; -}; - -struct usnic_ib_create_qp_resp { - USNIC_IB_CREATE_QP_RESP_V0_FIELDS - /* the above fields end on 4-byte alignment boundary */ - u32 cmd_version; - union { - struct { - u32 num_barres; - u32 pad_to_8byte; - } v1; - struct { - u32 num_barres; - u32 wq_err_intr_offset; - u32 rq_err_intr_offset; - u32 wcq_intr_offset; - u32 rcq_intr_offset; - u32 barhead_len; - } cur; /* v2 */ - } u; - - /* v0 had a "reserved[9]" field, must not shrink the response or we can - * corrupt newer clients running on older kernels */ - u32 reserved[2]; -}; - -#define USNIC_CTX_RESP_VERSION 2 - -/* - * Make this structure packed in order to make sure v1.num_caps not aligned - * at 8 byte boundary, hence still being able to support user libary - * requesting version 1 response. - */ -struct __attribute__((__packed__)) usnic_ib_get_context_cmd { - u32 resp_version; /* response version requested */ - union { - struct { - u32 num_caps; /* number of capabilities requested */ - } v1; - struct { - u32 encap_subcmd; /* whether encapsulate subcmd */ - union { - u32 num_caps; - struct usnic_user_cmd usnic_ucmd; - }; - } v2; - }; -}; - -/* - * Note that this enum must never have members removed or re-ordered in order - * to retain backwards compatability - */ -enum usnic_capability { - USNIC_CAP_CQ_SHARING, /* CQ sharing version */ - USNIC_CAP_MAP_PER_RES, /* Map individual RES */ - USNIC_CAP_PIO, /* PIO send */ - USNIC_CAP_CQ_INTR, /* CQ interrupts (via comp channels) */ - USNIC_CAP_GRP_INTR, /* Group interrupt */ - USNIC_CAP_DPKT, /* Direct Packet Interface */ - USNIC_CAP_CNT -}; - -/* - * If and when there become multiple versions of this struct, it will - * become a union for cross-version compatability to make sure there is always - * space for older and larger versions of the contents. - */ -struct usnic_ib_get_context_resp { - u32 resp_version; /* response version returned */ - u32 num_caps; /* number of capabilities returned */ - u32 cap_info[USNIC_CAP_CNT]; -}; - -#define USNIC_IB_CREATE_CQ_VERSION 2 - -struct usnic_ib_create_cq_v0 { - u64 reserved; -}; - -#define USNIC_CQ_COMP_SIGNAL_VERBS 0x1 /* whether to signal cq - * completion event via verbs - */ - -struct usnic_ib_create_cq { - u32 resp_version; /* response version requested */ - union { - struct { - u32 intr_arm_mode; - } v1; - struct { - u32 flags; - __s64 comp_event_fd; /* wait fd for cq completion */ - u64 affinity_mask_ptr; /* process affinity mask ptr*/ - u64 affinity_mask_len; - } cur; /* v2 */ - }; -}; - -struct usnic_ib_create_cq_resp_v0 { - u64 reserved; -}; - -struct usnic_ib_create_cq_resp { - u32 resp_version; /* response version returned */ - u32 pad_to_8byte; -}; - -#endif /* USNIC_ABI_H */ diff --git a/prov/usnic/src/usnic_direct/usnic_direct.h b/prov/usnic/src/usnic_direct/usnic_direct.h deleted file mode 100644 index a224e1cc0f0..00000000000 --- a/prov/usnic/src/usnic_direct/usnic_direct.h +++ /dev/null @@ -1,719 +0,0 @@ -/* - * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#ifndef _USNIC_DIRECT_H_ -#define _USNIC_DIRECT_H_ - -#include -#include -#include -#include -#include -#include - -#define USD_MAX_DEVICES 8 -#define USD_MAX_DEVNAME 16 -#define USD_RECV_MAX_SGE 8 -#define USD_SEND_MAX_SGE 8 - -enum usd_link_state { - USD_LINK_DOWN, - USD_LINK_UP -}; - -/* forward structure defs */ -struct usd_context; -struct usd_qp; -struct usd_device; -struct usd_dest; -struct usd_connection; -struct usd_mr; - -struct usd_device_attrs { - char uda_devname[USD_MAX_DEVNAME]; - char uda_ifname[IFNAMSIZ]; - int uda_ifindex; - uint8_t uda_mac_addr[ETH_ALEN]; - - /* IP config */ - uint32_t uda_ipaddr_be; - uint32_t uda_netmask_be; - uint32_t uda_prefixlen; /* netmask length */ - uint32_t uda_mtu; - enum usd_link_state uda_link_state; - - /* HW info */ - uint32_t uda_vendor_id; - uint32_t uda_vendor_part_id; - uint32_t uda_device_id; - char uda_firmware[64]; - - /* usnic config */ - unsigned uda_num_vf; - unsigned uda_cq_per_vf; - unsigned uda_qp_per_vf; - unsigned uda_intr_per_vf; - unsigned uda_num_comp_vectors; - unsigned uda_max_cq; - unsigned uda_max_qp; - - /* VIC constants */ - uint32_t uda_bandwidth; - unsigned uda_max_cqe; - unsigned uda_max_send_credits; - unsigned uda_max_recv_credits; - - /* fd that can be used to poll for device events */ - int uda_event_fd; -}; - -enum usd_completion_status { - USD_COMPSTAT_SUCCESS, - USD_COMPSTAT_ERROR_CRC, - USD_COMPSTAT_ERROR_TRUNC, - USD_COMPSTAT_ERROR_TIMEOUT, - USD_COMPSTAT_ERROR_INTERNAL -}; -enum usd_completion_type { - USD_COMPTYPE_SEND=0, - USD_COMPTYPE_RECV=7, -}; - -struct usd_completion { - enum usd_completion_status uc_status; - enum usd_completion_type uc_type; - uint32_t uc_bytes; - uint16_t uc_rkey; - struct usd_qp *uc_qp; - void *uc_context; - u_int16_t uc_retrans; -}; - -struct usd_recv_desc { - void *urd_context; - struct iovec urd_iov[USD_RECV_MAX_SGE]; - size_t urd_iov_cnt; - struct usd_recv_desc *urd_next; -}; - -/* - * Operations that may vary based on transport/QP type - */ -struct usd_qp_ops { - int (*qo_post_send_one)(struct usd_qp *qp, - struct usd_dest *dest, const void *buf, size_t len, - uint32_t flags, void *context); - int (*qo_post_send_one_prefixed)(struct usd_qp *qp, - struct usd_dest *dest, const void *buf, size_t len, - uint32_t flags, void *context); - int (*qo_post_send_one_copy)(struct usd_qp *qp, - struct usd_dest *dest, const void *buf, size_t len, - uint32_t flags, void *context); - int (*qo_post_send_two_copy)(struct usd_qp *qp, - struct usd_dest *dest, const void *hdr, size_t hdrlen, - const void *pkt, size_t pktlen, uint32_t flags, void *context); - int (*qo_post_send_iov)(struct usd_qp *qp, - struct usd_dest *dest, const struct iovec* iov, - size_t iov_count, uint32_t flags, void *context); - int (*qo_post_send_one_vlan)(struct usd_qp *qp, - struct usd_dest *dest, const void *buf, size_t len, - u_int16_t vlan, uint32_t flags, void *context); -}; - -/* - * user's view of a CQ - */ -struct usd_cq { - unsigned ucq_num_entries; -}; - -/* - * User's view of a QP - */ -struct usd_qp { - struct usd_qp_ops uq_ops; - void *uq_context; /* place for user to scribble */ -}; - -/* - * Filters for QPs - */ -enum usd_filter_type { - USD_FTY_UDP, - USD_FTY_UDP_SOCK, - USD_FTY_TCP, - USD_FTY_MCAST, - USD_FTY_8915 -}; -struct usd_filter { - enum usd_filter_type uf_type; - union { - struct { - uint16_t u_port; - } uf_udp; - struct { - int u_sock; - } uf_udp_sock; - struct { - int t_sock; - struct sockaddr_in t_remote; - } uf_tcp; - struct { - struct sockaddr_in m_addr; - } uf_mcast; - } uf_filter; -}; - -/* - * Local address - much like a filter - * Type is defined by transport specified in create_qp - */ -struct usd_local_addr { - union { - struct { - struct sockaddr_in u_addr; - } ul_udp; - struct { - uint32_t qp_num; - } ul_8915; - } ul_addr; -}; - -enum usd_qp_transport { - USD_QTR_RAW, /* no header added */ - USD_QTR_UDP /* create UDP header based on dest */ -}; - -enum usd_qp_type { - USD_QTY_UD, - USD_QTY_UD_PIO, -}; - -/* - * Attributes of a queue pair - */ -struct usd_qp_attrs { - enum usd_qp_transport uqa_transport; - enum usd_qp_type uqa_qtype; - struct usd_local_addr uqa_local_addr; - - unsigned uqa_max_send_credits; - unsigned uqa_max_recv_credits; - uint64_t uqa_pio_paddr; - - unsigned uqa_max_inline; - unsigned uqa_hdr_len; /* length of header for this QP */ -}; - -/* - * Description of a device event which has occurred - */ -enum usd_device_event_type { - USD_EVENT_LINK_UP, - USD_EVENT_LINK_DOWN -}; -struct usd_device_event { - union { - void *ude_context; - } ude_context; - enum usd_device_event_type ude_type; -}; - -/* - * Returned form usd_get_available_devices() - array of currently - * available usd device names - */ -struct usd_device_entry { - char ude_devname[USD_MAX_DEVNAME]; -}; - -/* - * Send flags - */ -enum usd_send_flag_shift { - USD_SFS_SIGNAL, -}; -#define USD_SF_SIGNAL (1 << USD_SFS_SIGNAL) - - /* - * cq creation parameters - */ -struct usd_cq_init_attr { - unsigned num_entries; /* number of requested cq elements */ - int comp_fd; /* completion fd */ - int comp_vec; /* requested completion vector */ - int comp_req_notify; /* whether need to request notify for each completion */ - void *ibv_cq; /* verbs userspace cq object if signaling through uverbs */ -}; - -/* - * Headers for defined transport types - */ -struct usd_udp_hdr { - struct ether_header uh_eth; - struct iphdr uh_ip; - struct udphdr uh_udp; -} __attribute__ ((__packed__)); - -/* - * Struct and defines for usd open parameters - */ -#define UOPF_SKIP_LINK_CHECK 0x1 -#define UOPF_SKIP_PD_ALLOC 0x2 - -struct usd_open_params { - int flags; - int cmd_fd; - struct usd_context *context; -}; - -/* - **************************************************************** - * Device management - **************************************************************** - */ -int usd_get_device_list(struct usd_device_entry *dev_array, - int *num_devs); - -int usd_open(const char *devname, struct usd_device **dev_o); - -int usd_open_with_params(const char *dev_name, - struct usd_open_params *uop_param, - struct usd_device **dev_o); - -int usd_close(struct usd_device *dev); - -int usd_get_device_attrs(struct usd_device *dev, - struct usd_device_attrs *attr); - -int usd_get_device_event(struct usd_device *dev, - struct usd_device_event *event); - -enum usd_capability { - USD_CAP_CQ_SHARING, - USD_CAP_MAP_PER_RES, - USD_CAP_PIO, - USD_CAP_CQ_INTR, - USD_CAP_GRP_INTR, - USD_CAP_MAX -}; -int usd_get_cap(struct usd_device *dev, enum usd_capability cap); - -/* - **************************************************************** - * Queue management - **************************************************************** - */ - -/* - * Get a file descriptor which can be used to poll for completions. The - * returned file descriptor will be different on each call to - * usd_get_completion_fd, so that coordination is not needed when using these - * fds in syscalls like poll(2). - */ -int usd_get_completion_fd(struct usd_device *dev, int *comp_fd_o); - -int usd_put_completion_fd(struct usd_device *dev, int comp_fd); - -/* - * Request a CQ with specified attributes: - * dev - device on which to create this CQ - * init_attr - CQ creation parameters - */ -int usd_create_cq(struct usd_device *dev, struct usd_cq_init_attr *init_attr, - struct usd_cq **cq_o); - -int usd_destroy_cq(struct usd_cq *cq); - -int usd_cq_intr_enable(struct usd_cq *cq); -int usd_cq_intr_disable(struct usd_cq *cq); - -/* - * Get and set interrupt coalescing delay, units are in microseconds - */ -int usd_cq_set_intr_coal(struct usd_cq *cq, unsigned intr_coal_delay); -unsigned usd_cq_get_intr_coal(struct usd_cq *cq); - -/* - * IN: - * dev - device on which QP is to be created - * transport - what transport to use on this queue - * type - type of queue to create - * wcq - CQ handle for send completions - * rcq - CQ handle for receive completions - * send_credits - Number of send credits requested - * recv_credite - Number of receive buffer credits requested - * port - Requested local port for QP (0 lets library choose) - * qp_o - Address to receive QP handle on successful completion - * OUT: - * Returns 0 or code from errno.h - * 0 - successful completion - * EBUSY - port is in use - * XXX - */ -int usd_create_qp(struct usd_device *dev, - enum usd_qp_transport transport, - enum usd_qp_type qtype, - struct usd_cq *wcq, struct usd_cq *rcq, - unsigned send_credits, unsigned recv_credits, - struct usd_filter *filt, struct usd_qp **qp_o); - -int usd_destroy_qp(struct usd_qp *qp); - -int usd_enable_qp(struct usd_qp *qp); -int usd_disable_qp(struct usd_qp *qp); - -int usd_get_qp_attrs(struct usd_qp *qp, - struct usd_qp_attrs *qp_attrs_o); - -/* - * Add a filter to a QP - */ -int usd_qp_add_filter(struct usd_qp *qp, struct usd_filter *filter); - -/* - * Get current send credits - */ -unsigned usd_get_send_credits(struct usd_qp *uqp); - -/* - * Get current recv credits - */ -unsigned usd_get_recv_credits(struct usd_qp *uqp); - -/* - **************************************************************** - * Memory management - **************************************************************** - */ - -int usd_reg_mr(struct usd_device *dev, - void *buffer, size_t size, struct usd_mr **mr_o); -int usd_dereg_mr(struct usd_mr *mr); - -int usd_alloc_mr(struct usd_device *dev, size_t size, void **vaddr_o); -int usd_free_mr(void *vaddr); - -/* - **************************************************************** - * Destination management - **************************************************************** - */ - -/* - * Return the distance metric to a specified IP address - * Metric is: - * 0 - same VLAN - * 1..MAXINT - relative distance metric - * -1 - unreachable - */ -int usd_get_dest_distance(struct usd_device *dev, uint32_t daddr_be, - int *metric_o); - -/* - * Settings for address resolution timeout and retry - */ -struct usd_dest_params { - unsigned dp_arp_timeout; /* per-try timeout in ms */ - unsigned dp_max_arps; -}; - -/* - * Get address resolution settings - */ -int usd_get_dest_params(struct usd_dest_params *params); - -/* - * Set address resolution settings - * Settings may not be changed while any resolution requests are in progress. - */ -int usd_set_dest_params(struct usd_dest_params *params); - -/* - * Used to create a destination with MAC address is already known. - */ -int usd_create_dest_with_mac(struct usd_device *dev, uint32_t daddr_be, - uint16_t port_be, uint8_t *dmac, struct usd_dest **dest_o); - -/* - * Synchronously creates a destination - */ -int usd_create_dest(struct usd_device *dev, uint32_t daddr_be, - uint16_t port_be, struct usd_dest **dest_o); - -/* - * Start the necessary ARP resolution to create a destination - * Resolution progress is performed in usd_create_dest_query() and - * usd_create_dest_poll() - */ -int usd_create_dest_start(struct usd_device *dev, uint32_t daddr_be, - uint16_t dport_be, void *context); - -/* - * Cancel resolution on a not-yet-completed create_dest request - */ -int usd_create_dest_cancel(struct usd_device *dev, void *context); - -/* - * Extract dest port and IP from a destination - */ -int usd_expand_dest(struct usd_dest *dest, uint32_t *dest_ip_be_o, - uint16_t *dest_port_be_o); - -/* - * Query completion status of a given create_dest request - * If complete, newly allocated destination is returned in dest_o - * Returns: - * 0 - request completed, *status is valid - * dest_o valid if *status == 0 - * -EAGAIN - nothing is complete - * other - negative errno code - */ -int usd_create_dest_query(struct usd_device *dev, void *context, int *status, - struct usd_dest **dest_o); - -/* - * Checks for completed destination creation. - * context specified in call to usd_create_dest_start is returned, - * newly allocated destination is returned in dest_o - * Returns: - * 0 - request completed, status and context_o valid - * dest_o valid if *status == 0 - * -EAGAIN - nothing is complete - * other - negative errno code - */ -int usd_create_dest_poll(struct usd_device *dev, void **context_o, int *status, - struct usd_dest **dest_o); - - -int usd_destroy_dest(struct usd_dest *dest); - -/* - **************************************************************** - * Sending, receiving, and completions - **************************************************************** - */ - -/* - * Post a receive. The number of receive credits consumed is equal - * to the number of entries in the SG list of the recv_desc, or - * recv_desc.urd_iov_cnt - */ -int usd_post_recv(struct usd_qp *qp, - struct usd_recv_desc *recv_list); - -int usd_poll_cq_multi(struct usd_cq *cq, int max_comps, - struct usd_completion *comps); -int usd_poll_cq(struct usd_cq *cq, struct usd_completion *comp); -int usd_poll_req_notify(struct usd_cq *ucq); - -unsigned usd_get_send_credits(struct usd_qp *qp); - -unsigned usd_get_recv_credits(struct usd_qp *qp); - -/* - * post a single-buffer send from registered memory - * IN: - * qp - * dest - * buf - - * Requires 2 send credits - */ -static inline int -usd_post_send_one( - struct usd_qp *qp, - struct usd_dest *dest, - const void *buf, - size_t len, - uint32_t flags, - void *context) -{ - return qp->uq_ops.qo_post_send_one( - qp, dest, buf, len, flags, context); -} - -/* - * post a single-buffer send from registered memory to specified VLAN - * IN: - * qp - * dest - * buf - - * Requires 2 send credits - */ -static inline int -usd_post_send_one_vlan( - struct usd_qp *qp, - struct usd_dest *dest, - const void *buf, - size_t len, - u_int16_t vlan, - uint32_t flags, - void *context) -{ - return qp->uq_ops.qo_post_send_one_vlan( - qp, dest, buf, len, vlan, flags, context); -} - -/* - * post a single-buffer send from registered memory - * Caller must allow sufficient space *before* the packet for usd header - * For optimal efficieny, the buffer should be aligned on XXX boundary - * IN: - * qp - * dest - * buf - - * Requires 1 send credit - */ -static inline int -usd_post_send_one_prefixed( - struct usd_qp *qp, - struct usd_dest *dest, - const void *buf, - size_t len, - uint32_t flags, - void *context) -{ - return qp->uq_ops.qo_post_send_one_prefixed( - qp, dest, buf, len, flags, context); -} - -/* - * post a single-buffer send from anywhere - * Data is copied into registered memory by the lib for sending - * IN: - * qp - * dest - * buf - - * len - number of bytes in buffer, must be less than max_inline for the QP - * Requires 1 send credit - */ -static inline int -usd_post_send_one_copy(struct usd_qp *qp, struct usd_dest *dest, - const void *buf, size_t len, uint32_t flags, void *context) -{ - return qp->uq_ops.qo_post_send_one_copy( - qp, dest, buf, len, flags, context); -} - -/* - * post a two-buffer send, the first buffer is a usually a header and must - * allow space *before* it for our header. - * For optimal efficieny, the first buffer should be aligned XXX - * Requires 2 send credits - */ -int usd_post_send_two_prefixed(struct usd_qp *qp, struct usd_dest *dest, - const void *hdr, size_t hdr_len, const void *pkt, size_t pkt_len, - uint32_t flags, void *context); - -/* - * post a two-buffer send, the first buffer is a usually a header. - * The header and the packet will be both be copied into registered - * memory by usnic_direct and sent. - * Requires 2 send credits - */ -static inline int -usd_post_send_two_copy(struct usd_qp *qp, struct usd_dest *dest, - const void *hdr, size_t hdrlen, const void *pkt, size_t pktlen, - uint32_t flags, void *context) -{ - return qp->uq_ops.qo_post_send_two_copy( - qp, dest, hdr, hdrlen, pkt, pktlen, flags, context); -} - -/* - * Post an N-buffer send - * All buffers must be in registered memory. - * Requires iov_count + 1 send credits - */ -static inline int -usd_post_send_iov(struct usd_qp *qp, struct usd_dest *dest, - const struct iovec *iov, size_t iov_count, uint32_t flags, - void *context) -{ - return qp->uq_ops.qo_post_send_iov( - qp, dest, iov, iov_count, flags, context); -} - -/**************************************************************** - * enum-to-string utility functions (for prettyprinting) - ****************************************************************/ - -const char *usd_link_state_str(enum usd_link_state state); - -const char *usd_completion_status_str(enum usd_completion_status cstatus); - -const char *usd_completion_type_str(enum usd_completion_type ctype); - -const char *usd_filter_type_str(enum usd_filter_type ftype); - -const char *usd_qp_transport_str(enum usd_qp_transport qpt); - -const char *usd_qp_type_str(enum usd_qp_type); - -const char *usd_qp_event_event_type_str(enum usd_device_event_type det); - -const char *usd_send_flag_sift_str(enum usd_send_flag_shift sfs); - -const char *usd_capability(enum usd_capability cap); - -const char *usd_devid_to_nicname(uint32_t vendor_id, uint32_t device_id); - -const char *usd_devid_to_pid(uint32_t vendor_id, uint32_t device_id); - -/**************************************************************** - * special API holes punched for implementing verbs - ****************************************************************/ -/* open a context, mapped to a verbs open_device call */ -int usd_open_context(const char *dev_name, int cmd_fd, - struct usd_context **ctx_o); - -int usd_close_context(struct usd_context *ctx); - -/* modify the destination UDP port in a usd_dest */ -void usd_dest_set_udp_ports(struct usd_dest *dest, struct usd_qp *src_qp, - uint16_t dest_port_be); - -/* create a dest with only IP addresses set */ -int usd_create_ip_dest(struct usd_device *dev, uint32_t dest_ip_be, - struct usd_dest **dest_o); - -#endif /* _USNIC_DIRECT_H_ */ diff --git a/prov/usnic/src/usnic_direct/usnic_ib_abi.h b/prov/usnic/src/usnic_direct/usnic_ib_abi.h deleted file mode 100644 index dbf7cbc9914..00000000000 --- a/prov/usnic/src/usnic_direct/usnic_ib_abi.h +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Copyright (c) 2013-2017, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - - -#ifndef USNIC_IB_ABI_H -#define USNIC_IB_ABI_H - -#include "kcompat.h" -#include - -/* - * Pick up common file with driver - */ -#include "usnic_abi.h" - -struct usnic_query_device { - struct ib_uverbs_cmd_hdr ibv_cmd_hdr; - struct ib_uverbs_query_device ibv_cmd; -}; - -struct usnic_query_port { - struct ib_uverbs_cmd_hdr ibv_cmd_hdr; - struct ib_uverbs_query_port ibv_cmd; -}; - -struct usnic_get_context { - struct ib_uverbs_cmd_hdr ibv_cmd_hdr; - struct ib_uverbs_get_context ibv_cmd; - struct usnic_ib_get_context_cmd usnic_cmd; - __u64 reserved; -}; - -struct usnic_get_context_resp { - struct ib_uverbs_get_context_resp ibv_resp; - struct usnic_ib_get_context_resp usnic_resp; - __u64 reserved; -}; - -struct usnic_alloc_pd { - struct ib_uverbs_cmd_hdr ibv_cmd_hdr; - struct ib_uverbs_alloc_pd ibv_cmd; - struct usnic_ib_alloc_pd_cmd usnic_cmd; -}; - -struct usnic_alloc_pd_resp { - struct ib_uverbs_alloc_pd_resp ibv_resp; - struct usnic_ib_alloc_pd_resp usnic_resp; -}; - -struct usnic_dealloc_pd { - struct ib_uverbs_cmd_hdr ibv_cmd_hdr; - struct ib_uverbs_dealloc_pd ibv_cmd; -}; - -struct usnic_create_comp_channel { - struct ib_uverbs_cmd_hdr ibv_cmd_hdr; - struct ib_uverbs_create_comp_channel ibv_cmd; -}; - -struct usnic_reg_mr { - struct ib_uverbs_cmd_hdr ibv_cmd_hdr; - struct ib_uverbs_reg_mr ibv_cmd; - __u64 reserved; -}; - -struct usnic_reg_mr_resp { - struct ib_uverbs_reg_mr_resp ibv_resp; - __u64 reserved; -}; - -struct usnic_dereg_mr { - struct ib_uverbs_cmd_hdr ibv_cmd_hdr; - struct ib_uverbs_dereg_mr ibv_cmd; -}; - -struct usnic_create_qp { - struct ib_uverbs_cmd_hdr ibv_cmd_hdr; - struct ib_uverbs_create_qp ibv_cmd; - struct usnic_ib_create_qp_cmd usnic_cmd; - __u64 reserved[8]; -}; - -struct usnic_create_qp_resp { - struct ib_uverbs_create_qp_resp ibv_resp; - struct usnic_ib_create_qp_resp usnic_resp; -}; - -struct usnic_modify_qp { - struct ib_uverbs_cmd_hdr ibv_cmd_hdr; - struct ib_uverbs_modify_qp ibv_cmd; -}; - -struct usnic_destroy_qp { - struct ib_uverbs_cmd_hdr ibv_cmd_hdr; - struct ib_uverbs_destroy_qp ibv_cmd; -}; - -struct usnic_create_cq { - struct ib_uverbs_cmd_hdr ibv_cmd_hdr; - struct ib_uverbs_create_cq ibv_cmd; - struct usnic_ib_create_cq usnic_cmd; -}; - -struct usnic_create_cq_resp { - struct ib_uverbs_create_cq_resp ibv_resp; - struct usnic_ib_create_cq_resp usnic_resp; -}; - -struct usnic_destroy_cq { - struct ib_uverbs_cmd_hdr ibv_cmd_hdr; - struct ib_uverbs_destroy_cq ibv_cmd; -}; - -#endif /* USNIC_IB_ABI_H */ diff --git a/prov/usnic/src/usnic_direct/usnic_ip_utils.c b/prov/usnic/src/usnic_direct/usnic_ip_utils.c deleted file mode 100644 index aa8a66eae7a..00000000000 --- a/prov/usnic/src/usnic_direct/usnic_ip_utils.c +++ /dev/null @@ -1,199 +0,0 @@ -/* - * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "libnl_utils.h" -#include "usnic_user_utils.h" -#include "usnic_ip_utils.h" - -int usnic_arp_lookup(char *ifname, uint32_t ipaddr, int sockfd, uint8_t *macaddr) -{ - struct arpreq req; - struct sockaddr_in sinp; - int err; - int status; - - memset(&req, 0, sizeof req); - strncpy(req.arp_dev, ifname, sizeof(req.arp_dev) - 1); - memset(&sinp, 0, sizeof(sinp)); - sinp.sin_family = AF_INET; - sinp.sin_addr.s_addr = ipaddr; - memcpy(&req.arp_pa, &sinp, sizeof(sinp)); - - err = 0; - status = ioctl(sockfd, SIOCGARP, (char *)&req, sizeof(req)); - if (status != -1 && (req.arp_flags & ATF_COM)) - memcpy(macaddr, req.arp_ha.sa_data, 6); - else if (status != -1) /* req.arp_flags & ATF_COM == 0 */ - err = EAGAIN; - else if (errno == ENXIO) /* ENXIO means no ARP entry was found */ - err = EAGAIN; - else /* status == -1 */ - err = errno; - - return err; -} - -static int usnic_arp_lookup_index(int if_index, uint32_t ipaddr, int sockfd, uint8_t *macaddr) -{ - char ifname[IF_NAMESIZE]; - - if (if_indextoname((unsigned int)if_index, ifname) == NULL) { - usnic_perr("if_indextoname failed. ifindex: %d", if_index); - return errno; - } - - return usnic_arp_lookup(ifname, ipaddr, sockfd, macaddr); -} - -int usnic_arp_request(uint32_t ipaddr, int sockfd) -{ - struct sockaddr_in sin; - int err = 0; - - memset(&sin, 0, sizeof(sin)); - sin.sin_addr.s_addr = ipaddr; - sin.sin_port = htons(9); /* Send to Discard Protocol */ - err = sendto(sockfd, NULL, 0, 0, (struct sockaddr *)&sin, sizeof(sin)); - if (err == -1) { - char buf[INET_ADDRSTRLEN]; - inet_ntop(AF_INET, &ipaddr, buf, sizeof(buf)); - usnic_perr("Arp triggering socket sendto() failed. ip: %s", - buf); - } - else - err = 0; - - return err; -} - -static -int usnic_resolve_arp(int if_index, uint32_t ipaddr, uint8_t *macaddr) -{ - int sockfd; - int err; - char buf[INET_ADDRSTRLEN]; - - inet_ntop(AF_INET, &ipaddr, buf, sizeof(buf)); - sockfd = socket(AF_INET, SOCK_DGRAM, 0); - if (sockfd == -1) { - usnic_perr( - "socket() failed when creating socket for arp resolution, ip: %s", - buf); - return ENXIO; - } - - err = usnic_arp_lookup_index(if_index, ipaddr, sockfd, macaddr); - if (err == EAGAIN || err == ENXIO) { - /* entry is FAILED or INCOMPLETE or does not exist, send a dummy packet */ - err = usnic_arp_request(ipaddr, sockfd); - if (err) /* sendto failure, abort */ - err = ENXIO; - else - err = EAGAIN; - } - - close(sockfd); - return err; -} - -int usnic_resolve_dst(int if_index, uint32_t src_ip_addr, - uint32_t dst_ip_addr, uint8_t *macaddr) -{ - uint32_t nh_ip_addr = 0; - int err; - - err = usnic_nl_rt_lookup(src_ip_addr, dst_ip_addr, if_index, - &nh_ip_addr); - if (err) { - char ifname[IFNAMSIZ]; - char src_buf[INET_ADDRSTRLEN]; - char dst_buf[INET_ADDRSTRLEN]; - - if_indextoname((unsigned int)if_index, ifname); - inet_ntop(AF_INET, &src_ip_addr, src_buf, sizeof(src_buf)); - inet_ntop(AF_INET, &dst_ip_addr, dst_buf, sizeof(dst_buf)); - - usnic_err( - "ip route lookup for dst: %s on if: %d device: %s src ip: %s failed\n", - dst_buf, if_index, ifname, src_buf); - return EHOSTUNREACH; - } - - if (nh_ip_addr) { - char nh_buf[INET_ADDRSTRLEN]; - char src_buf[INET_ADDRSTRLEN]; - char dst_buf[INET_ADDRSTRLEN]; - - inet_ntop(AF_INET, &nh_ip_addr, nh_buf, sizeof(nh_buf)); - inet_ntop(AF_INET, &src_ip_addr, src_buf, sizeof(src_buf)); - inet_ntop(AF_INET, &dst_ip_addr, dst_buf, sizeof(dst_buf)); - - usnic_info("ip route for dest %s src %s is via %s\n", - dst_buf, src_buf, nh_buf); - } else { - char src_buf[INET_ADDRSTRLEN]; - char dst_buf[INET_ADDRSTRLEN]; - - inet_ntop(AF_INET, &src_ip_addr, src_buf, sizeof(src_buf)); - inet_ntop(AF_INET, &dst_ip_addr, dst_buf, sizeof(dst_buf)); - usnic_info("ip route for dest %s src %s is directly connected\n", - dst_buf, src_buf); - } - - if (nh_ip_addr) - return usnic_resolve_arp(if_index, nh_ip_addr, macaddr); - else - return usnic_resolve_arp(if_index, dst_ip_addr, macaddr); -} diff --git a/prov/usnic/src/usnic_direct/usnic_ip_utils.h b/prov/usnic/src/usnic_direct/usnic_ip_utils.h deleted file mode 100644 index e9f1f600da9..00000000000 --- a/prov/usnic/src/usnic_direct/usnic_ip_utils.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#ifndef USNIC_IP_UTILS_H -#define USNIC_IP_UTILS_H - -int usnic_arp_lookup(char *ifname, uint32_t ipaddr, int sockfd, - uint8_t *macaddr); -int usnic_arp_request(uint32_t ipaddr, int sockfd); -int usnic_resolve_dst(int if_index, uint32_t src_ip_addr, - uint32_t dst_ip_addr, uint8_t *macaddr); - -#endif /* USNIC_IP_UTILS_H */ diff --git a/prov/usnic/src/usnic_direct/usnic_user_utils.h b/prov/usnic/src/usnic_direct/usnic_user_utils.h deleted file mode 100644 index 2a92100d335..00000000000 --- a/prov/usnic/src/usnic_direct/usnic_user_utils.h +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#ifndef USNIC_USER_UTILS_H -#define USNIC_USER_UTILS_H - -#include -#include -#include - -#ifndef __CHAR_BIT__ -#define __CHAR_BIT__ 8 -#endif -#define CHAR_BIT __CHAR_BIT__ - -#define USNIC_LOG_LVL_NONE 0 -#define USNIC_LOG_LVL_ERR 1 -#define USNIC_LOG_LVL_INFO 2 -#define USNIC_LOG_LVL_VERBOSE 3 - -#if WANT_DEBUG_MSGS -#define USNIC_LOG_LVL USNIC_LOG_LVL_INFO -#else -#define USNIC_LOG_LVL USNIC_LOG_LVL_NONE -#endif - -#define usnic_printf(fd, args...) \ - do { \ - fprintf(fd, "usnic:%-22s:%5d: ", __func__, __LINE__); \ - fprintf(fd, args); \ - } while (0) - -#if USNIC_LOG_LVL >= USNIC_LOG_LVL_ERR -#define usnic_err(args...) usnic_printf(stderr, args) -#else -#define usnic_err(args...) {} -#endif - -#if USNIC_LOG_LVL >= USNIC_LOG_LVL_ERR -#define usnic_strerror(err, args, ...) \ - do { \ - char err_buf[50]; \ - char *estr = strerror_r(err, err_buf, sizeof(err_buf)); \ - fprintf(stderr, "usnic:%-22s:%5d: ", __func__, __LINE__); \ - fprintf(stderr, args " error: %s\n", ## __VA_ARGS__, \ - estr); \ - } while (0) -#else -#define usnic_strerror(err, args, ...) -#endif - -#if USNIC_LOG_LVL >= USNIC_LOG_LVL_ERR -#define usnic_perr(args, ...) \ - do { \ - char err_buf[50]; \ - char *estr = strerror_r(errno, err_buf, sizeof(err_buf)); \ - fprintf(stderr, "usnic:%-22s:%5d: ", __func__, __LINE__); \ - fprintf(stderr, args " error: %s\n", ## __VA_ARGS__, \ - estr); \ - } while (0) -#else -#define usnic_perr(args, ...) {} -#endif - -#if USNIC_LOG_LVL >= USNIC_LOG_LVL_INFO -#define usnic_info(args...) usnic_printf(stdout, args) -#else -#define usnic_info(args...) {} -#endif - -#if USNIC_LOG_LVL >= USNIC_LOG_LVL_VERBOSE -#define usnic_verbose(args...) usnic_printf(stdout, args) -#else -#define usnic_verbose(args...) {} -#endif - -#endif /* USNIC_USER_UTILS_H */ diff --git a/prov/usnic/src/usnic_direct/vnic_cq.c b/prov/usnic/src/usnic_direct/vnic_cq.c deleted file mode 100644 index 12fc5cdb53c..00000000000 --- a/prov/usnic/src/usnic_direct/vnic_cq.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright 2008-2018 Cisco Systems, Inc. All rights reserved. - * Copyright 2007 Nuova Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#include -#include -#include -#include - -#include "kcompat.h" -#include "vnic_dev.h" -#include "vnic_cq.h" - -#ifndef NOT_FOR_OPEN_ENIC -int vnic_cq_mem_size(struct vnic_cq *cq, unsigned int desc_count, - unsigned int desc_size) -{ - int mem_size; - - mem_size = vnic_dev_desc_ring_size(&cq->ring, desc_count, desc_size); - - return mem_size; -} - -#endif -void vnic_cq_free(struct vnic_cq *cq) -{ - vnic_dev_free_desc_ring(cq->vdev, &cq->ring); - - cq->ctrl = NULL; -} - -int vnic_cq_alloc(struct vnic_dev *vdev, struct vnic_cq *cq, unsigned int index, - unsigned int desc_count, unsigned int desc_size) -{ - int err; - - cq->index = index; - cq->vdev = vdev; - - cq->ctrl = vnic_dev_get_res(vdev, RES_TYPE_CQ, index); - if (!cq->ctrl) { - pr_err("Failed to hook CQ[%d] resource\n", index); - return -EINVAL; - } - - err = vnic_dev_alloc_desc_ring(vdev, &cq->ring, desc_count, desc_size); - if (err) - return err; - - return 0; -} - -void vnic_cq_init(struct vnic_cq *cq, unsigned int flow_control_enable, - unsigned int color_enable, unsigned int cq_head, unsigned int cq_tail, - unsigned int cq_tail_color, unsigned int interrupt_enable, - unsigned int cq_entry_enable, unsigned int cq_message_enable, - unsigned int interrupt_offset, u64 cq_message_addr) -{ - u64 paddr; - - paddr = (u64)cq->ring.base_addr | VNIC_PADDR_TARGET; - writeq(paddr, &cq->ctrl->ring_base); - iowrite32(cq->ring.desc_count, &cq->ctrl->ring_size); - iowrite32(flow_control_enable, &cq->ctrl->flow_control_enable); - iowrite32(color_enable, &cq->ctrl->color_enable); - iowrite32(cq_head, &cq->ctrl->cq_head); - iowrite32(cq_tail, &cq->ctrl->cq_tail); - iowrite32(cq_tail_color, &cq->ctrl->cq_tail_color); - iowrite32(interrupt_enable, &cq->ctrl->interrupt_enable); - iowrite32(cq_entry_enable, &cq->ctrl->cq_entry_enable); - iowrite32(cq_message_enable, &cq->ctrl->cq_message_enable); - iowrite32(interrupt_offset, &cq->ctrl->interrupt_offset); - writeq(cq_message_addr, &cq->ctrl->cq_message_addr); - - cq->interrupt_offset = interrupt_offset; -} - -void vnic_cq_clean(struct vnic_cq *cq) -{ - cq->to_clean = 0; - cq->last_color = 0; - - iowrite32(0, &cq->ctrl->cq_head); - iowrite32(0, &cq->ctrl->cq_tail); - iowrite32(1, &cq->ctrl->cq_tail_color); - - vnic_dev_clear_desc_ring(&cq->ring); -} diff --git a/prov/usnic/src/usnic_direct/vnic_cq.h b/prov/usnic/src/usnic_direct/vnic_cq.h deleted file mode 100644 index af1c19f769d..00000000000 --- a/prov/usnic/src/usnic_direct/vnic_cq.h +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Copyright 2008-2018 Cisco Systems, Inc. All rights reserved. - * Copyright 2007 Nuova Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#ifndef _VNIC_CQ_H_ -#define _VNIC_CQ_H_ - -#include "cq_desc.h" -#include "vnic_dev.h" - -/* Completion queue control */ -struct vnic_cq_ctrl { - u64 ring_base; /* 0x00 */ - u32 ring_size; /* 0x08 */ - u32 pad0; - u32 flow_control_enable; /* 0x10 */ - u32 pad1; - u32 color_enable; /* 0x18 */ - u32 pad2; - u32 cq_head; /* 0x20 */ - u32 pad3; - u32 cq_tail; /* 0x28 */ - u32 pad4; - u32 cq_tail_color; /* 0x30 */ - u32 pad5; - u32 interrupt_enable; /* 0x38 */ - u32 pad6; - u32 cq_entry_enable; /* 0x40 */ - u32 pad7; - u32 cq_message_enable; /* 0x48 */ - u32 pad8; - u32 interrupt_offset; /* 0x50 */ - u32 pad9; - u64 cq_message_addr; /* 0x58 */ - u32 pad10; -}; - -#ifdef ENIC_AIC -struct vnic_rx_bytes_counter { - unsigned int small_pkt_bytes_cnt; - unsigned int large_pkt_bytes_cnt; -}; -#endif - -struct vnic_cq { - unsigned int index; - struct vnic_dev *vdev; - struct vnic_cq_ctrl __iomem *ctrl; /* memory-mapped */ - struct vnic_dev_ring ring; - unsigned int to_clean; - unsigned int last_color; - unsigned int interrupt_offset; -#ifdef ENIC_AIC - struct vnic_rx_bytes_counter pkt_size_counter; - unsigned int cur_rx_coal_timeval; - unsigned int tobe_rx_coal_timeval; - ktime_t prev_ts; -#endif -}; - -static inline unsigned int vnic_cq_service(struct vnic_cq *cq, - unsigned int work_to_do, - int (*q_service)(struct vnic_dev *vdev, struct cq_desc *cq_desc, - u8 type, u16 q_number, u16 completed_index, void *opaque), - void *opaque) -{ - struct cq_desc *cq_desc; - unsigned int work_done = 0; - u16 q_number, completed_index; - u8 type, color; - - cq_desc = (struct cq_desc *)((u8 *)cq->ring.descs + - cq->ring.desc_size * cq->to_clean); - cq_desc_dec(cq_desc, &type, &color, - &q_number, &completed_index); - - while (color != cq->last_color) { - if ((*q_service)(cq->vdev, cq_desc, type, - q_number, completed_index, opaque)) - break; - - cq->to_clean++; - if (cq->to_clean == cq->ring.desc_count) { - cq->to_clean = 0; - cq->last_color = cq->last_color ? 0 : 1; - } - - cq_desc = (struct cq_desc *)((u8 *)cq->ring.descs + - cq->ring.desc_size * cq->to_clean); - cq_desc_dec(cq_desc, &type, &color, - &q_number, &completed_index); - - work_done++; - if (work_done >= work_to_do) - break; - } - - return work_done; -} - -void vnic_cq_free(struct vnic_cq *cq); -int vnic_cq_alloc(struct vnic_dev *vdev, struct vnic_cq *cq, unsigned int index, - unsigned int desc_count, unsigned int desc_size); -void vnic_cq_init(struct vnic_cq *cq, unsigned int flow_control_enable, - unsigned int color_enable, unsigned int cq_head, unsigned int cq_tail, - unsigned int cq_tail_color, unsigned int interrupt_enable, - unsigned int cq_entry_enable, unsigned int message_enable, - unsigned int interrupt_offset, u64 message_addr); -void vnic_cq_clean(struct vnic_cq *cq); -#ifndef NOT_FOR_OPEN_ENIC -int vnic_cq_mem_size(struct vnic_cq *cq, unsigned int desc_count, - unsigned int desc_size); -#endif - -#endif /* _VNIC_CQ_H_ */ diff --git a/prov/usnic/src/usnic_direct/vnic_dev.c b/prov/usnic/src/usnic_direct/vnic_dev.c deleted file mode 100644 index ca393aec83e..00000000000 --- a/prov/usnic/src/usnic_direct/vnic_dev.c +++ /dev/null @@ -1,1787 +0,0 @@ -/* - * Copyright 2008-2018 Cisco Systems, Inc. All rights reserved. - * Copyright 2007 Nuova Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#include -#include -#include -#include -#include -#include - -#include "kcompat.h" -#include "vnic_resource.h" -#include "vnic_devcmd.h" -#include "vnic_dev.h" -#include "vnic_stats.h" -#include "vnic_wq.h" - -struct devcmd2_controller { - struct vnic_wq_ctrl *wq_ctrl; - struct vnic_dev_ring results_ring; - struct vnic_wq wq; - struct vnic_devcmd2 *cmd_ring; - struct devcmd2_result *result; - u16 next_result; - u16 result_size; - int color; - u32 posted; -}; - -enum vnic_proxy_type { - PROXY_NONE, - PROXY_BY_BDF, - PROXY_BY_INDEX, -}; - -struct vnic_res { - void __iomem *vaddr; - dma_addr_t bus_addr; - unsigned int count; - u8 bar_num; - u32 bar_offset; - unsigned long len; -}; - -struct vnic_intr_coal_timer_info { - u32 mul; - u32 div; - u32 max_usec; -}; - -struct vnic_dev { - void *priv; - struct pci_dev *pdev; - struct vnic_res res[RES_TYPE_MAX]; - enum vnic_dev_intr_mode intr_mode; - struct vnic_devcmd __iomem *devcmd; - struct vnic_devcmd_notify *notify; - struct vnic_devcmd_notify notify_copy; - dma_addr_t notify_pa; - u32 notify_sz; - dma_addr_t linkstatus_pa; - struct vnic_stats *stats; - dma_addr_t stats_pa; - struct vnic_devcmd_fw_info *fw_info; - dma_addr_t fw_info_pa; - enum vnic_proxy_type proxy; - u32 proxy_index; - u64 args[VNIC_DEVCMD_NARGS]; - struct vnic_intr_coal_timer_info intr_coal_timer_info; - struct devcmd2_controller *devcmd2; - int (*devcmd_rtn)(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd, int wait); -}; - -#define VNIC_MAX_RES_HDR_SIZE \ - (sizeof(struct vnic_resource_header) + \ - sizeof(struct vnic_resource) * RES_TYPE_MAX) -#define VNIC_RES_STRIDE 128 - -void *vnic_dev_priv(struct vnic_dev *vdev) -{ - return vdev->priv; -} - -int vnic_dev_get_size(void) -{ - return sizeof(struct vnic_dev); -} - -static int vnic_dev_discover_res(struct vnic_dev *vdev, - struct vnic_dev_bar *bar, unsigned int num_bars) -{ - struct vnic_resource_header __iomem *rh; - struct mgmt_barmap_hdr __iomem *mrh; - struct vnic_resource __iomem *r; - u8 type; - - if (num_bars == 0) - return -EINVAL; - - if (bar->len < VNIC_MAX_RES_HDR_SIZE) { - pr_err("vNIC BAR0 res hdr length error\n"); - return -EINVAL; - } - - rh = bar->vaddr; - mrh = bar->vaddr; - if (!rh) { - pr_err("vNIC BAR0 res hdr not mem-mapped\n"); - return -EINVAL; - } - - /* Check for mgmt vnic in addition to normal vnic */ - if ((ioread32(&rh->magic) != VNIC_RES_MAGIC) || - (ioread32(&rh->version) != VNIC_RES_VERSION)) { - if ((ioread32(&mrh->magic) != MGMTVNIC_MAGIC) || - (ioread32(&mrh->version) != MGMTVNIC_VERSION)) { - pr_err("vNIC BAR0 res magic/version error " - "exp (%lx/%lx) or (%lx/%lx), curr (%x/%x)\n", - VNIC_RES_MAGIC, VNIC_RES_VERSION, - MGMTVNIC_MAGIC, MGMTVNIC_VERSION, - ioread32(&rh->magic), ioread32(&rh->version)); - return -EINVAL; - } - } - - if (ioread32(&mrh->magic) == MGMTVNIC_MAGIC) - r = (struct vnic_resource __iomem *)(mrh + 1); - else - r = (struct vnic_resource __iomem *)(rh + 1); - - while ((type = ioread8(&r->type)) != RES_TYPE_EOL) { - - u8 bar_num = ioread8(&r->bar); - u32 bar_offset = ioread32(&r->bar_offset); - u32 count = ioread32(&r->count); - u32 len; - - r++; - - if (bar_num >= num_bars) - continue; - - if (!bar[bar_num].len || !bar[bar_num].vaddr) - continue; - - switch (type) { - case RES_TYPE_WQ: - case RES_TYPE_RQ: - case RES_TYPE_CQ: - case RES_TYPE_INTR_CTRL: - case RES_TYPE_GRPMBR_INTR: - /* each count is stride bytes long */ - len = count * VNIC_RES_STRIDE; - if (len + bar_offset > bar[bar_num].len) { - pr_err("vNIC BAR0 resource %d " - "out-of-bounds, offset 0x%x + " - "size 0x%x > bar len 0x%lx\n", - type, bar_offset, - len, - bar[bar_num].len); - return -EINVAL; - } - break; - case RES_TYPE_DPKT: - case RES_TYPE_MEM: - case RES_TYPE_INTR_PBA_LEGACY: -#ifdef CONFIG_MIPS - case RES_TYPE_DEV: -#endif - case RES_TYPE_DEVCMD2: - case RES_TYPE_DEVCMD: - len = count; - break; - default: - continue; - } - - vdev->res[type].count = count; - vdev->res[type].vaddr = (char __iomem *)bar[bar_num].vaddr + - bar_offset; - vdev->res[type].bus_addr = bar[bar_num].bus_addr + bar_offset; - vdev->res[type].bar_num = bar_num; - vdev->res[type].bar_offset = bar_offset; - vdev->res[type].len = len; - } - - return 0; -} - -/* - * Assign virtual addresses to all resources whose bus address falls - * within the specified map. - * vnic_dev_discover_res assigns res vaddrs based on the assumption that - * the entire bar is mapped once. When memory regions on the bar - * are mapped seperately, the vnic res for those regions need to be updated - * with new virutal addresses. - * Notice that the mapping and virtual address update need to be done before - * other VNIC APIs that might use the old virtual address, - * such as vdev->devcmd - */ -void vnic_dev_upd_res_vaddr(struct vnic_dev *vdev, - struct vnic_dev_iomap_info *map) -{ - int i; - - for (i = RES_TYPE_EOL; i < RES_TYPE_MAX; i++) { - if (i == RES_TYPE_EOL) - continue; - if (vdev->res[i].bus_addr >= map->bus_addr && - vdev->res[i].bus_addr < map->bus_addr + map->len) - vdev->res[i].vaddr = ((uint8_t *)map->vaddr) + - (vdev->res[i].bus_addr - map->bus_addr); - } -} -EXPORT_SYMBOL(vnic_dev_upd_res_vaddr); - -unsigned int vnic_dev_get_res_count(struct vnic_dev *vdev, - enum vnic_res_type type) -{ - return vdev->res[type].count; -} -EXPORT_SYMBOL(vnic_dev_get_res_count); - -void __iomem *vnic_dev_get_res(struct vnic_dev *vdev, enum vnic_res_type type, - unsigned int index) -{ - if (!vdev->res[type].vaddr) - return NULL; - - switch (type) { - case RES_TYPE_WQ: - case RES_TYPE_RQ: - case RES_TYPE_CQ: - case RES_TYPE_INTR_CTRL: - case RES_TYPE_GRPMBR_INTR: - return (char __iomem *)vdev->res[type].vaddr + - index * VNIC_RES_STRIDE; - default: - return (char __iomem *)vdev->res[type].vaddr; - } -} -EXPORT_SYMBOL(vnic_dev_get_res); - -dma_addr_t vnic_dev_get_res_bus_addr(struct vnic_dev *vdev, - enum vnic_res_type type, unsigned int index) -{ - switch (type) { - case RES_TYPE_WQ: - case RES_TYPE_RQ: - case RES_TYPE_CQ: - case RES_TYPE_INTR_CTRL: - case RES_TYPE_GRPMBR_INTR: - return vdev->res[type].bus_addr + - index * VNIC_RES_STRIDE; - default: - return vdev->res[type].bus_addr; - } -} -EXPORT_SYMBOL(vnic_dev_get_res_bus_addr); - -uint8_t vnic_dev_get_res_bar(struct vnic_dev *vdev, - enum vnic_res_type type) -{ - return vdev->res[type].bar_num; -} -EXPORT_SYMBOL(vnic_dev_get_res_bar); - -uint32_t vnic_dev_get_res_offset(struct vnic_dev *vdev, - enum vnic_res_type type, unsigned int index) -{ - switch (type) { - case RES_TYPE_WQ: - case RES_TYPE_RQ: - case RES_TYPE_CQ: - case RES_TYPE_INTR_CTRL: - case RES_TYPE_GRPMBR_INTR: - return vdev->res[type].bar_offset + - index * VNIC_RES_STRIDE; - default: - return vdev->res[type].bar_offset; - } -} -EXPORT_SYMBOL(vnic_dev_get_res_offset); - -/* - * Get the length of the res type - */ -unsigned long vnic_dev_get_res_type_len(struct vnic_dev *vdev, - enum vnic_res_type type) -{ - return vdev->res[type].len; -} -EXPORT_SYMBOL(vnic_dev_get_res_type_len); - -unsigned int vnic_dev_desc_ring_size(struct vnic_dev_ring *ring, - unsigned int desc_count, unsigned int desc_size) -{ - /* The base address of the desc rings must be 512 byte aligned. - * Descriptor count is aligned to groups of 32 descriptors. A - * count of 0 means the maximum 4096 descriptors. Descriptor - * size is aligned to 16 bytes. - */ - - unsigned int count_align = 32; - unsigned int desc_align = 16; - - ring->base_align = 512; - - if (desc_count == 0) - desc_count = 4096; - - ring->desc_count = ALIGN(desc_count, count_align); - - ring->desc_size = ALIGN(desc_size, desc_align); - - ring->size = ring->desc_count * ring->desc_size; - ring->size_unaligned = ring->size + ring->base_align; - - return ring->size_unaligned; -} - -void vnic_dev_clear_desc_ring(struct vnic_dev_ring *ring) -{ - memset(ring->descs, 0, ring->size); -} - -int vnic_dev_alloc_desc_ring(struct vnic_dev *vdev, struct vnic_dev_ring *ring, - unsigned int desc_count, unsigned int desc_size) -{ - vnic_dev_desc_ring_size(ring, desc_count, desc_size); - - ring->descs_unaligned = pci_alloc_consistent(vdev->pdev, - ring->size_unaligned, - &ring->base_addr_unaligned); - - if (!ring->descs_unaligned) { - pr_err("Failed to allocate ring (size=%d), aborting\n", - (int)ring->size); - return -ENOMEM; - } - - ring->base_addr = ALIGN(ring->base_addr_unaligned, - ring->base_align); - ring->descs = (u8 *)ring->descs_unaligned + - (ring->base_addr - ring->base_addr_unaligned); - - vnic_dev_clear_desc_ring(ring); - - ring->desc_avail = ring->desc_count - 1; - - return 0; -} - -void vnic_dev_free_desc_ring(struct vnic_dev *vdev, struct vnic_dev_ring *ring) -{ - if (ring->descs) { - pci_free_consistent(vdev->pdev, - ring->size_unaligned, - ring->descs_unaligned, - ring->base_addr_unaligned); - ring->descs = NULL; - } -} - -static int _vnic_dev_cmd(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd, - int wait) -{ -#if defined(CONFIG_MIPS) || defined(MGMT_VNIC) - return 0; -#else - struct vnic_devcmd __iomem *devcmd = vdev->devcmd; - unsigned int i; - int delay; - u32 status; - int err; - - status = ioread32(&devcmd->status); - if (status == 0xFFFFFFFF) { - /* PCI-e target device is gone */ - return -ENODEV; - } - if (status & STAT_BUSY) { - pr_err("%s: Busy devcmd %d\n", - pci_name(vdev->pdev), _CMD_N(cmd)); - return -EBUSY; - } - - if (_CMD_DIR(cmd) & _CMD_DIR_WRITE) { - for (i = 0; i < VNIC_DEVCMD_NARGS; i++) - writeq(vdev->args[i], &devcmd->args[i]); - wmb(); - } - - iowrite32(cmd, &devcmd->cmd); - - if ((_CMD_FLAGS(cmd) & _CMD_FLAGS_NOWAIT)) - return 0; - - for (delay = 0; delay < wait; delay++) { - - udelay(100); - - status = ioread32(&devcmd->status); - if (status == 0xFFFFFFFF) { - /* PCI-e target device is gone */ - return -ENODEV; - } - - if (!(status & STAT_BUSY)) { - if (status & STAT_ERROR) { - err = -(int)readq(&devcmd->args[0]); - if (cmd != CMD_CAPABILITY) - pr_err("%s: Devcmd %d failed " - "with error code %d\n", - pci_name(vdev->pdev), - _CMD_N(cmd), err); - return err; - } - - if (_CMD_DIR(cmd) & _CMD_DIR_READ) { - rmb(); - for (i = 0; i < VNIC_DEVCMD_NARGS; i++) - vdev->args[i] = readq(&devcmd->args[i]); - } - - return 0; - } - } - - pr_err("%s: Timedout devcmd %d\n", - pci_name(vdev->pdev), _CMD_N(cmd)); - return -ETIMEDOUT; -#endif -} - -static int _vnic_dev_cmd2(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd, - int wait) -{ -#if defined(CONFIG_MIPS) || defined(MGMT_VNIC) - return 0; -#else - struct devcmd2_controller *dc2c = vdev->devcmd2; - struct devcmd2_result *result; - u8 color; - unsigned int i; - int delay; - int err; - u32 fetch_index; - u32 posted = dc2c->posted; - u32 new_posted; - - fetch_index = ioread32(&dc2c->wq_ctrl->fetch_index); - - if (fetch_index == 0xFFFFFFFF) { /* check for hardware gone */ - /* Hardware surprise removal: return error */ - return -ENODEV; - - } - new_posted = (posted + 1) % DEVCMD2_RING_SIZE; - - if (new_posted == fetch_index) { - pr_err("%s: wq is full while issuing devcmd2 command %d, " - "fetch index: %u, posted index: %u\n", - pci_name(vdev->pdev), - _CMD_N(cmd), - fetch_index, posted); - return -EBUSY; - - } - dc2c->cmd_ring[posted].cmd = cmd; - dc2c->cmd_ring[posted].flags = 0; - - if ((_CMD_FLAGS(cmd) & _CMD_FLAGS_NOWAIT)) - dc2c->cmd_ring[posted].flags |= DEVCMD2_FNORESULT; - if (_CMD_DIR(cmd) & _CMD_DIR_WRITE) { - for (i = 0; i < VNIC_DEVCMD_NARGS; i++) - dc2c->cmd_ring[posted].args[i] = vdev->args[i]; - - } - - /* Adding write memory barrier prevents compiler and/or CPU - * reordering, thus avoiding descriptor posting before - * descriptor is initialized. Otherwise, hardware can read - * stale descriptor fields. - */ - wmb(); - iowrite32(new_posted, &dc2c->wq_ctrl->posted_index); - dc2c->posted = new_posted; - - if (dc2c->cmd_ring[posted].flags & DEVCMD2_FNORESULT) - return 0; - - result = dc2c->result + dc2c->next_result; - color = dc2c->color; - - dc2c->next_result++; - if (dc2c->next_result == dc2c->result_size) { - dc2c->next_result = 0; - dc2c->color = dc2c->color ? 0 : 1; - } - - for (delay = 0; delay < wait; delay++) { - udelay(100); - if (result->color == color) { - if (result->error) { - err = -(int) result->error; - if (err != ERR_ECMDUNKNOWN || cmd != CMD_CAPABILITY) - pr_err("%s:Error %d devcmd %d\n", - pci_name(vdev->pdev), - err, _CMD_N(cmd)); - return err; - } - if (_CMD_DIR(cmd) & _CMD_DIR_READ) { - for (i = 0; i < VNIC_DEVCMD_NARGS; i++) - vdev->args[i] = result->results[i]; - } - return 0; - } - } - - pr_err("%s:Timed out devcmd %d\n", pci_name(vdev->pdev), - _CMD_N(cmd)); - - return -ETIMEDOUT; -#endif -} - -int vnic_dev_init_devcmd1(struct vnic_dev *vdev) -{ -#if !defined(CONFIG_MIPS) && !defined(MGMT_VNIC) - vdev->devcmd = vnic_dev_get_res(vdev, RES_TYPE_DEVCMD, 0); - if (!vdev->devcmd) - return -ENODEV; - - vdev->devcmd_rtn = &_vnic_dev_cmd; - return 0; -#else - return 0; -#endif -} - -static int vnic_dev_init_devcmd2(struct vnic_dev *vdev) -{ -#if !defined(CONFIG_MIPS) && !defined(MGMT_VNIC) - int err; - unsigned int fetch_index; - - if (vdev->devcmd2) - return 0; - - vdev->devcmd2 = kzalloc(sizeof(*vdev->devcmd2), GFP_ATOMIC); - if (!vdev->devcmd2) - return -ENOMEM; - - vdev->devcmd2->color = 1; - vdev->devcmd2->result_size = DEVCMD2_RING_SIZE; - err = vnic_wq_devcmd2_alloc(vdev, &vdev->devcmd2->wq, - DEVCMD2_RING_SIZE, DEVCMD2_DESC_SIZE); - if (err) - goto err_free_devcmd2; - - fetch_index = ioread32(&vdev->devcmd2->wq.ctrl->fetch_index); - if (fetch_index == 0xFFFFFFFF) { /* check for hardware gone */ - pr_err("Fatal error in devcmd2 init - hardware surprise removal"); - return -ENODEV; - } - - /* - * Don't change fetch_index ever and - * set posted_index same as fetch_index - * when setting up the WQ for devmcd2. - */ - vnic_wq_init_start(&vdev->devcmd2->wq, 0, fetch_index, fetch_index, 0, 0); - vdev->devcmd2->posted = fetch_index; - vnic_wq_enable(&vdev->devcmd2->wq); - - err = vnic_dev_alloc_desc_ring(vdev, &vdev->devcmd2->results_ring, - DEVCMD2_RING_SIZE, DEVCMD2_DESC_SIZE); - if (err) - goto err_free_wq; - - vdev->devcmd2->result = - (struct devcmd2_result *) vdev->devcmd2->results_ring.descs; - vdev->devcmd2->cmd_ring = - (struct vnic_devcmd2 *) vdev->devcmd2->wq.ring.descs; - vdev->devcmd2->wq_ctrl = vdev->devcmd2->wq.ctrl; - vdev->args[0] = (u64) vdev->devcmd2->results_ring.base_addr | - VNIC_PADDR_TARGET; - vdev->args[1] = DEVCMD2_RING_SIZE; - - err = _vnic_dev_cmd2(vdev, CMD_INITIALIZE_DEVCMD2, 1000); - if (err) - goto err_free_desc_ring; - - vdev->devcmd_rtn = &_vnic_dev_cmd2; - - return 0; - -err_free_desc_ring: - vnic_dev_free_desc_ring(vdev, &vdev->devcmd2->results_ring); -err_free_wq: - vnic_wq_disable(&vdev->devcmd2->wq); - vnic_wq_free(&vdev->devcmd2->wq); -err_free_devcmd2: - kfree(vdev->devcmd2); - vdev->devcmd2 = NULL; - - return err; -#else - return 0; -#endif -} - -static void vnic_dev_deinit_devcmd2(struct vnic_dev *vdev) -{ -#if !defined(CONFIG_MIPS) && !defined(MGMT_VNIC) - vnic_dev_free_desc_ring(vdev, &vdev->devcmd2->results_ring); - vnic_wq_disable(&vdev->devcmd2->wq); - vnic_wq_free(&vdev->devcmd2->wq); - kfree(vdev->devcmd2); -#endif -} - -static int vnic_dev_cmd_proxy(struct vnic_dev *vdev, - enum vnic_devcmd_cmd proxy_cmd, enum vnic_devcmd_cmd cmd, - u64 *args, int nargs, int wait) -{ - u32 status; - int err; - - /* - * Proxy command consumes 2 arguments. One for proxy index, - * the other is for command to be proxied - */ - if (nargs > VNIC_DEVCMD_NARGS - 2) { - pr_err("number of args %d exceeds the maximum\n", nargs); - return -EINVAL; - } - memset(vdev->args, 0, sizeof(vdev->args)); - - vdev->args[0] = vdev->proxy_index; - vdev->args[1] = cmd; - memcpy(&vdev->args[2], args, nargs * sizeof(args[0])); - - err = (*vdev->devcmd_rtn)(vdev, proxy_cmd, wait); - if (err) - return err; - - status = (u32)vdev->args[0]; - if (status & STAT_ERROR) { - err = (int)vdev->args[1]; - if (err != ERR_ECMDUNKNOWN || - cmd != CMD_CAPABILITY) - pr_err("Error %d proxy devcmd %d\n", err, _CMD_N(cmd)); - return err; - } - - memcpy(args, &vdev->args[1], nargs * sizeof(args[0])); - - return 0; -} - -static int vnic_dev_cmd_no_proxy(struct vnic_dev *vdev, - enum vnic_devcmd_cmd cmd, u64 *args, int nargs, int wait) -{ - int err; - - if (nargs > VNIC_DEVCMD_NARGS) { - pr_err("number of args %d exceeds the maximum\n", nargs); - return -EINVAL; - } - memset(vdev->args, 0, sizeof(vdev->args)); - memcpy(vdev->args, args, nargs * sizeof(args[0])); - - err = (*vdev->devcmd_rtn)(vdev, cmd, wait); - - memcpy(args, vdev->args, nargs * sizeof(args[0])); - - return err; -} - -void vnic_dev_cmd_proxy_by_index_start(struct vnic_dev *vdev, u16 index) -{ - vdev->proxy = PROXY_BY_INDEX; - vdev->proxy_index = index; -} - -void vnic_dev_cmd_proxy_by_bdf_start(struct vnic_dev *vdev, u16 bdf) -{ - vdev->proxy = PROXY_BY_BDF; - vdev->proxy_index = bdf; -} - -void vnic_dev_cmd_proxy_end(struct vnic_dev *vdev) -{ - vdev->proxy = PROXY_NONE; - vdev->proxy_index = 0; -} - -int vnic_dev_cmd(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd, - u64 *a0, u64 *a1, int wait) -{ - u64 args[2]; - int err; - - args[0] = *a0; - args[1] = *a1; - memset(vdev->args, 0, sizeof(vdev->args)); - - switch (vdev->proxy) { - case PROXY_BY_INDEX: - err = vnic_dev_cmd_proxy(vdev, CMD_PROXY_BY_INDEX, cmd, - args, ARRAY_SIZE(args), wait); - break; - case PROXY_BY_BDF: - err = vnic_dev_cmd_proxy(vdev, CMD_PROXY_BY_BDF, cmd, - args, ARRAY_SIZE(args), wait); - break; - case PROXY_NONE: - default: - err = vnic_dev_cmd_no_proxy(vdev, cmd, args, 2, wait); - break; - } - - if (err == 0) { - *a0 = args[0]; - *a1 = args[1]; - } - - return err; -} - -int vnic_dev_cmd_args(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd, - u64 *args, int nargs, int wait) -{ - switch (vdev->proxy) { - case PROXY_BY_INDEX: - return vnic_dev_cmd_proxy(vdev, CMD_PROXY_BY_INDEX, cmd, - args, nargs, wait); - case PROXY_BY_BDF: - return vnic_dev_cmd_proxy(vdev, CMD_PROXY_BY_BDF, cmd, - args, nargs, wait); - case PROXY_NONE: - default: - return vnic_dev_cmd_no_proxy(vdev, cmd, args, nargs, wait); - } -} - -static int vnic_dev_capable(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd) -{ - u64 a0 = (u32)cmd, a1 = 0; - int wait = 1000; - int err; - - err = vnic_dev_cmd(vdev, CMD_CAPABILITY, &a0, &a1, wait); - - return !(err || a0); -} - -int vnic_dev_fw_info(struct vnic_dev *vdev, - struct vnic_devcmd_fw_info **fw_info) -{ - u64 a0, a1 = 0; - int wait = 1000; - int err = 0; - - if (!vdev->fw_info) { - vdev->fw_info = pci_alloc_consistent(vdev->pdev, - sizeof(struct vnic_devcmd_fw_info), - &vdev->fw_info_pa); - if (!vdev->fw_info) - return -ENOMEM; - - memset(vdev->fw_info, 0, sizeof(struct vnic_devcmd_fw_info)); - - a0 = vdev->fw_info_pa; - a1 = sizeof(struct vnic_devcmd_fw_info); - - /* only get fw_info once and cache it */ - if (vnic_dev_capable(vdev, CMD_MCPU_FW_INFO)) - err = vnic_dev_cmd(vdev, CMD_MCPU_FW_INFO, - &a0, &a1, wait); - else - err = vnic_dev_cmd(vdev, CMD_MCPU_FW_INFO_OLD, - &a0, &a1, wait); - } - - *fw_info = vdev->fw_info; - - return err; -} - -int vnic_dev_asic_info(struct vnic_dev *vdev, u16 *asic_type, u16 *asic_rev) -{ - struct vnic_devcmd_fw_info *fw_info; - int err; - - err = vnic_dev_fw_info(vdev, &fw_info); - if (err) - return err; - - *asic_type = fw_info->asic_type; - *asic_rev = fw_info->asic_rev; - - return 0; -} - -int vnic_dev_spec(struct vnic_dev *vdev, unsigned int offset, unsigned int size, - void *value) -{ -#ifdef CONFIG_MIPS - u8 *v = vnic_dev_get_res(vdev, RES_TYPE_DEV, 0); - if (!v) { - pr_err("vNIC device-specific region not found.\n"); - return -EINVAL; - } - - switch (size) { - case 1: - *(u8 *)value = ioread8(v + offset); - break; - case 2: - *(u16 *)value = ioread16(v + offset); - break; - case 4: - *(u32 *)value = ioread32(v + offset); - break; - case 8: - *(u64 *)value = readq(v + offset); - break; - default: - BUG(); - break; - } - - return 0; -#else - u64 a0, a1; - int wait = 1000; - int err; - - a0 = offset; - a1 = size; - - err = vnic_dev_cmd(vdev, CMD_DEV_SPEC, &a0, &a1, wait); - - switch (size) { - case 1: - *(u8 *)value = (u8)a0; - break; - case 2: - *(u16 *)value = (u16)a0; - break; - case 4: - *(u32 *)value = (u32)a0; - break; - case 8: - *(u64 *)value = a0; - break; - default: - BUG(); - break; - } - - return err; -#endif -} - -int vnic_dev_stats_clear(struct vnic_dev *vdev) -{ - u64 a0 = 0, a1 = 0; - int wait = 1000; - return vnic_dev_cmd(vdev, CMD_STATS_CLEAR, &a0, &a1, wait); -} - -int vnic_dev_stats_dump(struct vnic_dev *vdev, struct vnic_stats **stats) -{ - u64 a0, a1; - int wait = 1000; - - if (!vdev->stats) { - vdev->stats = pci_alloc_consistent(vdev->pdev, - sizeof(struct vnic_stats), &vdev->stats_pa); - if (!vdev->stats) - return -ENOMEM; - } - - *stats = vdev->stats; - a0 = vdev->stats_pa; - a1 = sizeof(struct vnic_stats); - - return vnic_dev_cmd(vdev, CMD_STATS_DUMP, &a0, &a1, wait); -} - -int vnic_dev_close(struct vnic_dev *vdev) -{ - u64 a0 = 0, a1 = 0; - int wait = 1000; - return vnic_dev_cmd(vdev, CMD_CLOSE, &a0, &a1, wait); -} - -/** Deprecated. @see vnic_dev_enable_wait */ -int vnic_dev_enable(struct vnic_dev *vdev) -{ - u64 a0 = 0, a1 = 0; - int wait = 1000; - return vnic_dev_cmd(vdev, CMD_ENABLE, &a0, &a1, wait); -} - -int vnic_dev_enable_wait(struct vnic_dev *vdev) -{ - u64 a0 = 0, a1 = 0; - int wait = 1000; - - if (vnic_dev_capable(vdev, CMD_ENABLE_WAIT)) - return vnic_dev_cmd(vdev, CMD_ENABLE_WAIT, &a0, &a1, wait); - else - return vnic_dev_cmd(vdev, CMD_ENABLE, &a0, &a1, wait); -} - -int vnic_dev_disable(struct vnic_dev *vdev) -{ - u64 a0 = 0, a1 = 0; - int wait = 1000; - return vnic_dev_cmd(vdev, CMD_DISABLE, &a0, &a1, wait); -} - -int vnic_dev_open(struct vnic_dev *vdev, int arg) -{ - u64 a0 = (u32)arg, a1 = 0; - int wait = 1000; - return vnic_dev_cmd(vdev, CMD_OPEN, &a0, &a1, wait); -} - -int vnic_dev_open_done(struct vnic_dev *vdev, int *done) -{ - u64 a0 = 0, a1 = 0; - int wait = 1000; - int err; - - *done = 0; - - err = vnic_dev_cmd(vdev, CMD_OPEN_STATUS, &a0, &a1, wait); - if (err) - return err; - - *done = (a0 == 0); - - return 0; -} - -int vnic_dev_soft_reset(struct vnic_dev *vdev, int arg) -{ - u64 a0 = (u32)arg, a1 = 0; - int wait = 1000; - - return vnic_dev_cmd(vdev, CMD_SOFT_RESET, &a0, &a1, wait); -} - -int vnic_dev_soft_reset_done(struct vnic_dev *vdev, int *done) -{ - u64 a0 = 0, a1 = 0; - int wait = 1000; - int err; - - *done = 0; - - err = vnic_dev_cmd(vdev, CMD_SOFT_RESET_STATUS, &a0, &a1, wait); - if (err) - return err; - - *done = (a0 == 0); - - return 0; -} - -int vnic_dev_hang_reset(struct vnic_dev *vdev, int arg) -{ - u64 a0 = (u32)arg, a1 = 0; - int wait = 1000; - int err; - - if (vnic_dev_capable(vdev, CMD_HANG_RESET)) { - return vnic_dev_cmd(vdev, CMD_HANG_RESET, - &a0, &a1, wait); - } else { - err = vnic_dev_soft_reset(vdev, arg); - if (err) - return err; - return vnic_dev_init(vdev, 0); - } -} - -int vnic_dev_hang_reset_done(struct vnic_dev *vdev, int *done) -{ - u64 a0 = 0, a1 = 0; - int wait = 1000; - int err; - - *done = 0; - - if (vnic_dev_capable(vdev, CMD_HANG_RESET_STATUS)) { - err = vnic_dev_cmd(vdev, CMD_HANG_RESET_STATUS, - &a0, &a1, wait); - if (err) - return err; - } else { - return vnic_dev_soft_reset_done(vdev, done); - } - - *done = (a0 == 0); - - return 0; -} - -int vnic_dev_hang_notify(struct vnic_dev *vdev) -{ - u64 a0 = 0, a1 = 0; - int wait = 1000; - return vnic_dev_cmd(vdev, CMD_HANG_NOTIFY, &a0, &a1, wait); -} - -int vnic_dev_get_mac_addr(struct vnic_dev *vdev, u8 *mac_addr) -{ -#if defined(CONFIG_MIPS) || defined(MGMT_VNIC) - u64 laa = 0x02; - memcpy(mac_addr, &laa, ETH_ALEN); - return 0; -#else - u64 a0 = 0, a1 = 0; - int wait = 1000; - int err, i; - - for (i = 0; i < ETH_ALEN; i++) - mac_addr[i] = 0; - - err = vnic_dev_cmd(vdev, CMD_GET_MAC_ADDR, &a0, &a1, wait); - if (err) - return err; - - for (i = 0; i < ETH_ALEN; i++) - mac_addr[i] = ((u8 *)&a0)[i]; - - return 0; -#endif -} - -int vnic_dev_packet_filter(struct vnic_dev *vdev, int directed, int multicast, - int broadcast, int promisc, int allmulti) -{ - u64 a0, a1 = 0; - int wait = 1000; - int err; - - a0 = (directed ? CMD_PFILTER_DIRECTED : 0) | - (multicast ? CMD_PFILTER_MULTICAST : 0) | - (broadcast ? CMD_PFILTER_BROADCAST : 0) | - (promisc ? CMD_PFILTER_PROMISCUOUS : 0) | - (allmulti ? CMD_PFILTER_ALL_MULTICAST : 0); - - err = vnic_dev_cmd(vdev, CMD_PACKET_FILTER, &a0, &a1, wait); - if (err) - pr_err("Can't set packet filter\n"); - - return err; -} - -int vnic_dev_packet_filter_all(struct vnic_dev *vdev, int directed, - int multicast, int broadcast, int promisc, int allmulti) -{ - u64 a0, a1 = 0; - int wait = 1000; - int err; - - a0 = (directed ? CMD_PFILTER_DIRECTED : 0) | - (multicast ? CMD_PFILTER_MULTICAST : 0) | - (broadcast ? CMD_PFILTER_BROADCAST : 0) | - (promisc ? CMD_PFILTER_PROMISCUOUS : 0) | - (allmulti ? CMD_PFILTER_ALL_MULTICAST : 0); - - err = vnic_dev_cmd(vdev, CMD_PACKET_FILTER_ALL, &a0, &a1, wait); - if (err) - pr_err("Can't set packet filter\n"); - - return err; -} - -int vnic_dev_add_addr(struct vnic_dev *vdev, u8 *addr) -{ - u64 a0 = 0, a1 = 0; - int wait = 1000; - int err; - int i; - - for (i = 0; i < ETH_ALEN; i++) - ((u8 *)&a0)[i] = addr[i]; - - err = vnic_dev_cmd(vdev, CMD_ADDR_ADD, &a0, &a1, wait); - if (err) - pr_err("Can't add addr [%02x:%02x:%02x:%02x:%02x:%02x], %d\n", - addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], - err); - - return err; -} - -int vnic_dev_del_addr(struct vnic_dev *vdev, u8 *addr) -{ - u64 a0 = 0, a1 = 0; - int wait = 1000; - int err; - int i; - - for (i = 0; i < ETH_ALEN; i++) - ((u8 *)&a0)[i] = addr[i]; - - err = vnic_dev_cmd(vdev, CMD_ADDR_DEL, &a0, &a1, wait); - if (err) - pr_err("Can't del addr [%02x:%02x:%02x:%02x:%02x:%02x], %d\n", - addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], - err); - - return err; -} - -int vnic_dev_set_ig_vlan_rewrite_mode(struct vnic_dev *vdev, - u8 ig_vlan_rewrite_mode) -{ - u64 a0 = ig_vlan_rewrite_mode, a1 = 0; - int wait = 1000; - - if (vnic_dev_capable(vdev, CMD_IG_VLAN_REWRITE_MODE)) - return vnic_dev_cmd(vdev, CMD_IG_VLAN_REWRITE_MODE, - &a0, &a1, wait); - else - return 0; -} - -int vnic_dev_raise_intr(struct vnic_dev *vdev, u16 intr) -{ - u64 a0 = intr, a1 = 0; - int wait = 1000; - int err; - - err = vnic_dev_cmd(vdev, CMD_IAR, &a0, &a1, wait); - if (err) - pr_err("Failed to raise INTR[%d], err %d\n", intr, err); - - return err; -} - -static int vnic_dev_notify_setcmd(struct vnic_dev *vdev, - void *notify_addr, dma_addr_t notify_pa, u16 intr) -{ - u64 a0, a1; - int wait = 1000; - int r; - - memset(notify_addr, 0, sizeof(struct vnic_devcmd_notify)); - vdev->notify = notify_addr; - vdev->notify_pa = notify_pa; - - a0 = (u64)notify_pa; - a1 = ((u64)intr << 32) & 0x0000ffff00000000ULL; - a1 += sizeof(struct vnic_devcmd_notify); - - r = vnic_dev_cmd(vdev, CMD_NOTIFY, &a0, &a1, wait); - vdev->notify_sz = (r == 0) ? (u32)a1 : 0; - return r; -} - -int vnic_dev_notify_set(struct vnic_dev *vdev, u16 intr) -{ - void *notify_addr; - dma_addr_t notify_pa; - - if (vdev->notify || vdev->notify_pa) { - pr_err("notify block %p still allocated", vdev->notify); - return -EINVAL; - } - - notify_addr = pci_alloc_consistent(vdev->pdev, - sizeof(struct vnic_devcmd_notify), - ¬ify_pa); - if (!notify_addr) - return -ENOMEM; - - return vnic_dev_notify_setcmd(vdev, notify_addr, notify_pa, intr); -} - -static int vnic_dev_notify_unsetcmd(struct vnic_dev *vdev) -{ - u64 a0, a1; - int wait = 1000; - int err; - - a0 = 0; /* paddr = 0 to unset notify buffer */ - a1 = 0x0000ffff00000000ULL; /* intr num = -1 to unreg for intr */ - a1 += sizeof(struct vnic_devcmd_notify); - - err = vnic_dev_cmd(vdev, CMD_NOTIFY, &a0, &a1, wait); - vdev->notify = NULL; - vdev->notify_pa = 0; - vdev->notify_sz = 0; - - return err; -} - -int vnic_dev_notify_unset(struct vnic_dev *vdev) -{ - if (vdev->notify) { - pci_free_consistent(vdev->pdev, - sizeof(struct vnic_devcmd_notify), - vdev->notify, - vdev->notify_pa); - } - - return vnic_dev_notify_unsetcmd(vdev); -} - -static int vnic_dev_notify_ready(struct vnic_dev *vdev) -{ - u32 *words; - unsigned int nwords = vdev->notify_sz / 4; - unsigned int i; - u32 csum; - - if (!vdev->notify || !vdev->notify_sz) - return 0; - - do { - csum = 0; - memcpy(&vdev->notify_copy, vdev->notify, vdev->notify_sz); - words = (u32 *)&vdev->notify_copy; - for (i = 1; i < nwords; i++) - csum += words[i]; - } while (csum != words[0]); - - return 1; -} - -int vnic_dev_init(struct vnic_dev *vdev, int arg) -{ - u64 a0 = (u32)arg, a1 = 0; - int wait = 1000; - int r = 0; - - if (vnic_dev_capable(vdev, CMD_INIT)) - r = vnic_dev_cmd(vdev, CMD_INIT, &a0, &a1, wait); - else { - vnic_dev_cmd(vdev, CMD_INIT_v1, &a0, &a1, wait); - if (a0 & CMD_INITF_DEFAULT_MAC) { - /* Emulate these for old CMD_INIT_v1 which - * didn't pass a0 so no CMD_INITF_*. - */ - vnic_dev_cmd(vdev, CMD_GET_MAC_ADDR, &a0, &a1, wait); - vnic_dev_cmd(vdev, CMD_ADDR_ADD, &a0, &a1, wait); - } - } - return r; -} - -int vnic_dev_init_done(struct vnic_dev *vdev, int *done, int *err) -{ - u64 a0 = 0, a1 = 0; - int wait = 1000; - int ret; - - *done = 0; - - ret = vnic_dev_cmd(vdev, CMD_INIT_STATUS, &a0, &a1, wait); - if (ret) - return ret; - - *done = (a0 == 0); - - *err = (a0 == 0) ? (int)a1 : 0; - - return 0; -} - -int vnic_dev_init_prov(struct vnic_dev *vdev, u8 *buf, u32 len) -{ - u64 a0, a1 = len; - int wait = 1000; - dma_addr_t prov_pa; - void *prov_buf; - int ret; - - prov_buf = pci_alloc_consistent(vdev->pdev, len, &prov_pa); - if (!prov_buf) - return -ENOMEM; - - memcpy(prov_buf, buf, len); - - a0 = prov_pa; - - ret = vnic_dev_cmd(vdev, CMD_INIT_PROV_INFO, &a0, &a1, wait); - - pci_free_consistent(vdev->pdev, len, prov_buf, prov_pa); - - return ret; -} - -int vnic_dev_deinit(struct vnic_dev *vdev) -{ - u64 a0 = 0, a1 = 0; - int wait = 1000; - - return vnic_dev_cmd(vdev, CMD_DEINIT, &a0, &a1, wait); -} - -EXPORT_SYMBOL(vnic_dev_intr_coal_timer_info_default); -void vnic_dev_intr_coal_timer_info_default(struct vnic_dev *vdev) -{ - /* Default: hardware intr coal timer is in units of 1.5 usecs */ - vdev->intr_coal_timer_info.mul = 2; - vdev->intr_coal_timer_info.div = 3; - vdev->intr_coal_timer_info.max_usec = - vnic_dev_intr_coal_timer_hw_to_usec(vdev, 0xffff); -} - -int vnic_dev_intr_coal_timer_info(struct vnic_dev *vdev) -{ - int wait = 1000; - int err; - - memset(vdev->args, 0, sizeof(vdev->args)); - - if (vnic_dev_capable(vdev, CMD_INTR_COAL_CONVERT)) - err = (*vdev->devcmd_rtn)(vdev, CMD_INTR_COAL_CONVERT, wait); - else - err = ERR_ECMDUNKNOWN; - - /* Use defaults when firmware doesn't support the devcmd at all or - * supports it for only specific hardware - */ - if ((err == ERR_ECMDUNKNOWN) || - (!err && !(vdev->args[0] && vdev->args[1] && vdev->args[2]))) { - pr_warning("Using default conversion factor for " - "interrupt coalesce timer\n"); - vnic_dev_intr_coal_timer_info_default(vdev); - return 0; - } - - if (!err) { - vdev->intr_coal_timer_info.mul = (u32) vdev->args[0]; - vdev->intr_coal_timer_info.div = (u32) vdev->args[1]; - vdev->intr_coal_timer_info.max_usec = (u32) vdev->args[2]; - } - - return err; -} - -int vnic_dev_link_status(struct vnic_dev *vdev) -{ -#ifdef CONFIG_MIPS - return 1; -#else - if (!vnic_dev_notify_ready(vdev)) - return 0; - - return vdev->notify_copy.link_state; -#endif -} - -u32 vnic_dev_port_speed(struct vnic_dev *vdev) -{ - if (!vnic_dev_notify_ready(vdev)) - return 0; - - return vdev->notify_copy.port_speed; -} - -u32 vnic_dev_msg_lvl(struct vnic_dev *vdev) -{ - if (!vnic_dev_notify_ready(vdev)) - return 0; - - return vdev->notify_copy.msglvl; -} - -u32 vnic_dev_mtu(struct vnic_dev *vdev) -{ -#if defined(CONFIG_MIPS) || defined(MGMT_VNIC) - return 1500; -#else - if (!vnic_dev_notify_ready(vdev)) - return 0; - - return vdev->notify_copy.mtu; -#endif -} - -u32 vnic_dev_link_down_cnt(struct vnic_dev *vdev) -{ - if (!vnic_dev_notify_ready(vdev)) - return 0; - - return vdev->notify_copy.link_down_cnt; -} - -u32 vnic_dev_notify_status(struct vnic_dev *vdev) -{ - if (!vnic_dev_notify_ready(vdev)) - return 0; - - return vdev->notify_copy.status; -} - -u32 vnic_dev_uif(struct vnic_dev *vdev) -{ - if (!vnic_dev_notify_ready(vdev)) - return 0; - - return vdev->notify_copy.uif; -} - -u32 vnic_dev_perbi_rebuild_cnt(struct vnic_dev *vdev) -{ - if (!vnic_dev_notify_ready(vdev)) - return 0; - - return vdev->notify_copy.perbi_rebuild_cnt; -} - -EXPORT_SYMBOL(vnic_dev_set_intr_mode); -void vnic_dev_set_intr_mode(struct vnic_dev *vdev, - enum vnic_dev_intr_mode intr_mode) -{ - vdev->intr_mode = intr_mode; -} - -EXPORT_SYMBOL(vnic_dev_get_intr_mode); -enum vnic_dev_intr_mode vnic_dev_get_intr_mode( - struct vnic_dev *vdev) -{ - return vdev->intr_mode; -} - -u32 vnic_dev_intr_coal_timer_usec_to_hw(struct vnic_dev *vdev, u32 usec) -{ - return (usec * vdev->intr_coal_timer_info.mul) / - vdev->intr_coal_timer_info.div; -} - -u32 vnic_dev_intr_coal_timer_hw_to_usec(struct vnic_dev *vdev, u32 hw_cycles) -{ - return (hw_cycles * vdev->intr_coal_timer_info.div) / - vdev->intr_coal_timer_info.mul; -} - -u32 vnic_dev_get_intr_coal_timer_max(struct vnic_dev *vdev) -{ - return vdev->intr_coal_timer_info.max_usec; -} - -void vnic_dev_unregister(struct vnic_dev *vdev) -{ - if (vdev) { - if (vdev->notify) - pci_free_consistent(vdev->pdev, - sizeof(struct vnic_devcmd_notify), - vdev->notify, - vdev->notify_pa); - if (vdev->stats) - pci_free_consistent(vdev->pdev, - sizeof(struct vnic_stats), - vdev->stats, vdev->stats_pa); - if (vdev->fw_info) - pci_free_consistent(vdev->pdev, - sizeof(struct vnic_devcmd_fw_info), - vdev->fw_info, vdev->fw_info_pa); - if (vdev->devcmd2) - vnic_dev_deinit_devcmd2(vdev); - - kfree(vdev); - } -} -EXPORT_SYMBOL(vnic_dev_unregister); - -struct vnic_dev *vnic_dev_alloc_discover(struct vnic_dev *vdev, - void *priv, struct pci_dev *pdev, struct vnic_dev_bar *bar, - unsigned int num_bars) -{ - if (!vdev) { - vdev = kzalloc(sizeof(struct vnic_dev), GFP_ATOMIC); - if (!vdev) - return NULL; - } - - vdev->priv = priv; - vdev->pdev = pdev; - - if (vnic_dev_discover_res(vdev, bar, num_bars)) - goto err_out; - - return vdev; - -err_out: - vnic_dev_unregister(vdev); - return NULL; -} -EXPORT_SYMBOL(vnic_dev_alloc_discover); - -struct vnic_dev *vnic_dev_register(struct vnic_dev *vdev, - void *priv, struct pci_dev *pdev, struct vnic_dev_bar *bar, - unsigned int num_bars) -{ - vdev = vnic_dev_alloc_discover(vdev, priv, pdev, bar, num_bars); - if (!vdev) - goto err_out; - - if (vnic_dev_init_devcmd1(vdev)) - goto err_free; - - return vdev; - -err_free: - vnic_dev_unregister(vdev); -err_out: - return NULL; -} -EXPORT_SYMBOL(vnic_dev_register); - -struct pci_dev *vnic_dev_get_pdev(struct vnic_dev *vdev) -{ - return vdev->pdev; -} -EXPORT_SYMBOL(vnic_dev_get_pdev); - -int vnic_devcmd_init(struct vnic_dev *vdev, int fallback) -{ -#if !defined(CONFIG_MIPS) && !defined(MGMT_VNIC) - int err; - void *p; - - p = vnic_dev_get_res(vdev, RES_TYPE_DEVCMD2, 0); - if (p) - err = vnic_dev_init_devcmd2(vdev); - else if (fallback) { - pr_warning("DEVCMD2 resource not found, fall back to devcmd\n"); - err = vnic_dev_init_devcmd1(vdev); - } else { - pr_err("DEVCMD2 resource not found, no fall back to devcmd allowed\n"); - err = -ENODEV; - } - - return err; -#else - return 0; -#endif -} - -int vnic_dev_int13(struct vnic_dev *vdev, u64 arg, u32 op) -{ - u64 a0 = arg, a1 = op; - int wait = 1000; - int r = 0; - - r = vnic_dev_cmd(vdev, CMD_INT13, &a0, &a1, wait); - return r; -} - -int vnic_dev_perbi(struct vnic_dev *vdev, u64 arg, u32 op) -{ - u64 a0 = arg, a1 = op; - int wait = 5000; - int r = 0; - - r = vnic_dev_cmd(vdev, CMD_PERBI, &a0, &a1, wait); - - return r; -} - -int vnic_dev_init_prov2(struct vnic_dev *vdev, u8 *buf, u32 len) -{ - u64 a0, a1 = len; - int wait = 1000; - dma_addr_t prov_pa; - void *prov_buf; - int ret; - - prov_buf = pci_alloc_consistent(vdev->pdev, len, &prov_pa); - if (!prov_buf) - return -ENOMEM; - - memcpy(prov_buf, buf, len); - - a0 = prov_pa; - - ret = vnic_dev_cmd(vdev, CMD_INIT_PROV_INFO2, &a0, &a1, wait); - - pci_free_consistent(vdev->pdev, len, prov_buf, prov_pa); - - return ret; -} - -int vnic_dev_enable2(struct vnic_dev *vdev, int active) -{ - u64 a0, a1 = 0; - int wait = 1000; - - a0 = (active ? CMD_ENABLE2_ACTIVE : 0); - - return vnic_dev_cmd(vdev, CMD_ENABLE2, &a0, &a1, wait); -} - -static int vnic_dev_cmd_status(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd, - int *status) -{ - u64 a0 = cmd, a1 = 0; - int wait = 1000; - int ret; - - ret = vnic_dev_cmd(vdev, CMD_STATUS, &a0, &a1, wait); - if (!ret) - *status = (int)a0; - - return ret; -} - -int vnic_dev_enable2_done(struct vnic_dev *vdev, int *status) -{ - return vnic_dev_cmd_status(vdev, CMD_ENABLE2, status); -} - -int vnic_dev_deinit_done(struct vnic_dev *vdev, int *status) -{ - return vnic_dev_cmd_status(vdev, CMD_DEINIT, status); -} - -int vnic_dev_set_mac_addr(struct vnic_dev *vdev, u8 *mac_addr) -{ - u64 a0 = 0, a1 = 0; - int wait = 1000; - int i; - - for (i = 0; i < ETH_ALEN; i++) - ((u8 *)&a0)[i] = mac_addr[i]; - - return vnic_dev_cmd(vdev, CMD_SET_MAC_ADDR, &a0, &a1, wait); -} - -/* - * vnic_dev_classifier: Add/Delete classifier entries - * @vdev: vdev of the device - * @cmd: CLSF_ADD for Add filter - * CLSF_DEL for Delete filter - * @entry: In case of ADD filter, the caller passes the RQ number in this variable. - * This function stores the filter_id returned by the - * firmware in the same variable before return; - * - * In case of DEL filter, the caller passes the RQ number. Return - * value is irrelevant. - * @data: filter data - */ -int vnic_dev_classifier(struct vnic_dev *vdev, u8 cmd, u16 *entry, struct filter *data) -{ - u64 a0, a1; - int wait = 1000; - dma_addr_t tlv_pa; - int ret = -EINVAL; - struct filter_tlv *tlv, *tlv_va; - struct filter_action *action; - u64 tlv_size; - - if (cmd == CLSF_ADD) { - tlv_size = sizeof(struct filter) + - sizeof(struct filter_action) + - 2*sizeof(struct filter_tlv); - tlv_va = pci_alloc_consistent(vdev->pdev, tlv_size, &tlv_pa); - if (!tlv_va) - return -ENOMEM; - tlv = tlv_va; - a0 = tlv_pa; - a1 = tlv_size; - memset(tlv, 0, tlv_size); - tlv->type = CLSF_TLV_FILTER; - tlv->length = sizeof(struct filter); - *(struct filter *)&tlv->val = *data; - - tlv = (struct filter_tlv *)((char *)tlv + - sizeof(struct filter_tlv) + - sizeof(struct filter)); - - tlv->type = CLSF_TLV_ACTION; - tlv->length = sizeof (struct filter_action); - action = (struct filter_action *)&tlv->val; - action->type = FILTER_ACTION_RQ_STEERING; - action->u.rq_idx = *entry; - - ret = vnic_dev_cmd(vdev, CMD_ADD_FILTER, &a0, &a1, wait); - *entry = (u16)a0; - pci_free_consistent(vdev->pdev, tlv_size, tlv_va, tlv_pa); - } else if (cmd == CLSF_DEL) { - a0 = *entry; - a1 = 0; - ret = vnic_dev_cmd(vdev, CMD_DEL_FILTER, &a0, &a1, wait); - } - - return ret; -} - -int vnic_dev_overlay_offload_ctrl(struct vnic_dev *vdev, u8 overlay, - u8 config) -{ - u64 a0, a1; - int wait = 1000; - int ret = -EINVAL; - - a0 = overlay; - a1 = config; - - ret = vnic_dev_cmd(vdev, CMD_OVERLAY_OFFLOAD_CTRL, &a0, &a1, wait); - - return ret; -} - -int vnic_dev_overlay_offload_cfg(struct vnic_dev *vdev, u8 overlay, - u16 vxlan_udp_port_number) -{ - u64 a0, a1; - int wait = 1000; - int ret = -EINVAL; - - a0 = overlay; - a1 = vxlan_udp_port_number; - - ret = vnic_dev_cmd(vdev, CMD_OVERLAY_OFFLOAD_CFG, &a0, &a1, wait); - - return ret; -} - -int vnic_dev_get_supported_feature_ver(struct vnic_dev *vdev, u8 feature, - u64 *supported_versions) -{ - u64 a0 = feature, a1 = 0; - int wait = 1000; - int ret = -EINVAL; - - ret = vnic_dev_cmd(vdev, CMD_GET_SUPP_FEATURE_VER, &a0, &a1, wait); - if (!ret) - *supported_versions = a0; - - return ret; -} diff --git a/prov/usnic/src/usnic_direct/vnic_dev.h b/prov/usnic/src/usnic_direct/vnic_dev.h deleted file mode 100644 index d21f6372cad..00000000000 --- a/prov/usnic/src/usnic_direct/vnic_dev.h +++ /dev/null @@ -1,214 +0,0 @@ -/* - * Copyright 2008-2018 Cisco Systems, Inc. All rights reserved. - * Copyright 2007 Nuova Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#ifndef _VNIC_DEV_H_ -#define _VNIC_DEV_H_ - -#ifdef __KERNEL__ -#include -#endif /* __KERNEL__ */ -#include "vnic_resource.h" -#include "vnic_devcmd.h" - -#ifndef VNIC_PADDR_TARGET -#define VNIC_PADDR_TARGET 0x0000000000000000ULL -#endif - -#ifndef readq -static inline u64 readq(void __iomem *reg) -{ - return ((u64)readl((char *)reg + 0x4UL) << 32) | - (u64)readl(reg); -} - -static inline void writeq(u64 val, void __iomem *reg) -{ - writel(val & 0xffffffff, reg); - writel(val >> 32, (char *)reg + 0x4UL); -} -#endif - -#undef pr_fmt -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -enum vnic_dev_intr_mode { - VNIC_DEV_INTR_MODE_UNKNOWN, - VNIC_DEV_INTR_MODE_INTX, - VNIC_DEV_INTR_MODE_MSI, - VNIC_DEV_INTR_MODE_MSIX, -}; - -struct vnic_dev_bar { - void __iomem *vaddr; - dma_addr_t bus_addr; - unsigned long len; -}; - -struct vnic_dev_ring { - void *descs; - size_t size; - dma_addr_t base_addr; - size_t base_align; - void *descs_unaligned; - size_t size_unaligned; - dma_addr_t base_addr_unaligned; - unsigned int desc_size; - unsigned int desc_count; - unsigned int desc_avail; -}; - -struct vnic_dev_iomap_info { - dma_addr_t bus_addr; - unsigned long len; - void __iomem *vaddr; -}; - -struct vnic_dev; -struct vnic_stats; - -void *vnic_dev_priv(struct vnic_dev *vdev); -unsigned int vnic_dev_get_res_count(struct vnic_dev *vdev, - enum vnic_res_type type); -void __iomem *vnic_dev_get_res(struct vnic_dev *vdev, enum vnic_res_type type, - unsigned int index); -dma_addr_t vnic_dev_get_res_bus_addr(struct vnic_dev *vdev, - enum vnic_res_type type, unsigned int index); -uint8_t vnic_dev_get_res_bar(struct vnic_dev *vdev, - enum vnic_res_type type); -uint32_t vnic_dev_get_res_offset(struct vnic_dev *vdev, - enum vnic_res_type type, unsigned int index); -unsigned long vnic_dev_get_res_type_len(struct vnic_dev *vdev, - enum vnic_res_type type); -unsigned int vnic_dev_desc_ring_size(struct vnic_dev_ring *ring, - unsigned int desc_count, unsigned int desc_size); -void vnic_dev_clear_desc_ring(struct vnic_dev_ring *ring); -int vnic_dev_alloc_desc_ring(struct vnic_dev *vdev, struct vnic_dev_ring *ring, - unsigned int desc_count, unsigned int desc_size); -void vnic_dev_free_desc_ring(struct vnic_dev *vdev, - struct vnic_dev_ring *ring); -int vnic_dev_cmd(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd, - u64 *a0, u64 *a1, int wait); -int vnic_dev_cmd_args(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd, - u64 *args, int nargs, int wait); -void vnic_dev_cmd_proxy_by_index_start(struct vnic_dev *vdev, u16 index); -void vnic_dev_cmd_proxy_by_bdf_start(struct vnic_dev *vdev, u16 bdf); -void vnic_dev_cmd_proxy_end(struct vnic_dev *vdev); -int vnic_dev_fw_info(struct vnic_dev *vdev, - struct vnic_devcmd_fw_info **fw_info); -int vnic_dev_asic_info(struct vnic_dev *vdev, u16 *asic_type, u16 *asic_rev); -int vnic_dev_spec(struct vnic_dev *vdev, unsigned int offset, unsigned int size, - void *value); -int vnic_dev_stats_clear(struct vnic_dev *vdev); -int vnic_dev_stats_dump(struct vnic_dev *vdev, struct vnic_stats **stats); -int vnic_dev_hang_notify(struct vnic_dev *vdev); -int vnic_dev_packet_filter(struct vnic_dev *vdev, int directed, int multicast, - int broadcast, int promisc, int allmulti); -int vnic_dev_packet_filter_all(struct vnic_dev *vdev, int directed, - int multicast, int broadcast, int promisc, int allmulti); -int vnic_dev_add_addr(struct vnic_dev *vdev, u8 *addr); -int vnic_dev_del_addr(struct vnic_dev *vdev, u8 *addr); -int vnic_dev_get_mac_addr(struct vnic_dev *vdev, u8 *mac_addr); -int vnic_dev_raise_intr(struct vnic_dev *vdev, u16 intr); -int vnic_dev_notify_set(struct vnic_dev *vdev, u16 intr); -void vnic_dev_set_reset_flag(struct vnic_dev *vdev, int state); -int vnic_dev_notify_unset(struct vnic_dev *vdev); -int vnic_dev_link_status(struct vnic_dev *vdev); -u32 vnic_dev_port_speed(struct vnic_dev *vdev); -u32 vnic_dev_msg_lvl(struct vnic_dev *vdev); -u32 vnic_dev_mtu(struct vnic_dev *vdev); -u32 vnic_dev_link_down_cnt(struct vnic_dev *vdev); -u32 vnic_dev_notify_status(struct vnic_dev *vdev); -u32 vnic_dev_uif(struct vnic_dev *vdev); -int vnic_dev_close(struct vnic_dev *vdev); -int vnic_dev_enable(struct vnic_dev *vdev); -int vnic_dev_enable_wait(struct vnic_dev *vdev); -int vnic_dev_disable(struct vnic_dev *vdev); -int vnic_dev_open(struct vnic_dev *vdev, int arg); -int vnic_dev_open_done(struct vnic_dev *vdev, int *done); -int vnic_dev_init(struct vnic_dev *vdev, int arg); -int vnic_dev_init_done(struct vnic_dev *vdev, int *done, int *err); -int vnic_dev_init_prov(struct vnic_dev *vdev, u8 *buf, u32 len); -int vnic_dev_deinit(struct vnic_dev *vdev); -void vnic_dev_intr_coal_timer_info_default(struct vnic_dev *vdev); -int vnic_dev_intr_coal_timer_info(struct vnic_dev *vdev); -int vnic_dev_soft_reset(struct vnic_dev *vdev, int arg); -int vnic_dev_soft_reset_done(struct vnic_dev *vdev, int *done); -int vnic_dev_hang_reset(struct vnic_dev *vdev, int arg); -int vnic_dev_hang_reset_done(struct vnic_dev *vdev, int *done); -void vnic_dev_set_intr_mode(struct vnic_dev *vdev, - enum vnic_dev_intr_mode intr_mode); -enum vnic_dev_intr_mode vnic_dev_get_intr_mode(struct vnic_dev *vdev); -u32 vnic_dev_intr_coal_timer_usec_to_hw(struct vnic_dev *vdev, u32 usec); -u32 vnic_dev_intr_coal_timer_hw_to_usec(struct vnic_dev *vdev, u32 hw_cycles); -u32 vnic_dev_get_intr_coal_timer_max(struct vnic_dev *vdev); -void vnic_dev_unregister(struct vnic_dev *vdev); -int vnic_dev_set_ig_vlan_rewrite_mode(struct vnic_dev *vdev, - u8 ig_vlan_rewrite_mode); -struct vnic_dev *vnic_dev_alloc_discover(struct vnic_dev *vdev, - void *priv, struct pci_dev *pdev, struct vnic_dev_bar *bar, - unsigned int num_bars); -struct vnic_dev *vnic_dev_register(struct vnic_dev *vdev, - void *priv, struct pci_dev *pdev, struct vnic_dev_bar *bar, - unsigned int num_bars); -void vnic_dev_upd_res_vaddr(struct vnic_dev *vdev, - struct vnic_dev_iomap_info *maps); -struct pci_dev *vnic_dev_get_pdev(struct vnic_dev *vdev); -int vnic_devcmd_init(struct vnic_dev *vdev, int fallback); -int vnic_dev_get_size(void); -int vnic_dev_int13(struct vnic_dev *vdev, u64 arg, u32 op); -int vnic_dev_perbi(struct vnic_dev *vdev, u64 arg, u32 op); -u32 vnic_dev_perbi_rebuild_cnt(struct vnic_dev *vdev); -int vnic_dev_init_prov2(struct vnic_dev *vdev, u8 *buf, u32 len); -int vnic_dev_enable2(struct vnic_dev *vdev, int active); -int vnic_dev_enable2_done(struct vnic_dev *vdev, int *status); -int vnic_dev_deinit_done(struct vnic_dev *vdev, int *status); -int vnic_dev_set_mac_addr(struct vnic_dev *vdev, u8 *mac_addr); -int vnic_dev_classifier(struct vnic_dev *vdev, u8 cmd, u16 *entry, - struct filter *data); -int vnic_dev_overlay_offload_ctrl(struct vnic_dev *vdev, u8 overlay, u8 config); -int vnic_dev_overlay_offload_cfg(struct vnic_dev *vdev, u8 overlay, - u16 vxlan_udp_port_number); -int vnic_dev_get_supported_feature_ver(struct vnic_dev *vdev, u8 feature, - u64 *supported_versions); -int vnic_dev_init_devcmd1(struct vnic_dev *vdev); -#endif /* _VNIC_DEV_H_ */ diff --git a/prov/usnic/src/usnic_direct/vnic_devcmd.h b/prov/usnic/src/usnic_direct/vnic_devcmd.h deleted file mode 100644 index 90872381c1c..00000000000 --- a/prov/usnic/src/usnic_direct/vnic_devcmd.h +++ /dev/null @@ -1,1413 +0,0 @@ -/* - * Copyright 2008-2016 Cisco Systems, Inc. All rights reserved. - * Copyright 2007 Nuova Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#ifndef _VNIC_DEVCMD_H_ -#define _VNIC_DEVCMD_H_ - -#define _CMD_NBITS 14 -#define _CMD_VTYPEBITS 10 -#define _CMD_FLAGSBITS 6 -#define _CMD_DIRBITS 2 - -#define _CMD_NMASK ((1 << _CMD_NBITS)-1) -#define _CMD_VTYPEMASK ((1 << _CMD_VTYPEBITS)-1) -#define _CMD_FLAGSMASK ((1 << _CMD_FLAGSBITS)-1) -#define _CMD_DIRMASK ((1 << _CMD_DIRBITS)-1) - -#define _CMD_NSHIFT 0 -#define _CMD_VTYPESHIFT (_CMD_NSHIFT+_CMD_NBITS) -#define _CMD_FLAGSSHIFT (_CMD_VTYPESHIFT+_CMD_VTYPEBITS) -#define _CMD_DIRSHIFT (_CMD_FLAGSSHIFT+_CMD_FLAGSBITS) - -/* - * Direction bits (from host perspective). - */ -#define _CMD_DIR_NONE 0U -#define _CMD_DIR_WRITE 1U -#define _CMD_DIR_READ 2U -#define _CMD_DIR_RW (_CMD_DIR_WRITE | _CMD_DIR_READ) - -/* - * Flag bits. - */ -#define _CMD_FLAGS_NONE 0U -#define _CMD_FLAGS_NOWAIT 1U - -/* - * vNIC type bits. - */ -#define _CMD_VTYPE_NONE 0U -#define _CMD_VTYPE_ENET 1U -#define _CMD_VTYPE_FC 2U -#define _CMD_VTYPE_SCSI 4U -#define _CMD_VTYPE_ALL (_CMD_VTYPE_ENET | _CMD_VTYPE_FC | _CMD_VTYPE_SCSI) - -/* - * Used to create cmds.. - */ -#define _CMDCF(dir, flags, vtype, nr) \ - (((dir) << _CMD_DIRSHIFT) | \ - ((flags) << _CMD_FLAGSSHIFT) | \ - ((vtype) << _CMD_VTYPESHIFT) | \ - ((nr) << _CMD_NSHIFT)) -#define _CMDC(dir, vtype, nr) _CMDCF(dir, 0, vtype, nr) -#define _CMDCNW(dir, vtype, nr) _CMDCF(dir, _CMD_FLAGS_NOWAIT, vtype, nr) - -/* - * Used to decode cmds.. - */ -#define _CMD_DIR(cmd) (((cmd) >> _CMD_DIRSHIFT) & _CMD_DIRMASK) -#define _CMD_FLAGS(cmd) (((cmd) >> _CMD_FLAGSSHIFT) & _CMD_FLAGSMASK) -#define _CMD_VTYPE(cmd) (((cmd) >> _CMD_VTYPESHIFT) & _CMD_VTYPEMASK) -#define _CMD_N(cmd) (((cmd) >> _CMD_NSHIFT) & _CMD_NMASK) - -enum vnic_devcmd_cmd { - CMD_NONE = _CMDC(_CMD_DIR_NONE, _CMD_VTYPE_NONE, 0), - - /* - * mcpu fw info in mem: - * in: - * (u64)a0=paddr to struct vnic_devcmd_fw_info - * action: - * Fills in struct vnic_devcmd_fw_info (128 bytes) - * note: - * An old definition of CMD_MCPU_FW_INFO - */ - CMD_MCPU_FW_INFO_OLD = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 1), - - /* - * mcpu fw info in mem: - * in: - * (u64)a0=paddr to struct vnic_devcmd_fw_info - * (u16)a1=size of the structure - * out: - * (u16)a1=0 for in:a1 = 0, - * data size actually written for other values. - * action: - * Fills in first 128 bytes of vnic_devcmd_fw_info for in:a1 = 0, - * first in:a1 bytes for 0 < in:a1 <= 132, - * 132 bytes for other values of in:a1. - * note: - * CMD_MCPU_FW_INFO and CMD_MCPU_FW_INFO_OLD have the same enum 1 - * for source compatibility. - */ - CMD_MCPU_FW_INFO = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ALL, 1), - - /* dev-specific block member: - * in: (u16)a0=offset,(u8)a1=size - * out: a0=value - */ - CMD_DEV_SPEC = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ALL, 2), - - /* stats clear */ - CMD_STATS_CLEAR = _CMDCNW(_CMD_DIR_NONE, _CMD_VTYPE_ALL, 3), - - /* stats dump in mem: (u64)a0=paddr to stats area, - * (u16)a1=sizeof stats area */ - CMD_STATS_DUMP = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 4), - - /* set Rx packet filter: (u32)a0=filters (see CMD_PFILTER_*) */ - CMD_PACKET_FILTER = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 7), - - /* set Rx packet filter for all: (u32)a0=filters (see CMD_PFILTER_*) */ - CMD_PACKET_FILTER_ALL = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 7), - - /* hang detection notification */ - CMD_HANG_NOTIFY = _CMDC(_CMD_DIR_NONE, _CMD_VTYPE_ALL, 8), - - /* MAC address in (u48)a0 */ - CMD_MAC_ADDR = _CMDC(_CMD_DIR_READ, - _CMD_VTYPE_ENET | _CMD_VTYPE_FC, 9), -#define CMD_GET_MAC_ADDR CMD_MAC_ADDR /* some uses are aliased */ - - /* add addr from (u48)a0 */ - CMD_ADDR_ADD = _CMDCNW(_CMD_DIR_WRITE, - _CMD_VTYPE_ENET | _CMD_VTYPE_FC, 12), - - /* del addr from (u48)a0 */ - CMD_ADDR_DEL = _CMDCNW(_CMD_DIR_WRITE, - _CMD_VTYPE_ENET | _CMD_VTYPE_FC, 13), - - /* add VLAN id in (u16)a0 */ - CMD_VLAN_ADD = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 14), - - /* del VLAN id in (u16)a0 */ - CMD_VLAN_DEL = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 15), - - /* nic_cfg in (u32)a0 */ - CMD_NIC_CFG = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 16), - - /* union vnic_rss_key in mem: (u64)a0=paddr, (u16)a1=len */ - CMD_RSS_KEY = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 17), - - /* union vnic_rss_cpu in mem: (u64)a0=paddr, (u16)a1=len */ - CMD_RSS_CPU = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 18), - - /* initiate softreset */ - CMD_SOFT_RESET = _CMDCNW(_CMD_DIR_NONE, _CMD_VTYPE_ALL, 19), - - /* softreset status: - * out: a0=0 reset complete, a0=1 reset in progress */ - CMD_SOFT_RESET_STATUS = _CMDC(_CMD_DIR_READ, _CMD_VTYPE_ALL, 20), - - /* set struct vnic_devcmd_notify buffer in mem: - * in: - * (u64)a0=paddr to notify (set paddr=0 to unset) - * (u32)a1 & 0x00000000ffffffff=sizeof(struct vnic_devcmd_notify) - * (u16)a1 & 0x0000ffff00000000=intr num (-1 for no intr) - * out: - * (u32)a1 = effective size - */ - CMD_NOTIFY = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ALL, 21), - - /* UNDI API: (u64)a0=paddr to s_PXENV_UNDI_ struct, - * (u8)a1=PXENV_UNDI_xxx */ - CMD_UNDI = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 22), - - /* initiate open sequence (u32)a0=flags (see CMD_OPENF_*) */ - CMD_OPEN = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 23), - - /* open status: - * out: a0=0 open complete, a0=1 open in progress */ - CMD_OPEN_STATUS = _CMDC(_CMD_DIR_READ, _CMD_VTYPE_ALL, 24), - - /* close vnic */ - CMD_CLOSE = _CMDC(_CMD_DIR_NONE, _CMD_VTYPE_ALL, 25), - - /* initialize virtual link: (u32)a0=flags (see CMD_INITF_*) */ -/***** Replaced by CMD_INIT *****/ - CMD_INIT_v1 = _CMDCNW(_CMD_DIR_READ, _CMD_VTYPE_ALL, 26), - - /* variant of CMD_INIT, with provisioning info - * (u64)a0=paddr of vnic_devcmd_provinfo - * (u32)a1=sizeof provision info */ - CMD_INIT_PROV_INFO = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 27), - - /* enable virtual link */ - CMD_ENABLE = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 28), - - /* enable virtual link, waiting variant. */ - CMD_ENABLE_WAIT = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 28), - - /* disable virtual link */ - CMD_DISABLE = _CMDC(_CMD_DIR_NONE, _CMD_VTYPE_ALL, 29), - - /* stats dump sum of all vnic stats on same uplink in mem: - * (u64)a0=paddr - * (u16)a1=sizeof stats area */ - CMD_STATS_DUMP_ALL = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 30), - - /* init status: - * out: a0=0 init complete, a0=1 init in progress - * if a0=0, a1=errno */ - CMD_INIT_STATUS = _CMDC(_CMD_DIR_READ, _CMD_VTYPE_ALL, 31), - - /* INT13 API: (u64)a0=paddr to vnic_int13_params struct - * (u32)a1=INT13_CMD_xxx */ - CMD_INT13 = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_FC, 32), - - /* logical uplink enable/disable: (u64)a0: 0/1=disable/enable */ - CMD_LOGICAL_UPLINK = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 33), - - /* undo initialize of virtual link */ - CMD_DEINIT = _CMDCNW(_CMD_DIR_NONE, _CMD_VTYPE_ALL, 34), - - /* initialize virtual link: (u32)a0=flags (see CMD_INITF_*) */ - CMD_INIT = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 35), - - /* check fw capability of a cmd: - * in: (u32)a0=cmd - * out: (u32)a0=errno, 0:valid cmd, a1=supported VNIC_STF_* bits */ - CMD_CAPABILITY = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ALL, 36), - - /* persistent binding info - * in: (u64)a0=paddr of arg - * (u32)a1=CMD_PERBI_XXX */ - CMD_PERBI = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_FC, 37), - - /* Interrupt Assert Register functionality - * in: (u16)a0=interrupt number to assert - */ - CMD_IAR = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 38), - - /* initiate hangreset, like softreset after hang detected */ - CMD_HANG_RESET = _CMDC(_CMD_DIR_NONE, _CMD_VTYPE_ALL, 39), - - /* hangreset status: - * out: a0=0 reset complete, a0=1 reset in progress */ - CMD_HANG_RESET_STATUS = _CMDC(_CMD_DIR_READ, _CMD_VTYPE_ALL, 40), - - /* - * Set hw ingress packet vlan rewrite mode: - * in: (u32)a0=new vlan rewrite mode - * out: (u32)a0=old vlan rewrite mode */ - CMD_IG_VLAN_REWRITE_MODE = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ENET, 41), - - /* - * in: (u16)a0=bdf of target vnic - * (u32)a1=cmd to proxy - * a2-a15=args to cmd in a1 - * out: (u32)a0=status of proxied cmd - * a1-a15=out args of proxied cmd */ - CMD_PROXY_BY_BDF = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ALL, 42), - - /* - * As for BY_BDF except a0 is index of hvnlink subordinate vnic - * or SR-IOV virtual vnic - */ - CMD_PROXY_BY_INDEX = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ALL, 43), - - /* - * For HPP toggle: - * adapter-info-get - * in: (u64)a0=phsical address of buffer passed in from caller. - * (u16)a1=size of buffer specified in a0. - * out: (u64)a0=phsical address of buffer passed in from caller. - * (u16)a1=actual bytes from VIF-CONFIG-INFO TLV, or - * 0 if no VIF-CONFIG-INFO TLV was ever received. */ - CMD_CONFIG_INFO_GET = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ALL, 44), - - /* - * INT13 API: (u64)a0=paddr to vnic_int13_params struct - * (u32)a1=INT13_CMD_xxx - */ - CMD_INT13_ALL = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 45), - - /* - * Set default vlan: - * in: (u16)a0=new default vlan - * (u16)a1=zero for overriding vlan with param a0, - * non-zero for resetting vlan to the default - * out: (u16)a0=old default vlan - */ - CMD_SET_DEFAULT_VLAN = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ALL, 46), - - /* init_prov_info2: - * Variant of CMD_INIT_PROV_INFO, where it will not try to enable - * the vnic until CMD_ENABLE2 is issued. - * (u64)a0=paddr of vnic_devcmd_provinfo - * (u32)a1=sizeof provision info */ - CMD_INIT_PROV_INFO2 = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 47), - - /* enable2: - * (u32)a0=0 ==> standby - * =CMD_ENABLE2_ACTIVE ==> active - */ - CMD_ENABLE2 = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 48), - - /* - * cmd_status: - * Returns the status of the specified command - * Input: - * a0 = command for which status is being queried. - * Possible values are: - * CMD_SOFT_RESET - * CMD_HANG_RESET - * CMD_OPEN - * CMD_INIT - * CMD_INIT_PROV_INFO - * CMD_DEINIT - * CMD_INIT_PROV_INFO2 - * CMD_ENABLE2 - * Output: - * if status == STAT_ERROR - * a0 = ERR_ENOTSUPPORTED - status for command in a0 is - * not supported - * if status == STAT_NONE - * a0 = status of the devcmd specified in a0 as follows. - * ERR_SUCCESS - command in a0 completed successfully - * ERR_EINPROGRESS - command in a0 is still in progress - */ - CMD_STATUS = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ALL, 49), - - /* - * Returns interrupt coalescing timer conversion factors. - * After calling this devcmd, ENIC driver can convert - * interrupt coalescing timer in usec into CPU cycles as follows: - * - * intr_timer_cycles = intr_timer_usec * multiplier / divisor - * - * Interrupt coalescing timer in usecs can be be converted/obtained - * from CPU cycles as follows: - * - * intr_timer_usec = intr_timer_cycles * divisor / multiplier - * - * in: none - * out: (u32)a0 = multiplier - * (u32)a1 = divisor - * (u32)a2 = maximum timer value in usec - */ - CMD_INTR_COAL_CONVERT = _CMDC(_CMD_DIR_READ, _CMD_VTYPE_ALL, 50), - - /* - * ISCSI DUMP API: - * in: (u64)a0=paddr of the param or param itself - * (u32)a1=ISCSI_CMD_xxx - */ - CMD_ISCSI_DUMP_REQ = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 51), - - /* - * ISCSI DUMP STATUS API: - * in: (u32)a0=cmd tag - * in: (u32)a1=ISCSI_CMD_xxx - * out: (u32)a0=cmd status - */ - CMD_ISCSI_DUMP_STATUS = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ALL, 52), - - /* - * Subvnic migration from MQ <--> VF. - * Enable the LIF migration from MQ to VF and vice versa. MQ and VF - * indexes are statically bound at the time of initialization. - * Based on the direction of migration, the resources of either MQ or - * the VF shall be attached to the LIF. - * in: (u32)a0=Direction of Migration - * 0=> Migrate to VF - * 1=> Migrate to MQ - * (u32)a1=VF index (MQ index) - */ - CMD_MIGRATE_SUBVNIC = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 53), - - /* - * Register / Deregister the notification block for MQ subvnics - * in: - * (u64)a0=paddr to notify (set paddr=0 to unset) - * (u32)a1 & 0x00000000ffffffff=sizeof(struct vnic_devcmd_notify) - * (u16)a1 & 0x0000ffff00000000=intr num (-1 for no intr) - * out: - * (u32)a1 = effective size - */ - CMD_SUBVNIC_NOTIFY = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ALL, 54), - - /* - * Set the predefined mac address as default - * in: - * (u48)a0=mac addr - */ - CMD_SET_MAC_ADDR = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 55), - - /* Update the provisioning info of the given VIF - * (u64)a0=paddr of vnic_devcmd_provinfo - * (u32)a1=sizeof provision info */ - CMD_PROV_INFO_UPDATE = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 56), - - /* - * Initialization for the devcmd2 interface. - * in: (u64) a0=host result buffer physical address - * in: (u16) a1=number of entries in result buffer - */ - CMD_INITIALIZE_DEVCMD2 = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 57), - - /* - * Add a filter. - * in: (u64) a0= filter address - * (u32) a1= size of filter - * out: (u32) a0=filter identifier - * - * Capability query: - * out: (u64) a0= 1 if capability query supported - * (u64) a1= MAX filter type supported - */ - CMD_ADD_FILTER = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ENET, 58), - - /* - * Delete a filter. - * in: (u32) a0=filter identifier - */ - CMD_DEL_FILTER = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 59), - - /* - * Enable a Queue Pair in User space NIC - * in: (u32) a0=Queue Pair number - * (u32) a1= command - */ - CMD_QP_ENABLE = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 60), - - /* - * Disable a Queue Pair in User space NIC - * in: (u32) a0=Queue Pair number - * (u32) a1= command - */ - CMD_QP_DISABLE = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 61), - - /* - * Stats dump Queue Pair in User space NIC - * in: (u32) a0=Queue Pair number - * (u64) a1=host buffer addr for status dump - * (u32) a2=length of the buffer - */ - CMD_QP_STATS_DUMP = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 62), - - /* - * Clear stats for Queue Pair in User space NIC - * in: (u32) a0=Queue Pair number - */ - CMD_QP_STATS_CLEAR = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 63), - - /* - * UEFI BOOT API: (u64)a0= UEFI FLS_CMD_xxx - * (ui64)a1= paddr for the info buffer - */ - CMD_FC_REQ = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_FC, 64), - - /* - * Return the iSCSI config details required by the EFI Option ROM - * in: (u32) a0=0 Get Boot Info for PXE eNIC as per pxe_boot_config_t - * a0=1 Get Boot info for iSCSI enic as per - * iscsi_boot_efi_cfg_t - * in: (u64) a1=Host address where iSCSI config info is returned - */ - CMD_VNIC_BOOT_CONFIG_INFO = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ALL, 65), - - /* - * Create a Queue Pair (RoCE) - * in: (u32) a0 = Queue Pair number - * (u32) a1 = Remote QP - * (u32) a2 = RDMA-RQ - * (u16) a3 = RQ Res Group - * (u16) a4 = SQ Res Group - * (u32) a5 = Protection Domain - * (u64) a6 = Remote MAC - * (u32) a7 = start PSN - * (u16) a8 = MSS - * (u32) a9 = protocol version - */ - CMD_RDMA_QP_CREATE = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 66), - - /* - * Delete a Queue Pair (RoCE) - * in: (u32) a0 = Queue Pair number - */ - CMD_RDMA_QP_DELETE = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 67), - - /* - * Retrieve a Queue Pair's status information (RoCE) - * in: (u32) a0 = Queue Pair number - * (u64) a1 = host buffer addr for QP status struct - * (u32) a2 = length of the buffer - */ - CMD_RDMA_QP_STATUS = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ENET, 68), - - /* - * Use this devcmd for agreeing on the highest common version supported - * by both driver and fw for by features who need such a facility. - * in: (u64) a0 = feature (driver requests for the supported versions on - * this feature) - * out: (u64) a0 = bitmap of all supported versions for that feature - */ - CMD_GET_SUPP_FEATURE_VER = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ENET, 69), - - /* - * Initialize the RDMA notification work queue - * in: (u64) a0 = host buffer address - * in: (u16) a1 = number of entries in buffer - * in: (u16) a2 = resource group number - * in: (u16) a3 = CQ number to post completion - */ - CMD_RDMA_INIT_INFO_BUF = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 70), - - /* - * De-init the RDMA notification work queue - * in: (u64) a0=resource group number - */ - CMD_RDMA_DEINIT_INFO_BUF = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 71), - - /* - * Control (Enable/Disable) overlay offloads on the given vnic - * in: (u8) a0 = OVERLAY_FEATURE_NVGRE : NVGRE - * a0 = OVERLAY_FEATURE_VXLAN : VxLAN - * in: (u8) a1 = OVERLAY_OFFLOAD_ENABLE : Enable or - * a1 = OVERLAY_OFFLOAD_DISABLE : Disable or - * a1 = OVERLAY_OFFLOAD_ENABLE_V2 : Enable with version 2 - */ - CMD_OVERLAY_OFFLOAD_CTRL = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 72), - - /* - * Configuration of overlay offloads feature on a given vNIC - * in: (u8) a0 = OVERLAY_CFG_VXLAN_PORT_UPDATE : VxLAN - * in: (u16) a1 = unsigned short int port information - */ - CMD_OVERLAY_OFFLOAD_CFG = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 73), - - /* - * Return the configured name for the device - * in: (u64) a0=Host address where the name is copied - * (u32) a1=Size of the buffer - */ - CMD_GET_CONFIG_NAME = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 74), - - /* - * Enable group interrupt for the VF - * in: (u32) a0 = GRPINTR_ENABLE : enable - * a0 = GRPINTR_DISABLE : disable - * a0 = GRPINTR_UPD_VECT: update group vector addr - * in: (u32) a1 = interrupt group count - * in: (u64) a2 = Start of host buffer address for DMAing group - * vector bitmap - * in: (u64) a3 = Stride between group vectors - */ - CMD_CONFIG_GRPINTR = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 75), - - /* - * Set cq arrary base and size in a list of consective wqs and - * rqs for a device - * in: (u16) a0 = the wq relative index in the device. - * -1 indicates skipping wq configuration - * in: (u16) a1 = the wcq relative index in the device - * in: (u16) a2 = the rq relative index in the device - * -1 indicates skipping rq configuration - * in: (u16) a3 = the rcq relative index in the device - */ - CMD_CONFIG_CQ_ARRAY = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 76), - - /* - * Add an advanced filter. - * in: (u64) a0= filter address - * (u32) a1= size of filter - * out: (u32) a0=filter identifier - * - * Capability query: - * in: (u64) a1= supported filter capability exchange modes - * out: (u64) a0= 1 if capability query supported - * if (u64) a1 = 0: a1 = MAX filter type supported - * if (u64) a1 & FILTER_CAP_MODE_V1_FLAG: - * a1 = bitmask of supported filters - * a2 = FILTER_CAP_MODE_V1 - * a3 = bitmask of supported actions - */ - CMD_ADD_ADV_FILTER = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ENET, 77), - - /* - * Add a MAC address and VLAN pair to a LIF. This is like CMD_ADDR_ADD - * but with the ability to specify a VLAN as well. - * in: (u64) a0 = MAC address - * (u16) a1 = VLAN (0 means default VLAN) - * (u32) a2 = flags (see AVF_xxx below) - */ - CMD_ADDR_VLAN_ADD = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 78), - - /* - * Delete a MAC address and VLAN pair from a LIF. This is like CMD_ADDR_DEL - * but with the ability to specify a VLAN as well. - * in: (u64) a0 = MAC address - * (u16) a1 = VLAN (0 means default VLAN) - * (u32) a2 = flags (see AVF_xxx below) - */ - CMD_ADDR_VLAN_DEL = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 79), - - /* - * Bind resources to an MQ sub-vnic. To detach a sub-vnic from all - * resources, call with all 0s. A sub-vnic may not be attached to - * different resources until it is detached from current resources. - * This may only be issued as proxy-by-index on a MQ sub-vnic - * in: (u32) a0 = WQ base (relative) - * (u32) a1 = WQ count - * (u32) a2 = RQ base - * (u32) a3 = RQ count - * (u32) a4 = CQ base - * (u32) a5 = CQ count - */ - CMD_SUBVNIC_RES_BIND = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 80), - - /* - * Configure RDMA Resource - * in: (u32) a0 = sub-command - * (u32) a1 = resource domain, 0xffffffff for domain-less commands - * (u32) a2 = (command-specific) - * ... - * - * All arguments that have not been assigned a meaning should be - * initialized to 0 to allow for better driver forward compatibility. - */ - CMD_RDMA_CTRL = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ENET, 81), - - /* - * Set a rate limit on a vnic - * in: (u32) a0 = rate limit in units of Mb/s - * (u32) a1 = traffic class - */ - CMD_RATE_LIMIT_SET = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 82), - - /* - * Query rate limit on a vnic - * in: (u32) a0 = traffic class - * out:(u32) a0 = latest devcmd specified rate limit (Mb/s) - * a1 = aurrent actual rate limit (Mb/s) - */ - CMD_RATE_LIMIT_GET = _CMDC(_CMD_DIR_READ, _CMD_VTYPE_ENET, 82), - - /* - * Write QoS settings to a vnic - * in: (u32) a0 = flags - * (u32) a1 = PFC map - * (u32) a2 = PGS grouping - * ((a2 >> (PRI * 4)) & 0xf) = pri group (15 = strict priority) - * (u32) a3 = PGS BW allocation - * ((a3 >> (PG * 8)) & 0xff) = BW % for priority group - * (must sum to 100) - */ - CMD_QOS_SET = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 83), - - /* - * Read QoS settings from a vnic - * out:(u32) a0 = flags - * (u32) a1 = PFC map - * (u32) a2 = PGS grouping - * ((a2 >> (PRI * 4)) & 0xf) = pri group (15 = strict priority) - * (u32) a3 = PGS BW allocation - * ((a3 >> (PG * 8)) & 0xff) = BW % for priority group - */ - CMD_QOS_GET = _CMDC(_CMD_DIR_READ, _CMD_VTYPE_ENET, 83), - - /* - * Command for tests on bodega-dev - * in: (u32) a0=requested operation - * a1..aN=operation specific - * out: a0..aN=operation specific - */ - CMD_TEST_OP = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ENET, 96), -}; - -/* Modes for exchanging advanced filter capabilities. The modes supported by - * the driver are passed in the CMD_ADD_ADV_FILTER capability command and the - * mode selected is returned. - * V0: the maximum filter type supported is returned - * V1: bitmasks of supported filters and actions are returned - */ -enum filter_cap_mode { - FILTER_CAP_MODE_V0 = 0, /* Must always be 0 for legacy drivers */ - FILTER_CAP_MODE_V1 = 1, -}; -#define FILTER_CAP_MODE_V1_FLAG (1 << FILTER_CAP_MODE_V1) - -/* CMD_ENABLE2 flags */ -#define CMD_ENABLE2_STANDBY 0x0 -#define CMD_ENABLE2_ACTIVE 0x1 - -/* flags for CMD_OPEN */ -#define CMD_OPENF_OPROM 0x1 /* open coming from option rom */ -#define CMD_OPENF_RQ_ENABLE_THEN_POST 0x2 /* Enable IG DESC cache on open */ - -/* flags for CMD_INIT */ -#define CMD_INITF_DEFAULT_MAC 0x1 /* init with default mac addr */ - -/* flags for CMD_PACKET_FILTER */ -#define CMD_PFILTER_DIRECTED 0x01 -#define CMD_PFILTER_MULTICAST 0x02 -#define CMD_PFILTER_BROADCAST 0x04 -#define CMD_PFILTER_PROMISCUOUS 0x08 -#define CMD_PFILTER_ALL_MULTICAST 0x10 - -/* Commands for CMD_QP_ENABLE/CM_QP_DISABLE */ -#define CMD_QP_RQWQ 0x0 - -/* rewrite modes for CMD_IG_VLAN_REWRITE_MODE */ -#define IG_VLAN_REWRITE_MODE_DEFAULT_TRUNK 0 -#define IG_VLAN_REWRITE_MODE_UNTAG_DEFAULT_VLAN 1 -#define IG_VLAN_REWRITE_MODE_PRIORITY_TAG_DEFAULT_VLAN 2 -#define IG_VLAN_REWRITE_MODE_PASS_THRU 3 - -enum vnic_devcmd_status { - STAT_NONE = 0, - STAT_BUSY = 1 << 0, /* cmd in progress */ - STAT_ERROR = 1 << 1, /* last cmd caused error (code in a0) */ - STAT_FAILOVER = 1 << 2, /* always set on vnics in pci standby state - if seen a failover to the standby happened */ -}; - -enum vnic_devcmd_error { - ERR_SUCCESS = 0, - ERR_EINVAL = 1, - ERR_EFAULT = 2, - ERR_EPERM = 3, - ERR_EBUSY = 4, - ERR_ECMDUNKNOWN = 5, - ERR_EBADSTATE = 6, - ERR_ENOMEM = 7, - ERR_ETIMEDOUT = 8, - ERR_ELINKDOWN = 9, - ERR_EMAXRES = 10, - ERR_ENOTSUPPORTED = 11, - ERR_EINPROGRESS = 12, - ERR_MAX -}; - -/* - * note: hw_version and asic_rev refer to the same thing, - * but have different formats. hw_version is - * a 32-byte string (e.g. "A2") and asic_rev is - * a 16-bit integer (e.g. 0xA2). - */ -struct vnic_devcmd_fw_info { - char fw_version[32]; - char fw_build[32]; - char hw_version[32]; - char hw_serial_number[32]; - u16 asic_type; - u16 asic_rev; -}; - -#ifndef FOR_UPSTREAM_KERNEL -enum fwinfo_asic_type { - FWINFO_ASIC_TYPE_UNKNOWN, - FWINFO_ASIC_TYPE_PALO, - FWINFO_ASIC_TYPE_SERENO, - FWINFO_ASIC_TYPE_CRUZ, -}; -#endif - -struct vnic_devcmd_notify { - u32 csum; /* checksum over following words */ - - u32 link_state; /* link up == 1 */ - u32 port_speed; /* effective port speed (rate limit) */ - u32 mtu; /* MTU */ - u32 msglvl; /* requested driver msg lvl */ - u32 uif; /* uplink interface */ - u32 status; /* status bits (see VNIC_STF_*) */ - u32 error; /* error code (see ERR_*) for first ERR */ - u32 link_down_cnt; /* running count of link down transitions */ - u32 perbi_rebuild_cnt; /* running count of perbi rebuilds */ -}; -#define VNIC_STF_FATAL_ERR 0x0001 /* fatal fw error */ -#define VNIC_STF_STD_PAUSE 0x0002 /* standard link-level pause on */ -#define VNIC_STF_PFC_PAUSE 0x0004 /* priority flow control pause on */ -/* all supported status flags */ -#define VNIC_STF_ALL (VNIC_STF_FATAL_ERR |\ - VNIC_STF_STD_PAUSE |\ - VNIC_STF_PFC_PAUSE |\ - 0) - -struct vnic_devcmd_provinfo { - u8 oui[3]; - u8 type; - u8 data[]; -}; - -/* - * These are used in flags field of different filters to denote - * valid fields used. - */ -#define FILTER_FIELD_VALID(fld) (1 << (fld - 1)) - -#define FILTER_FIELD_USNIC_VLAN FILTER_FIELD_VALID(1) -#define FILTER_FIELD_USNIC_ETHTYPE FILTER_FIELD_VALID(2) -#define FILTER_FIELD_USNIC_PROTO FILTER_FIELD_VALID(3) -#define FILTER_FIELD_USNIC_ID FILTER_FIELD_VALID(4) - -#define FILTER_FIELDS_USNIC (FILTER_FIELD_USNIC_VLAN | \ - FILTER_FIELD_USNIC_ETHTYPE | \ - FILTER_FIELD_USNIC_PROTO | \ - FILTER_FIELD_USNIC_ID) - -struct filter_usnic_id { - u32 flags; - u16 vlan; - u16 ethtype; - u8 proto_version; - u32 usnic_id; -} __attribute__((packed)); - -#define FILTER_FIELD_5TUP_PROTO FILTER_FIELD_VALID(1) -#define FILTER_FIELD_5TUP_SRC_AD FILTER_FIELD_VALID(2) -#define FILTER_FIELD_5TUP_DST_AD FILTER_FIELD_VALID(3) -#define FILTER_FIELD_5TUP_SRC_PT FILTER_FIELD_VALID(4) -#define FILTER_FIELD_5TUP_DST_PT FILTER_FIELD_VALID(5) - -#define FILTER_FIELDS_IPV4_5TUPLE (FILTER_FIELD_5TUP_PROTO | \ - FILTER_FIELD_5TUP_SRC_AD | \ - FILTER_FIELD_5TUP_DST_AD | \ - FILTER_FIELD_5TUP_SRC_PT | \ - FILTER_FIELD_5TUP_DST_PT) - -/* Enums for the protocol field. */ -enum protocol_e { - PROTO_UDP = 0, - PROTO_TCP = 1, - PROTO_IPV4 = 2, - PROTO_IPV6 = 3 -}; - -struct filter_ipv4_5tuple { - u32 flags; - u32 protocol; - u32 src_addr; - u32 dst_addr; - u16 src_port; - u16 dst_port; -} __attribute__((packed)); - -#define FILTER_FIELD_VMQ_VLAN FILTER_FIELD_VALID(1) -#define FILTER_FIELD_VMQ_MAC FILTER_FIELD_VALID(2) - -#define FILTER_FIELDS_MAC_VLAN (FILTER_FIELD_VMQ_VLAN | \ - FILTER_FIELD_VMQ_MAC) - -#define FILTER_FIELDS_NVGRE FILTER_FIELD_VMQ_MAC - -struct filter_mac_vlan { - u32 flags; - u16 vlan; - u8 mac_addr[6]; -} __attribute__((packed)); - -#define FILTER_FIELD_VLAN_IP_3TUP_VLAN FILTER_FIELD_VALID(1) -#define FILTER_FIELD_VLAN_IP_3TUP_L3_PROTO FILTER_FIELD_VALID(2) -#define FILTER_FIELD_VLAN_IP_3TUP_DST_AD FILTER_FIELD_VALID(3) -#define FILTER_FIELD_VLAN_IP_3TUP_L4_PROTO FILTER_FIELD_VALID(4) -#define FILTER_FIELD_VLAN_IP_3TUP_DST_PT FILTER_FIELD_VALID(5) - -#define FILTER_FIELDS_VLAN_IP_3TUP (FILTER_FIELD_VLAN_IP_3TUP_VLAN | \ - FILTER_FIELD_VLAN_IP_3TUP_L3_PROTO | \ - FILTER_FIELD_VLAN_IP_3TUP_DST_AD | \ - FILTER_FIELD_VLAN_IP_3TUP_L4_PROTO | \ - FILTER_FIELD_VLAN_IP_3TUP_DST_PT) - -struct filter_vlan_ip_3tuple { - u32 flags; - u16 vlan; - u16 l3_protocol; - union { - u32 dst_addr_v4; - u8 dst_addr_v6[16]; - } u; - u32 l4_protocol; - u16 dst_port; -} __attribute__((packed)); - -#define FILTER_GENERIC_1_BYTES 64 - -enum filter_generic_1_layer { - FILTER_GENERIC_1_L2, - FILTER_GENERIC_1_L3, - FILTER_GENERIC_1_L4, - FILTER_GENERIC_1_L5, - FILTER_GENERIC_1_NUM_LAYERS -}; - -#define FILTER_GENERIC_1_IPV4 (1 << 0) -#define FILTER_GENERIC_1_IPV6 (1 << 1) -#define FILTER_GENERIC_1_UDP (1 << 2) -#define FILTER_GENERIC_1_TCP (1 << 3) -#define FILTER_GENERIC_1_TCP_OR_UDP (1 << 4) -#define FILTER_GENERIC_1_IP4SUM_OK (1 << 5) -#define FILTER_GENERIC_1_L4SUM_OK (1 << 6) -#define FILTER_GENERIC_1_IPFRAG (1 << 7) - -#define FILTER_GENERIC_1_KEY_LEN 64 - -/* - * Version 1 of generic filter specification - * position is only 16 bits, reserving positions > 64k to be used by firmware - */ -struct filter_generic_1 { - u16 position; // lower position comes first - u32 mask_flags; - u32 val_flags; - u16 mask_vlan; - u16 val_vlan; - struct { - u8 mask[FILTER_GENERIC_1_KEY_LEN]; // 0 bit means "don't care" - u8 val[FILTER_GENERIC_1_KEY_LEN]; - } __attribute__((packed)) layer[FILTER_GENERIC_1_NUM_LAYERS]; -} __attribute__((packed)); - -/* Specifies the filter_action type. */ -enum { - FILTER_ACTION_RQ_STEERING = 0, - FILTER_ACTION_V2 = 1, - FILTER_ACTION_MAX -}; - -struct filter_action { - u32 type; - union { - u32 rq_idx; - } u; -} __attribute__((packed)); - -#define FILTER_ACTION_RQ_STEERING_FLAG (1 << 0) -#define FILTER_ACTION_FILTER_ID_FLAG (1 << 1) -#define FILTER_ACTION_DROP_FLAG (1 << 2) - -/* Version 2 of filter action must be a strict extension of struct filter_action - * where the first fields exactly match in size and meaning. - */ -struct filter_action_v2 { - u32 type; - u32 rq_idx; - u32 flags; // use FILTER_ACTION_XXX_FLAG defines - u16 filter_id; - u_int8_t reserved[32]; // for future expansion -} __attribute__((packed)); - -/* Specifies the filter type. */ -enum filter_type { - FILTER_USNIC_ID = 0, - FILTER_IPV4_5TUPLE = 1, - FILTER_MAC_VLAN = 2, - FILTER_VLAN_IP_3TUPLE = 3, - FILTER_NVGRE_VMQ = 4, - FILTER_USNIC_IP = 5, - FILTER_DPDK_1 = 6, - FILTER_MAX -}; - -#define FILTER_USNIC_ID_FLAG (1 << FILTER_USNIC_ID) -#define FILTER_IPV4_5TUPLE_FLAG (1 << FILTER_IPV4_5TUPLE) -#define FILTER_MAC_VLAN_FLAG (1 << FILTER_MAC_VLAN) -#define FILTER_VLAN_IP_3TUPLE_FLAG (1 << FILTER_VLAN_IP_3TUPLE) -#define FILTER_NVGRE_VMQ_FLAG (1 << FILTER_NVGRE_VMQ) -#define FILTER_USNIC_IP_FLAG (1 << FILTER_USNIC_IP) -#define FILTER_DPDK_1_FLAG (1 << FILTER_DPDK_1) - -struct filter { - u32 type; - union { - struct filter_usnic_id usnic; - struct filter_ipv4_5tuple ipv4; - struct filter_mac_vlan mac_vlan; - struct filter_vlan_ip_3tuple vlan_3tuple; - } u; -} __attribute__((packed)); - -/* - * This is a strict superset of "struct filter" and exists only - * because many drivers use "sizeof (struct filter)" in deciding TLV size. - * This new, larger struct filter would cause any code that uses that method - * to not work with older firmware, so we add filter_v2 to hold the - * new filter types. Drivers should use vnic_filter_size() to determine - * the TLV size instead of sizeof (struct fiter_v2) to guard against future - * growth. - */ -struct filter_v2 { - u32 type; - union { - struct filter_usnic_id usnic; - struct filter_ipv4_5tuple ipv4; - struct filter_mac_vlan mac_vlan; - struct filter_vlan_ip_3tuple vlan_3tuple; - struct filter_generic_1 generic_1; - } u; -} __attribute__((packed)); - -enum { - CLSF_TLV_FILTER = 0, - CLSF_TLV_ACTION = 1, -}; - -struct filter_tlv { - u_int32_t type; - u_int32_t length; - u_int32_t val[]; -}; - -/* Data for CMD_ADD_FILTER is 2 TLV and filter + action structs */ -#define FILTER_MAX_BUF_SIZE 100 -#define FILTER_V2_MAX_BUF_SIZE (sizeof (struct filter_v2) + \ - sizeof (struct filter_action_v2) + \ - (2 * sizeof (struct filter_tlv))) - -/* - * Compute actual structure size given filter type. To be "future-proof," - * drivers should use this instead of "sizeof (struct filter_v2)" when - * computing length for TLV. - */ -static inline u_int32_t -vnic_filter_size( - struct filter_v2 *fp) -{ - u_int32_t size; - - switch (fp->type) { - case FILTER_USNIC_ID: - size = sizeof (fp->u.usnic); - break; - case FILTER_IPV4_5TUPLE: - size = sizeof (fp->u.ipv4); - break; - case FILTER_MAC_VLAN: - case FILTER_NVGRE_VMQ: - size = sizeof (fp->u.mac_vlan); - break; - case FILTER_VLAN_IP_3TUPLE: - size = sizeof (fp->u.vlan_3tuple); - break; - case FILTER_USNIC_IP: - case FILTER_DPDK_1: - size = sizeof (fp->u.generic_1); - break; - default: - size = sizeof (fp->u); - break; - } - size += sizeof (fp->type); - return (size); -} - - -enum { - CLSF_ADD = 0, - CLSF_DEL = 1, -}; - -/* - * Get the action structure size given action type. To be "future-proof," - * drivers should use this instead of "sizeof (struct filter_action_v2)" - * when computing length for TLV. - */ -static inline u_int32_t -vnic_action_size(struct filter_action_v2 *fap) -{ - u_int32_t size; - - switch (fap->type) { - case FILTER_ACTION_RQ_STEERING: - size = sizeof (struct filter_action); - break; - case FILTER_ACTION_V2: - size = sizeof (struct filter_action_v2); - break; - default: - /* this should never happen and will cause a devcmd error */ - size = sizeof (struct filter_action); - break; - } - return (size); -} - -/* - * Writing cmd register causes STAT_BUSY to get set in status register. - * When cmd completes, STAT_BUSY will be cleared. - * - * If cmd completed successfully STAT_ERROR will be clear - * and args registers contain cmd-specific results. - * - * If cmd error, STAT_ERROR will be set and args[0] contains error code. - * - * status register is read-only. While STAT_BUSY is set, - * all other register contents are read-only. - */ - -/* Make sizeof(vnic_devcmd) a power-of-2 for I/O BAR. */ -#define VNIC_DEVCMD_NARGS 15 -struct vnic_devcmd { - u32 status; /* RO */ - u32 cmd; /* RW */ - u64 args[VNIC_DEVCMD_NARGS]; /* RW cmd args (little-endian) */ -}; - -/* - * Version 2 of the interface. - * - * Some things are carried over, notably the vnic_devcmd_cmd enum. - */ - -/* - * Flags for vnic_devcmd2.flags - */ - -#define DEVCMD2_FNORESULT 0x1 /* Don't copy result to host */ - -#define VNIC_DEVCMD2_NARGS VNIC_DEVCMD_NARGS -struct vnic_devcmd2 { - u16 pad; - u16 flags; - u32 cmd; /* same command #defines as original */ - u64 args[VNIC_DEVCMD2_NARGS]; -}; - -#define VNIC_DEVCMD2_NRESULTS VNIC_DEVCMD_NARGS -struct devcmd2_result { - u64 results[VNIC_DEVCMD2_NRESULTS]; - u32 pad; - u16 completed_index; /* into copy WQ */ - u8 error; /* same error codes as original */ - u8 color; /* 0 or 1 as with completion queues */ -}; - -#define DEVCMD2_RING_SIZE 32 -#define DEVCMD2_DESC_SIZE 128 - -#define DEVCMD2_RESULTS_SIZE_MAX ((1 << 16) - 1) - -/* Overlay related definitions */ - -/* - * This enum lists the flag associated with each of the overlay features - */ -typedef enum { - OVERLAY_FEATURE_NVGRE = 1, - OVERLAY_FEATURE_VXLAN, - OVERLAY_FEATURE_MAX, -} overlay_feature_t; - -typedef enum { - OVERLAY_OFFLOAD_ENABLE, - OVERLAY_OFFLOAD_DISABLE, - OVERLAY_OFFLOAD_ENABLE_V2, - OVERLAY_OFFLOAD_MAX, -} overlay_ofld_cmd; - -#define OVERLAY_CFG_VXLAN_PORT_UPDATE 0 - -/* - * Use this enum to get the supported versions for each of these features - * If you need to use the devcmd_get_supported_feature_version(), add - * the new feature into this enum and install function handler in devcmd.c - */ -typedef enum { - VIC_FEATURE_VXLAN, - VIC_FEATURE_RDMA, - VIC_FEATURE_VXLAN_PATCH, - VIC_FEATURE_MAX, -} vic_feature_t; - -/* this previously lived in vnic_rdma.h */ -#define MK_RDMA_FW_VER(ver) (1 << (ver)) -enum vnic_rdma_fw_versions { - RDMA_FW_VER_1, - RDMA_FW_VER_2 -}; - -/* - * CMD_CONFIG_GRPINTR subcommands - */ -typedef enum { - GRPINTR_ENABLE = 1, - GRPINTR_DISABLE, - GRPINTR_UPD_VECT, -} grpintr_subcmd_t; - -/* - * CMD_RDMA_CTRL subcommands - * - * Unless otherwise stated, all arguments are in little endian (as with regular - * devcmds). - * - * MAC address arguments are encoded in u64 arguments. A little endian host - * should encode 11:22:33:44:55:66 as 0x0000112233445566. The high order bytes - * of the u64 value must be 0 or the argument will be considered an invalid MAC - * address. - */ - -#define RDMA_QP_STATE_INVALID 0 -#define RDMA_QP_STATE_RESET (1<<0) -#define RDMA_QP_STATE_INIT (1<<1) -#define RDMA_QP_STATE_RTR (1<<2) -#define RDMA_QP_STATE_RTS (1<<3) -#define RDMA_QP_STATE_SQD (1<<4) -#define RDMA_QP_STATE_SQE (1<<5) -#define RDMA_QP_STATE_ERR (1<<6) -#define RDMA_QP_STATE_RSRV1 (1<<7) - -#define RDMA_QP_STATE_VALID_RQ (RDMA_QP_STATE_INIT | \ - RDMA_QP_STATE_RTR | \ - RDMA_QP_STATE_RTS | \ - RDMA_QP_STATE_SQD | \ - RDMA_QP_STATE_SQE) - -#define RDMA_QP_STATE_VALID_RESP (RDMA_QP_STATE_RTR | \ - RDMA_QP_STATE_RTS | \ - RDMA_QP_STATE_SQD) - -#define RDMA_QP_STATE_SQD_SQE (RDMA_QP_STATE_SQD | \ - RDMA_QP_STATE_SQE) - -#define RDMA_QP_TYPE_INVALID 0 -#define RDMA_QP_TYPE_RC 1 -#define RDMA_QP_TYPE_UD 2 - -#define RDMA_INTR_NULL_IDX 0xffffffff -#define RDMA_ANY_QPN 0xffffffff -#define RDMA_NULL_QP_ID 0xffffffff -#define RDMA_PSN_UNCHANGED 0xffffffff - -#define RDMA_PROTO_ROCEV2 0 - -/* - * Initialize a specific resource domain associated with the current vNIC. The - * number of resource domains for the current vNIC is specified in the vNIC - * devspec. - * - * in: (u32) a0 = RDMA_SUBCMD_CFG_RESOURCE_DOMAIN - * (u32) a1 = resource domain id (0-indexed) - * (u32) a2 = protocol type (only RDMA_PROTO_ROCEV2 for now) - * (u64) a3 = source MAC address (see note above about MAC encoding) - * (u64) a4 = ring base addr of rdma_reg_cmd_result ring - * (u32) a5 = result ring size, should equal command WQ ring size - * (u32) a6 = rcmd soft cq interrupt vector (idx w/in vnic's intr range) - * (pass RDMA_INTR_NULL_IDX for no interrupt) - */ -#define RDMA_SUBCMD_CFG_RESOURCE_DOMAIN 0 - -/* - * Allocate a soft CQ from the resource domain. - * - * in: (u32) a0 = RDMA_SUBCMD_CREATE_CQ - * (u32) a1 = resource domain ID - * (u64) a2 = ring base address - * (u32) a3 = ring size - * (u32) a4 = interrupt vector (idx w/in vnic's intr range) - * (pass RDMA_INTR_NULL_IDX for no interrupt) - * - * out: (u32) a0 = CQ ID - */ -#define RDMA_SUBCMD_CREATE_CQ 1 - -/* - * Deallocate a soft CQ. - * - * in: (u32) a0 = RDMA_SUBCMD_DESTROY_CQ - * (u32) a1 = resource domain ID - * (u32) a2 = CQ ID - */ -#define RDMA_SUBCMD_DESTROY_CQ 2 - -/* - * Allocate a QP (with one SQ and one RQ) from the resource domain. - * - * in: (u32) a0 = RDMA_SUBCMD_CREATE_QP - * (u32) a1 = resource domain ID - * (u32) a2 = QP type (see RDMA_QP_TYPE_xxx) - * (u32) a3 = max SQ WRs - * (u32) a4 = max RQ WRs - * (u32) a5 = SQ CQ ID - * (u32) a6 = RQ CQ ID - * (u32) a7 = desired QPN (or RDMA_ANY_QPN if don't care) - * (u32) a8 = QP flags - * (u64) a9 = SQ ring base ptr - * (u64) a10 = RQ ring base ptr - * - * out: (u32) a0 = QP ID - * (u32) a1 = actual QPN (XXX could just obtain from QUERY_QP) - */ -#define RDMA_SUBCMD_CREATE_QP 3 - -/* - * Modify the state of an existing QP. This is primarily used to transition - * the QP from one state to the next. The "current state" argument must match - * the QP's actual current state or the command will fail. If the driver and - * firmware get out of sync, the actual current state can be queried with - * RDMA_SUBCMD_QUERY_QP. - * - * The next-hop MAC, peer IP, and peer QPN arguments are ignored if the new - * state is not RTR. - * - * in: (u32) a0 = RDMA_SUBCMD_MODIFY_QP - * (u32) a1 = resource domain ID - * (u32) a2 = QP ID - * (u32) a3 = current state - * (u32) a4 = new state - * (u64) a5 = next-hop MAC to destination IP (see MAC encoding note above) - * (u64) a6 = peer IP address - * (u32) a7 = peer QPN - * (u32) a8 = path MTU (one of: 512/1024/2048/4096, 0 means no change) - * (u64) a9 = upper 32-bits: SQ PSN (RDMA_PSN_UNCHANGED means no change) - * lower 32-bits: RQ PSN (RDMA_PSN_UNCHANGED means no change) - * (u32) a10 = Q_Key (UD QPs only) - * (u32) a11 = source IPv4 address in network byte order - */ -#define RDMA_SUBCMD_MODIFY_QP 4 - -/* - * Query current QP status. - * - * in: (u32) a0 = RDMA_SUBCMD_QUERY_QP - * (u32) a1 = resource domain ID - * (u32) a2 = QP ID - * - * out: (u32) a0 = QPN - * (u32) a1 = current QP state - * (u32) a2 = path MTU - * (u32) a3 = current SQ PSN - * (u32) a4 = current RQ PSN - */ -#define RDMA_SUBCMD_QUERY_QP 5 - -/* - * Deallocate a QP. - * - * in: (u32) a0 = RDMA_SUBCMD_DESTROY_QP - * (u32) a1 = resource domain ID - * (u32) a2 = QP ID - */ -#define RDMA_SUBCMD_DESTROY_QP 6 - -/* - * Retrieve a snapshot of current statistics for this vnic's - * rdma engine - * - * in: (u32) a0 = RDMA_SUBCMD_GET_STATS - * - * out: (u64) a0 = IG packet count - * (u64) a1 = IG byte count - * (u64) a2 = EG packet count - * (u64) a3 = EG byte count - */ -#define RDMA_SUBCMD_GET_STATS 7 - -/* - * in: (u32) a0 = RDMA_SUBCMD_RST_RESOURCE_DOMAIN - * (u32) a1 = resource domain ID - */ -#define RDMA_SUBCMD_RST_RESOURCE_DOMAIN 8 - -/* - * Status for deallocate QP dev_cmd. - * - * in: (u32) a0 = RDMA_SUBCMD_DESTROY_QP_STATUS - * (u32) a1 = resource domain ID - * (u32) a2 = QP ID - * - * out: (u32) a0 = ERR_EINPROGRESS/ERR_EBADSTATE/ERR_SUCCESS - */ -#define RDMA_SUBCMD_DESTROY_QP_STATUS 9 - -/* - * Flags for CMD_ADDR_VLAN_ADD and CMD_ADDR_VLAN_DEL - */ -#define AVF_VLAN_VALID 0x0001 // use VLAN from a1 in match - // (else VLAN is wildcard) -#define AVF_INNER_PKT 0x0002 // match on inner packet - -#endif /* _VNIC_DEVCMD_H_ */ diff --git a/prov/usnic/src/usnic_direct/vnic_enet.h b/prov/usnic/src/usnic_direct/vnic_enet.h deleted file mode 100644 index 94ce66702f2..00000000000 --- a/prov/usnic/src/usnic_direct/vnic_enet.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright 2008-2018 Cisco Systems, Inc. All rights reserved. - * Copyright 2007 Nuova Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#ifndef _VNIC_ENIC_H_ -#define _VNIC_ENIC_H_ - -/* Device-specific region: enet configuration */ -struct vnic_enet_config { - u32 flags; - u32 wq_desc_count; - u32 rq_desc_count; - u16 mtu; - u16 intr_timer_deprecated; - u8 intr_timer_type; - u8 intr_mode; - char devname[16]; - u32 intr_timer_usec; - u16 loop_tag; - u16 vf_rq_count; - u16 num_arfs; - u64 mem_paddr; -}; - -#define VENETF_TSO 0x1 /* TSO enabled */ -#define VENETF_LRO 0x2 /* LRO enabled */ -#define VENETF_RXCSUM 0x4 /* RX csum enabled */ -#define VENETF_TXCSUM 0x8 /* TX csum enabled */ -#define VENETF_RSS 0x10 /* RSS enabled */ -#define VENETF_RSSHASH_IPV4 0x20 /* Hash on IPv4 fields */ -#define VENETF_RSSHASH_TCPIPV4 0x40 /* Hash on TCP + IPv4 fields */ -#define VENETF_RSSHASH_IPV6 0x80 /* Hash on IPv6 fields */ -#define VENETF_RSSHASH_TCPIPV6 0x100 /* Hash on TCP + IPv6 fields */ -#define VENETF_RSSHASH_IPV6_EX 0x200 /* Hash on IPv6 extended fields */ -#define VENETF_RSSHASH_TCPIPV6_EX 0x400 /* Hash on TCP + IPv6 ext. fields */ -#define VENETF_LOOP 0x800 /* Loopback enabled */ -#define VENETF_VMQ 0x4000 /* using VMQ flag for VMware NETQ */ -#define VENETF_VXLAN 0x10000 /* VxLAN offload */ -#define VENETF_NVGRE 0x20000 /* NVGRE offload */ -#define VENET_INTR_TYPE_MIN 0 /* Timer specs min interrupt spacing */ -#define VENET_INTR_TYPE_IDLE 1 /* Timer specs idle time before irq */ - -#define VENET_INTR_MODE_ANY 0 /* Try MSI-X, then MSI, then INTx */ -#define VENET_INTR_MODE_MSI 1 /* Try MSI then INTx */ -#define VENET_INTR_MODE_INTX 2 /* Try INTx only */ - -#endif /* _VNIC_ENIC_H_ */ diff --git a/prov/usnic/src/usnic_direct/vnic_intr.c b/prov/usnic/src/usnic_direct/vnic_intr.c deleted file mode 100644 index 51f98b327f3..00000000000 --- a/prov/usnic/src/usnic_direct/vnic_intr.c +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright 2008-2018 Cisco Systems, Inc. All rights reserved. - * Copyright 2007 Nuova Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#include -#include -#include -#include -#include - -#include "kcompat.h" -#include "vnic_dev.h" -#include "vnic_intr.h" - -EXPORT_SYMBOL(vnic_intr_free); -void vnic_intr_free(struct vnic_intr *intr) -{ - intr->ctrl = NULL; -} - -EXPORT_SYMBOL(vnic_intr_alloc); -int vnic_intr_alloc(struct vnic_dev *vdev, struct vnic_intr *intr, - unsigned int index) -{ - intr->index = index; - intr->vdev = vdev; - - intr->ctrl = vnic_dev_get_res(vdev, RES_TYPE_INTR_CTRL, index); - if (!intr->ctrl) { - pr_err("Failed to hook INTR[%d].ctrl resource\n", index); - return -EINVAL; - } - - return 0; -} - -EXPORT_SYMBOL(vnic_intr_init); -void vnic_intr_init(struct vnic_intr *intr, u32 coalescing_timer, - unsigned int coalescing_type, unsigned int mask_on_assertion) -{ - vnic_intr_coalescing_timer_set(intr, coalescing_timer); - iowrite32(coalescing_type, &intr->ctrl->coalescing_type); - iowrite32(mask_on_assertion, &intr->ctrl->mask_on_assertion); - iowrite32(0, &intr->ctrl->int_credits); -} - -void vnic_intr_coalescing_timer_set(struct vnic_intr *intr, - u32 coalescing_timer) -{ - iowrite32(vnic_dev_intr_coal_timer_usec_to_hw(intr->vdev, - coalescing_timer), &intr->ctrl->coalescing_timer); -} - -void vnic_intr_clean(struct vnic_intr *intr) -{ - iowrite32(0, &intr->ctrl->int_credits); -} - -void vnic_intr_raise(struct vnic_intr *intr) -{ - vnic_dev_raise_intr(intr->vdev, (u16)intr->index); -} - -EXPORT_SYMBOL(vnic_grpmbrintr_free); -void vnic_grpmbrintr_free(struct vnic_intr *intr) -{ - intr->ctrl = NULL; -} - -EXPORT_SYMBOL(vnic_grpmbrintr_alloc); -int vnic_grpmbrintr_alloc(struct vnic_dev *vdev, struct vnic_intr *intr, - unsigned int index) -{ - intr->index = index; - intr->vdev = vdev; - - intr->ctrl = vnic_dev_get_res(vdev, RES_TYPE_GRPMBR_INTR, index); - if (!intr->ctrl) { - pr_err("Failed to hook INTR[%d].ctrl resource\n", index); - return -EINVAL; - } - - return 0; -} diff --git a/prov/usnic/src/usnic_direct/vnic_intr.h b/prov/usnic/src/usnic_direct/vnic_intr.h deleted file mode 100644 index cc8152134e4..00000000000 --- a/prov/usnic/src/usnic_direct/vnic_intr.h +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Copyright 2008-2018 Cisco Systems, Inc. All rights reserved. - * Copyright 2007 Nuova Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#ifndef _VNIC_INTR_H_ -#define _VNIC_INTR_H_ - -#include - -#include "vnic_dev.h" - -#define VNIC_INTR_TIMER_TYPE_ABS 0 -#define VNIC_INTR_TIMER_TYPE_QUIET 1 - -/* Interrupt control */ -struct vnic_intr_ctrl { - u32 coalescing_timer; /* 0x00 */ - u32 pad0; - u32 coalescing_value; /* 0x08 */ - u32 pad1; - u32 coalescing_type; /* 0x10 */ - u32 pad2; - u32 mask_on_assertion; /* 0x18 */ - u32 pad3; - u32 mask; /* 0x20 */ - u32 pad4; - u32 int_credits; /* 0x28 */ - u32 pad5; - u32 int_credit_return; /* 0x30 */ - u32 pad6; -}; - -struct vnic_intr { - unsigned int index; - struct vnic_dev *vdev; - struct vnic_intr_ctrl __iomem *ctrl; /* memory-mapped */ -}; - -static inline void vnic_intr_unmask(struct vnic_intr *intr) -{ - iowrite32(0, &intr->ctrl->mask); -} - -static inline void vnic_intr_mask(struct vnic_intr *intr) -{ - iowrite32(1, &intr->ctrl->mask); -} - -static inline int vnic_intr_masked(struct vnic_intr *intr) -{ - return ioread32(&intr->ctrl->mask); -} - -static inline void vnic_intr_return_credits(struct vnic_intr *intr, - unsigned int credits, int unmask, int reset_timer) -{ -#define VNIC_INTR_UNMASK_SHIFT 16 -#define VNIC_INTR_RESET_TIMER_SHIFT 17 - - u32 int_credit_return = (credits & 0xffff) | - (unmask ? (1 << VNIC_INTR_UNMASK_SHIFT) : 0) | - (reset_timer ? (1 << VNIC_INTR_RESET_TIMER_SHIFT) : 0); - - iowrite32(int_credit_return, &intr->ctrl->int_credit_return); -} - -static inline unsigned int vnic_intr_credits(struct vnic_intr *intr) -{ - return ioread32(&intr->ctrl->int_credits); -} - -static inline void vnic_intr_return_all_credits(struct vnic_intr *intr) -{ - unsigned int credits = vnic_intr_credits(intr); - int unmask = 1; - int reset_timer = 1; - - vnic_intr_return_credits(intr, credits, unmask, reset_timer); -} - -static inline u32 vnic_intr_legacy_pba(u32 __iomem *legacy_pba) -{ - /* read PBA without clearing */ - return ioread32(legacy_pba); -} - -void vnic_intr_free(struct vnic_intr *intr); -int vnic_intr_alloc(struct vnic_dev *vdev, struct vnic_intr *intr, - unsigned int index); -void vnic_intr_init(struct vnic_intr *intr, u32 coalescing_timer, - unsigned int coalescing_type, unsigned int mask_on_assertion); -void vnic_intr_coalescing_timer_set(struct vnic_intr *intr, - u32 coalescing_timer); -void vnic_intr_clean(struct vnic_intr *intr); - -void vnic_grpmbrintr_free(struct vnic_intr *intr); -int vnic_grpmbrintr_alloc(struct vnic_dev *vdev, struct vnic_intr *intr, - unsigned int index); - - -#endif /* _VNIC_INTR_H_ */ diff --git a/prov/usnic/src/usnic_direct/vnic_resource.h b/prov/usnic/src/usnic_direct/vnic_resource.h deleted file mode 100644 index ebfe3cc329d..00000000000 --- a/prov/usnic/src/usnic_direct/vnic_resource.h +++ /dev/null @@ -1,119 +0,0 @@ -/* -* Copyright 2008-2010 Cisco Systems, Inc. All rights reserved. - * Copyright 2007 Nuova Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#ifndef _VNIC_RESOURCE_H_ -#define _VNIC_RESOURCE_H_ - -#define VNIC_RES_MAGIC 0x766E6963L /* 'vnic' */ -#define VNIC_RES_VERSION 0x00000000L -#define MGMTVNIC_MAGIC 0x544d474dL /* 'MGMT' */ -#define MGMTVNIC_VERSION 0x00000000L - -/* The MAC address assigned to the CFG vNIC is fixed. */ -#define MGMTVNIC_MAC { 0x02, 0x00, 0x54, 0x4d, 0x47, 0x4d } - -/* vNIC resource types */ -enum vnic_res_type { - RES_TYPE_EOL, /* End-of-list */ - RES_TYPE_WQ, /* Work queues */ - RES_TYPE_RQ, /* Receive queues */ - RES_TYPE_CQ, /* Completion queues */ - RES_TYPE_MEM, /* Window to dev memory */ - RES_TYPE_NIC_CFG, /* Enet NIC config registers */ - RES_TYPE_RSS_KEY, /* Enet RSS secret key */ - RES_TYPE_RSS_CPU, /* Enet RSS indirection table */ - RES_TYPE_TX_STATS, /* Netblock Tx statistic regs */ - RES_TYPE_RX_STATS, /* Netblock Rx statistic regs */ - RES_TYPE_INTR_CTRL, /* Interrupt ctrl table */ - RES_TYPE_INTR_TABLE, /* MSI/MSI-X Interrupt table */ - RES_TYPE_INTR_PBA, /* MSI/MSI-X PBA table */ - RES_TYPE_INTR_PBA_LEGACY, /* Legacy intr status */ - RES_TYPE_DEBUG, /* Debug-only info */ - RES_TYPE_DEV, /* Device-specific region */ - RES_TYPE_DEVCMD, /* Device command region */ - RES_TYPE_PASS_THRU_PAGE, /* Pass-thru page */ - RES_TYPE_SUBVNIC, /* subvnic resource type */ - RES_TYPE_MQ_WQ, /* MQ Work queues */ - RES_TYPE_MQ_RQ, /* MQ Receive queues */ - RES_TYPE_MQ_CQ, /* MQ Completion queues */ - RES_TYPE_DEPRECATED1, /* Old version of devcmd 2 */ - RES_TYPE_DEPRECATED2, /* Old version of devcmd 2 */ - RES_TYPE_DEVCMD2, /* Device control region */ - RES_TYPE_RDMA_WQ, /* RDMA WQ */ - RES_TYPE_RDMA_RQ, /* RDMA RQ */ - RES_TYPE_RDMA_CQ, /* RDMA CQ */ - RES_TYPE_RDMA_RKEY_TABLE, /* RDMA RKEY table */ - RES_TYPE_RDMA_RQ_HEADER_TABLE, /* RDMA RQ Header Table */ - RES_TYPE_RDMA_RQ_TABLE, /* RDMA RQ Table */ - RES_TYPE_RDMA_RD_RESP_HEADER_TABLE, /* RDMA Read Response Header Table */ - RES_TYPE_RDMA_RD_RESP_TABLE, /* RDMA Read Response Table */ - RES_TYPE_RDMA_QP_STATS_TABLE, /* RDMA per QP stats table */ - RES_TYPE_WQ_MREGS, /* XXX snic proto only */ - RES_TYPE_GRPMBR_INTR, /* Group member interrupt control */ - RES_TYPE_DPKT, /* Direct Packet memory region */ - - RES_TYPE_MAX, /* Count of resource types */ -}; - -struct vnic_resource_header { - u32 magic; - u32 version; -}; - -struct mgmt_barmap_hdr { - u32 magic; /* magic number */ - u32 version; /* header format version */ - u16 lif; /* loopback lif for mgmt frames */ - u16 pci_slot; /* installed pci slot */ - char serial[16]; /* card serial number */ -}; - -struct vnic_resource { - u8 type; - u8 bar; - u8 pad[2]; - u32 bar_offset; - u32 count; -}; - -#endif /* _VNIC_RESOURCE_H_ */ diff --git a/prov/usnic/src/usnic_direct/vnic_rq.c b/prov/usnic/src/usnic_direct/vnic_rq.c deleted file mode 100644 index 7e8624d351b..00000000000 --- a/prov/usnic/src/usnic_direct/vnic_rq.c +++ /dev/null @@ -1,272 +0,0 @@ -/* - * Copyright 2008-2018 Cisco Systems, Inc. All rights reserved. - * Copyright 2007 Nuova Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#include -#include -#include -#include -#ifdef __KERNEL__ -#include -#include -#endif - -#include "kcompat.h" -#include "vnic_dev.h" -#include "vnic_rq.h" - -static int vnic_rq_alloc_bufs(struct vnic_rq *rq) -{ - struct vnic_rq_buf *buf; - unsigned int i, j, count = rq->ring.desc_count; - unsigned int blks = VNIC_RQ_BUF_BLKS_NEEDED(count); - - for (i = 0; i < blks; i++) { - rq->bufs[i] = kzalloc(VNIC_RQ_BUF_BLK_SZ(count), GFP_ATOMIC); - if (!rq->bufs[i]) - return -ENOMEM; - } - - for (i = 0; i < blks; i++) { - buf = rq->bufs[i]; - for (j = 0; j < VNIC_RQ_BUF_BLK_ENTRIES(count); j++) { - buf->index = i * VNIC_RQ_BUF_BLK_ENTRIES(count) + j; - buf->desc = (u8 *)rq->ring.descs + - rq->ring.desc_size * buf->index; - if (buf->index + 1 == count) { - buf->next = rq->bufs[0]; - break; - } else if (j + 1 == VNIC_RQ_BUF_BLK_ENTRIES(count)) { - buf->next = rq->bufs[i + 1]; - } else { - buf->next = buf + 1; - buf++; - } - } - } - - rq->to_use = rq->to_clean = rq->bufs[0]; - - return 0; -} - -#ifndef NOT_FOR_OPEN_ENIC -int vnic_rq_mem_size(struct vnic_rq *rq, unsigned int desc_count, - unsigned int desc_size) -{ - int mem_size = 0; - - mem_size += vnic_dev_desc_ring_size(&rq->ring, desc_count, desc_size); - - mem_size += VNIC_RQ_BUF_BLKS_NEEDED(rq->ring.desc_count) * - VNIC_RQ_BUF_BLK_SZ(rq->ring.desc_count); - - return mem_size; -} - -#endif -void vnic_rq_free(struct vnic_rq *rq) -{ - struct vnic_dev *vdev; - unsigned int i; - - vdev = rq->vdev; - - vnic_dev_free_desc_ring(vdev, &rq->ring); - - for (i = 0; i < VNIC_RQ_BUF_BLKS_MAX; i++) { - if (rq->bufs[i]) { - kfree(rq->bufs[i]); - rq->bufs[i] = NULL; - } - } - - rq->ctrl = NULL; -} - -int vnic_rq_alloc(struct vnic_dev *vdev, struct vnic_rq *rq, unsigned int index, - unsigned int desc_count, unsigned int desc_size) -{ - int err; - - rq->index = index; - rq->vdev = vdev; - - rq->ctrl = vnic_dev_get_res(vdev, RES_TYPE_RQ, index); - if (!rq->ctrl) { - pr_err("Failed to hook RQ[%d] resource\n", index); - return -EINVAL; - } - - vnic_rq_disable(rq); - - err = vnic_dev_alloc_desc_ring(vdev, &rq->ring, desc_count, desc_size); - if (err) - return err; - - err = vnic_rq_alloc_bufs(rq); - if (err) { - vnic_rq_free(rq); - return err; - } - - return 0; -} - -static void vnic_rq_init_start(struct vnic_rq *rq, unsigned int cq_index, - unsigned int fetch_index, unsigned int posted_index, - unsigned int error_interrupt_enable, - unsigned int error_interrupt_offset) -{ - u64 paddr; - unsigned int count = rq->ring.desc_count; - - paddr = (u64)rq->ring.base_addr | VNIC_PADDR_TARGET; - writeq(paddr, &rq->ctrl->ring_base); - iowrite32(count, &rq->ctrl->ring_size); - iowrite32(cq_index, &rq->ctrl->cq_index); - iowrite32(error_interrupt_enable, &rq->ctrl->error_interrupt_enable); - iowrite32(error_interrupt_offset, &rq->ctrl->error_interrupt_offset); - iowrite32(0, &rq->ctrl->data_ring); - iowrite32(0, &rq->ctrl->header_split); - iowrite32(0, &rq->ctrl->error_status); - iowrite32(fetch_index, &rq->ctrl->fetch_index); - iowrite32(posted_index, &rq->ctrl->posted_index); - - rq->to_use = rq->to_clean = - &rq->bufs[fetch_index / VNIC_RQ_BUF_BLK_ENTRIES(count)] - [fetch_index % VNIC_RQ_BUF_BLK_ENTRIES(count)]; -} - -void vnic_rq_init(struct vnic_rq *rq, unsigned int cq_index, - unsigned int error_interrupt_enable, - unsigned int error_interrupt_offset) -{ - vnic_rq_init_start(rq, cq_index, 0, 0, error_interrupt_enable, - error_interrupt_offset); -} - -void vnic_rq_error_out(struct vnic_rq *rq, unsigned int error) -{ - iowrite32(error, &rq->ctrl->error_status); -} - -unsigned int vnic_rq_error_status(struct vnic_rq *rq) -{ - return vnic_rq_ctrl_error_status(rq->ctrl); -} - -EXPORT_SYMBOL(vnic_rq_ctrl_error_status); -unsigned int vnic_rq_ctrl_error_status(struct vnic_rq_ctrl *ctrl) -{ - return ioread32(&ctrl->error_status); -} - -void vnic_rq_enable(struct vnic_rq *rq) -{ - iowrite32(1, &rq->ctrl->enable); -} - -int vnic_rq_disable(struct vnic_rq *rq) -{ - unsigned int wait; - int i; - - /* - * Due to a race condition with clearing RQ "mini-cache", we need to - * disable the RQ twice to guarantee that stale descriptors are not - * used when this RQ is re-enabled. - */ - for (i = 0; i < 2; ++i) { - iowrite32(0, &rq->ctrl->enable); - - /* Wait for HW to ACK disable request */ - for (wait = 20000; wait > 0; wait--) { - if (ioread32(&rq->ctrl->running) == 0) - break; - } - - if (wait == 0) { - pr_err("Failed to disable RQ[%d]\n", rq->index); - return -ETIMEDOUT; - } - } - return 0; -} - -void vnic_rq_clean(struct vnic_rq *rq, - void (*buf_clean)(struct vnic_rq *rq, struct vnic_rq_buf *buf)) -{ - struct vnic_rq_buf *buf; - u32 fetch_index; - unsigned int count = rq->ring.desc_count; - size_t i; - - buf = rq->to_clean; - - for (i = 0; i < rq->ring.desc_count; i++) { - (*buf_clean)(rq, buf); - buf = buf->next; - } - rq->ring.desc_avail = rq->ring.desc_count - 1; - - /* Use current fetch_index as the ring starting point */ - fetch_index = ioread32(&rq->ctrl->fetch_index); - - if (fetch_index == 0xFFFFFFFF) { /* check for hardware gone */ - /* Hardware surprise removal: reset fetch_index */ - fetch_index = 0; - } - rq->to_use = rq->to_clean = - &rq->bufs[fetch_index / VNIC_RQ_BUF_BLK_ENTRIES(count)] - [fetch_index % VNIC_RQ_BUF_BLK_ENTRIES(count)]; - iowrite32(fetch_index, &rq->ctrl->posted_index); - - /* - * Anytime we write fetch_index, we need to re-write 0 to RQ.enable - * to re-sync internal VIC state on Sereno. - */ - iowrite32(0, &rq->ctrl->enable); - - vnic_dev_clear_desc_ring(&rq->ring); -} - diff --git a/prov/usnic/src/usnic_direct/vnic_rq.h b/prov/usnic/src/usnic_direct/vnic_rq.h deleted file mode 100644 index 0625760ae07..00000000000 --- a/prov/usnic/src/usnic_direct/vnic_rq.h +++ /dev/null @@ -1,296 +0,0 @@ -/* - * Copyright 2008-2018 Cisco Systems, Inc. All rights reserved. - * Copyright 2007 Nuova Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#ifndef _VNIC_RQ_H_ -#define _VNIC_RQ_H_ - -#include "vnic_dev.h" -#include "vnic_cq.h" - -/* Receive queue control */ -struct vnic_rq_ctrl { - u64 ring_base; /* 0x00 */ - u32 ring_size; /* 0x08 */ - u32 pad0; - u32 posted_index; /* 0x10 */ - u32 pad1; - u32 cq_index; /* 0x18 */ - u32 pad2; - u32 enable; /* 0x20 */ - u32 pad3; - u32 running; /* 0x28 */ - u32 pad4; - u32 fetch_index; /* 0x30 */ - u32 pad5; - u32 error_interrupt_enable; /* 0x38 */ - u32 pad6; - u32 error_interrupt_offset; /* 0x40 */ - u32 pad7; - u32 error_status; /* 0x48 */ - u32 pad8; - u32 tcp_sn; /* 0x50 */ - u32 pad9[3]; - u32 dca_select; /* 0x60 */ - u32 pad10[3]; - u32 data_ring; /* 0x70 */ - u32 pad11; - u32 header_split; /* 0x78 */ - u32 pad12; -}; - -/* Break the vnic_rq_buf allocations into blocks of 32/64 entries */ -#define VNIC_RQ_BUF_MIN_BLK_ENTRIES 32 -#define VNIC_RQ_BUF_DFLT_BLK_ENTRIES 64 -#define VNIC_RQ_BUF_BLK_ENTRIES(entries) \ - ((unsigned int)((entries < VNIC_RQ_BUF_DFLT_BLK_ENTRIES) ? \ - VNIC_RQ_BUF_MIN_BLK_ENTRIES : VNIC_RQ_BUF_DFLT_BLK_ENTRIES)) -#define VNIC_RQ_BUF_BLK_SZ(entries) \ - (VNIC_RQ_BUF_BLK_ENTRIES(entries) * sizeof(struct vnic_rq_buf)) -#define VNIC_RQ_BUF_BLKS_NEEDED(entries) \ - DIV_ROUND_UP(entries, VNIC_RQ_BUF_BLK_ENTRIES(entries)) -#define VNIC_RQ_BUF_BLKS_MAX VNIC_RQ_BUF_BLKS_NEEDED(4096) - -struct vnic_rq_buf { - struct vnic_rq_buf *next; - dma_addr_t dma_addr; - void *os_buf; - unsigned int os_buf_index; - unsigned int len; - unsigned int index; - void *desc; - uint64_t wr_id; -}; - -struct vnic_rq { - unsigned int index; - struct vnic_dev *vdev; - struct vnic_rq_ctrl __iomem *ctrl; /* memory-mapped */ - struct vnic_dev_ring ring; - struct vnic_rq_buf *bufs[VNIC_RQ_BUF_BLKS_MAX]; - struct vnic_rq_buf *to_use; - struct vnic_rq_buf *to_clean; - void *os_buf_head; - unsigned int pkts_outstanding; -#if defined(__LIBUSNIC__) - uint32_t qp_num; -#endif - -#ifdef ENIC_BUSY_POLL - atomic_t bpoll_state; -#endif /*ENIC_BUSY_POLL*/ -}; - -static inline unsigned int vnic_rq_desc_avail(struct vnic_rq *rq) -{ - /* how many does SW own? */ - return rq->ring.desc_avail; -} - -static inline unsigned int vnic_rq_desc_used(struct vnic_rq *rq) -{ - /* how many does HW own? */ - return rq->ring.desc_count - rq->ring.desc_avail - 1; -} - -static inline void *vnic_rq_next_desc(struct vnic_rq *rq) -{ - return rq->to_use->desc; -} - -static inline unsigned int vnic_rq_next_index(struct vnic_rq *rq) -{ - return rq->to_use->index; -} - -static inline void vnic_rq_post(struct vnic_rq *rq, - void *os_buf, unsigned int os_buf_index, - dma_addr_t dma_addr, unsigned int len, - uint64_t wrid) -{ - struct vnic_rq_buf *buf = rq->to_use; - - buf->os_buf = os_buf; - buf->os_buf_index = os_buf_index; - buf->dma_addr = dma_addr; - buf->len = len; - buf->wr_id = wrid; - - buf = buf->next; - rq->to_use = buf; - rq->ring.desc_avail--; - - /* Move the posted_index every nth descriptor - */ -#if defined(__LIBUSNIC__) -#define VNIC_RQ_RETURN_RATE 0x0 -#endif - -#ifndef VNIC_RQ_RETURN_RATE -#define VNIC_RQ_RETURN_RATE 0xf /* keep 2^n - 1 */ -#endif - - if ((buf->index & VNIC_RQ_RETURN_RATE) == 0) { - /* Adding write memory barrier prevents compiler and/or CPU - * reordering, thus avoiding descriptor posting before - * descriptor is initialized. Otherwise, hardware can read - * stale descriptor fields. - */ - wmb(); - iowrite32(buf->index, &rq->ctrl->posted_index); - } -} - -static inline void vnic_rq_post_commit(struct vnic_rq *rq, - void *os_buf, unsigned int os_buf_index, - dma_addr_t dma_addr, unsigned int len) -{ - struct vnic_rq_buf *buf = rq->to_use; - - buf->os_buf = os_buf; - buf->os_buf_index = os_buf_index; - buf->dma_addr = dma_addr; - buf->len = len; - - buf = buf->next; - rq->to_use = buf; - rq->ring.desc_avail--; - - /* Move the posted_index every descriptor - */ - - /* Adding write memory barrier prevents compiler and/or CPU - * reordering, thus avoiding descriptor posting before - * descriptor is initialized. Otherwise, hardware can read - * stale descriptor fields. - */ - wmb(); - iowrite32(buf->index, &rq->ctrl->posted_index); -} - -static inline void vnic_rq_return_descs(struct vnic_rq *rq, unsigned int count) -{ - rq->ring.desc_avail += count; -} - -enum desc_return_options { - VNIC_RQ_RETURN_DESC, - VNIC_RQ_DEFER_RETURN_DESC, -}; - -static inline void vnic_rq_service(struct vnic_rq *rq, - struct cq_desc *cq_desc, u16 completed_index, - int desc_return, void (*buf_service)(struct vnic_rq *rq, - struct cq_desc *cq_desc, struct vnic_rq_buf *buf, - int skipped, void *opaque), void *opaque) -{ - struct vnic_rq_buf *buf; - int skipped; - - buf = rq->to_clean; - while (1) { - - skipped = (buf->index != completed_index); - - (*buf_service)(rq, cq_desc, buf, skipped, opaque); - - if (desc_return == VNIC_RQ_RETURN_DESC) - rq->ring.desc_avail++; - - rq->to_clean = buf->next; - - if (!skipped) - break; - - buf = rq->to_clean; - } -} - -static inline int vnic_rq_fill(struct vnic_rq *rq, - int (*buf_fill)(struct vnic_rq *rq)) -{ - int err; - - while (vnic_rq_desc_avail(rq) > 0) { - - err = (*buf_fill)(rq); - if (err) - return err; - } - - return 0; -} - -static inline int vnic_rq_fill_count(struct vnic_rq *rq, - int (*buf_fill)(struct vnic_rq *rq), unsigned int count) -{ - int err; - - while ((vnic_rq_desc_avail(rq) > 0) && (count--)) { - - err = (*buf_fill)(rq); - if (err) - return err; - } - - return 0; -} - -void vnic_rq_free(struct vnic_rq *rq); -int vnic_rq_alloc(struct vnic_dev *vdev, struct vnic_rq *rq, unsigned int index, - unsigned int desc_count, unsigned int desc_size); -void vnic_rq_init(struct vnic_rq *rq, unsigned int cq_index, - unsigned int error_interrupt_enable, - unsigned int error_interrupt_offset); -void vnic_rq_error_out(struct vnic_rq *rq, unsigned int error); -unsigned int vnic_rq_error_status(struct vnic_rq *rq); -unsigned int vnic_rq_ctrl_error_status(struct vnic_rq_ctrl *ctrl); -void vnic_rq_enable(struct vnic_rq *rq); -int vnic_rq_disable(struct vnic_rq *rq); -void vnic_rq_clean(struct vnic_rq *rq, - void (*buf_clean)(struct vnic_rq *rq, struct vnic_rq_buf *buf)); -#ifndef NOT_FOR_OPEN_ENIC -int vnic_rq_mem_size(struct vnic_rq *rq, unsigned int desc_count, - unsigned int desc_size); -#endif - -#endif /* _VNIC_RQ_H_ */ diff --git a/prov/usnic/src/usnic_direct/vnic_stats.h b/prov/usnic/src/usnic_direct/vnic_stats.h deleted file mode 100644 index c42074b058d..00000000000 --- a/prov/usnic/src/usnic_direct/vnic_stats.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright 2008-2018 Cisco Systems, Inc. All rights reserved. - * Copyright 2007 Nuova Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#ifndef _VNIC_STATS_H_ -#define _VNIC_STATS_H_ - -/* Tx statistics */ -struct vnic_tx_stats { - u64 tx_frames_ok; - u64 tx_unicast_frames_ok; - u64 tx_multicast_frames_ok; - u64 tx_broadcast_frames_ok; - u64 tx_bytes_ok; - u64 tx_unicast_bytes_ok; - u64 tx_multicast_bytes_ok; - u64 tx_broadcast_bytes_ok; - u64 tx_drops; - u64 tx_errors; - u64 tx_tso; - u64 rsvd[16]; -}; - -/* Rx statistics */ -struct vnic_rx_stats { - u64 rx_frames_ok; - u64 rx_frames_total; - u64 rx_unicast_frames_ok; - u64 rx_multicast_frames_ok; - u64 rx_broadcast_frames_ok; - u64 rx_bytes_ok; - u64 rx_unicast_bytes_ok; - u64 rx_multicast_bytes_ok; - u64 rx_broadcast_bytes_ok; - u64 rx_drop; - u64 rx_no_bufs; - u64 rx_errors; - u64 rx_rss; - u64 rx_crc_errors; - u64 rx_frames_64; - u64 rx_frames_127; - u64 rx_frames_255; - u64 rx_frames_511; - u64 rx_frames_1023; - u64 rx_frames_1518; - u64 rx_frames_to_max; - u64 rsvd[16]; -}; - -/* Generic statistics */ -struct vnic_gen_stats { - u64 dma_map_error; -}; - -struct vnic_stats { - struct vnic_tx_stats tx; - struct vnic_rx_stats rx; -}; - -#endif /* _VNIC_STATS_H_ */ diff --git a/prov/usnic/src/usnic_direct/vnic_wq.c b/prov/usnic/src/usnic_direct/vnic_wq.c deleted file mode 100644 index 5711dff8b07..00000000000 --- a/prov/usnic/src/usnic_direct/vnic_wq.c +++ /dev/null @@ -1,288 +0,0 @@ -/* - * Copyright 2008-2018 Cisco Systems, Inc. All rights reserved. - * Copyright 2007 Nuova Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#include -#include -#include -#include -#include -#include - -#include "kcompat.h" -#include "vnic_dev.h" -#include "vnic_wq.h" - -static inline -int vnic_wq_get_ctrl(struct vnic_dev *vdev, struct vnic_wq *wq, - unsigned int index, enum vnic_res_type res_type) -{ - wq->ctrl = vnic_dev_get_res(vdev, res_type, index); - if (!wq->ctrl) - return -EINVAL; - return 0; -} - -static inline -int vnic_wq_alloc_ring(struct vnic_dev *vdev, struct vnic_wq *wq, - unsigned int desc_count, unsigned int desc_size) -{ - return vnic_dev_alloc_desc_ring(vdev, &wq->ring, desc_count, desc_size); -} - -static int vnic_wq_alloc_bufs(struct vnic_wq *wq) -{ - struct vnic_wq_buf *buf; - unsigned int i, j, count = wq->ring.desc_count; - unsigned int blks = VNIC_WQ_BUF_BLKS_NEEDED(count); - - for (i = 0; i < blks; i++) { - wq->bufs[i] = kzalloc(VNIC_WQ_BUF_BLK_SZ(count), GFP_ATOMIC); - if (!wq->bufs[i]) - return -ENOMEM; - } - - for (i = 0; i < blks; i++) { - buf = wq->bufs[i]; - for (j = 0; j < VNIC_WQ_BUF_BLK_ENTRIES(count); j++) { - buf->index = i * VNIC_WQ_BUF_BLK_ENTRIES(count) + j; - buf->desc = (u8 *)wq->ring.descs + - wq->ring.desc_size * buf->index; - if (buf->index + 1 == count) { - buf->next = wq->bufs[0]; - buf->next->prev = buf; - break; - } else if (j + 1 == VNIC_WQ_BUF_BLK_ENTRIES(count)) { - buf->next = wq->bufs[i + 1]; - buf->next->prev = buf; - } else { - buf->next = buf + 1; - buf->next->prev = buf; - buf++; - } - } - } - - wq->to_use = wq->to_clean = wq->bufs[0]; - - return 0; -} - -void vnic_wq_free(struct vnic_wq *wq) -{ - struct vnic_dev *vdev; - unsigned int i; - - vdev = wq->vdev; - - vnic_dev_free_desc_ring(vdev, &wq->ring); - - for (i = 0; i < VNIC_WQ_BUF_BLKS_MAX; i++) { - if (wq->bufs[i]) { - kfree(wq->bufs[i]); - wq->bufs[i] = NULL; - } - } - - wq->ctrl = NULL; -} - -#ifndef NOT_FOR_OPEN_ENIC -int vnic_wq_mem_size(struct vnic_wq *wq, unsigned int desc_count, - unsigned int desc_size) -{ - int mem_size = 0; - - mem_size += vnic_dev_desc_ring_size(&wq->ring, desc_count, desc_size); - - mem_size += VNIC_WQ_BUF_BLKS_NEEDED(wq->ring.desc_count) * - VNIC_WQ_BUF_BLK_SZ(wq->ring.desc_count); - - return mem_size; -} - -#endif - -int vnic_wq_alloc(struct vnic_dev *vdev, struct vnic_wq *wq, unsigned int index, - unsigned int desc_count, unsigned int desc_size) -{ - int err; - - wq->index = index; - wq->vdev = vdev; - - err = vnic_wq_get_ctrl(vdev, wq, index, RES_TYPE_WQ); - if (err) { - pr_err("Failed to hook WQ[%d] resource, err %d\n", index, err); - return err; - } - - vnic_wq_disable(wq); - - err = vnic_wq_alloc_ring(vdev, wq, desc_count, desc_size); - if (err) - return err; - - err = vnic_wq_alloc_bufs(wq); - if (err) { - vnic_wq_free(wq); - return err; - } - - return 0; -} - -int vnic_wq_devcmd2_alloc(struct vnic_dev *vdev, struct vnic_wq *wq, - unsigned int desc_count, unsigned int desc_size) -{ - int err; - - wq->index = 0; - wq->vdev = vdev; - - err = vnic_wq_get_ctrl(vdev, wq, 0, RES_TYPE_DEVCMD2); - if (err) { - pr_err("Failed to get devcmd2 resource\n"); - return err; - } - vnic_wq_disable(wq); - - err = vnic_wq_alloc_ring(vdev, wq, desc_count, desc_size); - if (err) - return err; - return 0; -} - -void vnic_wq_init_start(struct vnic_wq *wq, unsigned int cq_index, - unsigned int fetch_index, unsigned int posted_index, - unsigned int error_interrupt_enable, - unsigned int error_interrupt_offset) -{ - u64 paddr; - unsigned int count = wq->ring.desc_count; - - paddr = (u64)wq->ring.base_addr | VNIC_PADDR_TARGET; - writeq(paddr, &wq->ctrl->ring_base); - iowrite32(count, &wq->ctrl->ring_size); - iowrite32(fetch_index, &wq->ctrl->fetch_index); - iowrite32(posted_index, &wq->ctrl->posted_index); - iowrite32(cq_index, &wq->ctrl->cq_index); - iowrite32(error_interrupt_enable, &wq->ctrl->error_interrupt_enable); - iowrite32(error_interrupt_offset, &wq->ctrl->error_interrupt_offset); - iowrite32(0, &wq->ctrl->error_status); - - wq->to_use = wq->to_clean = - &wq->bufs[fetch_index / VNIC_WQ_BUF_BLK_ENTRIES(count)] - [fetch_index % VNIC_WQ_BUF_BLK_ENTRIES(count)]; -} - -void vnic_wq_init(struct vnic_wq *wq, unsigned int cq_index, - unsigned int error_interrupt_enable, - unsigned int error_interrupt_offset) -{ - vnic_wq_init_start(wq, cq_index, 0, 0, - error_interrupt_enable, - error_interrupt_offset); -} - -void vnic_wq_error_out(struct vnic_wq *wq, unsigned int error) -{ - iowrite32(error, &wq->ctrl->error_status); -} - -unsigned int vnic_wq_error_status(struct vnic_wq *wq) -{ - return vnic_wq_ctrl_error_status(wq->ctrl); -} - -EXPORT_SYMBOL(vnic_wq_ctrl_error_status); -unsigned int vnic_wq_ctrl_error_status(struct vnic_wq_ctrl *ctrl) -{ - return ioread32(&ctrl->error_status); -} - -void vnic_wq_enable(struct vnic_wq *wq) -{ - iowrite32(1, &wq->ctrl->enable); -} - -int vnic_wq_disable(struct vnic_wq *wq) -{ - unsigned int wait; - - iowrite32(0, &wq->ctrl->enable); - - /* Wait for HW to ACK disable request */ - for (wait = 0; wait < 1000; wait++) { - if (!(ioread32(&wq->ctrl->running))) - return 0; - udelay(10); - } - - pr_err("Failed to disable WQ[%d]\n", wq->index); - - return -ETIMEDOUT; -} - -void vnic_wq_clean(struct vnic_wq *wq, - void (*buf_clean)(struct vnic_wq *wq, struct vnic_wq_buf *buf)) -{ - struct vnic_wq_buf *buf; - - buf = wq->to_clean; - - while (vnic_wq_desc_used(wq) > 0) { - - (*buf_clean)(wq, buf); - - buf = wq->to_clean = buf->next; - wq->ring.desc_avail++; - } - - wq->to_use = wq->to_clean = wq->bufs[0]; - - iowrite32(0, &wq->ctrl->fetch_index); - iowrite32(0, &wq->ctrl->posted_index); - iowrite32(0, &wq->ctrl->error_status); - - vnic_dev_clear_desc_ring(&wq->ring); -} diff --git a/prov/usnic/src/usnic_direct/vnic_wq.h b/prov/usnic/src/usnic_direct/vnic_wq.h deleted file mode 100644 index c979a7d9404..00000000000 --- a/prov/usnic/src/usnic_direct/vnic_wq.h +++ /dev/null @@ -1,302 +0,0 @@ -/* - * Copyright 2008-2018 Cisco Systems, Inc. All rights reserved. - * Copyright 2007 Nuova Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#ifndef _VNIC_WQ_H_ -#define _VNIC_WQ_H_ - -#include - -#include "vnic_dev.h" -#include "vnic_cq.h" - -/* Work queue control */ -struct vnic_wq_ctrl { - u64 ring_base; /* 0x00 */ - u32 ring_size; /* 0x08 */ - u32 pad0; - u32 posted_index; /* 0x10 */ - u32 pad1; - u32 cq_index; /* 0x18 */ - u32 pad2; - u32 enable; /* 0x20 */ - u32 pad3; - u32 running; /* 0x28 */ - u32 pad4; - u32 fetch_index; /* 0x30 */ - u32 pad5; - u32 dca_value; /* 0x38 */ - u32 pad6; - u32 error_interrupt_enable; /* 0x40 */ - u32 pad7; - u32 error_interrupt_offset; /* 0x48 */ - u32 pad8; - u32 error_status; /* 0x50 */ - u32 pad9; -}; - -struct vnic_wq_buf { - struct vnic_wq_buf *next; - dma_addr_t dma_addr; - void *os_buf; - unsigned int len; - unsigned int index; - int sop; - void *desc; - uint64_t wr_id; /* Cookie */ - uint8_t cq_entry; /* Gets completion event from hw */ - uint8_t desc_skip_cnt; /* Num descs to occupy */ - uint8_t compressed_send; /* Both hdr and payload in one desc */ - struct vnic_wq_buf *prev; -}; - -/* Break the vnic_wq_buf allocations into blocks of 32/64 entries */ -#define VNIC_WQ_BUF_MIN_BLK_ENTRIES 32 -#define VNIC_WQ_BUF_DFLT_BLK_ENTRIES 64 -#define VNIC_WQ_BUF_BLK_ENTRIES(entries) \ - ((unsigned int)((entries < VNIC_WQ_BUF_DFLT_BLK_ENTRIES) ? \ - VNIC_WQ_BUF_MIN_BLK_ENTRIES : VNIC_WQ_BUF_DFLT_BLK_ENTRIES)) -#define VNIC_WQ_BUF_BLK_SZ(entries) \ - (VNIC_WQ_BUF_BLK_ENTRIES(entries) * sizeof(struct vnic_wq_buf)) -#define VNIC_WQ_BUF_BLKS_NEEDED(entries) \ - DIV_ROUND_UP(entries, VNIC_WQ_BUF_BLK_ENTRIES(entries)) -#define VNIC_WQ_BUF_BLKS_MAX VNIC_WQ_BUF_BLKS_NEEDED(4096) - -struct vnic_wq { - unsigned int index; - struct vnic_dev *vdev; - struct vnic_wq_ctrl __iomem *ctrl; /* memory-mapped */ - struct vnic_dev_ring ring; - struct vnic_wq_buf *bufs[VNIC_WQ_BUF_BLKS_MAX]; - struct vnic_wq_buf *to_use; - struct vnic_wq_buf *to_clean; - unsigned int pkts_outstanding; -#if defined(__LIBUSNIC__) - uint32_t qp_num; -#endif -}; - -static inline unsigned int vnic_wq_desc_avail(struct vnic_wq *wq) -{ - /* how many does SW own? */ - return wq->ring.desc_avail; -} - -static inline unsigned int vnic_wq_desc_used(struct vnic_wq *wq) -{ - /* how many does HW own? */ - return wq->ring.desc_count - wq->ring.desc_avail - 1; -} - -static inline void *vnic_wq_next_desc(struct vnic_wq *wq) -{ - return wq->to_use->desc; -} - -#define PI_LOG2_CACHE_LINE_SIZE 5 -#define PI_INDEX_BITS 12 -#define PI_INDEX_MASK ((1U << PI_INDEX_BITS) - 1) -#define PI_PREFETCH_LEN_MASK ((1U << PI_LOG2_CACHE_LINE_SIZE) - 1) -#define PI_PREFETCH_LEN_OFF 16 -#define PI_PREFETCH_ADDR_BITS 43 -#define PI_PREFETCH_ADDR_MASK ((1ULL << PI_PREFETCH_ADDR_BITS) - 1) -#define PI_PREFETCH_ADDR_OFF 21 - -/** How many cache lines are touched by buffer (addr, len). */ -static inline unsigned int num_cache_lines_touched(dma_addr_t addr, - unsigned int len) -{ - const unsigned long mask = PI_PREFETCH_LEN_MASK; - const unsigned long laddr = (unsigned long)addr; - unsigned long lines, equiv_len; - /* A. If addr is aligned, our solution is just to round up len to the - next boundary. - - e.g. addr = 0, len = 48 - +--------------------+ - |XXXXXXXXXXXXXXXXXXXX| 32-byte cacheline a - +--------------------+ - |XXXXXXXXXX | cacheline b - +--------------------+ - - B. If addr is not aligned, however, we may use an extra - cacheline. e.g. addr = 12, len = 22 - - +--------------------+ - | XXXXXXXXXXXXX| - +--------------------+ - |XX | - +--------------------+ - - Our solution is to make the problem equivalent to case A - above by adding the empty space in the first cacheline to the length: - unsigned long len; - - +--------------------+ - |eeeeeeeXXXXXXXXXXXXX| "e" is empty space, which we add to len - +--------------------+ - |XX | - +--------------------+ - - */ - equiv_len = len + (laddr & mask); - - /* Now we can just round up this len to the next 32-byte boundary. */ - lines = (equiv_len + mask) & (~mask); - - /* Scale bytes -> cachelines. */ - return lines >> PI_LOG2_CACHE_LINE_SIZE; -} - -static inline u64 vnic_cached_posted_index(dma_addr_t addr, unsigned int len, - unsigned int index) -{ - unsigned int num_cache_lines = num_cache_lines_touched(addr, len); - /* Wish we could avoid a branch here. We could have separate - * vnic_wq_post() and vinc_wq_post_inline(), the latter - * only supporting < 1k (2^5 * 2^5) sends, I suppose. This would - * eliminate the if (eop) branch as well. - */ - if (num_cache_lines > PI_PREFETCH_LEN_MASK) - num_cache_lines = 0; - return (index & PI_INDEX_MASK) | - ((num_cache_lines & PI_PREFETCH_LEN_MASK) << PI_PREFETCH_LEN_OFF) | - (((addr >> PI_LOG2_CACHE_LINE_SIZE) & - PI_PREFETCH_ADDR_MASK) << PI_PREFETCH_ADDR_OFF); -} - -static inline void vnic_wq_post(struct vnic_wq *wq, - void *os_buf, dma_addr_t dma_addr, - unsigned int len, int sop, int eop, - uint8_t desc_skip_cnt, uint8_t cq_entry, - uint8_t compressed_send, uint64_t wrid) -{ - struct vnic_wq_buf *buf = wq->to_use; - - buf->sop = sop; - buf->cq_entry = cq_entry; - buf->compressed_send = compressed_send; - buf->desc_skip_cnt = desc_skip_cnt; - buf->os_buf = eop ? os_buf : NULL; - buf->dma_addr = dma_addr; - buf->len = len; - buf->wr_id = wrid; - - buf = buf->next; - if (eop) { -#ifdef DO_PREFETCH - uint64_t wr = vnic_cached_posted_index(dma_addr, len, - buf->index); -#endif - /* Adding write memory barrier prevents compiler and/or CPU - * reordering, thus avoiding descriptor posting before - * descriptor is initialized. Otherwise, hardware can read - * stale descriptor fields. - */ - wmb(); -#ifdef DO_PREFETCH - /* Intel chipsets seem to limit the rate of PIOs that we can - * push on the bus. Thus, it is very important to do a single - * 64 bit write here. With two 32-bit writes, my maximum - * pkt/sec rate was cut almost in half. -AJF - */ - iowrite64((uint64_t)wr, &wq->ctrl->posted_index); -#else - iowrite32(buf->index, &wq->ctrl->posted_index); -#endif - } - wq->to_use = buf; - - wq->ring.desc_avail -= desc_skip_cnt; -} - -static inline void vnic_wq_service(struct vnic_wq *wq, - struct cq_desc *cq_desc, u16 completed_index, - void (*buf_service)(struct vnic_wq *wq, - struct cq_desc *cq_desc, struct vnic_wq_buf *buf, void *opaque), - void *opaque) -{ - struct vnic_wq_buf *buf; - - buf = wq->to_clean; - while (1) { - - (*buf_service)(wq, cq_desc, buf, opaque); - - wq->ring.desc_avail++; - - wq->to_clean = buf->next; - - if (buf->index == completed_index) - break; - - buf = wq->to_clean; - } -} - -void vnic_wq_free(struct vnic_wq *wq); -int vnic_wq_alloc(struct vnic_dev *vdev, struct vnic_wq *wq, unsigned int index, - unsigned int desc_count, unsigned int desc_size); -int vnic_wq_devcmd2_alloc(struct vnic_dev *vdev, struct vnic_wq *wq, - unsigned int desc_count, unsigned int desc_size); -#ifndef FOR_UPSTREAM_KERNEL -void vnic_wq_init_start(struct vnic_wq *wq, unsigned int cq_index, - unsigned int fetch_index, unsigned int posted_index, - unsigned int error_interrupt_enable, - unsigned int error_interrupt_offset); -#endif -void vnic_wq_init(struct vnic_wq *wq, unsigned int cq_index, - unsigned int error_interrupt_enable, - unsigned int error_interrupt_offset); -void vnic_wq_error_out(struct vnic_wq *wq, unsigned int error); -unsigned int vnic_wq_error_status(struct vnic_wq *wq); -unsigned int vnic_wq_ctrl_error_status(struct vnic_wq_ctrl *ctrl); -void vnic_wq_enable(struct vnic_wq *wq); -int vnic_wq_disable(struct vnic_wq *wq); -void vnic_wq_clean(struct vnic_wq *wq, - void (*buf_clean)(struct vnic_wq *wq, struct vnic_wq_buf *buf)); -#ifndef NOT_FOR_OPEN_ENIC -int vnic_wq_mem_size(struct vnic_wq *wq, unsigned int desc_count, - unsigned int desc_size); -#endif - -#endif /* _VNIC_WQ_H_ */ diff --git a/prov/usnic/src/usnic_direct/wq_enet_desc.h b/prov/usnic/src/usnic_direct/wq_enet_desc.h deleted file mode 100644 index 0f6077891fa..00000000000 --- a/prov/usnic/src/usnic_direct/wq_enet_desc.h +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright 2008-2010 Cisco Systems, Inc. All rights reserved. - * Copyright 2007 Nuova Systems, Inc. All rights reserved. - * - * LICENSE_BEGIN - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * LICENSE_END - * - * - */ - -#ifndef _WQ_ENET_DESC_H_ -#define _WQ_ENET_DESC_H_ - -/* Ethernet work queue descriptor: 16B */ -struct wq_enet_desc { - __le64 address; - __le16 length; - __le16 mss_loopback; - __le16 header_length_flags; - __le16 vlan_tag; -}; - -#define WQ_ENET_ADDR_BITS 64 -#define WQ_ENET_LEN_BITS 14 -#define WQ_ENET_LEN_MASK ((1 << WQ_ENET_LEN_BITS) - 1) -#define WQ_ENET_MSS_BITS 14 -#define WQ_ENET_MSS_MASK ((1 << WQ_ENET_MSS_BITS) - 1) -#define WQ_ENET_MSS_SHIFT 2 -#define WQ_ENET_LOOPBACK_SHIFT 1 -#define WQ_ENET_HDRLEN_BITS 10 -#define WQ_ENET_HDRLEN_MASK ((1 << WQ_ENET_HDRLEN_BITS) - 1) -#define WQ_ENET_FLAGS_OM_BITS 2 -#define WQ_ENET_FLAGS_OM_MASK ((1 << WQ_ENET_FLAGS_OM_BITS) - 1) -#define WQ_ENET_FLAGS_EOP_SHIFT 12 -#define WQ_ENET_FLAGS_CQ_ENTRY_SHIFT 13 -#define WQ_ENET_FLAGS_FCOE_ENCAP_SHIFT 14 -#define WQ_ENET_FLAGS_VLAN_TAG_INSERT_SHIFT 15 - -#define WQ_ENET_OFFLOAD_MODE_CSUM 0 -#define WQ_ENET_OFFLOAD_MODE_RESERVED 1 -#define WQ_ENET_OFFLOAD_MODE_CSUM_L4 2 -#define WQ_ENET_OFFLOAD_MODE_TSO 3 - -static inline void wq_enet_desc_enc(struct wq_enet_desc *desc, - u64 address, u16 length, u16 mss, u16 header_length, - u8 offload_mode, u8 eop, u8 cq_entry, u8 fcoe_encap, - u8 vlan_tag_insert, u16 vlan_tag, u8 loopback) -{ - desc->address = cpu_to_le64(address); - desc->length = cpu_to_le16(length & WQ_ENET_LEN_MASK); - desc->mss_loopback = cpu_to_le16((mss & WQ_ENET_MSS_MASK) << - WQ_ENET_MSS_SHIFT | (loopback & 1) << WQ_ENET_LOOPBACK_SHIFT); - desc->header_length_flags = cpu_to_le16( - (header_length & WQ_ENET_HDRLEN_MASK) | - (offload_mode & WQ_ENET_FLAGS_OM_MASK) << WQ_ENET_HDRLEN_BITS | - (eop & 1) << WQ_ENET_FLAGS_EOP_SHIFT | - (cq_entry & 1) << WQ_ENET_FLAGS_CQ_ENTRY_SHIFT | - (fcoe_encap & 1) << WQ_ENET_FLAGS_FCOE_ENCAP_SHIFT | - (vlan_tag_insert & 1) << WQ_ENET_FLAGS_VLAN_TAG_INSERT_SHIFT); - desc->vlan_tag = cpu_to_le16(vlan_tag); -} - -static inline void wq_enet_desc_dec(struct wq_enet_desc *desc, - u64 *address, u16 *length, u16 *mss, u16 *header_length, - u8 *offload_mode, u8 *eop, u8 *cq_entry, u8 *fcoe_encap, - u8 *vlan_tag_insert, u16 *vlan_tag, u8 *loopback) -{ - *address = le64_to_cpu(desc->address); - *length = le16_to_cpu(desc->length) & WQ_ENET_LEN_MASK; - *mss = (le16_to_cpu(desc->mss_loopback) >> WQ_ENET_MSS_SHIFT) & - WQ_ENET_MSS_MASK; - *loopback = (u8)((le16_to_cpu(desc->mss_loopback) >> - WQ_ENET_LOOPBACK_SHIFT) & 1); - *header_length = le16_to_cpu(desc->header_length_flags) & - WQ_ENET_HDRLEN_MASK; - *offload_mode = (u8)((le16_to_cpu(desc->header_length_flags) >> - WQ_ENET_HDRLEN_BITS) & WQ_ENET_FLAGS_OM_MASK); - *eop = (u8)((le16_to_cpu(desc->header_length_flags) >> - WQ_ENET_FLAGS_EOP_SHIFT) & 1); - *cq_entry = (u8)((le16_to_cpu(desc->header_length_flags) >> - WQ_ENET_FLAGS_CQ_ENTRY_SHIFT) & 1); - *fcoe_encap = (u8)((le16_to_cpu(desc->header_length_flags) >> - WQ_ENET_FLAGS_FCOE_ENCAP_SHIFT) & 1); - *vlan_tag_insert = (u8)((le16_to_cpu(desc->header_length_flags) >> - WQ_ENET_FLAGS_VLAN_TAG_INSERT_SHIFT) & 1); - *vlan_tag = le16_to_cpu(desc->vlan_tag); -} - -#endif /* _WQ_ENET_DESC_H_ */ diff --git a/prov/util/src/util_attr.c b/prov/util/src/util_attr.c index a334934602d..f40e2bf8330 100644 --- a/prov/util/src/util_attr.c +++ b/prov/util/src/util_attr.c @@ -247,12 +247,7 @@ static int ofi_info_to_util(uint32_t version, const struct fi_provider *prov, if (ofi_dup_addr(core_info, *util_info)) goto err; - /* Release 1.4 brought standardized domain names across IP based - * providers. Before this release, the usNIC provider would return a - * NULL domain name from fi_getinfo. For compatibility reasons, allow a - * NULL domain name when apps are requesting version < 1.4. - */ - assert(FI_VERSION_LT(1, 4) || core_info->domain_attr->name); + assert(core_info->domain_attr->name); if (core_info->domain_attr->name) { (*util_info)->domain_attr->name = diff --git a/prov/verbs/src/verbs_info.c b/prov/verbs/src/verbs_info.c index fadf7f00007..b692788e82d 100644 --- a/prov/verbs/src/verbs_info.c +++ b/prov/verbs/src/verbs_info.c @@ -689,7 +689,6 @@ static int vrb_get_device_attrs(struct ibv_context *ctx, } /* - * USNIC plugs into the verbs framework, but is not a usable device. * Manually check for devices and fail gracefully if none are present. * This avoids the lower libraries (libibverbs and librdmacm) from * reporting error messages to stderr. diff --git a/src/fabric.c b/src/fabric.c index 1ceb4219cde..fb2000ff415 100644 --- a/src/fabric.c +++ b/src/fabric.c @@ -445,7 +445,7 @@ static struct fi_provider *ofi_get_hook(const char *name) static void ofi_ordered_provs_init(void) { char *ordered_prov_names[] = { - "efa", "psm2", "opx", "usnic", "gni", "verbs", + "efa", "psm2", "opx", "gni", "verbs", "netdir", "psm3", "ucx", "ofi_rxm", "ofi_rxd", "shm", /* Initialize the socket based providers last of the @@ -889,7 +889,6 @@ void fi_ini(void) ofi_register_provider(PSM3_INIT, NULL); ofi_register_provider(PSM2_INIT, NULL); - ofi_register_provider(USNIC_INIT, NULL); ofi_register_provider(GNI_INIT, NULL); ofi_register_provider(NETDIR_INIT, NULL); ofi_register_provider(SHM_INIT, NULL); From bbcf4ac758ccf768dca2c0ef79c0da51cca95534 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Mon, 18 Sep 2023 16:54:18 -0700 Subject: [PATCH 03/34] prov/rstream: Remove unfinished provider Signed-off-by: Sean Hefty --- .travis.yml | 1 - Makefile.am | 1 - configure.ac | 1 - contrib/intel/jenkins/common.py | 1 - include/ofi_prov.h | 11 - man/fi_rstream.7.md | 84 ---- man/man7/fi_rstream.7 | 84 ---- prov/psm3/configure.ac | 2 - prov/rstream/Makefile.include | 28 -- prov/rstream/configure.m4 | 15 - prov/rstream/src/rstream.h | 232 ---------- prov/rstream/src/rstream_attr.c | 96 ---- prov/rstream/src/rstream_cm.c | 188 -------- prov/rstream/src/rstream_domain.c | 126 ----- prov/rstream/src/rstream_ep.c | 420 ----------------- prov/rstream/src/rstream_eq.c | 234 ---------- prov/rstream/src/rstream_fabric.c | 144 ------ prov/rstream/src/rstream_init.c | 181 -------- prov/rstream/src/rstream_msg.c | 742 ------------------------------ src/fabric.c | 1 - src/fi_tostr.c | 1 - 21 files changed, 2593 deletions(-) delete mode 100644 man/fi_rstream.7.md delete mode 100644 man/man7/fi_rstream.7 delete mode 100644 prov/rstream/Makefile.include delete mode 100644 prov/rstream/configure.m4 delete mode 100644 prov/rstream/src/rstream.h delete mode 100644 prov/rstream/src/rstream_attr.c delete mode 100644 prov/rstream/src/rstream_cm.c delete mode 100644 prov/rstream/src/rstream_domain.c delete mode 100644 prov/rstream/src/rstream_ep.c delete mode 100644 prov/rstream/src/rstream_eq.c delete mode 100644 prov/rstream/src/rstream_fabric.c delete mode 100644 prov/rstream/src/rstream_init.c delete mode 100644 prov/rstream/src/rstream_msg.c diff --git a/.travis.yml b/.travis.yml index 42443fedc8c..8f23303a816 100644 --- a/.travis.yml +++ b/.travis.yml @@ -95,7 +95,6 @@ install: --disable-psm --disable-psm2 --disable-psm3 - --disable-rstream --disable-rxd --disable-rxm --disable-shm diff --git a/Makefile.am b/Makefile.am index 7b2941c9283..3727164cf8e 100644 --- a/Makefile.am +++ b/Makefile.am @@ -462,7 +462,6 @@ include prov/opx/Makefile.include include prov/shm/Makefile.include include prov/sm2/Makefile.include include prov/tcp/Makefile.include -include prov/rstream/Makefile.include include prov/ucx/Makefile.include include prov/hook/Makefile.include include prov/hook/perf/Makefile.include diff --git a/configure.ac b/configure.ac index 8552ed0bced..254f0f47347 100644 --- a/configure.ac +++ b/configure.ac @@ -958,7 +958,6 @@ FI_PROVIDER_SETUP([mrail]) FI_PROVIDER_SETUP([rxd]) FI_PROVIDER_SETUP([shm]) FI_PROVIDER_SETUP([sm2]) -FI_PROVIDER_SETUP([rstream]) FI_PROVIDER_SETUP([ucx]) FI_PROVIDER_SETUP([perf]) FI_PROVIDER_SETUP([trace]) diff --git a/contrib/intel/jenkins/common.py b/contrib/intel/jenkins/common.py index d456578a33f..ff3bd9e11c4 100755 --- a/contrib/intel/jenkins/common.py +++ b/contrib/intel/jenkins/common.py @@ -131,7 +131,6 @@ def run(self): common_disable_list = [ 'efa', 'perf', - 'rstream', 'hook_debug', 'mrail', 'opx' diff --git a/include/ofi_prov.h b/include/ofi_prov.h index 38ee97f74df..b657ef89e70 100644 --- a/include/ofi_prov.h +++ b/include/ofi_prov.h @@ -215,17 +215,6 @@ MRAIL_INI ; # define MRAIL_INIT NULL #endif -#if (HAVE_RSTREAM) && (HAVE_RSTREAM_DL) -# define RSTREAM_INI FI_EXT_INI -# define RSTREAM_INIT NULL -#elif (HAVE_RSTREAM) -# define RSTREAM_INI INI_SIG(fi_rstream_ini) -# define RSTREAM_INIT fi_rstream_ini() -RSTREAM_INI ; -#else -# define RSTREAM_INIT NULL -#endif - #if (HAVE_PERF) && (HAVE_PERF_DL) # define HOOK_PERF_INI FI_EXT_INI # define HOOK_PERF_INIT NULL diff --git a/man/fi_rstream.7.md b/man/fi_rstream.7.md deleted file mode 100644 index ee77ddeb3c1..00000000000 --- a/man/fi_rstream.7.md +++ /dev/null @@ -1,84 +0,0 @@ ---- -layout: page -title: fi_rstream(7) -tagline: Libfabric Programmer's Manual ---- -{% include JB/setup %} - -# NAME - -fi_rstream - -# OVERVIEW - -The rstream provider supports stream messaging over - message based RMA. It maps stream to message over - a core RMA-based OFI provider. Only Endpoints and EQs - are needed for connection start-up and messaging. Unlike other - OFI providers, rstream does not support CQs or memory registration - of any kind. In order to asynchronously wait for a completion (cm/msg), - one can use fi_control on the endpoint/eq to get an fd to use in a poll call. - For messaging completions, use FI_PEEK on send/recv after poll to see what type of - transaction has transpired. - -# SUPPORTED FEATURES - -The rstream provider currently supports *FI_MSG* capabilities. - -*Endpoint types* -: The provider supports only endpoint type *FI_EP_SOCK_STREAM*. - -*Endpoint capabilities* : The following data transfer interface is -supported: *fi_msg*. - -*Modes* -: The provider does not require the use of any mode bits but supports - core providers that require FI_CONTEXT and FI_RX_CQ_DATA. - -*Progress* -: The rstream provider only supports *FI_PROGRESS_MANUAL*. - -*Threading Model* -: The provider supports FI_THREAD_SAFE - -*Verbs-iWarp* -: The provider has added features to enable iWarp. To use this feature, the - ep protocol iWarp must be requested in an fi_getinfo call. - -# LIMITATIONS - -The rstream provider is experimental and lacks performance validation and - extensive testing. The iWarp protocol may need extra initialization work to re-enable. - Currently the rstream provider is used to by the rsockets-OFI library as a ULP and - hooks into the core provider verbs. It is not interoperable with the previous rsockets(v1) - protocol. There are default settings that limit the message stream (provider - memory region size and CQ size). These can be modified by fi_setopt. - -# SETTINGS - -The *rstream* provider settings can be modified via fi_setopt on the - endpoint (FI_OPT_ENDPOINT) along with the following parameters: - -*FI_OPT_SEND_BUF_SIZE* -: Size of the send buffer. Default is 32KB. - -*FI_OPT_RECV_BUF_SIZE* -: Size of the recv buffer. Default is 32KB. - -*FI_OPT_TX_SIZE* -: Size of the send queue. Default is 384. - -*FI_OPT_RX_SIZE* -: Size of the recv queue. Default is 384. - -# OFI EXTENSIONS - -The rstream provider has extended the current OFI API set in order to enable a - user implementation of Poll. Specifically sendmsg(FI_PEEK) is supported - which replicates the behavior of the recvmsg(FI_PEEK) feature. - -# SEE ALSO - -[`fabric`(7)](fabric.7.html), -[`fi_provider`(7)](fi_provider.7.html), -[`fi_getinfo`(3)](fi_getinfo.3.html) diff --git a/man/man7/fi_rstream.7 b/man/man7/fi_rstream.7 deleted file mode 100644 index a8754186b56..00000000000 --- a/man/man7/fi_rstream.7 +++ /dev/null @@ -1,84 +0,0 @@ -.\" Automatically generated by Pandoc 2.9.2.1 -.\" -.TH "fi_rstream" "7" "2022\-12\-09" "Libfabric Programmer\[cq]s Manual" "#VERSION#" -.hy -.SH NAME -.PP -fi_rstream -.SH OVERVIEW -.PP -The rstream provider supports stream messaging over message based RMA. -It maps stream to message over a core RMA-based OFI provider. -Only Endpoints and EQs are needed for connection start-up and messaging. -Unlike other OFI providers, rstream does not support CQs or memory -registration of any kind. -In order to asynchronously wait for a completion (cm/msg), one can use -fi_control on the endpoint/eq to get an fd to use in a poll call. -For messaging completions, use FI_PEEK on send/recv after poll to see -what type of transaction has transpired. -.SH SUPPORTED FEATURES -.PP -The rstream provider currently supports \f[I]FI_MSG\f[R] capabilities. -.TP -\f[I]Endpoint types\f[R] -The provider supports only endpoint type \f[I]FI_EP_SOCK_STREAM\f[R]. -.PP -\f[I]Endpoint capabilities\f[R] : The following data transfer interface -is supported: \f[I]fi_msg\f[R]. -.TP -\f[I]Modes\f[R] -The provider does not require the use of any mode bits but supports core -providers that require FI_CONTEXT and FI_RX_CQ_DATA. -.TP -\f[I]Progress\f[R] -The rstream provider only supports \f[I]FI_PROGRESS_MANUAL\f[R]. -.TP -\f[I]Threading Model\f[R] -The provider supports FI_THREAD_SAFE -.TP -\f[I]Verbs-iWarp\f[R] -The provider has added features to enable iWarp. -To use this feature, the ep protocol iWarp must be requested in an -fi_getinfo call. -.SH LIMITATIONS -.PP -The rstream provider is experimental and lacks performance validation -and extensive testing. -The iWarp protocol may need extra initialization work to re-enable. -Currently the rstream provider is used to by the rsockets-OFI library as -a ULP and hooks into the core provider verbs. -It is not interoperable with the previous rsockets(v1) protocol. -There are default settings that limit the message stream (provider -memory region size and CQ size). -These can be modified by fi_setopt. -.SH SETTINGS -.PP -The \f[I]rstream\f[R] provider settings can be modified via fi_setopt on -the endpoint (FI_OPT_ENDPOINT) along with the following parameters: -.TP -\f[I]FI_OPT_SEND_BUF_SIZE\f[R] -Size of the send buffer. -Default is 32KB. -.TP -\f[I]FI_OPT_RECV_BUF_SIZE\f[R] -Size of the recv buffer. -Default is 32KB. -.TP -\f[I]FI_OPT_TX_SIZE\f[R] -Size of the send queue. -Default is 384. -.TP -\f[I]FI_OPT_RX_SIZE\f[R] -Size of the recv queue. -Default is 384. -.SH OFI EXTENSIONS -.PP -The rstream provider has extended the current OFI API set in order to -enable a user implementation of Poll. -Specifically sendmsg(FI_PEEK) is supported which replicates the behavior -of the recvmsg(FI_PEEK) feature. -.SH SEE ALSO -.PP -\f[C]fabric\f[R](7), \f[C]fi_provider\f[R](7), \f[C]fi_getinfo\f[R](3) -.SH AUTHORS -OpenFabrics. diff --git a/prov/psm3/configure.ac b/prov/psm3/configure.ac index decca9b4a85..9ff226d8d28 100644 --- a/prov/psm3/configure.ac +++ b/prov/psm3/configure.ac @@ -880,8 +880,6 @@ AC_DEFINE([HAVE_PSM2_DL], 0, [Ignore HAVE_PSM2_DL]) dnl FI_PROVIDER_SETUP([psm3]) AC_DEFINE([HAVE_OPX], 0, [Ignore HAVE_OPX]) AC_DEFINE([HAVE_OPX_DL], 0, [Ignore HAVE_OPX_DL]) -AC_DEFINE([HAVE_RSTREAM], 0, [Ignore HAVE_RSTREAM]) -AC_DEFINE([HAVE_RSTREAM_DL], 0, [Ignore HAVE_RSTREAM_DL]) AC_DEFINE([HAVE_RXD], 0, [Ignore HAVE_RXD]) AC_DEFINE([HAVE_RXD_DL], 0, [Ignore HAVE_RXD_DL]) AC_DEFINE([HAVE_RXM], 0, [Ignore HAVE_RXM]) diff --git a/prov/rstream/Makefile.include b/prov/rstream/Makefile.include deleted file mode 100644 index 6eac7c0d51a..00000000000 --- a/prov/rstream/Makefile.include +++ /dev/null @@ -1,28 +0,0 @@ -if HAVE_RSTREAM -_rstream_files = \ - prov/rstream/src/rstream_domain.c \ - prov/rstream/src/rstream_fabric.c \ - prov/rstream/src/rstream_attr.c \ - prov/rstream/src/rstream_init.c \ - prov/rstream/src/rstream_cm.c \ - prov/rstream/src/rstream_msg.c \ - prov/rstream/src/rstream_eq.c \ - prov/rstream/src/rstream_ep.c \ - prov/rstream/src/rstream.h - -if HAVE_RSTREAM_DL -pkglib_LTLIBRARIES += librstream-fi.la -librstream_fi_la_SOURCES = $(_rstream_files) $(common_srcs) -librstream_fi_la_LIBADD = $(linkback) $(rstream_LIBS) -librstream_fi_la_LDFLAGS = -module -avoid-version -shared -export-dynamic -librstream_fi_la_DEPENDENCIES = $(linkback) -else !HAVE_RSTREAM_DL -src_libfabric_la_SOURCES += $(_rstream_files) -src_libfabric_la_LIBADD += $(rstream_LIBS) -endif !HAVE_RSTREAM_DL - -prov_install_man_pages += man/man7/fi_rstream.7 - -endif HAVE_RSTREAM - -prov_dist_man_pages += man/man7/fi_rstream.7 diff --git a/prov/rstream/configure.m4 b/prov/rstream/configure.m4 deleted file mode 100644 index 2543f472b02..00000000000 --- a/prov/rstream/configure.m4 +++ /dev/null @@ -1,15 +0,0 @@ -dnl Configury specific to the libfabric rstream provider - -dnl Called to configure this provider -dnl -dnl Arguments: -dnl -dnl $1: action if configured successfully -dnl $2: action if not configured successfully -dnl -AC_DEFUN([FI_RSTREAM_CONFIGURE],[ - # Determine if we can support the rxd provider - rstream_h_happy=0 - AS_IF([test x"$enable_rstream" != x"no"], [rstream_h_happy=1]) - AS_IF([test $rstream_h_happy -eq 1], [$1], [$2]) -]) diff --git a/prov/rstream/src/rstream.h b/prov/rstream/src/rstream.h deleted file mode 100644 index 80e457e0761..00000000000 --- a/prov/rstream/src/rstream.h +++ /dev/null @@ -1,232 +0,0 @@ -/* - * Copyright (c) 2017-2018 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _RSTREAM_H_ -#define _RSTREAM_H_ - -#if HAVE_CONFIG_H -# include -#endif /* HAVE_CONFIG_H */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - - -#define RSTREAM_CAPS (FI_MSG | FI_SEND | FI_RECV | FI_LOCAL_COMM | FI_REMOTE_COMM) -#define RSTREAM_DEFAULT_QP_SIZE 384 -#define RSTREAM_MAX_CTRL 2 -#define RSTREAM_MR_BITS 15 -#define RSTREAM_DEFAULT_MR_SEG_SIZE (1 << RSTREAM_MR_BITS) - -#define RSTREAM_MAX_POLL_TIME 10 - -#define RSTREAM_MAX_MR_BITS 20 -#define RSTREAM_MR_MAX (1ULL << RSTREAM_MAX_MR_BITS) -#define RSTREAM_MR_LEN_MASK (RSTREAM_MR_MAX - 1) -#define RSTREAM_CREDIT_OFFSET RSTREAM_MAX_MR_BITS -#define RSTREAM_CREDIT_BITS 9 -#define RSTREAM_CREDITS_MAX (1ULL << RSTREAM_CREDIT_BITS) -#define RSTREAM_CREDIT_MASK ((RSTREAM_CREDITS_MAX - 1) << RSTREAM_CREDIT_OFFSET) -#define RSTREAM_RSOCKETV2 2 - -/*iWARP, have to also track msg len [msglen, target_credits, target_mr_len]*/ -#define RSTREAM_USING_IWARP (rstream_info.ep_attr->protocol == FI_PROTO_IWARP) -#define RSTREAM_IWARP_DATA_SIZE sizeof(uint32_t) - -#define RSTREAM_IWARP_MSG_BIT (1ULL << 31) -#define RSTREAM_IWARP_MSG_BIT_MASK (RSTREAM_IWARP_MSG_BIT - 1) -#define RSTREAM_IWARP_IMM_MSG_LEN (1ULL << RSTREAM_MAX_MR_BITS) /* max transmission size */ - -extern struct fi_info rstream_info; -extern struct fi_provider rstream_prov; -extern struct util_prov rstream_util_prov; -extern struct fi_fabric_attr rstream_fabric_attr; - -/* util structs ~ user layer fds */ - -struct rstream_fabric { - struct util_fabric util_fabric; - struct fid_fabric *msg_fabric; -}; - -struct rstream_domain { - struct util_domain util_domain; - struct fid_domain *msg_domain; -}; - -enum rstream_msg_type { - RSTREAM_CTRL_MSG, - RSTREAM_RX_MSG_COMP, - RSTREAM_TX_MSG_COMP, - RSTREAM_MSG_UNKNOWN -}; - -struct rstream_mr_seg { - void *data_start; - uint32_t size; - uint32_t avail_size; - uint64_t start_offset; - uint64_t end_offset; -}; - -struct rstream_lmr_data { - void *base_addr; - void *ldesc; - uint64_t rkey; - struct fid_mr *mr; - struct rstream_mr_seg tx; - struct rstream_mr_seg rx; - uint64_t recv_buffer_offset; -}; - -struct rstream_rmr_data { - struct rstream_mr_seg mr; - uint64_t rkey; -}; - -struct rstream_cm_data { - uint64_t base_addr; - uint64_t rkey; - uint32_t rmr_size; - uint16_t max_rx_credits; - uint8_t version; - uint8_t reserved; -}; - -struct rstream_ctx_data { - struct fi_context ctx; - size_t len; -}; - -OFI_DECLARE_FREESTACK(struct rstream_ctx_data, rstream_tx_ctx_fs); - -struct rstream_tx_ctx { - struct rstream_ctx_data *tx_ctxs; - uint32_t num_in_use; - uint32_t free_index; - uint32_t front; -}; - -struct rstream_window { - uint16_t max_tx_credits; - uint16_t tx_credits; - uint16_t ctrl_credits; - uint16_t max_target_rx_credits; - uint16_t target_rx_credits; - uint16_t max_rx_credits; -}; - -struct rstream_cq_data { - uint32_t total_len; - uint16_t num_completions; -}; - -struct rstream_ep { - struct util_ep util_ep; - struct fid_ep *ep_fd; - struct fid_domain *msg_domain; - struct rstream_lmr_data local_mr; - struct rstream_rmr_data remote_data; - struct fid_cq *cq; - struct rstream_window qp_win; - struct fi_context *rx_ctxs; - uint32_t rx_ctx_index; - struct rstream_tx_ctx_fs *tx_ctxs; - struct rstream_cq_data rx_cq_data; - ofi_mutex_t send_lock; - ofi_mutex_t recv_lock; - /* must take send/recv lock before cq_lock */ - ofi_mutex_t cq_lock; -}; - -struct rstream_pep { - struct util_pep util_pep; - struct fid_pep *pep_fd; -}; - -struct rstream_eq { - struct util_eq util_eq; - struct fid_eq *eq_fd; - uint32_t cm_data_len; - struct fi_eq_cm_entry *cm_entry; - uint32_t prev_cm_state; - RbtHandle ep_map; -}; - -struct rstream_timer { - struct timeval start; - struct timeval end; - uint32_t poll_time; -}; - -extern ssize_t rstream_post_cq_data_recv(struct rstream_ep *ep, - const struct fi_cq_data_entry *cq_entry); - -extern int rstream_info_to_rstream(uint32_t version, const struct fi_info *core_info, - const struct fi_info *base_info, struct fi_info *info); -extern int rstream_info_to_core(uint32_t version, const struct fi_info *rstream_info, - const struct fi_info *base_info, struct fi_info *core_info); -extern void rstream_set_info(struct fi_info *info); -extern struct fi_ops_cm rstream_ops_cm; -extern struct fi_ops_cm rstream_ops_pep_cm; -extern struct fi_ops_msg rstream_ops_msg; -extern int rstream_passive_ep(struct fid_fabric *fabric, struct fi_info *info, - struct fid_pep **pep, void *context); -extern void rstream_process_cm_event(struct rstream_ep *ep, void *cm_data); - -int rstream_fabric_open(struct fi_fabric_attr *attr, struct fid_fabric **fabric, - void *context); -int rstream_domain_open(struct fid_fabric *fabric, struct fi_info *info, - struct fid_domain **domain, void *context); -int rstream_ep_open(struct fid_domain *domain, struct fi_info *info, - struct fid_ep **ep_fid, void *context); -int rstream_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr, - struct fid_eq **eq, void *context); -int rstream_info_to_core(uint32_t version, const struct fi_info *rstream_info, - const struct fi_info *base_info, struct fi_info *core_info); - -#endif /* _RSTREAM_H_ */ diff --git a/prov/rstream/src/rstream_attr.c b/prov/rstream/src/rstream_attr.c deleted file mode 100644 index 467dc36ee0b..00000000000 --- a/prov/rstream/src/rstream_attr.c +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2017-2018 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "rstream.h" - - -struct fi_tx_attr rstream_tx_attr = { - .caps = RSTREAM_CAPS, - .msg_order = FI_ORDER_SAS, - .size = RSTREAM_DEFAULT_QP_SIZE, -}; - -struct fi_rx_attr rstream_rx_attr = { - .caps = RSTREAM_CAPS, - .msg_order = FI_ORDER_SAS, - .size = RSTREAM_DEFAULT_QP_SIZE, -}; - -struct fi_ep_attr rstream_ep_attr = { - .type = FI_EP_SOCK_STREAM, - .protocol = FI_PROTO_RSTREAM, - .protocol_version = 1, - .tx_ctx_cnt = 1, - .rx_ctx_cnt = 1, -}; - -struct fi_domain_attr rstream_domain_attr = { - .caps = FI_LOCAL_COMM | FI_REMOTE_COMM, - .threading = FI_THREAD_SAFE, - .control_progress = FI_PROGRESS_AUTO, - .data_progress = FI_PROGRESS_MANUAL, - .resource_mgmt = FI_RM_ENABLED, - .av_type = FI_AV_UNSPEC, - /* for the ofi mr_check */ - .mr_mode = 0, - .tx_ctx_cnt = 1, - .rx_ctx_cnt = 1, - .max_ep_tx_ctx = 1, - .mr_iov_limit = 1, -}; - -struct fi_fabric_attr rstream_fabric_attr = { - .prov_version = FI_VERSION(1, 8), -}; - -struct fi_info rstream_info = { - .caps = RSTREAM_CAPS, - .addr_format = FI_SOCKADDR, - .tx_attr = &rstream_tx_attr, - .rx_attr = &rstream_rx_attr, - .ep_attr = &rstream_ep_attr, - .domain_attr = &rstream_domain_attr, - .fabric_attr = &rstream_fabric_attr -}; - -/* settings post CONNREQ for users */ -void rstream_set_info(struct fi_info *info) -{ - info->caps = RSTREAM_CAPS; - info->mode = 0; - info->ep_attr->type = FI_EP_SOCK_STREAM; - info->ep_attr->protocol = rstream_info.ep_attr->protocol; - info->domain_attr->mr_mode = 0; - info->domain_attr->mr_cnt = 0; - *info->rx_attr = rstream_rx_attr; - *info->tx_attr = rstream_tx_attr; -} diff --git a/prov/rstream/src/rstream_cm.c b/prov/rstream/src/rstream_cm.c deleted file mode 100644 index 97635a05f9b..00000000000 --- a/prov/rstream/src/rstream_cm.c +++ /dev/null @@ -1,188 +0,0 @@ -/* - * Copyright (c) 2017-2018 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "rstream.h" - - -static void rstream_format_data(struct rstream_cm_data *cm, - const struct rstream_ep *ep) -{ - assert(cm && ep->local_mr.rx.data_start); - - cm->version = RSTREAM_RSOCKETV2; - cm->max_rx_credits = htons(ep->qp_win.max_rx_credits); - cm->base_addr = htonll((uintptr_t)ep->local_mr.rx.data_start); - cm->rkey = htonll(ep->local_mr.rkey); - cm->rmr_size = htonl(ep->local_mr.rx.size); -} - -static int rstream_setname(fid_t fid, void *addr, size_t addrlen) -{ - fid_t rstream_fid; - struct rstream_pep *rstream_pep; - struct rstream_ep *rstream_ep; - - if (fid->fclass == FI_CLASS_PEP) { - rstream_pep = container_of(fid, struct rstream_pep, - util_pep.pep_fid); - rstream_fid = &rstream_pep->pep_fd->fid; - } else if (fid->fclass == FI_CLASS_EP) { - rstream_ep = container_of(fid, struct rstream_ep, - util_ep.ep_fid); - rstream_fid = &rstream_ep->ep_fd->fid; - } else { - return -FI_ENOSYS; - } - - return fi_setname(rstream_fid, addr, addrlen); -} - -static int rstream_getname(fid_t fid, void *addr, size_t *addrlen) -{ - fid_t rstream_fid; - struct rstream_pep *rstream_pep; - struct rstream_ep *rstream_ep; - - if (fid->fclass == FI_CLASS_PEP) { - rstream_pep = container_of(fid, struct rstream_pep, - util_pep.pep_fid); - rstream_fid = &rstream_pep->pep_fd->fid; - } else if (fid->fclass == FI_CLASS_EP) { - rstream_ep = container_of(fid, struct rstream_ep, - util_ep.ep_fid); - rstream_fid = &rstream_ep->ep_fd->fid; - } else { - return -FI_ENOSYS; - } - - return fi_getname(rstream_fid, addr, addrlen); -} - -static int rstream_getpeer(struct fid_ep *ep, void *addr, size_t *addrlen) -{ - struct rstream_ep *rstream_ep = - container_of(ep, struct rstream_ep, util_ep.ep_fid); - - return fi_getpeer(rstream_ep->ep_fd, addr, addrlen); -} - -static int rstream_check_cm_size(struct rstream_ep *ep) -{ - int ret; - size_t cm_max_size = 0, opt_size = sizeof(size_t); - - ret = fi_getopt(&ep->ep_fd->fid, FI_OPT_ENDPOINT, FI_OPT_CM_DATA_SIZE, - &cm_max_size, &opt_size); - if (ret < 0) - return ret; - if (cm_max_size < sizeof(struct rstream_cm_data)) - return -FI_ETOOSMALL; - return ret; -} - -static int rstream_connect(struct fid_ep *ep, const void *addr, - const void *param, size_t paramlen) -{ - struct rstream_ep *rstream_ep = - container_of(ep, struct rstream_ep, util_ep.ep_fid); - struct rstream_cm_data cm_data; - - if (param || paramlen > 0 || rstream_check_cm_size(rstream_ep) != 0) - return -FI_ENOSYS; - - rstream_format_data(&cm_data, rstream_ep); - - return fi_connect(rstream_ep->ep_fd, addr, &cm_data, sizeof(cm_data)); -} - -static int rstream_listen(struct fid_pep *pep) -{ - struct rstream_pep *rstream_pep = container_of(pep, - struct rstream_pep, util_pep.pep_fid); - - return fi_listen(rstream_pep->pep_fd); -} - -static int rstream_accept(struct fid_ep *ep, const void *param, - size_t paramlen) -{ - struct rstream_cm_data cm_data; - struct rstream_ep *rstream_ep = - container_of(ep, struct rstream_ep, util_ep.ep_fid); - - if (param || paramlen > 0 || rstream_check_cm_size(rstream_ep) != 0) - return -FI_ENOSYS; - - rstream_format_data(&cm_data, rstream_ep); - - return fi_accept(rstream_ep->ep_fd, &cm_data, sizeof(cm_data)); -} - -static int rstream_reject(struct fid_pep *pep, fid_t handle, - const void *param, size_t paramlen) -{ - return -FI_ENOSYS; -} - -static int rstream_shutdown(struct fid_ep *ep, uint64_t flags) -{ - struct rstream_ep *rstream_ep = - container_of(ep, struct rstream_ep, util_ep.ep_fid); - - return fi_shutdown(rstream_ep->ep_fd, flags); -} - -struct fi_ops_cm rstream_ops_pep_cm = { - .size = sizeof(struct fi_ops_cm), - .setname = rstream_setname, - .getname = rstream_getname, - .getpeer = fi_no_getpeer, - .connect = fi_no_connect, - .listen = rstream_listen, - .accept = fi_no_accept, - .reject = rstream_reject, - .shutdown = rstream_shutdown, - .join = fi_no_join, -}; - -struct fi_ops_cm rstream_ops_cm = { - .size = sizeof(struct fi_ops_cm), - .setname = fi_no_setname, - .getname = fi_no_getname, - .getpeer = rstream_getpeer, - .connect = rstream_connect, - .listen = fi_no_listen, - .accept = rstream_accept, - .reject = fi_no_reject, - .shutdown = rstream_shutdown, - .join = fi_no_join, -}; diff --git a/prov/rstream/src/rstream_domain.c b/prov/rstream/src/rstream_domain.c deleted file mode 100644 index 8072d9d2d31..00000000000 --- a/prov/rstream/src/rstream_domain.c +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright (c) 2017-2018 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "rstream.h" - - -static int rstream_domain_close(fid_t fid) -{ - struct rstream_domain *rstream_domain = - container_of(fid, struct rstream_domain, - util_domain.domain_fid.fid); - int ret; - - ret = fi_close(&rstream_domain->msg_domain->fid); - if (ret) - return ret; - - ret = ofi_domain_close(&rstream_domain->util_domain); - if (ret) - return ret; - - free(rstream_domain); - - return 0; -} - -static struct fi_ops_mr rstream_domain_mr_ops = { - .size = sizeof(struct fi_ops_mr), - .reg = fi_no_mr_reg, - .regv = fi_no_mr_regv, - .regattr = fi_no_mr_regattr, -}; - -static struct fi_ops rstream_domain_fi_ops = { - .size = sizeof(struct fi_ops), - .close = rstream_domain_close, - .bind = fi_no_bind, - .control = fi_no_control, - .ops_open = fi_no_ops_open, -}; - -static struct fi_ops_domain rstream_domain_ops = { - .size = sizeof(struct fi_ops_domain), - .av_open = fi_no_av_open, - .cq_open = fi_no_cq_open, - .endpoint = rstream_ep_open, - .scalable_ep = fi_no_scalable_ep, - .cntr_open = fi_no_cntr_open, - .poll_open = fi_no_poll_open, - .stx_ctx = fi_no_stx_context, - .srx_ctx = fi_no_srx_context, - .query_atomic = fi_no_query_atomic, - .query_collective = fi_no_query_collective, -}; - -int rstream_domain_open(struct fid_fabric *fabric, struct fi_info *info, - struct fid_domain **domain, void *context) -{ - struct rstream_domain *rstream_domain; - struct rstream_fabric *rstream_fabric; - int ret; - struct fi_info *cinfo = NULL; - - rstream_domain = calloc(1, sizeof(*rstream_domain)); - if (!rstream_domain) - return -FI_ENOMEM; - - rstream_fabric = container_of(fabric, struct rstream_fabric, - util_fabric.fabric_fid); - - ret = ofi_get_core_info(FI_VERSION(1, 8), NULL, NULL, 0, - &rstream_util_prov, info, NULL, rstream_info_to_core, &cinfo); - if (ret) - goto err1; - - ret = fi_domain(rstream_fabric->msg_fabric, cinfo, - &rstream_domain->msg_domain, context); - if (ret) - goto err1; - - ret = ofi_domain_init(fabric, info, &rstream_domain->util_domain, - context, OFI_LOCK_MUTEX); - if (ret) - goto err1; - - *domain = &rstream_domain->util_domain.domain_fid; - (*domain)->fid.ops = &rstream_domain_fi_ops; - (*domain)->mr = &rstream_domain_mr_ops; - (*domain)->ops = &rstream_domain_ops; - - return 0; -err1: - if (cinfo) - fi_freeinfo(cinfo); - free(rstream_domain); - return ret; -} diff --git a/prov/rstream/src/rstream_ep.c b/prov/rstream/src/rstream_ep.c deleted file mode 100644 index 8976c7706a6..00000000000 --- a/prov/rstream/src/rstream_ep.c +++ /dev/null @@ -1,420 +0,0 @@ -/* - * Copyright (c) 2017-2018 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "rstream.h" - - -static int rstream_ep_close(fid_t fid) -{ - int ret; - struct rstream_ep *rstream_ep = - container_of(fid, struct rstream_ep, util_ep.ep_fid.fid); - - ret = fi_close(&rstream_ep->local_mr.mr->fid); - if (ret) - return ret; - free(rstream_ep->local_mr.base_addr); - - ret = fi_close(&rstream_ep->ep_fd->fid); - if (ret) - return ret; - - ret = fi_close(&rstream_ep->cq->fid); - if (ret) - return ret; - - ofi_endpoint_close(&rstream_ep->util_ep); - - rstream_tx_ctx_fs_free(rstream_ep->tx_ctxs); - - ofi_mutex_destroy(&rstream_ep->send_lock); - ofi_mutex_destroy(&rstream_ep->recv_lock); - ofi_mutex_destroy(&rstream_ep->cq_lock); - free(rstream_ep->rx_ctxs); - free(rstream_ep); - return 0; -} - -static int rstream_ep_bind(struct fid *ep_fid, struct fid *bfid, uint64_t flags) -{ - int ret; - struct rstream_ep *rstream_ep = - container_of(ep_fid, struct rstream_ep, util_ep.ep_fid.fid); - struct rstream_eq *rstream_eq = NULL; - - switch (bfid->fclass) { - case FI_CLASS_EQ: - rstream_eq = container_of(bfid, struct rstream_eq, - util_eq.eq_fid.fid); - ret = fi_ep_bind(rstream_ep->ep_fd, &rstream_eq->eq_fd->fid, - flags); - rbtInsert(rstream_eq->ep_map, &rstream_ep->ep_fd->fid, - rstream_ep); - break; - default: - FI_WARN(&rstream_prov, FI_LOG_EP_CTRL, "invalid fid class\n"); - ret = -FI_EINVAL; - break; - } - return ret; -} - -static int rstream_reg_mrs(struct fid_domain *domain, - struct rstream_lmr_data *lmr) -{ - int ret; - uint64_t rx_meta_data_offset = 0; - uint32_t full_mr_size = lmr->tx.size + lmr->rx.size; - - if (RSTREAM_USING_IWARP) - rx_meta_data_offset = RSTREAM_IWARP_DATA_SIZE * lmr->rx.size; - - full_mr_size = full_mr_size + rx_meta_data_offset; - lmr->base_addr = malloc(full_mr_size); - - ret = fi_mr_reg(domain, lmr->base_addr, full_mr_size, - FI_READ | FI_WRITE | FI_REMOTE_READ | FI_REMOTE_WRITE, - 0, 0, 0, &lmr->mr, NULL); - if (ret) - return ret; - - lmr->ldesc = fi_mr_desc(lmr->mr); - lmr->rkey = fi_mr_key(lmr->mr); - lmr->tx.data_start = (char *)lmr->base_addr; - lmr->tx.avail_size = lmr->tx.size; - lmr->rx.data_start = (char *)lmr->tx.data_start + - lmr->tx.size + rx_meta_data_offset; - - return ret; -} - -static int rstream_cq_init(struct fid_domain *domain, struct rstream_ep *rep) -{ - int ret; - struct fi_cq_attr attr; - - memset(&attr, 0, sizeof(attr)); - attr.format = FI_CQ_FORMAT_DATA; - attr.wait_obj = FI_WAIT_FD; - attr.size = rep->qp_win.max_rx_credits + rep->qp_win.max_tx_credits; - - ret = fi_cq_open(domain, &attr, &rep->cq, NULL); - if (ret) - return ret; - - ret = fi_ep_bind(rep->ep_fd, &rep->cq->fid, FI_TRANSMIT | FI_RECV); - if (ret) - return ret; - - rep->qp_win.tx_credits = - rep->qp_win.max_tx_credits - RSTREAM_MAX_CTRL; - - return ret; -} - -static int rstream_ep_ctrl(struct fid *fid, int command, void *arg) -{ - struct rstream_ep *rstream_ep; - int ret = 0; - rstream_ep = container_of(fid, struct rstream_ep, util_ep.ep_fid.fid); - - switch (command) { - case FI_ENABLE: - ret = rstream_reg_mrs(rstream_ep->msg_domain, - &rstream_ep->local_mr); - if (ret) - goto err1; - ret = rstream_cq_init(rstream_ep->msg_domain, rstream_ep); - if (ret) - goto err1; - ret = fi_enable(rstream_ep->ep_fd); - break; - case FI_GETWAIT: - ret = fi_control(&rstream_ep->cq->fid, FI_GETWAIT, arg); - if (ret) - return ret; - break; - default: - return -FI_ENOSYS; - } - - return ret; - -err1: - if (rstream_ep->local_mr.base_addr) - free(rstream_ep->local_mr.base_addr); - if (rstream_ep->local_mr.mr) - fi_close(&rstream_ep->local_mr.mr->fid); - - return ret; -} - -static struct fi_ops rstream_ep_fi_ops = { - .size = sizeof(struct fi_ops), - .close = rstream_ep_close, - .bind = rstream_ep_bind, - .control = rstream_ep_ctrl, - .ops_open = fi_no_ops_open, -}; - -static int rstream_ep_setopt(fid_t fid, int level, int optname, - const void *optval, size_t optlen) -{ - struct rstream_ep *rstream_ep = - container_of(fid, struct rstream_ep, util_ep.ep_fid.fid); - - if (level != FI_OPT_ENDPOINT) - return -FI_ENOPROTOOPT; - - if (optname == FI_OPT_SEND_BUF_SIZE) { - if(sizeof(rstream_ep->local_mr.tx.size) != optlen) - return -FI_EINVAL; - rstream_ep->local_mr.tx.size = *((uint32_t *)optval); - } else if (optname == FI_OPT_RECV_BUF_SIZE) { - if(sizeof(rstream_ep->local_mr.rx.size) != optlen) - return -FI_EINVAL; - rstream_ep->local_mr.rx.size = *((uint32_t *)optval); - } else if (optname == FI_OPT_TX_SIZE) { - if(sizeof(rstream_ep->qp_win.max_tx_credits) != optlen) - return -FI_EINVAL; - rstream_ep->qp_win.max_tx_credits = *((uint16_t *)optval); - } else if (optname == FI_OPT_RX_SIZE) { - if(sizeof(rstream_ep->qp_win.max_rx_credits) != optlen) - return -FI_EINVAL; - rstream_ep->qp_win.max_rx_credits = *((uint16_t *)optval); - } else { - return -FI_ENOPROTOOPT; - } - - return 0; -} - -static struct fi_ops_ep rstream_ops_ep = { - .size = sizeof(struct fi_ops_ep), - .cancel = fi_no_cancel, - .getopt = fi_no_getopt, - .setopt = rstream_ep_setopt, - .tx_ctx = fi_no_tx_ctx, - .rx_ctx = fi_no_rx_ctx, - .rx_size_left = fi_no_rx_size_left, - .tx_size_left = fi_no_tx_size_left, -}; - -int rstream_ep_open(struct fid_domain *domain, struct fi_info *info, - struct fid_ep **ep_fid, void *context) -{ - struct rstream_ep *rstream_ep; - struct rstream_domain *rstream_domain; - struct rstream_pep *rstream_pep = NULL; - int ret; - - rstream_domain = container_of(domain, struct rstream_domain, - util_domain.domain_fid); - - rstream_ep = calloc(1, sizeof(*rstream_ep)); - if (!rstream_ep) - return -FI_ENOMEM; - - /* manual progress */ - ret = ofi_endpoint_init(domain, &rstream_util_prov, info, - &rstream_ep->util_ep, context, NULL); - if (ret) - goto err1; - - rstream_info_to_core(FI_VERSION(1, 8), NULL, NULL, info); - - if (info->handle && info->handle->fclass == FI_CLASS_PEP) { - rstream_pep = container_of(info->handle, - struct rstream_pep, util_pep.pep_fid); - info->handle = &rstream_pep->pep_fd->fid; - } - - ret = fi_endpoint(rstream_domain->msg_domain, info, - &rstream_ep->ep_fd, NULL); - if (ret) - goto err1; - - if (rstream_pep) - free(rstream_pep); - - rstream_ep->msg_domain = rstream_domain->msg_domain; - rstream_ep->local_mr.tx.size = RSTREAM_DEFAULT_MR_SEG_SIZE; - rstream_ep->local_mr.rx.size = RSTREAM_DEFAULT_MR_SEG_SIZE; - - rstream_ep->qp_win.max_tx_credits = rstream_info.tx_attr->size; - rstream_ep->qp_win.ctrl_credits = RSTREAM_MAX_CTRL; - rstream_ep->qp_win.max_rx_credits = rstream_info.rx_attr->size; - - rstream_ep->tx_ctxs = - rstream_tx_ctx_fs_create(rstream_ep->qp_win.max_tx_credits, - NULL, NULL); - - assert(rstream_ep->tx_ctxs); - rstream_ep->rx_ctxs = (struct fi_context *) - calloc(rstream_ep->qp_win.max_rx_credits, - sizeof(*rstream_ep->rx_ctxs)); - assert(rstream_ep->rx_ctxs); - - *ep_fid = &rstream_ep->util_ep.ep_fid; - (*ep_fid)->fid.ops = &rstream_ep_fi_ops; - (*ep_fid)->ops = &rstream_ops_ep; - (*ep_fid)->cm = &rstream_ops_cm; - (*ep_fid)->msg = &rstream_ops_msg; - ofi_mutex_init(&rstream_ep->send_lock); - ofi_mutex_init(&rstream_ep->recv_lock); - ofi_mutex_init(&rstream_ep->cq_lock); - return 0; - -err1: - free(rstream_ep); - return ret; -} - -static int rstream_pep_bind(struct fid *pep_fid, struct fid *bfid, - uint64_t flags) -{ - struct rstream_pep *rstream_pep = container_of(pep_fid, - struct rstream_pep, util_pep.pep_fid); - struct rstream_eq *rstream_eq = NULL; - int ret; - - switch (bfid->fclass) { - case FI_CLASS_EQ: - rstream_eq = container_of(bfid, struct rstream_eq, - util_eq.eq_fid.fid); - ret = fi_pep_bind(rstream_pep->pep_fd, &rstream_eq->eq_fd->fid, - flags); - break; - default: - FI_WARN(&rstream_prov, FI_LOG_EP_CTRL, "invalid fid class\n"); - ret = -FI_EINVAL; - break; - } - return ret; -} - -static int rstream_pep_ctrl(struct fid *fid, int command, void *arg) -{ - struct rstream_pep *rstream_pep; - int ret = 0; - - switch (fid->fclass) { - case FI_CLASS_PEP: - if (command != FI_BACKLOG) - return -FI_EINVAL; - rstream_pep = container_of(fid, struct rstream_pep, - util_pep.pep_fid.fid); - ret = fi_control(&rstream_pep->pep_fd->fid, command, arg); - break; - default: - return -FI_ENOSYS; - } - - return ret; -} - -static int rstream_pep_close(fid_t fid) -{ - struct rstream_pep *rstream_pep = - container_of(fid, struct rstream_pep, util_pep.pep_fid.fid); - int ret; - - ret = fi_close(&rstream_pep->pep_fd->fid); - if (ret) - return ret; - - ofi_pep_close(&rstream_pep->util_pep); - free(rstream_pep); - - return ret; -} - -static struct fi_ops rstream_pep_fi_ops = { - .size = sizeof(struct fi_ops), - .close = rstream_pep_close, - .bind = rstream_pep_bind, - .control = rstream_pep_ctrl, - .ops_open = fi_no_ops_open, -}; - -int rstream_passive_ep(struct fid_fabric *fabric, struct fi_info *info, - struct fid_pep **pep, void *context) -{ - int ret; - struct rstream_fabric *rstream_fabric = container_of(fabric, - struct rstream_fabric, util_fabric.fabric_fid); - struct rstream_pep *rstream_pep; - - rstream_pep = calloc(1, sizeof(*rstream_pep)); - if (!rstream_pep) - return -FI_ENOMEM; - - rstream_info_to_core(FI_VERSION(1, 8), NULL, NULL, info); - - ret = fi_passive_ep(rstream_fabric->msg_fabric, info, - &rstream_pep->pep_fd, NULL); - if (ret) - goto err1; - - *pep = &rstream_pep->util_pep.pep_fid; - (*pep)->fid.fclass = FI_CLASS_PEP; - (*pep)->fid.ops = &rstream_pep_fi_ops; - (*pep)->ops = &rstream_ops_ep; - (*pep)->cm = &rstream_ops_pep_cm; - - return 0; - -err1: - free(rstream_pep); - return ret; -} - -void rstream_process_cm_event(struct rstream_ep *ep, void *cm_data) -{ - assert(ep && cm_data); - - int i; - struct rstream_cm_data *rcv_data = (struct rstream_cm_data *)cm_data; - - assert(rcv_data->version == RSTREAM_RSOCKETV2); - - ep->qp_win.target_rx_credits = ntohs(rcv_data->max_rx_credits); - ep->qp_win.max_target_rx_credits = ep->qp_win.target_rx_credits; - ep->remote_data.rkey = ntohll(rcv_data->rkey); - ep->remote_data.mr.data_start = (void *)ntohll(rcv_data->base_addr); - ep->remote_data.mr.size = ntohl(rcv_data->rmr_size); - ep->remote_data.mr.avail_size = ep->remote_data.mr.size; - - for(i = 0; i < ep->qp_win.max_rx_credits; i++) { - rstream_post_cq_data_recv(ep, NULL); - } -} diff --git a/prov/rstream/src/rstream_eq.c b/prov/rstream/src/rstream_eq.c deleted file mode 100644 index 8aee3145217..00000000000 --- a/prov/rstream/src/rstream_eq.c +++ /dev/null @@ -1,234 +0,0 @@ -/* - * Copyright (c) 2017-2018 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "rstream.h" - -static int rstream_eq_events(uint32_t *event, struct fi_eq_cm_entry *cm_entry, - struct fi_eq_cm_entry *usr_cm_entry, struct rstream_eq *rstream_eq) -{ - int ret = 0; - - if (*event == FI_CONNREQ) { - /* have to store to transfer to ep during FI_CONNECT */ - if (cm_entry->info) { - usr_cm_entry->info = cm_entry->info; - rstream_set_info(usr_cm_entry->info); - } - } else if (*event == FI_CONNECTED) { - struct rstream_ep *rstream_ep = NULL; - void *itr = rbtFind(rstream_eq->ep_map, cm_entry->fid); - assert(itr); - rbtKeyValue(rstream_eq->ep_map, itr, - (void **) &cm_entry->fid, (void **) &rstream_ep); - rstream_process_cm_event(rstream_ep, cm_entry->data); - usr_cm_entry->fid = &rstream_ep->util_ep.ep_fid.fid; - } else { - ret = -FI_ENODATA; - } - rstream_eq->prev_cm_state = *event; - return ret; -} - -static ssize_t rstream_read(struct fid_eq *eq, uint32_t *event, - void *buf, size_t len, uint64_t flags) -{ - uint32_t rlen = sizeof(struct fi_eq_cm_entry); - assert(len == rlen && event); - struct fi_eq_cm_entry *usr_cm_entry = (struct fi_eq_cm_entry *) buf; - ssize_t ret; - struct fi_eq_cm_entry *cm_entry = NULL; - - struct rstream_eq *rstream_eq = container_of(eq, - struct rstream_eq, util_eq.eq_fid); - - cm_entry = rstream_eq->cm_entry; - assert(cm_entry); - - if (rstream_eq->prev_cm_state != FI_CONNREQ) { - rlen = rlen + rstream_eq->cm_data_len; - } - - ret = fi_eq_read(rstream_eq->eq_fd, event, cm_entry, rlen, flags); - if (ret == rlen) { - ret = rstream_eq_events(event, cm_entry, usr_cm_entry, rstream_eq); - if (ret) - return ret; - } else { - return ret; - } - - return len; -} - -static ssize_t rstream_readerr(struct fid_eq *eq, struct fi_eq_err_entry *buf, - uint64_t flags) -{ - struct rstream_eq *rstream_eq = container_of(eq, - struct rstream_eq, util_eq.eq_fid); - - return fi_eq_readerr(rstream_eq->eq_fd, buf, flags); -} - -static ssize_t rstream_sread(struct fid_eq *eq, uint32_t *event, - void *buf, size_t len, int timeout, uint64_t flags) -{ - uint32_t rlen = sizeof(struct fi_eq_cm_entry); - assert(len == rlen && event); - struct fi_eq_cm_entry *usr_cm_entry = (struct fi_eq_cm_entry *) buf; - ssize_t ret; - struct fi_eq_cm_entry *cm_entry = NULL; - - struct rstream_eq *rstream_eq = container_of(eq, - struct rstream_eq, util_eq.eq_fid); - - cm_entry = rstream_eq->cm_entry; - assert(cm_entry); - - if (rstream_eq->prev_cm_state != FI_CONNREQ) { - rlen = rlen + rstream_eq->cm_data_len; - } - - ret = fi_eq_sread(rstream_eq->eq_fd, event, cm_entry, rlen, timeout, - flags); - if (ret == rlen) { - ret = rstream_eq_events(event, cm_entry, usr_cm_entry, rstream_eq); - if (ret) - return ret; - } else { - return ret; - } - - return len; -} - -static const char *rstream_strerror(struct fid_eq *eq, int prov_errno, - const void *err_data, char *buf, size_t len) -{ - struct rstream_eq *rstream_eq = container_of(eq, struct rstream_eq, - util_eq.eq_fid); - - return fi_eq_strerror(rstream_eq->eq_fd, prov_errno, err_data, buf, len); -} - -static int rstream_eq_control(fid_t fid, int command, void *arg) -{ - struct rstream_eq *rstream_eq = container_of(fid, struct rstream_eq, - util_eq.eq_fid.fid); - int ret; - - switch (command) { - case FI_GETWAIT: - ret = fi_control(&rstream_eq->eq_fd->fid, FI_GETWAIT, arg); - break; - default: - return -FI_ENOSYS; - } - return ret; -} - -static int rstream_eq_close(fid_t fid) -{ - struct rstream_eq *rstream_eq = - container_of(fid, struct rstream_eq, util_eq.eq_fid.fid); - int ret; - - ret = fi_close(&rstream_eq->eq_fd->fid); - if (ret) - return ret; - - free(rstream_eq->cm_entry); - free(rstream_eq); - return ret; -} - -static struct fi_ops_eq rstream_ops_eq = { - .size = sizeof(struct fi_ops_eq), - .read = rstream_read, - .readerr = rstream_readerr, - .write = fi_no_eq_write, - .sread = rstream_sread, - .strerror = rstream_strerror, -}; - -static struct fi_ops rstream_fid_ops_eq = { - .size = sizeof(struct fi_ops), - .close = rstream_eq_close, - .bind = fi_no_bind, - .control = rstream_eq_control, - .ops_open = fi_no_ops_open, -}; - -/* assumes uint64_t keys */ -static int compare_mr_keys(void *key1, void *key2) -{ - uint64_t k1 = *((uint64_t *) key1); - uint64_t k2 = *((uint64_t *) key2); - - return (k1 < k2) ? -1 : (k1 > k2); -} - -int rstream_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr, - struct fid_eq **eq, void *context) -{ - struct rstream_fabric *rstream_fabric = NULL; - struct rstream_eq *rstream_eq; - int ret; - - rstream_eq = calloc(1, sizeof(*rstream_eq)); - if (!rstream_eq) - return -FI_ENOMEM; - - rstream_fabric = container_of(fabric, struct rstream_fabric, - util_fabric.fabric_fid); - - ret = fi_eq_open(rstream_fabric->msg_fabric, attr, &rstream_eq->eq_fd, - NULL); - if (ret) - goto err1; - - (*eq) = &rstream_eq->util_eq.eq_fid; - (*eq)->fid.fclass = FI_CLASS_EQ; - (*eq)->fid.context = context; - (*eq)->ops = &rstream_ops_eq; - (*eq)->fid.ops = &rstream_fid_ops_eq; - rstream_eq->cm_data_len = sizeof(struct rstream_cm_data); - rstream_eq->cm_entry = calloc(1, sizeof(struct fi_eq_cm_entry) + - rstream_eq->cm_data_len); - rstream_eq->ep_map = rbtNew(compare_mr_keys); - rstream_eq->prev_cm_state = FI_NOTIFY; - - return ret; -err1: - free(rstream_eq); - - return ret; -} diff --git a/prov/rstream/src/rstream_fabric.c b/prov/rstream/src/rstream_fabric.c deleted file mode 100644 index 632ae6cd855..00000000000 --- a/prov/rstream/src/rstream_fabric.c +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Copyright (c) 2017-2018 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "rstream.h" - - -static int rstream_fabric_close(fid_t fid) -{ - struct rstream_fabric *rstream_fabric = - container_of(fid, struct rstream_fabric, - util_fabric.fabric_fid.fid); - int ret; - - ret = fi_close(&rstream_fabric->msg_fabric->fid); - if (ret) - return ret; - - ret = ofi_fabric_close(&rstream_fabric->util_fabric); - if (ret) - return ret; - - free(rstream_fabric); - return 0; -} - -static int rstream_control(struct fid *fid, int command, void *arg) -{ - return -FI_ENOSYS; -} - -int rstream_trywait(struct fid_fabric *fabric, struct fid **fids, int count) -{ - int ret; - struct rstream_ep *rstream_ep; - struct rstream_fabric *rstream_fabric; - int num_fids = 1; - struct fid *rstream_fids[num_fids]; - - if (count != num_fids) - return -FI_ENOSYS; - - if (fids[0]->fclass == FI_CLASS_EP) { - rstream_ep = container_of(fids[0], struct rstream_ep, - util_ep.ep_fid.fid); - rstream_fabric = container_of(fabric, struct rstream_fabric, - util_fabric.fabric_fid); - rstream_fids[0] = &rstream_ep->cq->fid; - ret = fi_trywait(rstream_fabric->msg_fabric, rstream_fids, - num_fids); - return ret; - } - - return -FI_EINVAL; -} - -static struct fi_ops rstream_fabric_fi_ops = { - .size = sizeof(struct fi_ops), - .close = rstream_fabric_close, - .bind = fi_no_bind, - .control = rstream_control, - .ops_open = fi_no_ops_open, -}; - -static struct fi_ops_fabric rstream_fabric_ops = { - .size = sizeof(struct fi_ops_fabric), - .domain = rstream_domain_open, - .passive_ep = rstream_passive_ep, - .eq_open = rstream_eq_open, - .wait_open = fi_no_wait_open, - .trywait = rstream_trywait -}; - -int rstream_fabric_open(struct fi_fabric_attr *attr, struct fid_fabric **fabric, - void *context) -{ - struct rstream_fabric *rstream_fabric; - int ret; - struct fi_info *info = NULL; - - rstream_fabric = calloc(1, sizeof(*rstream_fabric)); - if (!rstream_fabric) - return -FI_ENOMEM; - - ret = ofi_fabric_init(&rstream_prov, &rstream_fabric_attr, attr, - &rstream_fabric->util_fabric, context); - if (ret) - goto err1; - - ret = ofi_get_core_info_fabric(&rstream_prov, attr, &info); - if (ret) { - FI_WARN(&rstream_prov, FI_LOG_FABRIC, "core info failed\n"); - ret = -FI_EINVAL; - goto err1; - } - - ret = fi_fabric(info->fabric_attr, &rstream_fabric->msg_fabric, context); - if (ret) { - FI_WARN(&rstream_prov, FI_LOG_FABRIC, "fi_fabric failed\n"); - ret = -FI_EINVAL; - goto err1; - } - - *fabric = &rstream_fabric->util_fabric.fabric_fid; - (*fabric)->fid.ops = &rstream_fabric_fi_ops; - (*fabric)->ops = &rstream_fabric_ops; - - fi_freeinfo(info); - return 0; -err1: - free(rstream_fabric); - if (info) - fi_freeinfo(info); - - return ret; -} diff --git a/prov/rstream/src/rstream_init.c b/prov/rstream/src/rstream_init.c deleted file mode 100644 index 15d09302c14..00000000000 --- a/prov/rstream/src/rstream_init.c +++ /dev/null @@ -1,181 +0,0 @@ -/* - * Copyright (c) 2017-2018 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "rstream.h" -#include -#include -#include - - -static void rstream_iwarp_settings(struct fi_info *core_info) -{ - core_info->ep_attr->max_msg_size = 2147483647; - core_info->domain_attr->cq_data_size = 0; - core_info->domain_attr->mr_cnt = 2289662; - core_info->mode = FI_CONTEXT; -} - -static void rstream_default_settings(struct fi_info *core_info) -{ - core_info->mode = FI_RX_CQ_DATA | FI_CONTEXT; - core_info->rx_attr->mode = FI_RX_CQ_DATA; -} - -int rstream_info_to_core(uint32_t version, const struct fi_info *irstream_info, - const struct fi_info *base_info, struct fi_info *core_info) -{ - core_info->ep_attr->type = FI_EP_MSG; - core_info->ep_attr->protocol = FI_PROTO_UNSPEC; - core_info->caps = FI_RMA | FI_MSG; - core_info->domain_attr->caps = FI_LOCAL_COMM | FI_REMOTE_COMM; - core_info->domain_attr->mr_mode = FI_MR_LOCAL | OFI_MR_BASIC_MAP; - core_info->tx_attr->op_flags = FI_COMPLETION; - core_info->rx_attr->op_flags = FI_COMPLETION; - core_info->fabric_attr->api_version = FI_VERSION(1, 8); - core_info->fabric_attr->prov_version = FI_VERSION(1, 0); - (RSTREAM_USING_IWARP) ? rstream_iwarp_settings(core_info): - rstream_default_settings(core_info); - - return 0; -} - -static void update_rstream_info(const struct fi_info *core_info) -{ - rstream_info.tx_attr->iov_limit = core_info->tx_attr->iov_limit; - rstream_info.rx_attr->iov_limit = core_info->rx_attr->iov_limit; - rstream_info.tx_attr->size = core_info->tx_attr->size; - rstream_info.rx_attr->size = core_info->rx_attr->size; - rstream_info.domain_attr->max_ep_rx_ctx = - core_info->domain_attr->max_ep_rx_ctx; - rstream_info.domain_attr->max_ep_srx_ctx = - core_info->domain_attr->max_ep_srx_ctx; - rstream_info.ep_attr->max_msg_size = - core_info->ep_attr->max_msg_size; - rstream_info.rx_attr->iov_limit = core_info->rx_attr->iov_limit; - rstream_info.domain_attr->cq_data_size = - core_info->domain_attr->cq_data_size; - rstream_info.domain_attr->cq_cnt = core_info->domain_attr->cq_cnt; - rstream_info.domain_attr->ep_cnt = core_info->domain_attr->ep_cnt; - rstream_info.domain_attr->max_err_data = - core_info->domain_attr->max_err_data; -} - -int rstream_info_to_rstream(uint32_t version, const struct fi_info *core_info, - const struct fi_info *base_info, struct fi_info *info) -{ - info->caps = RSTREAM_CAPS; - info->mode = 0; - - *info->tx_attr = *rstream_info.tx_attr; - *info->rx_attr = *rstream_info.rx_attr; - *info->domain_attr = *rstream_info.domain_attr; - *info->ep_attr = *rstream_info.ep_attr; - info->fabric_attr->api_version = FI_VERSION(1, 8); - info->fabric_attr->prov_version = FI_VERSION(1, 0); - update_rstream_info(core_info); - - return 0; -} - -static int rstream_getinfo(uint32_t version, const char *node, - const char *service, uint64_t flags, const struct fi_info *hints, - struct fi_info **info) -{ - struct fi_info *cur; - struct addrinfo *ai; - uint16_t port_save = 0; - int ret; - - if (!info) - return -FI_EINVAL; - - if (hints && hints->ep_attr->protocol == FI_PROTO_IWARP) { - rstream_info.ep_attr->protocol = FI_PROTO_IWARP; - rstream_info.tx_attr->iov_limit = 3; - rstream_info.rx_attr->iov_limit = 3; - rstream_info.domain_attr->max_ep_srx_ctx = 0; - } - - /* Avoid getting wild card address from MSG provider */ - if (ofi_is_wildcard_listen_addr(node, service, flags, hints)) { - if (service) { - ret = getaddrinfo(NULL, service, NULL, &ai); - if (ret) { - FI_WARN(&rstream_prov, FI_LOG_CORE, - "Unable to getaddrinfo\n"); - return ret; - } - port_save = ofi_addr_get_port(ai->ai_addr); - freeaddrinfo(ai); - service = NULL; - } - } - - ret = ofix_getinfo(version, node, service, flags, &rstream_util_prov, - hints, rstream_info_to_core, rstream_info_to_rstream, info); - if (ret) - return ret; - - if (port_save) { - for (cur = *info; cur; cur = cur->next) { - assert(cur->src_addr); - ofi_addr_set_port(cur->src_addr, port_save); - } - } - - return ret; -} - -static void rstream_fini(void) -{ - /* yawn */ -} - -struct fi_provider rstream_prov = { - .name = OFI_UTIL_PREFIX "rstream", - .version = OFI_VERSION_DEF_PROV, - .fi_version = OFI_VERSION_LATEST, - .getinfo = rstream_getinfo, - .fabric = rstream_fabric_open, - .cleanup = rstream_fini -}; - -struct util_prov rstream_util_prov = { - .prov = &rstream_prov, - .info = &rstream_info, - .flags = 0, -}; - -RSTREAM_INI -{ - return &rstream_prov; -} diff --git a/prov/rstream/src/rstream_msg.c b/prov/rstream/src/rstream_msg.c deleted file mode 100644 index 37f0fc6de41..00000000000 --- a/prov/rstream/src/rstream_msg.c +++ /dev/null @@ -1,742 +0,0 @@ -/* - * Copyright (c) 2017-2018 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "rstream.h" -#include -#include - - -ssize_t rstream_process_cq(struct rstream_ep *ep, enum rstream_msg_type type); - -static uint32_t rstream_cq_data_get_len(uint32_t cq_data) -{ - return (cq_data & RSTREAM_MR_LEN_MASK); -} - -static uint32_t rstream_cq_data_set(struct rstream_cq_data cq_data) -{ - uint32_t credits = cq_data.num_completions; - - assert(cq_data.num_completions < RSTREAM_CREDITS_MAX); - assert(cq_data.total_len < RSTREAM_MR_MAX); - - credits = credits << RSTREAM_CREDIT_OFFSET; - return credits | cq_data.total_len; -} - -static uint16_t rstream_cq_data_get_credits(uint32_t cq_data) -{ - uint32_t credits = cq_data & RSTREAM_CREDIT_MASK; - - credits = (credits >> RSTREAM_CREDIT_OFFSET); - assert(credits < RSTREAM_CREDITS_MAX); - - return credits; -} - -static uint32_t rstream_iwarp_cq_data_is_msg(uint32_t cq_data) { - return cq_data & RSTREAM_IWARP_MSG_BIT; -} - - -static uint32_t rstream_iwarp_cq_data_set_msg_len(uint32_t msg_len) -{ - assert(msg_len < RSTREAM_IWARP_IMM_MSG_LEN); - - uint32_t cq_data = msg_len; - - return cq_data | RSTREAM_IWARP_MSG_BIT; -} - -static uint32_t rstream_iwarp_cq_data_get_msg_len(uint32_t cq_data) -{ - uint32_t msg_len = cq_data & RSTREAM_IWARP_MSG_BIT_MASK; - - assert(msg_len < RSTREAM_IWARP_IMM_MSG_LEN); - - return msg_len; -} - -static char *rstream_get_next_recv_buffer(struct rstream_ep *ep) -{ - char *base_ptr = (char *)ep->local_mr.tx.data_start + - ep->local_mr.tx.size; - uint64_t *offset = &ep->local_mr.recv_buffer_offset; - const uint32_t full_size = RSTREAM_IWARP_DATA_SIZE * - ep->qp_win.max_rx_credits; - char *buffer = base_ptr + *offset; - - assert((void *)buffer < ep->local_mr.rx.data_start); - *offset = (*offset + RSTREAM_IWARP_DATA_SIZE) % full_size; - - return buffer; -} - -/*assuming rx_ctxs are always fully used */ -static struct fi_context *rstream_get_rx_ctx(struct rstream_ep *ep) -{ - struct fi_context *ctx; - - if (ep->rx_ctx_index == ep->qp_win.max_rx_credits) - return NULL; - - ctx = &ep->rx_ctxs[ep->rx_ctx_index]; - ep->rx_ctx_index = ep->rx_ctx_index + 1; - - return ctx; -} - -static struct fi_context *rstream_get_tx_ctx(struct rstream_ep *ep, int len) -{ - struct rstream_tx_ctx_fs *fs = ep->tx_ctxs; - struct rstream_ctx_data *rtn_ctx = ofi_freestack_pop(fs); - - if (!rtn_ctx) - return NULL; - - rtn_ctx->len = len; - return &rtn_ctx->ctx; -} - -static int rstream_return_tx_ctx(struct fi_context *ctx_ptr, - struct rstream_ep *ep) -{ - int len; - struct rstream_tx_ctx_fs *fs = ep->tx_ctxs; - - struct rstream_ctx_data *ctx_data = (struct rstream_ctx_data *)ctx_ptr; - len = ctx_data->len; - ofi_freestack_push(fs, ctx_data); - - return len; -} - -static ssize_t rstream_inject(struct fid_ep *ep_fid, const void *buf, size_t len, - fi_addr_t dest_addr) -{ - return -FI_ENOSYS; -} - -static ssize_t rstream_print_cq_error(struct fid_cq *cq) -{ - ssize_t ret; - struct fi_cq_err_entry cq_entry = {0}; - const char *errmsg; - - ret = fi_cq_readerr(cq, &cq_entry, 0); - if (cq_entry.err == FI_ENOMSG) { - ret = FI_ENOMSG; - return ret; - } - - errmsg = fi_cq_strerror(cq, cq_entry.prov_errno, - cq_entry.err_data, NULL, 0); - fprintf(stderr, "CQ error msg: %s\n", errmsg); - - return ret; -} - -static void rstream_update_tx_credits(struct rstream_ep *ep, - uint16_t num_completions) -{ - assert(num_completions == 1); - - if(ep->qp_win.ctrl_credits < RSTREAM_MAX_CTRL) - ep->qp_win.ctrl_credits++; - else - ep->qp_win.tx_credits++; - - assert(ep->qp_win.tx_credits <= ep->qp_win.max_tx_credits); -} - -static int rstream_timer_completed(struct rstream_timer *timer) -{ - if (!timer->poll_time) - gettimeofday(&timer->start, NULL); - - gettimeofday(&timer->end, NULL); - timer->poll_time = (timer->end.tv_sec - timer->start.tv_sec) * 1000000 + - (timer->end.tv_usec - timer->start.tv_usec); - - return (timer->poll_time > RSTREAM_MAX_POLL_TIME); -} - -static int rstream_tx_mr_full(struct rstream_ep *ep) -{ - return !(ep->local_mr.tx.avail_size); -} - -static int rstream_target_mr_full(struct rstream_ep *ep) -{ - return !(ep->remote_data.mr.avail_size); -} - -static int rstream_tx_full(struct rstream_ep *ep) -{ - return (ep->qp_win.tx_credits == 0); -} - -static int rstream_target_rx_full(struct rstream_ep *ep) -{ - return ((ep->qp_win.target_rx_credits - RSTREAM_MAX_CTRL) == 0); -} - -static uint32_t rstream_calc_contig_len(struct rstream_mr_seg *mr) -{ - if (!mr->avail_size) { - assert(mr->start_offset == mr->end_offset); - return 0; - } else if (mr->start_offset < mr->end_offset) { - return (mr->end_offset - mr->start_offset); - } else { - return (mr->size - mr->start_offset); - } -} - -static uint32_t rstream_alloc_contig_len_available(struct rstream_mr_seg *mr, - char **data_addr, uint32_t req_len) -{ - uint32_t len_available = rstream_calc_contig_len(mr); - uint32_t len; - - *data_addr = (char *)mr->data_start; - assert(len_available <= mr->avail_size); - - if (!len_available) - return 0; - - *data_addr = *data_addr + mr->start_offset; - len = (len_available < req_len) ? len_available : req_len; - assert(mr->avail_size >= len); - mr->avail_size = mr->avail_size - len; - mr->start_offset = (mr->start_offset + len) % mr->size; - - return len; -} - -static void rstream_free_contig_len(struct rstream_mr_seg *mr, uint32_t len) -{ - assert((mr->avail_size + len) <= mr->size); - mr->avail_size = mr->avail_size + len; - mr->end_offset = (mr->end_offset + len) % mr->size; -} - -static ssize_t rstream_send_ctrl_msg(struct rstream_ep *ep, uint32_t cq_data) -{ - ssize_t ret = 0; - struct fi_msg msg; - - if (!ep->qp_win.ctrl_credits || (ep->qp_win.target_rx_credits == 0)) { - ret = -FI_EAGAIN; - goto out; - } - - if (RSTREAM_USING_IWARP) { - ret = fi_inject(ep->ep_fd, &cq_data, RSTREAM_IWARP_DATA_SIZE, 0); - if (ret != 0) - goto out; - } else { - msg.msg_iov = NULL; - msg.desc = NULL; - msg.iov_count = 0; - msg.context = rstream_get_tx_ctx(ep, 0); - msg.data = cq_data; - - ret = fi_sendmsg(ep->ep_fd, &msg, FI_REMOTE_CQ_DATA); - if (ret != 0) - goto out; - - if (ep->qp_win.tx_credits > 0) - ep->qp_win.tx_credits--; - else - ep->qp_win.ctrl_credits--; - } - - assert(ep->qp_win.target_rx_credits > 0); - ep->qp_win.target_rx_credits--; - -out: - return ret; -} - -/* accumulate data in tx_cq exhaustion case */ -static ssize_t rstream_update_target(struct rstream_ep *ep, - uint16_t num_completions, uint32_t len) -{ - uint32_t cq_data; - ssize_t ret = 0; - - ep->rx_cq_data.num_completions = - ep->rx_cq_data.num_completions + num_completions; - ep->rx_cq_data.total_len = ep->rx_cq_data.total_len + len; - - if ((ep->rx_cq_data.num_completions >= ep->qp_win.max_rx_credits / 2) || - (ep->rx_cq_data.total_len >= ep->local_mr.rx.size / 2)) { - - cq_data = rstream_cq_data_set(ep->rx_cq_data); - - ret = rstream_send_ctrl_msg(ep, cq_data); - if (ret == 0) { - FI_DBG(&rstream_prov, FI_LOG_EP_CTRL, - "ctrl msg update %u = completions %u = len \n", - ep->rx_cq_data.num_completions, - ep->rx_cq_data.total_len); - ep->rx_cq_data.num_completions = 0; - ep->rx_cq_data.total_len = 0; - } - } - - return ret; -} - -ssize_t rstream_process_rx_cq_data(struct rstream_ep *ep, - const struct fi_cq_data_entry *cq_entry) -{ - uint16_t recvd_credits; - uint32_t recvd_len; - - if (cq_entry->data != 0) { - recvd_credits = rstream_cq_data_get_credits(cq_entry->data); - recvd_len = rstream_cq_data_get_len(cq_entry->data); - - ep->qp_win.target_rx_credits += recvd_credits; - assert(ep->qp_win.target_rx_credits <= - ep->qp_win.max_target_rx_credits); - - rstream_free_contig_len(&ep->remote_data.mr, recvd_len); - FI_DBG(&rstream_prov, FI_LOG_EP_CTRL, - "recvd: ctrl msg %u = completions %u = len \n", - recvd_credits, recvd_len); - } else { - rstream_free_contig_len(&ep->local_mr.rx, cq_entry->len); - } - - return rstream_post_cq_data_recv(ep, cq_entry); -} - -static void format_iwarp_cq_data(struct rstream_ep *ep, - struct fi_cq_data_entry *cq_entry) -{ - uint32_t cq_data; - - cq_entry->buf = rstream_get_next_recv_buffer(ep); - cq_data = *((uint32_t *)cq_entry->buf); - - if(rstream_iwarp_cq_data_is_msg(cq_data)) { - cq_entry->data = 0; - cq_entry->len = rstream_iwarp_cq_data_get_msg_len(cq_data); - } else { - cq_entry->data = cq_data; - cq_entry->len = 0; - } -} - -static enum rstream_msg_type rstream_cqe_msg_type(struct rstream_ep *ep, - struct fi_cq_data_entry *cq_entry) -{ - enum rstream_msg_type type = RSTREAM_MSG_UNKNOWN; - - if (cq_entry->flags & FI_REMOTE_WRITE || cq_entry->flags & FI_RECV || - cq_entry->flags & FI_REMOTE_CQ_DATA) { - if (RSTREAM_USING_IWARP) - format_iwarp_cq_data(ep, cq_entry); - - if (cq_entry->data) { - type = RSTREAM_CTRL_MSG; - } else { - type = RSTREAM_RX_MSG_COMP; - } - } else if (cq_entry->flags & FI_WRITE || cq_entry->flags & FI_SEND) { - type = RSTREAM_TX_MSG_COMP; - } - - return type; -} - -static ssize_t rstream_check_cq(struct rstream_ep *ep, - struct fi_cq_data_entry *completion_entry) -{ - const int max_num = 1; - ssize_t ret; - - ret = fi_cq_read(ep->cq, completion_entry, max_num); - if (ret < 0 && ret != -FI_EAGAIN) { - if (ret == -FI_EAVAIL) { - ret = rstream_print_cq_error(ep->cq); - fprintf(stderr, "error from %s:%d\n", __FILE__, __LINE__); - return ret; - } - } - assert(ret == -FI_EAGAIN || ret == max_num); - - return ret; -} - -ssize_t rstream_process_cq(struct rstream_ep *ep, enum rstream_msg_type type) -{ - struct fi_cq_data_entry cq_entry; - ssize_t ret, data_ret; - ssize_t found_msg_type = 0; - uint16_t rx_completions = 0; - struct rstream_timer timer = {.poll_time = 0}; - enum rstream_msg_type comp_type; - int len; - - ofi_mutex_lock(&ep->cq_lock); - do { - ret = rstream_check_cq(ep, &cq_entry); - if (ret == 1) { - comp_type = rstream_cqe_msg_type(ep, &cq_entry); - - if (comp_type == type) - found_msg_type++; - - if (comp_type == RSTREAM_CTRL_MSG || - comp_type == RSTREAM_RX_MSG_COMP) { - data_ret = rstream_process_rx_cq_data(ep, &cq_entry); - if (data_ret) { - fprintf(stderr, "error from %s:%d\n", - __FILE__, __LINE__); - ret = data_ret; - goto out; - } - rx_completions++; - } else if (comp_type == RSTREAM_TX_MSG_COMP) { - len = rstream_return_tx_ctx(cq_entry.op_context, ep); - rstream_update_tx_credits(ep, ret); - rstream_free_contig_len(&ep->local_mr.tx, len); - } else { - ret = -FI_ENOMSG; - goto out; - } - } else if (ret != -FI_EAGAIN) { - goto out; - } - } while ((ret == -FI_EAGAIN && !rstream_timer_completed(&timer) && - !found_msg_type) || (found_msg_type && ret > 0)); - - ret = rstream_update_target(ep, rx_completions, 0); - ofi_mutex_unlock(&ep->cq_lock); - if (ret) - return ret; - - if (found_msg_type) - return found_msg_type; - else - return -FI_EAGAIN; -out: - ofi_mutex_unlock(&ep->cq_lock); - return ret; -} - -static uint32_t get_send_addrs_and_len(struct rstream_ep *ep, char **tx_addr, - char **dest_addr, uint32_t requested_len) -{ - uint32_t available_len = 0; - - requested_len = MIN(MIN(requested_len, - rstream_calc_contig_len(&ep->local_mr.tx)), - rstream_calc_contig_len(&ep->remote_data.mr)); - if (requested_len == 0) - return available_len; - - available_len = rstream_alloc_contig_len_available(&ep->local_mr.tx, - tx_addr, requested_len); - available_len = rstream_alloc_contig_len_available(&ep->remote_data.mr, - dest_addr, requested_len); - - return available_len; -} - -static ssize_t rstream_can_send(struct rstream_ep *ep) -{ - ssize_t ret; - - if (rstream_tx_mr_full(ep) || rstream_target_mr_full(ep) || - rstream_target_rx_full(ep)) { - ret = rstream_process_cq(ep, RSTREAM_CTRL_MSG); - if (ret < 0) - return ret; - } - - if (rstream_tx_full(ep)) { - ret = rstream_process_cq(ep, RSTREAM_TX_MSG_COMP); - if (ret < 0) - return ret; - } - - return 0; -} - -static ssize_t rstream_send(struct fid_ep *ep_fid, const void *buf, size_t len, - void *desc, fi_addr_t dest_addr, void *context) -{ - struct rstream_ep *ep = container_of(ep_fid, struct rstream_ep, - util_ep.ep_fid); - uint32_t cq_data = 0; - ssize_t ret; - char *tx_addr = NULL; - char *remote_addr = NULL; - size_t sent_len = 0; - uint32_t curr_avail_len = len; - void *ctx; - - ofi_mutex_lock(&ep->send_lock); - do { - ret = rstream_can_send(ep); - if (ret < 0) { - if (ret < 0 && ret != -FI_EAGAIN) { - goto err; - } else { - ofi_mutex_unlock(&ep->send_lock); - return ((sent_len) ? sent_len : ret); - } - } - - curr_avail_len = get_send_addrs_and_len(ep, &tx_addr, - &remote_addr, curr_avail_len); - if (curr_avail_len == 0) - break; - - memcpy(tx_addr, ((char *)buf + sent_len), curr_avail_len); - sent_len = sent_len + curr_avail_len; - ctx = rstream_get_tx_ctx(ep, curr_avail_len); - - if (RSTREAM_USING_IWARP) { - ret = fi_write(ep->ep_fd, tx_addr, curr_avail_len, - ep->local_mr.ldesc, 0, (uint64_t)remote_addr, - ep->remote_data.rkey, ctx); - ret = rstream_send_ctrl_msg(ep, - rstream_iwarp_cq_data_set_msg_len(curr_avail_len)); - } else { - ret = fi_writedata(ep->ep_fd, tx_addr, curr_avail_len, - ep->local_mr.ldesc, cq_data, 0, (uint64_t)remote_addr, - ep->remote_data.rkey, ctx); - } - if (ret != 0) { - FI_DBG(&rstream_prov, FI_LOG_EP_DATA, - "error: fi_write failed: %zd", ret); - goto err; - } - curr_avail_len = len - sent_len; - - if (!RSTREAM_USING_IWARP) - ep->qp_win.target_rx_credits--; - - ep->qp_win.tx_credits--; - - } while(curr_avail_len); /* circle buffer rollover requires two loops */ - - ofi_mutex_unlock(&ep->send_lock); - return sent_len; - -err: - ofi_mutex_unlock(&ep->send_lock); - return ret; -} - -static ssize_t rstream_sendv(struct fid_ep *ep_fid, const struct iovec *iov, - void **desc, size_t count, fi_addr_t dest_addr, void *context) -{ - return -FI_ENOSYS; -} - -static ssize_t rstream_sendmsg(struct fid_ep *ep_fid, const struct fi_msg *msg, - uint64_t flags) -{ - int ret; - struct rstream_ep *ep = container_of(ep_fid, struct rstream_ep, - util_ep.ep_fid); - - if (flags == FI_PEEK) { - ofi_mutex_lock(&ep->send_lock); - ret = rstream_can_send(ep); - ofi_mutex_unlock(&ep->send_lock); - return ret; - } else { - return -FI_ENOSYS; - } -} - -/* either posting everything at once or reposting after cq completion */ -ssize_t rstream_post_cq_data_recv(struct rstream_ep *ep, - const struct fi_cq_data_entry *cq_entry) -{ - struct fi_context *context = NULL; - struct fi_msg msg; - struct iovec imsg; - void *buffer; - ssize_t ret; - - if (!cq_entry || !cq_entry->op_context) - context = rstream_get_rx_ctx(ep); - else if (cq_entry && cq_entry->op_context) - context = cq_entry->op_context; - - if (RSTREAM_USING_IWARP) { - buffer = (cq_entry && cq_entry->buf) ? cq_entry->buf : - rstream_get_next_recv_buffer(ep); - assert(buffer); - imsg.iov_base = buffer; - imsg.iov_len = RSTREAM_IWARP_DATA_SIZE; - msg.msg_iov = &imsg; - msg.desc = &ep->local_mr.ldesc; - msg.iov_count = 1; - msg.context = context; - } else { - msg.msg_iov = NULL; - msg.desc = NULL; - msg.iov_count = 0; - msg.context = context; - } - - ret = fi_recvmsg(ep->ep_fd, &msg, 0); - if (ret != 0) - return ret; - - return ret; -} - -static uint32_t rstream_copy_out_chunk(struct rstream_ep *ep, void *buf, - uint32_t len_left) -{ - char *rx_data_ptr = NULL; - uint32_t current_chunk = - rstream_alloc_contig_len_available(&ep->local_mr.rx, &rx_data_ptr, - len_left); - - if (current_chunk) { - memcpy(buf, rx_data_ptr, current_chunk); - } - - return current_chunk; -} - -static ssize_t rstream_recv(struct fid_ep *ep_fid, void *buf, size_t len, - void *desc, fi_addr_t src_addr, void *context) -{ - struct rstream_ep *ep = container_of(ep_fid, struct rstream_ep, - util_ep.ep_fid); - uint32_t copy_out_len = 0; - ssize_t ret; - - ofi_mutex_lock(&ep->recv_lock); - - copy_out_len = rstream_copy_out_chunk(ep, buf, len); - - if ((len - copy_out_len)) { - ret = rstream_process_cq(ep, RSTREAM_RX_MSG_COMP); - if(ret < 0 && ret != -FI_EAGAIN) { - ofi_mutex_unlock(&ep->recv_lock); - return ret; - } - - copy_out_len = copy_out_len + rstream_copy_out_chunk(ep, - ((char *)buf + copy_out_len), (len - copy_out_len)); - } - - ofi_mutex_lock(&ep->send_lock); - ret = rstream_update_target(ep, 0, copy_out_len); - ofi_mutex_unlock(&ep->send_lock); - ofi_mutex_unlock(&ep->recv_lock); - if(ret < 0 && ret != -FI_EAGAIN) { - return ret; - } - - if (copy_out_len) { - return copy_out_len; - } - - return -FI_EAGAIN; -} - -static ssize_t rstream_recvv(struct fid_ep *ep_fid, const struct iovec *iov, - void **desc, size_t count, fi_addr_t src_addr, void *context) -{ - return -FI_ENOSYS; -} - -/* can't recv if you can't send a ctrl message -- only way to force user - * to progress ctrl msg, but...Continue to receive any queued data even - * if the remote side has disconnected (TODO) */ -static ssize_t rstream_recvmsg(struct fid_ep *ep_fid, const struct fi_msg *msg, - uint64_t flags) -{ - int ret; - struct rstream_ep *ep = container_of(ep_fid, struct rstream_ep, - util_ep.ep_fid); - - if (flags == FI_PEEK) { - ofi_mutex_lock(&ep->recv_lock); - if (!ep->local_mr.rx.avail_size) { - ret = rstream_process_cq(ep, RSTREAM_RX_MSG_COMP); - if (ret < 0) { - ofi_mutex_unlock(&ep->recv_lock); - return ret; - } - } - ofi_mutex_unlock(&ep->recv_lock); - - ofi_mutex_lock(&ep->send_lock); - if (rstream_target_rx_full(ep)) { - ret = rstream_process_cq(ep, RSTREAM_RX_MSG_COMP); - if (ret < 0) { - ofi_mutex_unlock(&ep->send_lock); - return ret; - } - } - - if (!ep->qp_win.ctrl_credits) { - ret = rstream_process_cq(ep, RSTREAM_TX_MSG_COMP); - ofi_mutex_unlock(&ep->send_lock); - return ret; - } - - ofi_mutex_unlock(&ep->send_lock); - return 0; - } else { - return -FI_ENOSYS; - } -} - -struct fi_ops_msg rstream_ops_msg = { - .size = sizeof(struct fi_ops_msg), - .recv = rstream_recv, - .recvv = rstream_recvv, - .recvmsg = rstream_recvmsg, - .send = rstream_send, - .sendv = rstream_sendv, - .sendmsg = rstream_sendmsg, - .inject = rstream_inject, - .senddata = fi_no_msg_senddata, - .injectdata = fi_no_msg_injectdata, -}; diff --git a/src/fabric.c b/src/fabric.c index fb2000ff415..46ba69e65b0 100644 --- a/src/fabric.c +++ b/src/fabric.c @@ -896,7 +896,6 @@ void fi_ini(void) ofi_register_provider(RXM_INIT, NULL); ofi_register_provider(VERBS_INIT, NULL); - /* ofi_register_provider(RSTREAM_INIT, NULL); - no support */ ofi_register_provider(MRAIL_INIT, NULL); ofi_register_provider(RXD_INIT, NULL); ofi_register_provider(EFA_INIT, NULL); diff --git a/src/fi_tostr.c b/src/fi_tostr.c index 5f9e5032f80..d578e092e44 100644 --- a/src/fi_tostr.c +++ b/src/fi_tostr.c @@ -269,7 +269,6 @@ static void ofi_tostr_protocol(char *buf, size_t len, uint32_t protocol) CASEENUMSTRN(FI_PROTO_UCX, len); CASEENUMSTRN(FI_PROTO_NETWORKDIRECT, len); CASEENUMSTRN(FI_PROTO_SHM, len); - CASEENUMSTRN(FI_PROTO_RSTREAM, len); CASEENUMSTRN(FI_PROTO_RDMA_CM_IB_XRC, len); CASEENUMSTRN(FI_PROTO_EFA, len); CASEENUMSTRN(FI_PROTO_PSMX3, len); From c1ae77e7e80dc1a3891645358d46e5e81be6218d Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Mon, 18 Sep 2023 18:46:47 -0700 Subject: [PATCH 04/34] prov/gni: Remove provider GNI is only supported by the v1.x series Signed-off-by: Sean Hefty --- .travis.yml | 1 - Makefile.am | 1 - configure.ac | 3 +- include/ofi_prov.h | 11 - man/fi_endpoint.3.md | 3 - man/fi_getinfo.3.md | 4 - man/fi_gni.7.md | 398 -- man/fi_provider.7.md | 5 - man/man7/fi_gni.7 | 447 -- prov/gni/Makefile.include | 170 - prov/gni/README.md | 21 - prov/gni/configure.m4 | 209 - prov/gni/contrib/gnitest.supp | 578 -- prov/gni/gnix.map | 108 - prov/gni/include/fi_ext_gni.h | 191 - prov/gni/include/gnix.h | 1204 ---- prov/gni/include/gnix_atomic.h | 57 - prov/gni/include/gnix_auth_key.h | 205 - prov/gni/include/gnix_av.h | 149 - prov/gni/include/gnix_bitmap.h | 203 - prov/gni/include/gnix_buddy_allocator.h | 211 - prov/gni/include/gnix_cm.h | 171 - prov/gni/include/gnix_cm_nic.h | 214 - prov/gni/include/gnix_cntr.h | 96 - prov/gni/include/gnix_cq.h | 97 - prov/gni/include/gnix_datagram.h | 374 -- prov/gni/include/gnix_ep.h | 466 -- prov/gni/include/gnix_eq.h | 114 - prov/gni/include/gnix_freelist.h | 197 - prov/gni/include/gnix_hashtable.h | 248 - prov/gni/include/gnix_mbox_allocator.h | 201 - prov/gni/include/gnix_mr.h | 237 - prov/gni/include/gnix_mr_cache.h | 270 - prov/gni/include/gnix_mr_notifier.h | 183 - prov/gni/include/gnix_msg.h | 57 - prov/gni/include/gnix_nameserver.h | 58 - prov/gni/include/gnix_nic.h | 518 -- prov/gni/include/gnix_poll.h | 86 - prov/gni/include/gnix_priv.h | 160 - prov/gni/include/gnix_progress.h | 57 - prov/gni/include/gnix_queue.h | 67 - prov/gni/include/gnix_rma.h | 63 - prov/gni/include/gnix_shmem.h | 65 - prov/gni/include/gnix_smrn.h | 133 - prov/gni/include/gnix_tags.h | 494 -- prov/gni/include/gnix_trigger.h | 53 - prov/gni/include/gnix_util.h | 288 - prov/gni/include/gnix_vc.h | 328 -- prov/gni/include/gnix_vector.h | 427 -- prov/gni/include/gnix_wait.h | 89 - prov/gni/include/gnix_xpmem.h | 207 - prov/gni/include/rdma/fi_direct.h | 45 - prov/gni/include/rdma/fi_direct_atomic.h | 267 - prov/gni/include/rdma/fi_direct_atomic_def.h | 81 - prov/gni/include/rdma/fi_direct_cm.h | 107 - prov/gni/include/rdma/fi_direct_domain.h | 218 - prov/gni/include/rdma/fi_direct_endpoint.h | 275 - prov/gni/include/rdma/fi_direct_eq.h | 257 - prov/gni/include/rdma/fi_direct_rma.h | 147 - prov/gni/include/rdma/fi_direct_tagged.h | 145 - prov/gni/include/rdma/fi_direct_trigger.h | 49 - prov/gni/provider_FABRIC_1.0.map | 113 - prov/gni/src/gnix_atomic.c | 703 --- prov/gni/src/gnix_auth_key.c | 555 -- prov/gni/src/gnix_av.c | 996 ---- prov/gni/src/gnix_bitmap.c | 277 - prov/gni/src/gnix_buddy_allocator.c | 399 -- prov/gni/src/gnix_cm.c | 1337 ----- prov/gni/src/gnix_cm_nic.c | 736 --- prov/gni/src/gnix_cntr.c | 510 -- prov/gni/src/gnix_cq.c | 766 --- prov/gni/src/gnix_datagram.c | 820 --- prov/gni/src/gnix_dom.c | 766 --- prov/gni/src/gnix_ep.c | 3301 ----------- prov/gni/src/gnix_eq.c | 700 --- prov/gni/src/gnix_fabric.c | 1057 ---- prov/gni/src/gnix_freelist.c | 161 - prov/gni/src/gnix_hashtable.c | 852 --- prov/gni/src/gnix_init.c | 177 - prov/gni/src/gnix_mbox_allocator.c | 805 --- prov/gni/src/gnix_mr.c | 1269 ---- prov/gni/src/gnix_mr_cache.c | 1640 ------ prov/gni/src/gnix_mr_notifier.c | 279 - prov/gni/src/gnix_msg.c | 3777 ------------ prov/gni/src/gnix_nameserver.c | 437 -- prov/gni/src/gnix_nic.c | 1430 ----- prov/gni/src/gnix_poll.c | 98 - prov/gni/src/gnix_progress.c | 159 - prov/gni/src/gnix_queue.c | 133 - prov/gni/src/gnix_rma.c | 1586 ----- prov/gni/src/gnix_sep.c | 1680 ------ prov/gni/src/gnix_shmem.c | 187 - prov/gni/src/gnix_smrn.c | 176 - prov/gni/src/gnix_tags.c | 964 --- prov/gni/src/gnix_trigger.c | 128 - prov/gni/src/gnix_util.c | 799 --- prov/gni/src/gnix_vc.c | 2236 ------- prov/gni/src/gnix_vector.c | 539 -- prov/gni/src/gnix_wait.c | 590 -- prov/gni/src/gnix_xpmem.c | 616 -- prov/gni/test/allocator.c | 683 --- prov/gni/test/api.c | 1173 ---- prov/gni/test/api_cntr.c | 686 --- prov/gni/test/api_cq.c | 614 -- prov/gni/test/auth_key.c | 184 - prov/gni/test/av.c | 984 ---- prov/gni/test/bitmap.c | 615 -- prov/gni/test/buddy_allocator.c | 270 - prov/gni/test/cancel.c | 371 -- prov/gni/test/cm.c | 480 -- prov/gni/test/cntr.c | 1096 ---- prov/gni/test/common.c | 74 - prov/gni/test/common.h | 122 - prov/gni/test/cq.c | 1041 ---- prov/gni/test/datagram.c | 457 -- prov/gni/test/dlist-utils.c | 223 - prov/gni/test/dom.c | 322 - prov/gni/test/ep.c | 359 -- prov/gni/test/eq.c | 366 -- prov/gni/test/fabric.c | 389 -- prov/gni/test/fi_addr_str.c | 1700 ------ prov/gni/test/freelist.c | 217 - prov/gni/test/gnix_rdma_headers.h | 53 - prov/gni/test/hashtable.c | 757 --- prov/gni/test/mr.c | 1854 ------ prov/gni/test/mr_notifier.c | 503 -- prov/gni/test/nic.c | 153 - prov/gni/test/pmi_utils.c | 57 - prov/gni/test/queue.c | 223 - prov/gni/test/rdm_addr_str_sr.c | 344 -- prov/gni/test/rdm_atomic.c | 5519 ------------------ prov/gni/test/rdm_dgram_rma.c | 3235 ---------- prov/gni/test/rdm_dgram_stx.c | 2676 --------- prov/gni/test/rdm_fi_pcd_trecv_msg.c | 1912 ------ prov/gni/test/rdm_multi_recv.c | 955 --- prov/gni/test/rdm_rx_overrun.c | 364 -- prov/gni/test/rdm_sr.c | 2312 -------- prov/gni/test/rdm_tagged_sr.c | 961 --- prov/gni/test/run_gnitest | 85 - prov/gni/test/sep.c | 2689 --------- prov/gni/test/shmem.c | 133 - prov/gni/test/smrn.c | 260 - prov/gni/test/tags.c | 1925 ------ prov/gni/test/utils.c | 172 - prov/gni/test/vc.c | 965 --- prov/gni/test/vc_lookup.c | 216 - prov/gni/test/vector.c | 384 -- prov/gni/test/wait.c | 376 -- prov/psm3/configure.ac | 2 - src/common.c | 7 - src/fabric.c | 3 +- src/fi_tostr.c | 2 - util/info.c | 1 - 153 files changed, 2 insertions(+), 87064 deletions(-) delete mode 100644 man/fi_gni.7.md delete mode 100644 man/man7/fi_gni.7 delete mode 100644 prov/gni/Makefile.include delete mode 100644 prov/gni/README.md delete mode 100644 prov/gni/configure.m4 delete mode 100644 prov/gni/contrib/gnitest.supp delete mode 100644 prov/gni/gnix.map delete mode 100644 prov/gni/include/fi_ext_gni.h delete mode 100644 prov/gni/include/gnix.h delete mode 100644 prov/gni/include/gnix_atomic.h delete mode 100644 prov/gni/include/gnix_auth_key.h delete mode 100644 prov/gni/include/gnix_av.h delete mode 100644 prov/gni/include/gnix_bitmap.h delete mode 100644 prov/gni/include/gnix_buddy_allocator.h delete mode 100644 prov/gni/include/gnix_cm.h delete mode 100644 prov/gni/include/gnix_cm_nic.h delete mode 100644 prov/gni/include/gnix_cntr.h delete mode 100644 prov/gni/include/gnix_cq.h delete mode 100644 prov/gni/include/gnix_datagram.h delete mode 100644 prov/gni/include/gnix_ep.h delete mode 100644 prov/gni/include/gnix_eq.h delete mode 100644 prov/gni/include/gnix_freelist.h delete mode 100644 prov/gni/include/gnix_hashtable.h delete mode 100644 prov/gni/include/gnix_mbox_allocator.h delete mode 100644 prov/gni/include/gnix_mr.h delete mode 100644 prov/gni/include/gnix_mr_cache.h delete mode 100644 prov/gni/include/gnix_mr_notifier.h delete mode 100644 prov/gni/include/gnix_msg.h delete mode 100644 prov/gni/include/gnix_nameserver.h delete mode 100644 prov/gni/include/gnix_nic.h delete mode 100644 prov/gni/include/gnix_poll.h delete mode 100644 prov/gni/include/gnix_priv.h delete mode 100644 prov/gni/include/gnix_progress.h delete mode 100644 prov/gni/include/gnix_queue.h delete mode 100644 prov/gni/include/gnix_rma.h delete mode 100644 prov/gni/include/gnix_shmem.h delete mode 100644 prov/gni/include/gnix_smrn.h delete mode 100644 prov/gni/include/gnix_tags.h delete mode 100644 prov/gni/include/gnix_trigger.h delete mode 100644 prov/gni/include/gnix_util.h delete mode 100644 prov/gni/include/gnix_vc.h delete mode 100644 prov/gni/include/gnix_vector.h delete mode 100644 prov/gni/include/gnix_wait.h delete mode 100644 prov/gni/include/gnix_xpmem.h delete mode 100644 prov/gni/include/rdma/fi_direct.h delete mode 100644 prov/gni/include/rdma/fi_direct_atomic.h delete mode 100644 prov/gni/include/rdma/fi_direct_atomic_def.h delete mode 100644 prov/gni/include/rdma/fi_direct_cm.h delete mode 100644 prov/gni/include/rdma/fi_direct_domain.h delete mode 100644 prov/gni/include/rdma/fi_direct_endpoint.h delete mode 100644 prov/gni/include/rdma/fi_direct_eq.h delete mode 100644 prov/gni/include/rdma/fi_direct_rma.h delete mode 100644 prov/gni/include/rdma/fi_direct_tagged.h delete mode 100644 prov/gni/include/rdma/fi_direct_trigger.h delete mode 100644 prov/gni/provider_FABRIC_1.0.map delete mode 100644 prov/gni/src/gnix_atomic.c delete mode 100644 prov/gni/src/gnix_auth_key.c delete mode 100644 prov/gni/src/gnix_av.c delete mode 100644 prov/gni/src/gnix_bitmap.c delete mode 100644 prov/gni/src/gnix_buddy_allocator.c delete mode 100644 prov/gni/src/gnix_cm.c delete mode 100644 prov/gni/src/gnix_cm_nic.c delete mode 100644 prov/gni/src/gnix_cntr.c delete mode 100644 prov/gni/src/gnix_cq.c delete mode 100644 prov/gni/src/gnix_datagram.c delete mode 100644 prov/gni/src/gnix_dom.c delete mode 100644 prov/gni/src/gnix_ep.c delete mode 100644 prov/gni/src/gnix_eq.c delete mode 100644 prov/gni/src/gnix_fabric.c delete mode 100644 prov/gni/src/gnix_freelist.c delete mode 100644 prov/gni/src/gnix_hashtable.c delete mode 100644 prov/gni/src/gnix_init.c delete mode 100644 prov/gni/src/gnix_mbox_allocator.c delete mode 100644 prov/gni/src/gnix_mr.c delete mode 100644 prov/gni/src/gnix_mr_cache.c delete mode 100644 prov/gni/src/gnix_mr_notifier.c delete mode 100644 prov/gni/src/gnix_msg.c delete mode 100644 prov/gni/src/gnix_nameserver.c delete mode 100644 prov/gni/src/gnix_nic.c delete mode 100644 prov/gni/src/gnix_poll.c delete mode 100644 prov/gni/src/gnix_progress.c delete mode 100644 prov/gni/src/gnix_queue.c delete mode 100644 prov/gni/src/gnix_rma.c delete mode 100644 prov/gni/src/gnix_sep.c delete mode 100644 prov/gni/src/gnix_shmem.c delete mode 100644 prov/gni/src/gnix_smrn.c delete mode 100644 prov/gni/src/gnix_tags.c delete mode 100644 prov/gni/src/gnix_trigger.c delete mode 100644 prov/gni/src/gnix_util.c delete mode 100644 prov/gni/src/gnix_vc.c delete mode 100644 prov/gni/src/gnix_vector.c delete mode 100644 prov/gni/src/gnix_wait.c delete mode 100644 prov/gni/src/gnix_xpmem.c delete mode 100644 prov/gni/test/allocator.c delete mode 100644 prov/gni/test/api.c delete mode 100644 prov/gni/test/api_cntr.c delete mode 100644 prov/gni/test/api_cq.c delete mode 100644 prov/gni/test/auth_key.c delete mode 100644 prov/gni/test/av.c delete mode 100644 prov/gni/test/bitmap.c delete mode 100644 prov/gni/test/buddy_allocator.c delete mode 100644 prov/gni/test/cancel.c delete mode 100644 prov/gni/test/cm.c delete mode 100644 prov/gni/test/cntr.c delete mode 100644 prov/gni/test/common.c delete mode 100644 prov/gni/test/common.h delete mode 100644 prov/gni/test/cq.c delete mode 100644 prov/gni/test/datagram.c delete mode 100644 prov/gni/test/dlist-utils.c delete mode 100644 prov/gni/test/dom.c delete mode 100644 prov/gni/test/ep.c delete mode 100644 prov/gni/test/eq.c delete mode 100644 prov/gni/test/fabric.c delete mode 100644 prov/gni/test/fi_addr_str.c delete mode 100644 prov/gni/test/freelist.c delete mode 100644 prov/gni/test/gnix_rdma_headers.h delete mode 100644 prov/gni/test/hashtable.c delete mode 100644 prov/gni/test/mr.c delete mode 100644 prov/gni/test/mr_notifier.c delete mode 100644 prov/gni/test/nic.c delete mode 100644 prov/gni/test/pmi_utils.c delete mode 100644 prov/gni/test/queue.c delete mode 100644 prov/gni/test/rdm_addr_str_sr.c delete mode 100644 prov/gni/test/rdm_atomic.c delete mode 100644 prov/gni/test/rdm_dgram_rma.c delete mode 100644 prov/gni/test/rdm_dgram_stx.c delete mode 100644 prov/gni/test/rdm_fi_pcd_trecv_msg.c delete mode 100644 prov/gni/test/rdm_multi_recv.c delete mode 100644 prov/gni/test/rdm_rx_overrun.c delete mode 100644 prov/gni/test/rdm_sr.c delete mode 100644 prov/gni/test/rdm_tagged_sr.c delete mode 100755 prov/gni/test/run_gnitest delete mode 100644 prov/gni/test/sep.c delete mode 100644 prov/gni/test/shmem.c delete mode 100644 prov/gni/test/smrn.c delete mode 100644 prov/gni/test/tags.c delete mode 100644 prov/gni/test/utils.c delete mode 100644 prov/gni/test/vc.c delete mode 100644 prov/gni/test/vc_lookup.c delete mode 100644 prov/gni/test/vector.c delete mode 100644 prov/gni/test/wait.c diff --git a/.travis.yml b/.travis.yml index 8f23303a816..5a4f669a5be 100644 --- a/.travis.yml +++ b/.travis.yml @@ -88,7 +88,6 @@ install: # List of providers current as of Jan 2020 - ./configure --prefix=$PREFIX --enable-tcp=dl --disable-efa - --disable-gni --disable-hook_debug --disable-mrail --disable-perf diff --git a/Makefile.am b/Makefile.am index 3727164cf8e..2696ed9c243 100644 --- a/Makefile.am +++ b/Makefile.am @@ -454,7 +454,6 @@ include prov/verbs/Makefile.include include prov/efa/Makefile.include include prov/psm2/Makefile.include include prov/psm3/Makefile.include -include prov/gni/Makefile.include include prov/rxm/Makefile.include include prov/mrail/Makefile.include include prov/rxd/Makefile.include diff --git a/configure.ac b/configure.ac index 254f0f47347..06ec6ea489f 100644 --- a/configure.ac +++ b/configure.ac @@ -410,7 +410,7 @@ AM_CONDITIONAL(HAVE_LD_VERSION_SCRIPT, test "$ac_cv_version_script" = "yes") xpmem_happy=0 AC_ARG_ENABLE([xpmem], [AS_HELP_STRING([--enable-xpmem@<:@=yes|no|PATH@:>@], - [Enable xpmem (gni and shm providers) @<:@default=yes@:>@ + [Enable xpmem (providers) @<:@default=yes@:>@ (yes: enable xpmem; no: disable xpmem; PATH: enable xpmem and use xpmem installed under PATH)])], ) @@ -950,7 +950,6 @@ FI_PROVIDER_SETUP([psm3]) FI_PROVIDER_SETUP([sockets]) FI_PROVIDER_SETUP([verbs]) FI_PROVIDER_SETUP([efa]) -FI_PROVIDER_SETUP([gni]) FI_PROVIDER_SETUP([udp]) FI_PROVIDER_SETUP([tcp]) FI_PROVIDER_SETUP([rxm]) diff --git a/include/ofi_prov.h b/include/ofi_prov.h index b657ef89e70..506c1fd8f08 100644 --- a/include/ofi_prov.h +++ b/include/ofi_prov.h @@ -48,17 +48,6 @@ * not built: no-op call for ctor */ -#if (HAVE_GNI) && (HAVE_GNI_DL) -# define GNI_INI FI_EXT_INI -# define GNI_INIT NULL -#elif (HAVE_GNI) -# define GNI_INI INI_SIG(fi_gni_ini) -# define GNI_INIT fi_gni_ini() -GNI_INI ; -#else -# define GNI_INIT NULL -#endif - /* If HAVE_EFA is defined on Windows, then the VisualStudio project configures * MSBuild to include the efa related files and exclude the verbs related files. * With the verbs related files excluded from the build, we need only ensure diff --git a/man/fi_endpoint.3.md b/man/fi_endpoint.3.md index b57dba4bc12..19188184c84 100644 --- a/man/fi_endpoint.3.md +++ b/man/fi_endpoint.3.md @@ -687,9 +687,6 @@ protocol value set to one. : Proprietary protocol on Elastic Fabric Adapter fabric. It supports both DGRAM and RDM endpoints. -*FI_PROTO_GNI* -: Protocol runs over Cray GNI low-level interface. - *FI_PROTO_IB_RDM* : Reliable-datagram protocol implemented over InfiniBand reliable-connected queue pairs. diff --git a/man/fi_getinfo.3.md b/man/fi_getinfo.3.md index 75be2058590..cf3a0ffdc26 100644 --- a/man/fi_getinfo.3.md +++ b/man/fi_getinfo.3.md @@ -628,10 +628,6 @@ fabric. See [`fi_av`(3)](fi_av.3.html). *FI_ADDR_EFA* : Address is an Amazon Elastic Fabric Adapter (EFA) proprietary format. -*FI_ADDR_GNI* -: Address is a Cray proprietary format that is used with their GNI - protocol. - *FI_ADDR_PSMX2* : Address is an Intel proprietary format used with their Performance Scaled Messaging protocol version 2. diff --git a/man/fi_gni.7.md b/man/fi_gni.7.md deleted file mode 100644 index 29ffed29790..00000000000 --- a/man/fi_gni.7.md +++ /dev/null @@ -1,398 +0,0 @@ ---- -layout: page -title: fi_gni(7) -tagline: Libfabric Programmer's Manual ---- -{% include JB/setup %} - -# NAME - -fi_gni \- The GNI Fabric Provider - -# OVERVIEW - -The GNI provider runs on Cray XC (TM) systems utilizing the user-space -Generic Network Interface (uGNI) which provides low-level access to -the Aries interconnect. The Aries interconnect is designed for -low-latency one-sided messaging and also includes direct hardware -support for common atomic operations and optimized collectives. - -# REQUIREMENTS - -The GNI provider runs on Cray XC systems running CLE 5.2 UP04 or higher -using gcc version 4.9 or higher. - -When using the fi_mr_regattr() and fi_mr_regv() functions to register -multiple region, users must register the memory region with 4K -page alignment. Any other page address alignment will result in a return -value of -FI_EINVAL. - -When using the scalable memory registration mode, applications must make -registration requests on 4K page alignment boundaries. Any other value -will result in a return value of -FI_EINVAL. - -# SUPPORTED FEATURES - -The GNI provider supports the following features defined for the -libfabric API: - -*Endpoint types* -: The provider supports the *FI_EP_RDM*, *FI_EP_DGRAM*, *FI_EP_MSG* endpoint - types, including scalable endpoints. - -*Address vectors* -: The provider implements both the *FI_AV_MAP* and *FI_AV_TABLE* - address vector types. FI_EVENT is unsupported. - -*Memory registration modes* -: The provider implements basic and scalable memory - registration modes. - -*Data transfer operations* -: The following data transfer interfaces are supported for all - endpoint types: *FI_ATOMIC*, *FI_MSG*, *FI_RMA*, *FI_TAGGED*. See - DATA TRANSFER OPERATIONS below for more details. - -*Completion events* -: The GNI provider supports *FI_CQ_FORMAT_CONTEXT*, *FI_CQ_FORMAT_MSG*, - *FI_CQ_FORMAT_DATA* and *FI_CQ_FORMAT_TAGGED* with wait objects of type - *FI_WAIT_NONE*, *FI_WAIT_UNSPEC*, *FI_WAIT_SET*. - -*Modes* -: The GNI provider does not require any operation modes. - -*Progress* -: For both control and data progress, the GNI provider supports both - *FI_PROGRESS_AUTO* and *FI_PROGRESS_MANUAL*, with a default set to - *FI_PROGRESS_AUTO*. - Note that for data progress, progression is only performed - when data transfers use the rendezvous protocol. - -*Wait Objects* -: The GNI provider specifically supports wait object types *FI_WAIT_UNSPEC*, - and *FI_WAIT_SET*. A wait object must be used when calling fi_cntr_wait, - fi_cq_sread/from, fi_eq_sread/from, fi_wait. - The GNI provider spawns an internal wait progress thread that is woken up - when clients utilize the wait system (e.g., calling fi_wait). - -*Additional Features* -: The GNI provider also supports the following capabilities and features: -- *FI_MULTI_RECV* -- *FI_SOURCE* -- *FI_FENCE* -- *FI_RM_ENABLED* -- *FI_RMA_EVENT* -- *FI_REMOTE_CQ_DATA* -- *FABRIC_DIRECT* compilation mode -- *FI_MORE* (For FI_RMA) - -# DATA TRANSFER OPERATIONS - -## FI_ATOMIC - -Currently, the GNI provider only supports atomic operations supported -directly by the Aries NIC. These include operations on 32- and -64-bit, signed and unsigned integer and floating point values. -Specifically, - -### Basic (fi_atomic, etc.) -- *FI_MIN*, *FI_MAX* (no unsigned) -- *FI_SUM* (no 64-bit floating point) -- *FI_BOR*, *FI_BAND*, *FI_BXOR* (no floating point) -- *FI_ATOMIC_WRITE* - -### Fetching (fi_fetch_atomic, etc.) -- All of the basic operations as above -- FI_ATOMIC_READ - -### Comparison (fi_compare_atomic, etc.) -- FI_CSWAP -- FI_MSWAP - -## FI_MSG - -All *FI_MSG* operations are supported. - -## FI_RMA - -All *FI_RMA* operations are supported. - -## FI_TAGGED - -All *FI_TAGGED* operations are supported except `fi_tinjectdata`. - -# GNI EXTENSIONS - -The GNI provider exposes low-level tuning parameters via domain, endpoint -and fabric level `fi_open_ops` interfaces. -The domain extensions have been named *FI_GNI_DOMAIN_OPS_1*. The endpoint -extensions have been named *FI_GNI_EP_OPS_1*. The fabric extensions have -been named *FI_GNI_FABRIC_OPS_1* and *FI_GNI_FABRIC_OPS_2*. -The flags parameter is currently ignored. The fi_open_ops function takes -a `struct fi_gni_ops_domain` or a `struct fi_gni_ops_ep` parameter -respectively and populates it with the following: - -```c -struct fi_gni_ops_fab { - int (*set_val)(struct fid *fid, fab_ops_val_t t, void *val); - int (*get_val)(struct fid *fid, fab_ops_val_t t, void *val); -}; - -struct fi_gni_auth_key_ops_fab { - int (*set_val)(uint8_t *auth_key, size_t auth_keylen, gnix_auth_key_opt_t opt, void *val); - int (*get_val)(uint8_t *auth_key, size_t auth_keylen, gnix_auth_key_opt_t opt, void *val); -}; - -struct fi_gni_ops_domain { - int (*set_val)(struct fid *fid, dom_ops_val_t t, void *val); - int (*get_val)(struct fid *fid, dom_ops_val_t t, void *val); - int (*flush_cache)(struct fid *fid); -}; - -struct fi_gni_ops_ep { - int (*set_val)(struct fid *fid, dom_ops_val_t t, void *val); - int (*get_val)(struct fid *fid, dom_ops_val_t t, void *val); - size_t (*native_amo)(struct fid_ep *ep, const void *buf, - size_t count,void *desc, - fi_addr_t dest_addr, uint64_t addr, - uint64_t key, enum fi_datatype datatype, - enum gnix_fab_req_type req_type, - void *context); -}; -``` - -The `set_val` function sets the value of a given parameter; the -`get_val` function returns the current value. - -For *FI_GNI_FABRIC_OPS_1*, the currently supported values are: - -*GNI_WAIT_THREAD_SLEEP* -: Time in seconds for which the progress thread will sleep between -periods of inactivity. - -*GNI_DEFAULT_USER_REGISTRATION_LIMIT* -: The number of user registrations that an authorization key is limited -to when using the scalable memory mode, if not specified by -the user during init. - -*GNI_DEFAULT_PROV_REGISTRATION_LIMIT* -: The number of provider registration that an authorization key is -limited to when using the scalable memory mode, if not specified -by the user during init. - -*GNI_WAIT_SHARED_MEMORY_TIMEOUT* -: The number of seconds that the provider should wait when -attempting to open mmap'd shared memory files for internal -mappings. - -For *FI_GNI_FABRIC_OPS_2*, the currently supported values are: - -*GNIX_USER_KEY_LIMIT* -: The number of user registrations that an authorization key is limited -to when using the scalable memory mode. This may only be set prior -to the first use of an authorization key in the initialization of a -domain, endpoint, or memory registration. - -*GNIX_PROV_KEY_LIMIT* -: The number of provider registrations that an authorization key is -limited to when using the scalable memory mode. This may only be -set prior to the first use of an authorization key in the initialization -of a domain, endpoint, or memory registration. - -For *FI_GNI_DOMAIN_OPS_1*, the currently supported values are: - -*GNI_MSG_RENDEZVOUS_THRESHOLD* -: Threshold message size at which a rendezvous protocol is used for - *FI_MSG* data transfers. The value is of type uint32_t. - -*GNI_RMA_RDMA_THRESHOLD* -: Threshold message size at which RDMA is used for *FI_RMA* data - transfers. The value is of type uint32_t. - -*GNI_CONN_TABLE_INITIAL_SIZE* -: Initial size of the internal table data structure used to manage - connections. The value is of type uint32_t. - -*GNI_CONN_TABLE_MAX_SIZE* -: Maximum size of the internal table data structure used to manage - connections. The value is of type uint32_t. - -*GNI_CONN_TABLE_STEP_SIZE* -: Step size for increasing the size of the internal table data - structure used to manage internal GNI connections. The value is of - type uint32_t. - -*GNI_VC_ID_TABLE_CAPACITY* -: Size of the virtual channel (VC) table used for managing remote - connections. The value is of type uint32_t. - -*GNI_MBOX_PAGE_SIZE* -: Page size for GNI SMSG mailbox allocations. The value is of type - uint32_t. - -*GNI_MBOX_NUM_PER_SLAB* -: Number of GNI SMSG mailboxes per allocation slab. The value is of - type uint32_t. - -*GNI_MBOX_MAX_CREDIT* -: Maximum number of credits per GNI SMSG mailbox. The value is of - type uint32_t. - -*GNI_MBOX_MSG_MAX_SIZE* -: Maximum size of GNI SMSG messages. The value is of type uint32_t. - -*GNI_RX_CQ_SIZE* -: Recommended GNI receive CQ size. The value is of type uint32_t. - -*GNI_TX_CQ_SIZE* -: Recommended GNI transmit CQ size. The value is of type uint32_t. - -*GNI_MAX_RETRANSMITS* -: Maximum number of message retransmits before failure. The value is - of type uint32_t. - -*GNI_MR_CACHE_LAZY_DEREG* -: Enable or disable lazy deregistration of memory. The value is of - type int32_t. - -*GNI_MR_CACHE* -: Select the type of cache that the domain will use. Valid choices are - the following: 'internal', 'udreg', or 'none'. 'internal' refers to the GNI - provider internal registration cache. 'udreg' refers to a user level dreg - library based cache. Lastly, 'none' refers to device direct registration - without a provider cache. - -*GNI_MR_HARD_REG_LIMIT* -: Maximum number of registrations. Applies only to the GNI provider cache. The value is of type int32_t (-1 for no limit). - -*GNI_MR_SOFT_REG_LIMIT* -: Soft cap on the registration limit. Applies only to the GNI provider cache. The value is of type int32_t (-1 for no limit). - -*GNI_MR_HARD_STALE_REG_LIMIT* -: Maximum number of stale registrations to be held in cache. This applies to the GNI provider cache and the udreg cache. The value is of type int32_t (-1 for no limit for the GNI provider cache and udreg cache values must be greater than 0). - -*GNI_MR_UDREG_LIMIT* -: Maximum number of registrations. Applies only to the udreg cache. The value is of type int32_t. The value must be greater than 0. - -*GNI_XPMEM_ENABLE* -: Enable or disable use of XPMEM for on node messages using the GNI provider internal rendezvous protocol. The value is of type bool. - -*GNI_DGRAM_PROGRESS_TIMEOUT* -: Controls timeout value in milliseconds for the control progress thread. The value is of type uint32_t. - -The `flush_cache` function allows the user to flush any stale registration -cache entries from the cache. This has the effect of removing registrations -from the cache that have been deregistered with the provider, but still -exist in case that they may be reused in the near future. Flushing the stale -registrations forces hardware-level deregistration of the stale memory -registrations and frees any memory related to those stale registrations. Only -the provider-level registration struct is freed, not the user buffer -associated with the registration. -The parameter for `flush_cache` is a struct fid pointer to a fi_domain. The -memory registration cache is tied to the domain, so issuing a `flush_cache` to -the domain will flush the registration cache of the domain. - -For *FI_GNI_EP_OPS_1*, the currently supported values are: -*GNI_HASH_TAG_IMPL* -: Use a hashlist for the tag list implementation. The value is of type uint32_t. - -The `native_amo` function allows the user to call GNI native atomics -that are not implemented in the libfabric API. -The parameters for native_amo are the same as the fi_atomic function -but adds the following parameter: - -*enum gnix_fab_req_type req_type* -: The req_type's supported with this call are GNIX_FAB_RQ_NAMO_AX - (AND and XOR), and GNIX_FAB_RQ_NAMO_AX_S (AND and XOR 32 bit), -GNIX_FAB_RQ_NAMO_FAX (Fetch AND and XOR) and GNIX_FAB_RQ_NAMO_FAX_S - (Fetch AND and XOR 32 bit). - -# NOTES - -The default address format is FI_ADDR_GNI. This is the only address format -used within the GNI provider for message passing. FI_ADDR_STR is always -parsed and converted to FI_ADDR_GNI for use within the GNI provider. - -*FI_ADDR_STR* is formatted as follows: -gni;node;service;GNIX_AV_STR_ADDR_VERSION;device_addr;cdm_id;name_type;cm_nic_cdm_id;cookie;rx_ctx_cnt;key_offset - -The GNI provider sets the domain attribute *cntr_cnt* to the CQ limit divided by 2. - -The GNI provider sets the domain attribute *cq_cnt* to the CQ limit divided by 2. - -The GNI provider sets the domain attribute *ep_cnt* to SIZE_MAX. - -Completion queue events may report unknown source address information when -using *FI_SOURCE*. If *FI_SOURCE_ERR* is also specified, the source address -information will be reported in the -err_data member of the struct fi_cq_err_entry populated by fi_cq_readerr. The -err_data member will contain the source address information in the FI_ADDR_GNI -address format. In order to populate the remote peer's address vector -with this mechanism, the application must call fi_cq_readerr to get the -source address followed by fi_av_insert on the populated err_data member. - -For FI_MULTI_RECV, the GNI provider generates a separate FI_MULTI_RECV CQ event -once the receive buffer has been consumed. Also, owing to the out-or-order nature -of the Cray network, the CQ events associated with individual messages arriving in the -receive buffer may be generated out of order with respect to the offset into the buffer -into which the messages were received. - -The GNI provider can use a maximum of 4K memory registrations per *node* when using scalable memory registration. -Please consider this limitation when placing multiple processes on each node. - -The GNI provider sets the default user registration limit to 192 when using scalable memory registration, -and sets the default provider registration limit to 64. These limits are directly associated -with the authorization key in use when creating the registration. If no authorization key -is used when creating the registration, the registration is automatically bound to the same -authorization key as the domain to which the registration belongs. - -When using scalable memory registration, the provider may make registrations which consume some of the -registrations set aside for the provider. This impacts the performance of FI_LOCAL_MR, which -relies on provider-created registrations. - -All memory registrations are associated with an authorization key, whether it is the provider -default key(keylen=0) or a user-acquired key (key!=NULL, keylen!=0). Each authorization -key is associated with a unique GNI network key. A GNI network key can only accommodate a single -memory mode, whether it is basic, or scalable memory registration. If a user attempts to open multiple -domains using different memory modes with the same authorization key, the provider will return --FI_EINVAL. - -When using scalable memory registration, the user may request keys beginning at 0, and ending at the -user registration limit for a given authorization key. - -When using scalable memory registration and fi_mr_refresh(), only refresh the updated pages, not the entire -registration. If the entire registration is refreshed and some of the pages are not mapped, -then refresh will return -FI_EFAULT. - -Registration IDs for scalable memory registration are local to the node. This means that the application is -responsible for handing the coordination of key selection. - -The location of the authorization key mapping file can be controlled through two environment variables, -TMPDIR and GNIX_AK_FILENAME. Setting TMPDIR to a non-NULL value with change the directory for the -authorization key mapping file, and setting GNIX_AK_FILENAME to a non-NULL value will change the filename. -The default path for the authorization key mapping file is '/tmp/gnix_vmdh_info'. The recommendation is that -the user should not change these environment variables unless necessary. - -# KNOWN BUGS - -The GNI provider currently treats the fi_shutdown() interface as a strictly -local operation. That is, fi_shutdown() causes the local endpoint to be shut -down, and a shutdown event to be generated on the local EQ. However, a -connected remote peer endpoint is not notified of a call to fi_shutdown(). - -The GNI provider does not currently handle the case when FI_OPT_MULTI_RECV is set to 0 -and will return -FI_EINVAL if an application attempts to set this value to zero. - -# SEE ALSO - -[`fabric`(7)](fabric.7.html), -[`fi_open_ops`(3)](fi_open_ops.3.html), -[`fi_provider`(7)](fi_provider.7.html), -[`fi_getinfo`(3)](fi_getinfo.3.html) -[`fi_atomic`(3)](fi_atomic.3.html) - -For more information on uGNI, see *Using the GNI and DMAPP APIs* -(S-2446-3103, Cray Inc.). For more information on the GNI provider, -see *An Implementation of OFI libfabric in Support of Multithreaded -PGAS Solutions* (PGAS '15). - diff --git a/man/fi_provider.7.md b/man/fi_provider.7.md index b83f826303c..40184b225a8 100644 --- a/man/fi_provider.7.md +++ b/man/fi_provider.7.md @@ -73,11 +73,6 @@ operating system support is available, etc. This list is not exhaustive. hardware interface for inter-instance communication on EC2. See [`fi_efa`(7)](fi_efa.7.html) for more information. -*GNI* -: A provider for the Aries interconnect in Cray XC(TM) systems - utilizing the user-space *Generic Networking Interface*. See - [`fi_gni`(7)](fi_gni.7.html) for more information. - *OPX* : Supports Omni-Path networking from Cornelis Networks. See [`fi_opx`(7)](fi_opx.7.html) for more information. diff --git a/man/man7/fi_gni.7 b/man/man7/fi_gni.7 deleted file mode 100644 index bc607c272e8..00000000000 --- a/man/man7/fi_gni.7 +++ /dev/null @@ -1,447 +0,0 @@ -.\" Automatically generated by Pandoc 2.9.2.1 -.\" -.TH "fi_gni" "7" "2022\-12\-09" "Libfabric Programmer\[cq]s Manual" "#VERSION#" -.hy -.SH NAME -.PP -fi_gni - The GNI Fabric Provider -.SH OVERVIEW -.PP -The GNI provider runs on Cray XC (TM) systems utilizing the user-space -Generic Network Interface (uGNI) which provides low-level access to the -Aries interconnect. -The Aries interconnect is designed for low-latency one-sided messaging -and also includes direct hardware support for common atomic operations -and optimized collectives. -.SH REQUIREMENTS -.PP -The GNI provider runs on Cray XC systems running CLE 5.2 UP04 or higher -using gcc version 4.9 or higher. -.PP -When using the fi_mr_regattr() and fi_mr_regv() functions to register -multiple region, users must register the memory region with 4K page -alignment. -Any other page address alignment will result in a return value of --FI_EINVAL. -.PP -When using the scalable memory registration mode, applications must make -registration requests on 4K page alignment boundaries. -Any other value will result in a return value of -FI_EINVAL. -.SH SUPPORTED FEATURES -.PP -The GNI provider supports the following features defined for the -libfabric API: -.TP -\f[I]Endpoint types\f[R] -The provider supports the \f[I]FI_EP_RDM\f[R], \f[I]FI_EP_DGRAM\f[R], -\f[I]FI_EP_MSG\f[R] endpoint types, including scalable endpoints. -.TP -\f[I]Address vectors\f[R] -The provider implements both the \f[I]FI_AV_MAP\f[R] and -\f[I]FI_AV_TABLE\f[R] address vector types. -FI_EVENT is unsupported. -.TP -\f[I]Memory registration modes\f[R] -The provider implements basic and scalable memory registration modes. -.TP -\f[I]Data transfer operations\f[R] -The following data transfer interfaces are supported for all endpoint -types: \f[I]FI_ATOMIC\f[R], \f[I]FI_MSG\f[R], \f[I]FI_RMA\f[R], -\f[I]FI_TAGGED\f[R]. -See DATA TRANSFER OPERATIONS below for more details. -.TP -\f[I]Completion events\f[R] -The GNI provider supports \f[I]FI_CQ_FORMAT_CONTEXT\f[R], -\f[I]FI_CQ_FORMAT_MSG\f[R], \f[I]FI_CQ_FORMAT_DATA\f[R] and -\f[I]FI_CQ_FORMAT_TAGGED\f[R] with wait objects of type -\f[I]FI_WAIT_NONE\f[R], \f[I]FI_WAIT_UNSPEC\f[R], \f[I]FI_WAIT_SET\f[R]. -.TP -\f[I]Modes\f[R] -The GNI provider does not require any operation modes. -.TP -\f[I]Progress\f[R] -For both control and data progress, the GNI provider supports both -\f[I]FI_PROGRESS_AUTO\f[R] and \f[I]FI_PROGRESS_MANUAL\f[R], with a -default set to \f[I]FI_PROGRESS_AUTO\f[R]. -Note that for data progress, progression is only performed when data -transfers use the rendezvous protocol. -.TP -\f[I]Wait Objects\f[R] -The GNI provider specifically supports wait object types -\f[I]FI_WAIT_UNSPEC\f[R], and \f[I]FI_WAIT_SET\f[R]. -A wait object must be used when calling fi_cntr_wait, fi_cq_sread/from, -fi_eq_sread/from, fi_wait. -The GNI provider spawns an internal wait progress thread that is woken -up when clients utilize the wait system (e.g., calling fi_wait). -.TP -\f[I]Additional Features\f[R] -The GNI provider also supports the following capabilities and features: -- \f[I]FI_MULTI_RECV\f[R] - \f[I]FI_SOURCE\f[R] - \f[I]FI_FENCE\f[R] - -\f[I]FI_RM_ENABLED\f[R] - \f[I]FI_RMA_EVENT\f[R] - -\f[I]FI_REMOTE_CQ_DATA\f[R] - \f[I]FABRIC_DIRECT\f[R] compilation mode - -\f[I]FI_MORE\f[R] (For FI_RMA) -.SH DATA TRANSFER OPERATIONS -.SS FI_ATOMIC -.PP -Currently, the GNI provider only supports atomic operations supported -directly by the Aries NIC. -These include operations on 32- and 64-bit, signed and unsigned integer -and floating point values. -Specifically, -.SS Basic (fi_atomic, etc.) -.IP \[bu] 2 -\f[I]FI_MIN\f[R], \f[I]FI_MAX\f[R] (no unsigned) -.IP \[bu] 2 -\f[I]FI_SUM\f[R] (no 64-bit floating point) -.IP \[bu] 2 -\f[I]FI_BOR\f[R], \f[I]FI_BAND\f[R], \f[I]FI_BXOR\f[R] (no floating -point) -.IP \[bu] 2 -\f[I]FI_ATOMIC_WRITE\f[R] -.SS Fetching (fi_fetch_atomic, etc.) -.IP \[bu] 2 -All of the basic operations as above -.IP \[bu] 2 -FI_ATOMIC_READ -.SS Comparison (fi_compare_atomic, etc.) -.IP \[bu] 2 -FI_CSWAP -.IP \[bu] 2 -FI_MSWAP -.SS FI_MSG -.PP -All \f[I]FI_MSG\f[R] operations are supported. -.SS FI_RMA -.PP -All \f[I]FI_RMA\f[R] operations are supported. -.SS FI_TAGGED -.PP -All \f[I]FI_TAGGED\f[R] operations are supported except -\f[C]fi_tinjectdata\f[R]. -.SH GNI EXTENSIONS -.PP -The GNI provider exposes low-level tuning parameters via domain, -endpoint and fabric level \f[C]fi_open_ops\f[R] interfaces. -The domain extensions have been named \f[I]FI_GNI_DOMAIN_OPS_1\f[R]. -The endpoint extensions have been named \f[I]FI_GNI_EP_OPS_1\f[R]. -The fabric extensions have been named \f[I]FI_GNI_FABRIC_OPS_1\f[R] and -\f[I]FI_GNI_FABRIC_OPS_2\f[R]. -The flags parameter is currently ignored. -The fi_open_ops function takes a \f[C]struct fi_gni_ops_domain\f[R] or a -\f[C]struct fi_gni_ops_ep\f[R] parameter respectively and populates it -with the following: -.IP -.nf -\f[C] -struct fi_gni_ops_fab { - int (*set_val)(struct fid *fid, fab_ops_val_t t, void *val); - int (*get_val)(struct fid *fid, fab_ops_val_t t, void *val); -}; - -struct fi_gni_auth_key_ops_fab { - int (*set_val)(uint8_t *auth_key, size_t auth_keylen, gnix_auth_key_opt_t opt, void *val); - int (*get_val)(uint8_t *auth_key, size_t auth_keylen, gnix_auth_key_opt_t opt, void *val); -}; - -struct fi_gni_ops_domain { - int (*set_val)(struct fid *fid, dom_ops_val_t t, void *val); - int (*get_val)(struct fid *fid, dom_ops_val_t t, void *val); - int (*flush_cache)(struct fid *fid); -}; - -struct fi_gni_ops_ep { - int (*set_val)(struct fid *fid, dom_ops_val_t t, void *val); - int (*get_val)(struct fid *fid, dom_ops_val_t t, void *val); - size_t (*native_amo)(struct fid_ep *ep, const void *buf, - size_t count,void *desc, - fi_addr_t dest_addr, uint64_t addr, - uint64_t key, enum fi_datatype datatype, - enum gnix_fab_req_type req_type, - void *context); -}; -\f[R] -.fi -.PP -The \f[C]set_val\f[R] function sets the value of a given parameter; the -\f[C]get_val\f[R] function returns the current value. -.PP -For \f[I]FI_GNI_FABRIC_OPS_1\f[R], the currently supported values are: -.TP -\f[I]GNI_WAIT_THREAD_SLEEP\f[R] -Time in seconds for which the progress thread will sleep between periods -of inactivity. -.TP -\f[I]GNI_DEFAULT_USER_REGISTRATION_LIMIT\f[R] -The number of user registrations that an authorization key is limited to -when using the scalable memory mode, if not specified by the user during -init. -.TP -\f[I]GNI_DEFAULT_PROV_REGISTRATION_LIMIT\f[R] -The number of provider registration that an authorization key is limited -to when using the scalable memory mode, if not specified by the user -during init. -.TP -\f[I]GNI_WAIT_SHARED_MEMORY_TIMEOUT\f[R] -The number of seconds that the provider should wait when attempting to -open mmap\[cq]d shared memory files for internal mappings. -.PP -For \f[I]FI_GNI_FABRIC_OPS_2\f[R], the currently supported values are: -.TP -\f[I]GNIX_USER_KEY_LIMIT\f[R] -The number of user registrations that an authorization key is limited to -when using the scalable memory mode. -This may only be set prior to the first use of an authorization key in -the initialization of a domain, endpoint, or memory registration. -.TP -\f[I]GNIX_PROV_KEY_LIMIT\f[R] -The number of provider registrations that an authorization key is -limited to when using the scalable memory mode. -This may only be set prior to the first use of an authorization key in -the initialization of a domain, endpoint, or memory registration. -.PP -For \f[I]FI_GNI_DOMAIN_OPS_1\f[R], the currently supported values are: -.TP -\f[I]GNI_MSG_RENDEZVOUS_THRESHOLD\f[R] -Threshold message size at which a rendezvous protocol is used for -\f[I]FI_MSG\f[R] data transfers. -The value is of type uint32_t. -.TP -\f[I]GNI_RMA_RDMA_THRESHOLD\f[R] -Threshold message size at which RDMA is used for \f[I]FI_RMA\f[R] data -transfers. -The value is of type uint32_t. -.TP -\f[I]GNI_CONN_TABLE_INITIAL_SIZE\f[R] -Initial size of the internal table data structure used to manage -connections. -The value is of type uint32_t. -.TP -\f[I]GNI_CONN_TABLE_MAX_SIZE\f[R] -Maximum size of the internal table data structure used to manage -connections. -The value is of type uint32_t. -.TP -\f[I]GNI_CONN_TABLE_STEP_SIZE\f[R] -Step size for increasing the size of the internal table data structure -used to manage internal GNI connections. -The value is of type uint32_t. -.TP -\f[I]GNI_VC_ID_TABLE_CAPACITY\f[R] -Size of the virtual channel (VC) table used for managing remote -connections. -The value is of type uint32_t. -.TP -\f[I]GNI_MBOX_PAGE_SIZE\f[R] -Page size for GNI SMSG mailbox allocations. -The value is of type uint32_t. -.TP -\f[I]GNI_MBOX_NUM_PER_SLAB\f[R] -Number of GNI SMSG mailboxes per allocation slab. -The value is of type uint32_t. -.TP -\f[I]GNI_MBOX_MAX_CREDIT\f[R] -Maximum number of credits per GNI SMSG mailbox. -The value is of type uint32_t. -.TP -\f[I]GNI_MBOX_MSG_MAX_SIZE\f[R] -Maximum size of GNI SMSG messages. -The value is of type uint32_t. -.TP -\f[I]GNI_RX_CQ_SIZE\f[R] -Recommended GNI receive CQ size. -The value is of type uint32_t. -.TP -\f[I]GNI_TX_CQ_SIZE\f[R] -Recommended GNI transmit CQ size. -The value is of type uint32_t. -.TP -\f[I]GNI_MAX_RETRANSMITS\f[R] -Maximum number of message retransmits before failure. -The value is of type uint32_t. -.TP -\f[I]GNI_MR_CACHE_LAZY_DEREG\f[R] -Enable or disable lazy deregistration of memory. -The value is of type int32_t. -.TP -\f[I]GNI_MR_CACHE\f[R] -Select the type of cache that the domain will use. -Valid choices are the following: `internal', `udreg', or `none'. -`internal' refers to the GNI provider internal registration cache. -`udreg' refers to a user level dreg library based cache. -Lastly, `none' refers to device direct registration without a provider -cache. -.TP -\f[I]GNI_MR_HARD_REG_LIMIT\f[R] -Maximum number of registrations. -Applies only to the GNI provider cache. -The value is of type int32_t (-1 for no limit). -.TP -\f[I]GNI_MR_SOFT_REG_LIMIT\f[R] -Soft cap on the registration limit. -Applies only to the GNI provider cache. -The value is of type int32_t (-1 for no limit). -.TP -\f[I]GNI_MR_HARD_STALE_REG_LIMIT\f[R] -Maximum number of stale registrations to be held in cache. -This applies to the GNI provider cache and the udreg cache. -The value is of type int32_t (-1 for no limit for the GNI provider cache -and udreg cache values must be greater than 0). -.TP -\f[I]GNI_MR_UDREG_LIMIT\f[R] -Maximum number of registrations. -Applies only to the udreg cache. -The value is of type int32_t. -The value must be greater than 0. -.TP -\f[I]GNI_XPMEM_ENABLE\f[R] -Enable or disable use of XPMEM for on node messages using the GNI -provider internal rendezvous protocol. -The value is of type bool. -.TP -\f[I]GNI_DGRAM_PROGRESS_TIMEOUT\f[R] -Controls timeout value in milliseconds for the control progress thread. -The value is of type uint32_t. -.PP -The \f[C]flush_cache\f[R] function allows the user to flush any stale -registration cache entries from the cache. -This has the effect of removing registrations from the cache that have -been deregistered with the provider, but still exist in case that they -may be reused in the near future. -Flushing the stale registrations forces hardware-level deregistration of -the stale memory registrations and frees any memory related to those -stale registrations. -Only the provider-level registration struct is freed, not the user -buffer associated with the registration. -The parameter for \f[C]flush_cache\f[R] is a struct fid pointer to a -fi_domain. -The memory registration cache is tied to the domain, so issuing a -\f[C]flush_cache\f[R] to the domain will flush the registration cache of -the domain. -.PP -For \f[I]FI_GNI_EP_OPS_1\f[R], the currently supported values are: -\f[I]GNI_HASH_TAG_IMPL\f[R] : Use a hashlist for the tag list -implementation. -The value is of type uint32_t. -.PP -The \f[C]native_amo\f[R] function allows the user to call GNI native -atomics that are not implemented in the libfabric API. -The parameters for native_amo are the same as the fi_atomic function but -adds the following parameter: -.TP -\f[I]enum gnix_fab_req_type req_type\f[R] -The req_type\[cq]s supported with this call are GNIX_FAB_RQ_NAMO_AX (AND -and XOR), and GNIX_FAB_RQ_NAMO_AX_S (AND and XOR 32 bit), -GNIX_FAB_RQ_NAMO_FAX (Fetch AND and XOR) and GNIX_FAB_RQ_NAMO_FAX_S -(Fetch AND and XOR 32 bit). -.SH NOTES -.PP -The default address format is FI_ADDR_GNI. -This is the only address format used within the GNI provider for message -passing. -FI_ADDR_STR is always parsed and converted to FI_ADDR_GNI for use within -the GNI provider. -.PP -\f[I]FI_ADDR_STR\f[R] is formatted as follows: -gni;node;service;GNIX_AV_STR_ADDR_VERSION;device_addr;cdm_id;name_type;cm_nic_cdm_id;cookie;rx_ctx_cnt;key_offset -.PP -The GNI provider sets the domain attribute \f[I]cntr_cnt\f[R] to the CQ -limit divided by 2. -.PP -The GNI provider sets the domain attribute \f[I]cq_cnt\f[R] to the CQ -limit divided by 2. -.PP -The GNI provider sets the domain attribute \f[I]ep_cnt\f[R] to SIZE_MAX. -.PP -Completion queue events may report unknown source address information -when using \f[I]FI_SOURCE\f[R]. -If \f[I]FI_SOURCE_ERR\f[R] is also specified, the source address -information will be reported in the err_data member of the struct -fi_cq_err_entry populated by fi_cq_readerr. -The err_data member will contain the source address information in the -FI_ADDR_GNI address format. -In order to populate the remote peer\[cq]s address vector with this -mechanism, the application must call fi_cq_readerr to get the source -address followed by fi_av_insert on the populated err_data member. -.PP -For FI_MULTI_RECV, the GNI provider generates a separate FI_MULTI_RECV -CQ event once the receive buffer has been consumed. -Also, owing to the out-or-order nature of the Cray network, the CQ -events associated with individual messages arriving in the receive -buffer may be generated out of order with respect to the offset into the -buffer into which the messages were received. -.PP -The GNI provider can use a maximum of 4K memory registrations per -\f[I]node\f[R] when using scalable memory registration. -Please consider this limitation when placing multiple processes on each -node. -.PP -The GNI provider sets the default user registration limit to 192 when -using scalable memory registration, and sets the default provider -registration limit to 64. -These limits are directly associated with the authorization key in use -when creating the registration. -If no authorization key is used when creating the registration, the -registration is automatically bound to the same authorization key as the -domain to which the registration belongs. -.PP -When using scalable memory registration, the provider may make -registrations which consume some of the registrations set aside for the -provider. -This impacts the performance of FI_LOCAL_MR, which relies on -provider-created registrations. -.PP -All memory registrations are associated with an authorization key, -whether it is the provider default key(keylen=0) or a user-acquired key -(key!=NULL, keylen!=0). -Each authorization key is associated with a unique GNI network key. -A GNI network key can only accommodate a single memory mode, whether it -is basic, or scalable memory registration. -If a user attempts to open multiple domains using different memory modes -with the same authorization key, the provider will return -FI_EINVAL. -.PP -When using scalable memory registration, the user may request keys -beginning at 0, and ending at the user registration limit for a given -authorization key. -.PP -When using scalable memory registration and fi_mr_refresh(), only -refresh the updated pages, not the entire registration. -If the entire registration is refreshed and some of the pages are not -mapped, then refresh will return -FI_EFAULT. -.PP -Registration IDs for scalable memory registration are local to the node. -This means that the application is responsible for handing the -coordination of key selection. -.PP -The location of the authorization key mapping file can be controlled -through two environment variables, TMPDIR and GNIX_AK_FILENAME. -Setting TMPDIR to a non-NULL value with change the directory for the -authorization key mapping file, and setting GNIX_AK_FILENAME to a -non-NULL value will change the filename. -The default path for the authorization key mapping file is -`/tmp/gnix_vmdh_info'. -The recommendation is that the user should not change these environment -variables unless necessary. -.SH KNOWN BUGS -.PP -The GNI provider currently treats the fi_shutdown() interface as a -strictly local operation. -That is, fi_shutdown() causes the local endpoint to be shut down, and a -shutdown event to be generated on the local EQ. -However, a connected remote peer endpoint is not notified of a call to -fi_shutdown(). -.PP -The GNI provider does not currently handle the case when -FI_OPT_MULTI_RECV is set to 0 and will return -FI_EINVAL if an -application attempts to set this value to zero. -.SH SEE ALSO -.PP -\f[C]fabric\f[R](7), \f[C]fi_open_ops\f[R](3), \f[C]fi_provider\f[R](7), -\f[C]fi_getinfo\f[R](3) \f[C]fi_atomic\f[R](3) -.PP -For more information on uGNI, see \f[I]Using the GNI and DMAPP APIs\f[R] -(S-2446-3103, Cray Inc.). -For more information on the GNI provider, see \f[I]An Implementation of -OFI libfabric in Support of Multithreaded PGAS Solutions\f[R] (PGAS -\[cq]15). -.SH AUTHORS -OpenFabrics. diff --git a/prov/gni/Makefile.include b/prov/gni/Makefile.include deleted file mode 100644 index c3492856b55..00000000000 --- a/prov/gni/Makefile.include +++ /dev/null @@ -1,170 +0,0 @@ -# Makefile.am for gni provider - -if HAVE_GNI - -# -# want to keep ccan in the include path seen in the -# source code, so just add prov/gni to the include path -# rather than prov/gni/ccan -# -AM_CPPFLAGS += -I$(top_srcdir)/prov/gni/include -I$(top_srcdir)/prov/gni - -_gni_files = \ - prov/gni/src/gnix_atomic.c \ - prov/gni/src/gnix_auth_key.c \ - prov/gni/src/gnix_av.c \ - prov/gni/src/gnix_bitmap.c \ - prov/gni/src/gnix_buddy_allocator.c \ - prov/gni/src/gnix_cm.c \ - prov/gni/src/gnix_cm_nic.c \ - prov/gni/src/gnix_cntr.c \ - prov/gni/src/gnix_cq.c \ - prov/gni/src/gnix_datagram.c \ - prov/gni/src/gnix_dom.c \ - prov/gni/src/gnix_ep.c \ - prov/gni/src/gnix_eq.c \ - prov/gni/src/gnix_fabric.c \ - prov/gni/src/gnix_freelist.c \ - prov/gni/src/gnix_hashtable.c \ - prov/gni/src/gnix_init.c \ - prov/gni/src/gnix_mbox_allocator.c \ - prov/gni/src/gnix_mr.c \ - prov/gni/src/gnix_mr_cache.c \ - prov/gni/src/gnix_mr_notifier.c \ - prov/gni/src/gnix_msg.c \ - prov/gni/src/gnix_nameserver.c \ - prov/gni/src/gnix_nic.c \ - prov/gni/src/gnix_poll.c \ - prov/gni/src/gnix_progress.c \ - prov/gni/src/gnix_queue.c \ - prov/gni/src/gnix_rma.c \ - prov/gni/src/gnix_sep.c \ - prov/gni/src/gnix_shmem.c \ - prov/gni/src/gnix_smrn.c \ - prov/gni/src/gnix_tags.c \ - prov/gni/src/gnix_trigger.c \ - prov/gni/src/gnix_util.c \ - prov/gni/src/gnix_vc.c \ - prov/gni/src/gnix_vector.c \ - prov/gni/src/gnix_xpmem.c \ - prov/gni/src/gnix_wait.c - -_gni_headers = \ - prov/gni/include/fi_ext_gni.h \ - prov/gni/include/gnix_atomic.h \ - prov/gni/include/gnix_auth_key.h \ - prov/gni/include/gnix_av.h \ - prov/gni/include/gnix_bitmap.h \ - prov/gni/include/gnix_buddy_allocator.h \ - prov/gni/include/gnix_cm.h \ - prov/gni/include/gnix_cm_nic.h \ - prov/gni/include/gnix_cntr.h \ - prov/gni/include/gnix_cq.h \ - prov/gni/include/gnix_datagram.h \ - prov/gni/include/gnix_ep.h \ - prov/gni/include/gnix_eq.h \ - prov/gni/include/gnix_freelist.h \ - prov/gni/include/gnix.h \ - prov/gni/include/gnix_hashtable.h \ - prov/gni/include/gnix_mbox_allocator.h \ - prov/gni/include/gnix_mr.h \ - prov/gni/include/gnix_mr_cache.h \ - prov/gni/include/gnix_mr_notifier.h \ - prov/gni/include/gnix_msg.h \ - prov/gni/include/gnix_nameserver.h \ - prov/gni/include/gnix_nic.h \ - prov/gni/include/gnix_poll.h \ - prov/gni/include/gnix_progress.h \ - prov/gni/include/gnix_priv.h \ - prov/gni/include/gnix_queue.h \ - prov/gni/include/gnix_rma.h \ - prov/gni/include/gnix_shmem.h \ - prov/gni/include/gnix_smrn.h \ - prov/gni/include/gnix_tags.h \ - prov/gni/include/gnix_trigger.h \ - prov/gni/include/gnix_util.h \ - prov/gni/include/gnix_vc.h \ - prov/gni/include/gnix_vector.h \ - prov/gni/include/gnix_xpmem.h \ - prov/gni/include/gnix_wait.h - - -if HAVE_CRITERION -bin_PROGRAMS += prov/gni/test/gnitest -bin_SCRIPTS += prov/gni/test/run_gnitest -nodist_prov_gni_test_gnitest_SOURCES = \ - prov/gni/test/allocator.c \ - prov/gni/test/api.c \ - prov/gni/test/api_cq.c \ - prov/gni/test/api_cntr.c \ - prov/gni/test/av.c \ - prov/gni/test/auth_key.c \ - prov/gni/test/bitmap.c \ - prov/gni/test/buddy_allocator.c \ - prov/gni/test/cancel.c \ - prov/gni/test/cntr.c \ - prov/gni/test/cm.c \ - prov/gni/test/common.c \ - prov/gni/test/cq.c \ - prov/gni/test/datagram.c \ - prov/gni/test/dlist-utils.c \ - prov/gni/test/dom.c \ - prov/gni/test/ep.c \ - prov/gni/test/eq.c \ - prov/gni/test/fabric.c \ - prov/gni/test/fi_addr_str.c \ - prov/gni/test/freelist.c \ - prov/gni/test/hashtable.c \ - prov/gni/test/mr.c \ - prov/gni/test/mr_notifier.c \ - prov/gni/test/nic.c \ - prov/gni/test/pmi_utils.c \ - prov/gni/test/queue.c \ - prov/gni/test/rdm_atomic.c \ - prov/gni/test/rdm_fi_pcd_trecv_msg.c \ - prov/gni/test/rdm_dgram_rma.c \ - prov/gni/test/rdm_dgram_stx.c \ - prov/gni/test/rdm_rx_overrun.c \ - prov/gni/test/rdm_sr.c \ - prov/gni/test/rdm_addr_str_sr.c \ - prov/gni/test/rdm_multi_recv.c \ - prov/gni/test/rdm_tagged_sr.c \ - prov/gni/test/sep.c \ - prov/gni/test/shmem.c \ - prov/gni/test/smrn.c \ - prov/gni/test/tags.c \ - prov/gni/test/utils.c \ - prov/gni/test/vc.c \ - prov/gni/test/vc_lookup.c \ - prov/gni/test/vector.c \ - prov/gni/test/wait.c - -prov_gni_test_gnitest_LDFLAGS = $(CRAY_PMI_LIBS) $(gnitest_LDFLAGS) -static -prov_gni_test_gnitest_CPPFLAGS = $(AM_CPPFLAGS) $(CRAY_PMI_CFLAGS) $(CRAY_XPMEM_CFLAGS) $(gnitest_CPPFLAGS) -prov_gni_test_gnitest_LDADD = $(gnitest_LIBS) $(linkback) -endif HAVE_CRITERION - -if HAVE_GNI_DL -pkglib_LTLIBRARIES += libgnix-fi.la -libgnix_fi_la_CPPFLAGS = $(AM_CPPFLAGS) $(gni_CPPFLAGS) -libgnix_fi_la_SOURCES = $(_gni_files) $(_gni_headers) $(common_srcs) -libgnix_fi_la_LDFLAGS = \ - $(gni_LDFLAGS) \ - -module -avoid-version -shared -export-dynamic -libgnix_fi_la_LIBADD = $(linkback) -libgnix_fi_la_DEPENDENCIES = $(linkback) -else !HAVE_GNI_DL -src_libfabric_la_SOURCES += $(_gni_files) $(_gni_headers) -src_libfabric_la_CPPFLAGS += $(gni_CPPFLAGS) -src_libfabric_la_LDFLAGS += $(gni_LDFLAGS) -src_libfabric_la_LIBADD += $(gni_LIBS) -endif !HAVE_GNI_DL - -rdmainclude_HEADERS += \ - prov/gni/include/fi_ext_gni.h - -prov_install_man_pages += man/man7/fi_gni.7 - -endif HAVE_GNI - -prov_dist_man_pages += man/man7/fi_gni.7 diff --git a/prov/gni/README.md b/prov/gni/README.md deleted file mode 100644 index f77427239e8..00000000000 --- a/prov/gni/README.md +++ /dev/null @@ -1,21 +0,0 @@ -# OFI libfabric GNI provider - -The GNI provider is a research prototype provider layer for OFI -libfabric running on Cray XC (TM) systems. It is being developed as a -collaboration between Los Alamos National Laboratory and Cray Inc. -The goals of the collaboration are to enable libfabric clients such as -OpenMPI to experiment at scale on today's hardware. As such, we have -initially tried to address requirements of MPI, SHMEM and PGAS -language and library implementation. As with any provider, there -features that are difficult to support efficiently (e.g., -FI_MR_SCALABLE) and have been omitted for the time being. - -Bugs should be filed as GitHub issues on the ofi-cray/libfabric-cray -repository (https://github.com/ofi-cray/libfabric-cray). - -## For Developers - -See the wiki pages at the ofi-cray/libfabric-cray repository -(https://github.com/ofi-cray/libfabric-cray) for tips and other useful -information. - diff --git a/prov/gni/configure.m4 b/prov/gni/configure.m4 deleted file mode 100644 index 44a8e444df1..00000000000 --- a/prov/gni/configure.m4 +++ /dev/null @@ -1,209 +0,0 @@ -dnl -dnl Copyright (c) 2015-2019 Cray Inc. All rights reserved. -dnl Copyright (c) 2015-2018 Los Alamos National Security, LLC. -dnl All rights reserved. -dnl Copyright (c) 2021 Triad National Security, LLC. All rights -dnl reserved. -dnl -dnl This software is available to you under a choice of one of two -dnl licenses. You may choose to be licensed under the terms of the GNU -dnl General Public License (GPL) Version 2, available from the file -dnl COPYING in the main directory of this source tree, or the -dnl BSD license below: -dnl -dnl Redistribution and use in source and binary forms, with or -dnl without modification, are permitted provided that the following -dnl conditions are met: -dnl -dnl - Redistributions of source code must retain the above -dnl copyright notice, this list of conditions and the following -dnl disclaimer. -dnl -dnl - Redistributions in binary form must reproduce the above -dnl copyright notice, this list of conditions and the following -dnl disclaimer in the documentation and/or other materials -dnl provided with the distribution. -dnl -dnl THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -dnl "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -dnl LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -dnl FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -dnl COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -dnl INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -dnl BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -dnl LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -dnl CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -dnl LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -dnl ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -dnl POSSIBILITY OF SUCH DAMAGE. -dnl - -dnl Configury specific to the libfabrics GNI provider - -dnl Called to configure this provider - -m4_include([config/fi_pkg.m4]) - -AC_DEFUN([FI_GNI_CONFIGURE],[ - # Determine if we can support the gni provider - # have to pull in pkg.m4 manually - ugni_lib_happy=0 - udreg_lib_happy=0 - gni_header_happy=0 - alps_lli_happy=0 - alps_util_happy=0 - have_criterion=false - criterion_tests_present=true - gni_CPPFLAGS= - gni_LDFLAGS= - gnitest_CPPFLAGS= - gnitest_LDFLAGS= - gnitest_LIBS= - kdreg_happy=0 - - - AC_ARG_ENABLE([ugni-static], - [AS_HELP_STRING([--enable-ugni-static], - [Enable static linking with uGNI. Recommended for KNL.])], - ) - - AS_IF([test x"$enable_gni" != x"no"], - [FI_PKG_CHECK_MODULES([CRAY_GNI_HEADERS], [cray-gni-headers], - [gni_header_happy=1 - gni_CPPFLAGS="$CRAY_GNI_HEADERS_CFLAGS $gni_CPPFLAGS" - ], - [gni_header_happy=0]) - ]) - - AS_IF([test "$gni_header_happy" -eq 1], - [FI_PKG_CHECK_MODULES_STATIC([CRAY_UGNI], [cray-ugni], - [ugni_lib_happy=1 - gni_CPPFLAGS=$CRAY_UGNI_CFLAGS - gni_LDFLAGS=$CRAY_UGNI_LIBS - ], - [ugni_lib_happy=0]) - - AS_IF([test x"$enable_ugni_static" = x"yes" && test $ugni_lib_happy -eq 1], - [gni_LDFLAGS=$(echo $gni_LDFLAGS | sed -e 's/lugni/l:libugni.a/')],[]) - - FI_PKG_CHECK_MODULES_STATIC([CRAY_ALPS_LLI], [cray-alpslli], - [alps_lli_happy=1 - gni_CPPFLAGS="$CRAY_ALPS_LLI_CFLAGS $gni_CPPFLAGS" - gni_LDFLAGS="$CRAY_ALPS_LLI_LIBS $gni_LDFLAGS" - ], - [alps_lli_happy=0]) - FI_PKG_CHECK_MODULES([CRAY_ALPS_UTIL], [cray-alpsutil], - [alps_util_happy=1 - gni_CPPFLAGS="$CRAY_ALPS_UTIL_CFLAGS $gni_CPPFLAGS" - gni_LDFLAGS="$CRAY_ALPS_UTIL_LIBS $gni_LDFLAGS" - ], - [alps_util_happy=0]) - FI_PKG_CHECK_MODULES([CRAY_UDREG], [cray-udreg], - [udreg_lib_happy=1 - gni_CPPFLAGS="-DHAVE_UDREG $CRAY_UDREG_CFLAGS $gni_CPPFLAGS" - gni_LDFLAGS="$CRAY_UDREG_LIBS $gni_LDFLAGS" - ], - [udreg_lib_happy=0]) - - CPPFLAGS_SAVE=$CPPFLAGS - CPPFLAGS="$gni_CPPFLAGS $CPPFLAGS" - AC_CHECK_TYPES([gni_ct_cqw_post_descriptor_t], [], - [AC_MSG_WARN([GNI provider requires CLE 5.2.UP04 or higher. Disabling gni provider.]) - gni_header_happy=0 - ], - [[#include "gni_pub.h"]]) - CPPFLAGS=$CPPFLAGS_SAVE - - AS_IF([test -d $srcdir/prov/gni/test], - [AC_ARG_WITH([criterion], [AS_HELP_STRING([--with-criterion], - [Location for criterion unit testing framework])])], - [criterion_tests_present=false]) - - if test "$with_criterion" != "" && test "$with_criterion" != "no"; then - if test "$enable_direct" != "" && test "$enable_direct" != "no"; then - gnitest_CPPFLAGS="-I$srcdir/prov/gni/include" - fi - - AS_IF([test "$criterion_tests_present" = "true"], - [AC_MSG_CHECKING([criterion path]) - if test -d "$with_criterion"; then - AC_MSG_RESULT([yes]) - gnitest_CPPFLAGS="-I$with_criterion/include -DHAVE_UDREG $CRAY_UDREG_CFLAGS $gnitest_CPPFLAGS" - gnitest_LIBS="-lcriterion -ludreg $gnitest_LIBS" - - if test -d "$with_criterion/lib"; then - gnitest_LDFLAGS="$CRAY_ALPS_LLI_STATIC_LIBS -L$with_criterion/lib -Wl,-rpath=$with_criterion/lib $gnitest_LDFLAGS" - have_criterion=true - elif test -d "$with_criterion/lib64"; then - gnitest_LDFLAGS="$CRAY_ALPS_LLI_STATIC_LIBS -L$with_criterion/lib64 -Wl,-rpath=$with_criterion/lib64 $gnitest_LDFLAGS" - have_criterion=true - else - have_criterion=false - fi - - gnitest_LDFLAGS="$CRAY_UDREG_LIBS $gnitest_LDFLAGS" - FI_PKG_CHECK_MODULES([CRAY_PMI], [cray-pmi], - [], - [have_criterion=false]) - else - AC_MSG_RESULT([no]) - AC_MSG_ERROR([criterion requested but invalid path given]) - fi], - [AC_MSG_ERROR([criterion requested tests not available])]) - fi - - AC_CHECK_DECL([HAVE_ATOMICS], - [], - [cc_version=`$CC --version | head -n1` - AC_MSG_WARN(["$cc_version" doesn't support native atomics. Disabling GNI provider.]) - ugni_lib_happy=0]) - - - -dnl kdreg configury handling: -dnl First check to see config line has --with-kdreg arg. If yes and something other than -dnl no, use the old way, otherwise if with_kdreg is not equal to no, try pkg-config method. -dnl Note kdreg only supplies an include file, no library - - AC_ARG_WITH([kdreg], [AS_HELP_STRING([--with-kdreg], - [Install directory for kdreg headers])]) - - AS_IF([test "$with_kdreg" != "" && test "$with_kdreg" != "no"], - [gni_CPPFLAGS="-I$with_kdreg/include $gni_CPPFLAGS" - gnitest_CPPFLAGS="-I$with_kdreg/include $gnitest_CPPFLAGS" - kdreg_happy=1], - [AS_IF([test "$with_kdreg" != "no"], - [FI_PKG_CHECK_MODULES([CRAY_KDREG], [cray-kdreg], - [kdreg_happy=1 - gni_CPPFLAGS="$CRAY_KDREG_CFLAGS $gni_CPPFLAGS" - gnitest_CPPFLAGS="$CRAY_KDREG_CFLAGS $gnitest_CPPFLAGS"], - [kdreg_happy=0])])]) - -dnl -dnl double check that kdreg_pub.h is available -dnl - AS_IF([test "$kdreg_happy" = "1"], - [CPPFLAGS="$CPPFLAGS $gni_CPPFLAGS" - AC_CHECK_HEADER([kdreg_pub.h], - [], - [kdreg_happy=0])]) - AC_DEFINE_UNQUOTED([HAVE_KDREG],[$kdreg_happy], [Define to 1 if kdreg available]) - - ]) - - - AM_CONDITIONAL([HAVE_CRITERION], [test "x$have_criterion" = "xtrue"]) - AS_IF([test "x$have_criterion" = "xtrue"], - [AC_DEFINE_UNQUOTED([HAVE_CRITERION], [1], [Define to 1 if criterion requested and available])], - [AC_DEFINE_UNQUOTED([HAVE_CRITERION], [0], [Define to 1 if criterion requested and available])]) - - AC_SUBST(gni_CPPFLAGS) - AC_SUBST(gni_LDFLAGS) - AC_SUBST(gnitest_CPPFLAGS) - AC_SUBST(gnitest_LDFLAGS) - AC_SUBST(gnitest_LIBS) - - AS_IF([test $gni_header_happy -eq 1 -a $ugni_lib_happy -eq 1 \ - -a $alps_lli_happy -eq 1 -a $alps_util_happy -eq 1 \ - -a $udreg_lib_happy -eq 1], [$1], [$2]) -]) diff --git a/prov/gni/contrib/gnitest.supp b/prov/gni/contrib/gnitest.supp deleted file mode 100644 index cc1ef4a7229..00000000000 --- a/prov/gni/contrib/gnitest.supp +++ /dev/null @@ -1,578 +0,0 @@ -# -# These are from Criterion and should be fixed in a subsequent version -# - -{ - calloc_criterion_run_all_tests - Memcheck:Leak - match-leak-kinds: definite - fun:calloc - fun:_dl_allocate_tls - fun:pthread_create@@GLIBC_2.2.5 - fun:init_proc_compat - fun:criterion_run_all_tests_impl - fun:criterion_run_all_tests - fun:main -} - -{ - malloc_criterion_runn_all_tests - Memcheck:Leak - match-leak-kinds: definite - fun:malloc - fun:alloc_entry - fun:smalloc_impl - fun:smalloc - fun:test_stats_init - fun:run_next_test - fun:run_tests_async - fun:criterion_run_all_tests_impl - fun:criterion_run_all_tests - fun:main -} - -# -# These are leaks from getifaddrs in the sockets provider. -# -{ - getifaddrs_criterion_run_all_tests - Memcheck:Leak - match-leak-kinds: definite - fun:calloc - fun:getifaddrs_internal - fun:getifaddrs -} - - -# -# This is an actual memory leak in uGNI. A bug has been submitted -# -{ - cq_vector_wait_event_leak - Memcheck:Leak - match-leak-kinds: definite - fun:malloc - fun:cq_vector_wait_event - ... -} - -# -# These are benign -# -{ - dgram_handle_alloc - Memcheck:Leak - match-leak-kinds: definite - fun:calloc - fun:_dl_allocate_tls - fun:pthread_create@@GLIBC_2.2.5 - fun:_gnix_dgram_hndl_alloc - fun:_gnix_cm_nic_alloc - fun:_gnix_ep_nic_init - fun:gnix_ep_open - fun:fi_endpoint -} - -# -# This is due to not initializing unused attr fields in struct -# gnix_pep_sock_connreq -# -# { -# fi_connect_memcheck -# Memcheck:Param -# write(buf) -# obj:/lib64/libpthread-2.11.3.so -# fun:gnix_connect -# fun:fi_connect -# ... -# } - -# -# This is due to not initializing all or part the eqe_buf field in -# struct gnix_pep_sock_connresp. This is fine, as the amount data -# needed is given by the cm_data_len field. -# -{ - fi_accept_memcheck - Memcheck:Param - write(buf) - obj:/lib64/libpthread-2.11.3.so - fun:gnix_accept - fun:fi_accept - ... -} - -# -# These are from specifying an additional .init function for a test -# (there's no way to specify a replacement .init function with -# Criterion). -# -{ - dg_allocation::dgram_wc_post_exchg_manual-1 - Memcheck:Leak - match-leak-kinds: definite - fun:calloc - fun:fi_allocinfo_internal - fun:fi_dupinfo@@FABRIC_1.0 - fun:fi_allocinfo - fun:dg_setup - ... -} - -{ - dg_allocation::dgram_wc_post_exchg_manual-2 - Memcheck:Leak - match-leak-kinds: definite - fun:calloc - fun:fi_allocinfo_internal - fun:fi_dupinfo@@FABRIC_1.0 - fun:fi_allocinfo - fun:gnix_getinfo - fun:fi_getinfo@@FABRIC_1.0 - fun:dg_setup - ... -} - -{ - cq_msg::multi_sread_setup-1 - Memcheck:Leak - match-leak-kinds: definite - fun:calloc - fun:gnix_fabric_open - fun:setup - fun:cq_wait_none_setup - ... -} - -{ - cq_msg::multi_sread_setup-2 - Memcheck:Leak - match-leak-kinds: definite - fun:calloc - fun:fi_allocinfo_internal - fun:fi_dupinfo@@FABRIC_1.0 - fun:fi_allocinfo - fun:setup - fun:cq_wait_none_setup - ... -} - -{ - cq_msg::multi_sread_setup-3 - Memcheck:Leak - match-leak-kinds: definite - fun:calloc - fun:fi_allocinfo_internal - fun:fi_dupinfo@@FABRIC_1.0 - fun:fi_allocinfo - fun:gnix_getinfo - fun:fi_getinfo@@FABRIC_1.0 - fun:setup - fun:cq_wait_none_setup - ... -} - -{ - cq_msg::multi_sread_setup-4 - Memcheck:Leak - match-leak-kinds: definite - fun:calloc - fun:gnix_cq_open - fun:fi_cq_open - fun:cq_create - fun:criterion_internal_test_setup - ... -} - -# -# This is from the av multithreaded tests that pass void * args via pthread-create -# -{ - atomic_cas_weak_continuous_remove - Memcheck:Cond - fun:atomic_cas_weak - fun:continuous_remove - fun:start_thread -} - -# -# This is from reading kernel initialized memory -# -{ - gnix_notifier_get_event - Memcheck:Addr8 - fun:_gnix_notifier_get_event - ... -} - -# -# These are due to writing a subset of the bytes in a word, but the -# compiler reading a whole word in the generated code. -# -{ - rdm_rma_readmsg_check_data - Memcheck:Cond - fun:check_data - fun:do_readmsg_more - fun:do_read_alignment_more - ... -} - -{ - rdm_rma_read_alignment_check_data - Memcheck:Cond - fun:check_data - fun:do_read_buf - fun:do_read_alignment - fun:xfer_for_each_size - ... -} - -{ - rdm_src_check_data_multirecv - Memcheck:Cond - fun:rdm_sr_check_data - fun:do_multirecv - fun:rdm_sr_xfer_for_each_size - ... -} - -{ - rdm_sr_check_data_multirecv_send_first - Memcheck:Cond - fun:rdm_sr_check_data - fun:do_multirecv_send_first - fun:rdm_sr_xfer_for_each_size - ... -} - -{ - rdm_sr_check_data_sendrecv_buf - Memcheck:Cond - fun:rdm_sr_check_data - fun:do_sendrecv_buf - fun:do_sendrecv_alignment - fun:rdm_sr_xfer_for_each_size - ... -} - -# -# These are from uGNI itself -# -{ - ioctl_cq_create - Memcheck:Param - ioctl(generic) - fun:ioctl - fun:cq_create - ... -} -{ - ioctl_GNI_MemRegister - Memcheck:Param - ioctl(generic) - fun:ioctl - fun:GNI_MemRegister - ... -} -{ - ioctl_GNI_EpPostDataWId - Memcheck:Param - ioctl(generic) - fun:ioctl - fun:GNI_EpPostDataWId - ... -} -{ - ioctl_gni_fma_assign - Memcheck:Param - ioctl(generic) - fun:ioctl - fun:gni_fma_assign - ... -} -{ - GNI_EpPostDataTestById - Memcheck:Addr4 - fun:GNI_EpPostDataTestById - ... -} -{ - GNI_PostDataProbeById - Memcheck:Addr4 - fun:GNI_PostDataProbeById - ... -} - -{ - GNI_CqGetEvent - Memcheck:Addr8 - fun:GNI_CqGetEvent - ... -} - -{ - GNII_DlaProgress - Memcheck:Addr8 - fun:GNII_DlaProgress - ... -} - -{ - GNII_DLA_PROGRESS_NOLOCK - Memcheck:Addr8 - fun:GNII_DLA_PROGRESS_NOLOCK - ... -} - -{ - gni_fma_assign - Memcheck:Addr4 - fun:gni_fma_assign - ... -} - -{ - GNII_POST_FMA_GET - Memcheck:Addr8 - fun:GNII_POST_FMA_GET - ... -} - -{ - GNII_FmaGetWithMode - Memcheck:Addr4 - fun:GNII_FmaGetWithMode - ... -} - -{ - GNII_FmaGetWithMode - Memcheck:Addr8 - fun:GNII_FmaGetWithMode - ... -} - -{ - GNII_GenAllocSeqid - Memcheck:Addr8 - fun:GNII_GenAllocSeqid - ... -} - -{ - GNII_PostRdma - Memcheck:Addr4 - fun:GNII_PostRdma - ... -} - -{ - GNII_PostRdma - Memcheck:Addr8 - fun:GNII_PostRdma - ... -} - -{ - GNII_PostFlbte - Memcheck:Addr4 - fun:GNII_PostFlbte - ... -} - -{ - GNII_PostFlbte - Memcheck:Addr8 - fun:GNII_PostFlbte - ... -} - -{ - GNII_FmaPut - Memcheck:Addr8 - fun:GNII_FmaPut - ... -} - -{ - GNII_FmaPut - Memcheck:Addr4 - fun:GNII_FmaPut - ... -} - -{ - GNII_SmsgSend - Memcheck:Addr8 - fun:GNII_SmsgSend - ... -} - -{ - GNII_SmsgSend - Memcheck:Addr4 - fun:GNII_SmsgSend - ... -} - -{ - GNII_SmsgSend - Memcheck:Addr2 - fun:GNII_SmsgSend - ... -} - -{ - GNII_SmsgSend - Memcheck:Addr1 - fun:GNII_SmsgSend - ... -} - -{ - GNII_POST_FMA_PUT - Memcheck:Addr8 - fun:GNII_POST_FMA_PUT - ... -} - -{ - GNII_POST_FMA_PUT - Memcheck:Addr4 - fun:GNII_POST_FMA_PUT - ... -} - -{ - GNII_POST_FMA_PUT - Memcheck:Addr2 - fun:GNII_POST_FMA_PUT - ... -} - -{ - GNII_POST_FMA_PUT - Memcheck:Addr1 - fun:GNII_POST_FMA_PUT - ... -} - -{ - GNI_PostFma - Memcheck:Addr8 - fun:GNI_PostFma - ... -} - -{ - GNII_POST_AMO - Memcheck:Addr4 - fun:GNII_POST_AMO - ... -} - -{ - GNII_POST_AMO - Memcheck:Addr8 - fun:GNII_POST_AMO - ... -} - -{ - GNI_PostFma - Memcheck:Addr8 - fun:GNI_PostFma - ... -} - -{ - return_back_credits - Memcheck:Addr8 - fun:return_back_credits - ... -} - -{ - GNI_CqTestEvent - Memcheck:Addr8 - fun:GNI_CqTestEvent - ... -} - -{ - cq_vector_wait_event - Memcheck:Addr4 - fun:cq_vector_wait_event - ... -} - -{ - cq_vector_wait_event - Memcheck:Addr8 - fun:cq_vector_wait_event - ... -} - -{ - ioctl_cq_vector_wait_event - Memcheck:Param - ioctl(generic) - fun:ioctl - ... -} - -{ - GNII_CqPeek - Memcheck:Addr8 - fun:GNII_CqPeek - ... -} - -{ - GNII_PostCqWrite - Memcheck:Addr8 - fun:GNII_PostCqWrite - fun:GNI_PostCqWrite - ... -} - -{ - GNII_PostCqWrite - Memcheck:Addr4 - fun:GNII_PostCqWrite - fun:GNI_PostCqWrite - ... -} - - -{ - UDREG_BUG_841637_1 - Memcheck:Leak - match-leak-kinds: reachable - fun:malloc - fun:ckalloc - fun:avl_insert - ... -} - -{ - UDREG_BUG_841637_2 - Memcheck:Leak - match-leak-kinds: definite - fun:malloc - fun:ckalloc - fun:avlinit - fun:UDREG_CacheCreate - ... -} - -{ - UDRED_BUG_841637_3 - Memcheck:Leak - match-leak-kinds: reachable - fun:malloc - fun:UDREG_Register - ... -} - diff --git a/prov/gni/gnix.map b/prov/gni/gnix.map deleted file mode 100644 index 29a5758aa3f..00000000000 --- a/prov/gni/gnix.map +++ /dev/null @@ -1,108 +0,0 @@ -/* - * used for exporting GNI provider - * symbols when building to support FI_DIRECT - */ - gnix_av_straddr; - gnix_cq_strerror; - gnix_eq_strerror; - gnix_accept; - gnix_av_bind; - gnix_av_insertsvc; - gnix_av_insertsym; - gnix_av_open; - gnix_cntr_open; - gnix_cntr_wait; - gnix_connect; - gnix_cq_open; - gnix_domain_bind; - gnix_domain_open; - gnix_ep_atomic_compwrite; - gnix_ep_atomic_compwritemsg; - gnix_ep_atomic_compwritev; - gnix_ep_atomic_inject; - gnix_ep_atomic_readwrite; - gnix_ep_atomic_readwritemsg; - gnix_ep_atomic_readwritev; - gnix_ep_atomic_write; - gnix_ep_atomic_writemsg; - gnix_ep_atomic_writev; - gnix_ep_msg_injectdata; - gnix_ep_open; - gnix_ep_send; - gnix_ep_senddata; - gnix_ep_tinjectdata; - gnix_ep_tsenddata; - gnix_eq_open; - gnix_eq_read; - gnix_eq_sread; - gnix_getpeer; - gnix_listen; - gnix_mr_bind; - gnix_mr_reg; - gnix_pep_open; - gnix_pep_bind; - gnix_poll_add; - gnix_poll_del; - gnix_poll_open; - gnix_poll_poll; - gnix_reject; - gnix_rma_inject_write; - gnix_rma_inject_writedata; - gnix_rma_read; - gnix_rma_readmsg; - gnix_rma_readv; - gnix_rma_write; - gnix_rma_writedata; - gnix_rma_writemsg; - gnix_rma_writev; - gnix_scalable_ep_bind; - gnix_scalable_ep_open; - gnix_setname; - gnix_shutdown; - gnix_srx_context; - gnix_stx_open; - gnix_wait_open; - gnix_wait_wait; - gnix_av_insert; - gnix_av_lookup; - gnix_av_remove; - gnix_cntr_add; - gnix_cntr_set; - gnix_cq_signal; - gnix_ep_atomic_valid; - gnix_ep_bind; - gnix_ep_cmp_atomic_valid; - gnix_ep_control; - gnix_ep_fetch_atomic_valid; - gnix_ep_getopt; - gnix_ep_setopt; - gnix_eq_close; - gnix_eq_control; - gnix_getname; - gnix_rx_context; - gnix_tx_context; - gnix_cq_read; - gnix_cq_readerr; - gnix_cq_readfrom; - gnix_cq_sread; - gnix_cq_sreadfrom; - gnix_ep_cancel; - gnix_ep_msg_inject; - gnix_ep_recv; - gnix_ep_recvmsg; - gnix_ep_recvv; - gnix_ep_rx_size_left; - gnix_ep_sendmsg; - gnix_ep_sendv; - gnix_ep_tinject; - gnix_ep_trecv; - gnix_ep_trecvmsg; - gnix_ep_trecvv; - gnix_ep_tsend; - gnix_ep_tsendmsg; - gnix_ep_tsendv; - gnix_ep_tx_size_left; - gnix_eq_readerr; - gnix_eq_write; - gnix_cntr_read; - gnix_cntr_readerr; diff --git a/prov/gni/include/fi_ext_gni.h b/prov/gni/include/fi_ext_gni.h deleted file mode 100644 index 26939de34b8..00000000000 --- a/prov/gni/include/fi_ext_gni.h +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _FI_EXT_GNI_H_ -#define _FI_EXT_GNI_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#if HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include - -#define FI_GNI_DOMAIN_OPS_1 "domain ops 1" -typedef enum dom_ops_val { GNI_MSG_RENDEZVOUS_THRESHOLD, - GNI_RMA_RDMA_THRESHOLD, - GNI_CONN_TABLE_INITIAL_SIZE, - GNI_CONN_TABLE_MAX_SIZE, - GNI_CONN_TABLE_STEP_SIZE, - GNI_VC_ID_TABLE_CAPACITY, - GNI_MBOX_PAGE_SIZE, - GNI_MBOX_NUM_PER_SLAB, - GNI_MBOX_MAX_CREDIT, - GNI_MBOX_MSG_MAX_SIZE, - GNI_RX_CQ_SIZE, - GNI_TX_CQ_SIZE, - GNI_MAX_RETRANSMITS, - GNI_ERR_INJECT_COUNT, - GNI_MR_CACHE_LAZY_DEREG, - GNI_MR_CACHE, - GNI_MR_UDREG_REG_LIMIT, - GNI_MR_SOFT_REG_LIMIT, - GNI_MR_HARD_REG_LIMIT, - GNI_MR_HARD_STALE_REG_LIMIT, - GNI_XPMEM_ENABLE, - GNI_DGRAM_PROGRESS_TIMEOUT, - GNI_EAGER_AUTO_PROGRESS, - GNI_NUM_DOM_OPS -} dom_ops_val_t; - -#define FI_GNI_EP_OPS_1 "ep ops 1" -typedef enum ep_ops_val { - GNI_HASH_TAG_IMPL = 0, - GNI_NUM_EP_OPS, -} ep_ops_val_t; - -#define FI_GNI_FAB_OPS_1 "fab ops 1" -typedef enum fab_ops_val { - GNI_WAIT_THREAD_SLEEP = 0, - GNI_DEFAULT_USER_REGISTRATION_LIMIT, - GNI_DEFAULT_PROV_REGISTRATION_LIMIT, - GNI_WAIT_SHARED_MEMORY_TIMEOUT, - GNI_NUM_FAB_OPS, -} fab_ops_val_t; - -/* per domain gni provider specific ops */ -struct fi_gni_ops_domain { - int (*set_val)(struct fid *fid, dom_ops_val_t t, void *val); - int (*get_val)(struct fid *fid, dom_ops_val_t t, void *val); - int (*flush_cache)(struct fid *fid); -}; - -#include -enum gnix_native_amo_types { - GNIX_NAMO_AX = 0x20, - GNIX_NAMO_AX_S, - GNIX_NAMO_FAX, - GNIX_NAMO_FAX_S, -}; - -struct fi_gni_ops_ep { - int (*set_val)(struct fid *fid, ep_ops_val_t t, void *val); - int (*get_val)(struct fid *fid, ep_ops_val_t t, void *val); - size_t (*native_amo)(struct fid_ep *ep, const void *buf, size_t count, - void *desc, void *result, void *result_desc, - /*void *desc,*/ fi_addr_t dest_addr, uint64_t addr, - uint64_t key, enum fi_datatype datatype, - int req_type, - void *context); -}; - -/* per domain parameters */ -struct gnix_ops_domain { - uint32_t msg_rendezvous_thresh; - uint32_t rma_rdma_thresh; - uint32_t ct_init_size; - uint32_t ct_max_size; - uint32_t ct_step; - uint32_t vc_id_table_capacity; - uint32_t mbox_page_size; - uint32_t mbox_num_per_slab; - uint32_t mbox_maxcredit; - uint32_t mbox_msg_maxsize; - uint32_t rx_cq_size; - uint32_t tx_cq_size; - uint32_t max_retransmits; - int32_t err_inject_count; - bool xpmem_enabled; - uint32_t dgram_progress_timeout; - uint32_t eager_auto_progress; -}; - -struct fi_gni_ops_fab { - int (*set_val)(struct fid *fid, fab_ops_val_t t, void *val); - int (*get_val)(struct fid *fid, fab_ops_val_t t, void *val); -}; - -typedef enum gnix_auth_key_opt { - GNIX_USER_KEY_LIMIT = 0, - GNIX_PROV_KEY_LIMIT, - GNIX_TOTAL_KEYS_NEEDED, - GNIX_USER_KEY_MAX_PER_RANK, - GNIX_MAX_AUTH_KEY_OPTS, -} gnix_auth_key_opt_t; - -struct gnix_auth_key_attr { - int user_key_limit; - int prov_key_limit; -}; - -enum { - GNIX_AKT_RAW = 0, - GNIX_MAX_AKT_TYPES, -}; - -struct fi_gni_raw_auth_key { - uint32_t protection_key; -}; - -struct fi_gni_auth_key { - uint32_t type; - union { - struct fi_gni_raw_auth_key raw; - }; -}; - -extern uint8_t* gnix_default_auth_key; -#define GNIX_PROV_DEFAULT_AUTH_KEY gnix_default_auth_key -#define GNIX_PROV_DEFAULT_AUTH_KEYLEN sizeof(struct fi_gni_auth_key) - -#define FI_GNI_FAB_OPS_2 "fab ops 2" -struct fi_gni_auth_key_ops_fab { - int (*set_val)( - uint8_t *auth_key, - size_t auth_key_size, - gnix_auth_key_opt_t opt, - void *val); - int (*get_val)( - uint8_t *auth_key, - size_t auth_key_size, - gnix_auth_key_opt_t opt, - void *val); -}; - -#ifdef __cplusplus -} -#endif - -#endif /* _FI_EXT_GNI_H_ */ diff --git a/prov/gni/include/gnix.h b/prov/gni/include/gnix.h deleted file mode 100644 index c96fb029699..00000000000 --- a/prov/gni/include/gnix.h +++ /dev/null @@ -1,1204 +0,0 @@ -/* - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * Copyright (c) 2015-2018 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2015-2016 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2020 Triad National Security, LLC. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _GNIX_H_ -#define _GNIX_H_ - -#if HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#ifdef HAVE_UDREG -#include -#endif - -#include "gni_pub.h" -#include "gnix_util.h" -#include "gnix_freelist.h" -#include "gnix_mr.h" -#include "gnix_cq.h" -#include "fi_ext_gni.h" -#include "gnix_tags.h" -#include "gnix_mr_cache.h" -#include "gnix_mr_notifier.h" -#include "gnix_nic.h" -#include "gnix_auth_key.h" - -#define GNI_MAJOR_VERSION 1 -#define GNI_MINOR_VERSION 1 - -/* - * useful macros - */ -#ifndef FLOOR -#define FLOOR(a, b) ((long long)(a) - (((long long)(a)) % (b))) -#endif - -#ifndef CEILING -#define CEILING(a, b) ((long long)(a) <= 0LL ? 0 : (FLOOR((a)-1, b) + (b))) -#endif - -#ifndef IN -#define IN -#endif - -#ifndef OUT -#define OUT -#endif - -#ifndef INOUT -#define INOUT -#endif - -#ifndef compiler_barrier -#define compiler_barrier() asm volatile ("" ::: "memory") -#endif - -#define GNIX_MAX_MSG_IOV_LIMIT 8 -#define GNIX_MAX_RMA_IOV_LIMIT 1 -#define GNIX_MAX_ATOMIC_IOV_LIMIT 1 -#define GNIX_ADDR_CACHE_SIZE 5 - -/* - * GNI GET alignment - */ - -#define GNI_READ_ALIGN 4 -#define GNI_READ_ALIGN_MASK (GNI_READ_ALIGN - 1) - -/* - * GNI IOV GET alignment - * - * We always pull 4byte chucks for unaligned GETs. To prevent stomping on - * someone else's head or tail data, each segment must be four bytes - * (i.e. GNI_READ_ALIGN bytes). - * - * Note: "* 2" for head and tail - */ -#define GNIX_INT_TX_BUF_SZ (GNIX_MAX_MSG_IOV_LIMIT * GNI_READ_ALIGN * 2) - -/* - * Flags - * The 64-bit flag field is used as follows: - * 1-grow up common (usable with multiple operations) - * 59-grow down operation specific (used for single call/class) - * 60 - 63 provider specific - */ - -#define GNIX_SUPPRESS_COMPLETION (1ULL << 60) /* TX only flag */ - -#define GNIX_RMA_RDMA (1ULL << 61) /* RMA only flag */ -#define GNIX_RMA_INDIRECT (1ULL << 62) /* RMA only flag */ -#define GNIX_RMA_CHAINED (1ULL << 63) /* RMA only flag */ - -#define GNIX_MSG_RENDEZVOUS (1ULL << 61) /* MSG only flag */ -#define GNIX_MSG_GET_TAIL (1ULL << 62) /* MSG only flag */ - -/* - * Cray gni provider supported flags for fi_getinfo argument for now, needs - * refining (see fi_getinfo.3 man page) - */ -#define GNIX_SUPPORTED_FLAGS (FI_NUMERICHOST | FI_SOURCE) - -#define GNIX_DEFAULT_FLAGS (0) - -/* - * Cray gni provider will try to support the fabric interface capabilities (see - * fi_getinfo.3 man page) - * for RDM and MSG (future) endpoint types. - */ - -/* - * See capabilities section in fi_getinfo.3. - */ - -#define GNIX_DOM_CAPS (FI_REMOTE_COMM) - -/* Primary capabilities. Each must be explicitly requested (unless the full - * set is requested by setting input hints->caps to NULL). */ -#define GNIX_EP_PRIMARY_CAPS \ - (FI_MSG | FI_RMA | FI_TAGGED | FI_ATOMICS | \ - FI_DIRECTED_RECV | FI_READ | FI_NAMED_RX_CTX | \ - FI_WRITE | FI_SEND | FI_RECV | FI_REMOTE_READ | FI_REMOTE_WRITE) - -/* No overhead secondary capabilities. These can be silently enabled by the - * provider. */ -#define GNIX_EP_SEC_CAPS (FI_MULTI_RECV | FI_TRIGGER | FI_FENCE | FI_LOCAL_COMM | FI_REMOTE_COMM) - -/* Secondary capabilities that introduce overhead. Must be requested. */ -#define GNIX_EP_SEC_CAPS_OH (FI_SOURCE | FI_RMA_EVENT | FI_SOURCE_ERR) - -/* FULL set of capabilities for the provider. */ -#define GNIX_EP_CAPS_FULL (GNIX_EP_PRIMARY_CAPS | \ - GNIX_EP_SEC_CAPS | \ - GNIX_EP_SEC_CAPS_OH) - -/* - * see Operations flags in fi_endpoint.3 - */ -#define GNIX_EP_OP_FLAGS (FI_INJECT | FI_MULTI_RECV | FI_COMPLETION | \ - FI_INJECT_COMPLETE | FI_TRANSMIT_COMPLETE | \ - FI_DELIVERY_COMPLETE) - -/* - * Valid msg transaction input flags. See fi_msg.3. - */ -#define GNIX_SENDMSG_FLAGS (FI_REMOTE_CQ_DATA | FI_COMPLETION | \ - FI_MORE | FI_INJECT | FI_INJECT_COMPLETE | \ - FI_TRANSMIT_COMPLETE | FI_FENCE | FI_TRIGGER) -#define GNIX_RECVMSG_FLAGS (FI_COMPLETION | FI_MORE | FI_MULTI_RECV) -#define GNIX_TRECVMSG_FLAGS \ - (GNIX_RECVMSG_FLAGS | FI_CLAIM | FI_PEEK | FI_DISCARD) - -/* - * Valid rma transaction input flags. See fi_rma.3. - */ -#define GNIX_WRITEMSG_FLAGS (FI_REMOTE_CQ_DATA | FI_COMPLETION | \ - FI_MORE | FI_INJECT | FI_INJECT_COMPLETE | \ - FI_TRANSMIT_COMPLETE | FI_FENCE | FI_TRIGGER) -#define GNIX_READMSG_FLAGS (FI_COMPLETION | FI_MORE | \ - FI_FENCE | FI_TRIGGER) -#define GNIX_ATOMICMSG_FLAGS (FI_COMPLETION | FI_MORE | FI_INJECT | \ - FI_FENCE | FI_TRIGGER) -#define GNIX_FATOMICMSG_FLAGS (FI_COMPLETION | FI_MORE | FI_FENCE | \ - FI_TRIGGER) -#define GNIX_CATOMICMSG_FLAGS (FI_COMPLETION | FI_MORE | FI_FENCE | \ - FI_TRIGGER) - -/* - * Valid completion event flags. See fi_cq.3. - */ -#define GNIX_RMA_COMPLETION_FLAGS (FI_RMA | FI_READ | FI_WRITE) -#define GNIX_AMO_COMPLETION_FLAGS (FI_ATOMIC | FI_READ | FI_WRITE) - -/* - * GNI provider fabric default values - */ -#define GNIX_TX_SIZE_DEFAULT 500 -#define GNIX_RX_SIZE_DEFAULT 500 -/* - * based on the max number of fma descriptors without fma sharing - */ -#define GNIX_RX_CTX_MAX_BITS 8 -#define GNIX_SEP_MAX_CNT (1 << (GNIX_RX_CTX_MAX_BITS - 1)) - -/* - * if this has to be changed, check gnix_getinfo, etc. - */ -#define GNIX_MAX_MSG_SIZE ((0x1ULL << 32) - 1) -#define GNIX_CACHELINE_SIZE (64) -#define GNIX_INJECT_SIZE GNIX_CACHELINE_SIZE - -/* - * Cray gni provider will require the following fabric interface modes (see - * fi_getinfo.3 man page) - */ -#define GNIX_FAB_MODES 0 - -/* - * fabric modes that GNI provider doesn't need - */ -#define GNIX_FAB_MODES_CLEAR (FI_MSG_PREFIX | FI_ASYNC_IOV) - -/** - * gnix_address struct - * - * @note - gnix address format - used for fi_send/fi_recv, etc. - * These values are passed to GNI_EpBind - * - * @var device_addr physical NIC address of the remote peer - * @var cdm_id user supplied id of the remote instance - */ -struct gnix_address { - uint32_t device_addr; - uint32_t cdm_id; -}; - -/* - * macro for testing whether a gnix_address value is FI_ADDR_UNSPEC - */ - -#define GNIX_ADDR_UNSPEC(var) (((var).device_addr == -1) && \ - ((var).cdm_id == -1)) -/* - * macro testing for gnix_address equality - */ - -#define GNIX_ADDR_EQUAL(a, b) (((a).device_addr == (b).device_addr) && \ - ((a).cdm_id == (b).cdm_id)) - -#define GNIX_CREATE_CDM_ID 0 - -#define GNIX_EPN_TYPE_UNBOUND (1 << 0) -#define GNIX_EPN_TYPE_BOUND (1 << 1) -#define GNIX_EPN_TYPE_SEP (1 << 2) - -/** - * gnix_ep_name struct - * - * @note - info returned by fi_getname/fi_getpeer - has enough - * side band info for RDM ep's to be able to connect, etc. - * - * @var gnix_addr address of remote peer - * @var name_type bound, unbound, scalable endpoint name types - * @var cm_nic_cdm_id id of the cm nic associated with this endpoint - * @var cookie communication domain identifier - * @var rx_ctx_cnt number of contexts associated with this endpoint - * @var unused1/2 for future use - * @var reserved for future use - */ -struct gnix_ep_name { - struct gnix_address gnix_addr; - struct { - uint32_t name_type : 8; - uint32_t cm_nic_cdm_id : 24; - uint32_t cookie; - }; - struct { - uint32_t rx_ctx_cnt : 8; - uint32_t key_offset : 12; - uint32_t unused1 : 12; - uint32_t unused2; - }; - uint64_t reserved[3]; -}; - -/* AV address string revision. */ -#define GNIX_AV_STR_ADDR_VERSION 1 - -/* - * 52 is the number of characters printed out in gnix_av_straddr. - * 1 is for the null terminator - */ -#define GNIX_AV_MAX_STR_ADDR_LEN (52 + 1) - -/* - * 15 is the number of characters for the device addr. - * 1 is for the null terminator - */ -#define GNIX_AV_MIN_STR_ADDR_LEN (15 + 1) - -/* - * 69 is the number of characters for the printable portion of the address - * 1 is for the null terminator - */ -#define GNIX_FI_ADDR_STR_LEN (69 + 1) - -/* - * enum for blocking/non-blocking progress - */ -enum gnix_progress_type { - GNIX_PRG_BLOCKING, - GNIX_PRG_NON_BLOCKING -}; - -/* - * simple struct for gnix fabric, may add more stuff here later - */ -struct gnix_fid_fabric { - struct fid_fabric fab_fid; - /* llist of domains's opened from fabric */ - struct dlist_entry domain_list; - /* number of bound datagrams for domains opened from - * this fabric object - used by cm nic*/ - int n_bnd_dgrams; - /* number of wildcard datagrams for domains opened from - * this fabric object - used by cm nic*/ - int n_wc_dgrams; - /* timeout datagram completion - see - * GNI_PostdataProbeWaitById in gni_pub.h */ - uint64_t datagram_timeout; - struct gnix_reference ref_cnt; - struct gnix_mr_notifier mr_notifier; -}; - -extern struct fi_ops_cm gnix_ep_msg_ops_cm; -extern struct fi_ops_cm gnix_ep_ops_cm; - -#define GNIX_GET_MR_CACHE_INFO(domain, auth_key) \ - ({ &(domain)->mr_cache_info[(auth_key)->ptag]; }) - -/* - * a gnix_fid_domain is associated with one or more gnix_nic's. - * the gni_nics are in turn associated with ep's opened off of the - * domain. The gni_nic's are use for data motion - sending/receivng - * messages, rma ops, etc. - */ -struct gnix_fid_domain { - struct fid_domain domain_fid; - /* used for fabric object dlist of domains*/ - struct dlist_entry list; - /* list nics this domain is attached to, TODO: thread safety */ - struct dlist_entry nic_list; - struct gnix_fid_fabric *fabric; - struct gnix_cm_nic *cm_nic; - ofi_spin_t cm_nic_lock; - uint32_t cdm_id_seed; - uint32_t addr_format; - /* user tunable parameters accessed via open_ops functions */ - struct gnix_ops_domain params; - /* additional gni cq modes to use for this domain */ - gni_cq_mode_t gni_cq_modes; - /* additional gni cq modes to use for this domain */ - enum fi_progress control_progress; - enum fi_progress data_progress; - enum fi_threading thread_model; - struct gnix_reference ref_cnt; - gnix_mr_cache_attr_t mr_cache_attr; - struct gnix_mr_cache_info *mr_cache_info; - struct gnix_mr_ops *mr_ops; - ofi_spin_t mr_cache_lock; - int mr_cache_type; - /* flag to indicate that memory registration is initialized and should not - * be changed at this point. - */ - int mr_is_init; - int mr_iov_limit; - int udreg_reg_limit; - struct gnix_auth_key *auth_key; - int using_vmdh; -#ifdef HAVE_UDREG - udreg_cache_handle_t udreg_cache; -#endif - uint32_t num_allocd_stxs; -}; - -/** - * gnix_fid_pep structure - GNIX passive endpoint - * - * @var pep_fid libfabric passive EP fid structure - * @var fabric Fabric associated with this endpoint - * @var eq Event queue bound to this endpoint - * @var src_addr Source address of this endpoint - * @var lock Lock protecting all endpoint fields - * @var listen_fd TCP socket used to listen for connections - * @var backlog Maximum number of pending connetions - * @var bound Flag indicating if the endpoint source address is set - * @var cm_data_size Maximum size of CM data - * @var ref_cnt Endpoint reference count - */ -struct gnix_fid_pep { - struct fid_pep pep_fid; - struct gnix_fid_fabric *fabric; - struct fi_info *info; - struct gnix_fid_eq *eq; - struct gnix_ep_name src_addr; - ofi_spin_t lock; - int listen_fd; - int backlog; - int bound; - size_t cm_data_size; - struct gnix_reference ref_cnt; -}; - -#define GNIX_CQS_PER_EP 8 - -struct gnix_fid_ep_ops_en { - uint32_t msg_recv_allowed: 1; - uint32_t msg_send_allowed: 1; - uint32_t rma_read_allowed: 1; - uint32_t rma_write_allowed: 1; - uint32_t tagged_recv_allowed: 1; - uint32_t tagged_send_allowed: 1; - uint32_t atomic_read_allowed: 1; - uint32_t atomic_write_allowed: 1; -}; - -#define GNIX_INT_TX_POOL_SIZE 128 -#define GNIX_INT_TX_POOL_COUNT 256 - -struct gnix_int_tx_buf { - struct slist_entry e; - uint8_t *buf; - struct gnix_fid_mem_desc *md; -}; - -struct gnix_int_tx_ptrs { - struct slist_entry e; - void *sl_ptr; - void *buf_ptr; - struct gnix_fid_mem_desc *md; -}; - -struct gnix_int_tx_pool { - bool enabled; - int nbufs; - ofi_spin_t lock; - struct slist sl; - struct slist bl; -}; - -struct gnix_addr_cache_entry { - fi_addr_t addr; - struct gnix_vc *vc; -}; - -enum gnix_conn_state { - GNIX_EP_UNCONNECTED, - GNIX_EP_CONNECTING, - GNIX_EP_CONNECTED, - GNIX_EP_SHUTDOWN -}; - -#define GNIX_EP_CONNECTED(ep) ((ep)->conn_state == GNIX_EP_CONNECTED) - -/* - * gnix endpoint structure - * - * A gnix_cm_nic is associated with an EP if it is of type FI_EP_RDM. - * The gnix_cm_nic is used for building internal connections between the - * endpoints at different addresses. - */ -struct gnix_fid_ep { - struct fid_ep ep_fid; - enum fi_ep_type type; - struct gnix_fid_domain *domain; - uint64_t op_flags; - uint64_t caps; - uint32_t use_tag_hlist; - struct gnix_fid_cq *send_cq; - struct gnix_fid_cq *recv_cq; - struct gnix_fid_cntr *send_cntr; - struct gnix_fid_cntr *recv_cntr; - struct gnix_fid_cntr *write_cntr; - struct gnix_fid_cntr *read_cntr; - struct gnix_fid_cntr *rwrite_cntr; - struct gnix_fid_cntr *rread_cntr; - struct gnix_fid_av *av; - struct gnix_fid_stx *stx_ctx; - struct gnix_cm_nic *cm_nic; - struct gnix_nic *nic; - ofi_spin_t vc_lock; - /* used for unexpected receives */ - struct gnix_tag_storage unexp_recv_queue; - /* used for posted receives */ - struct gnix_tag_storage posted_recv_queue; - - struct gnix_tag_storage tagged_unexp_recv_queue; - struct gnix_tag_storage tagged_posted_recv_queue; - - /* pointer to tag matching engine */ - int (*progress_fn)(struct gnix_fid_ep *, enum gnix_progress_type); - /* RX specific progress fn */ - int (*rx_progress_fn)(struct gnix_fid_ep *, gni_return_t *rc); - struct gnix_xpmem_handle *xpmem_hndl; - bool tx_enabled; - bool rx_enabled; - bool shared_tx; - bool requires_lock; - struct gnix_auth_key *auth_key; - int last_cached; - struct gnix_addr_cache_entry addr_cache[GNIX_ADDR_CACHE_SIZE]; - int send_selective_completion; - int recv_selective_completion; - int min_multi_recv; - /* note this free list will be initialized for thread safe */ - struct gnix_freelist fr_freelist; - struct gnix_int_tx_pool int_tx_pool; - struct gnix_reference ref_cnt; - struct gnix_fid_ep_ops_en ep_ops; - - struct fi_info *info; - struct fi_ep_attr ep_attr; - struct gnix_ep_name src_addr; - - /* FI_EP_MSG specific. */ - struct gnix_vc *vc; - int conn_fd; - int conn_state; - struct gnix_ep_name dest_addr; - struct gnix_fid_eq *eq; - - /* Unconnected EP specific. */ - union { - struct gnix_hashtable *vc_ht; /* FI_AV_MAP */ - struct gnix_vector *vc_table; /* FI_AV_TABLE */ - }; - struct dlist_entry unmapped_vcs; - - /* FI_MORE specific. */ - struct slist more_read; - struct slist more_write; -}; - -#define GNIX_EP_RDM(type) (type == FI_EP_RDM) - -#define GNIX_EP_DGM(type) (type == FI_EP_DGRAM) - -#define GNIX_EP_RDM_DGM(type) ((type == FI_EP_RDM) || \ - (type == FI_EP_DGRAM)) - -#define GNIX_EP_RDM_DGM_MSG(type) ((type == FI_EP_RDM) || \ - (type == FI_EP_DGRAM) || \ - (type == FI_EP_MSG)) - -/** - * gnix_fid_sep struct - * - * @var ep_fid embedded struct fid_ep field - * @var domain pointer to domain used to create the sep instance - * @var info pointer to dup of info struct supplied to fi_scalable_ep - * operation - * @var op_flags quick access for op_flags for tx/rx contexts - * instantiated using this sep - * @var caps quick access for caps for tx/rx contexts instantiated - * using this sep - * @var cdm_id_base base cdm id to use for tx/rx contexts instantiated - * using this sep - * @var ep_table array of pointers to EPs used by the rx/tx contexts - * instantiated using this sep - * @var tx_ep_table array of pointers to tx contexts instantiated using - * this sep - * @var rx_ep_table array of pointers to rx contexts instantiated using - * this sep - * @var enabled array of bool to track enabling of embedded eps - * @var cm_nic gnix cm nic associated with this SEP. - * @var av address vector bound to this SEP - * @var my_name ep name for this endpoint - * @var sep_lock lock protecting this sep object - * @var ref_cnt ref cnt on this object - * @var auth_key GNIX authorization key - */ -struct gnix_fid_sep { - struct fid_ep ep_fid; - enum fi_ep_type type; - struct fid_domain *domain; - struct fi_info *info; - uint64_t caps; - uint32_t cdm_id_base; - struct fid_ep **ep_table; - struct fid_ep **tx_ep_table; - struct fid_ep **rx_ep_table; - bool *enabled; - struct gnix_cm_nic *cm_nic; - struct gnix_fid_av *av; - struct gnix_ep_name my_name; - ofi_spin_t sep_lock; - struct gnix_reference ref_cnt; - struct gnix_auth_key *auth_key; -}; - -/** - * gnix_fid_trx struct - * - * @var ep_fid embedded struct fid_ep field - * @var ep pointer to gnix_fid_ep used by this tx/rx context - * @var sep pointer to associated gnix_fid_sep for this context - * @var op_flags op flags for this tx context - * @var caps caps for this tx context - * @var ref_cnt ref cnt on this object - */ -struct gnix_fid_trx { - struct fid_ep ep_fid; - struct gnix_fid_ep *ep; - struct gnix_fid_sep *sep; - uint64_t op_flags; - uint64_t caps; - int index; - struct gnix_reference ref_cnt; -}; - -/** - * gnix_fid_stx struct - * @note - another way to associated gnix_nic's with an ep - * - * @var stx_fid embedded struct fid_stx field - * @var domain pointer to domain used to create the stx instance - * @var nic pointer to gnix_nic associated with this stx - * @var ref_cnt ref cnt on this object - */ -struct gnix_fid_stx { - struct fid_stx stx_fid; - struct gnix_fid_domain *domain; - struct gnix_nic *nic; - struct gnix_auth_key *auth_key; - struct gnix_reference ref_cnt; -}; - -/** - * gnix_fid_av struct - * @TODO - Support shared named AVs - * - * @var fid_av embedded struct fid_stx field - * @var domain pointer to domain used to create the av - * @var type the type of the AV, FI_AV_{TABLE,MAP} - * @var table - * @var valid_entry_vec - * @var addrlen - * @var capacity current size of AV - * @var count number of address are currently stored in AV - * @var rx_ctx_bits address bits to identify an rx context - * @var mask mask of the fi_addr to resolve the base address - * @var map_ht Hash table for mapping FI_AV_MAP - * @var block_list linked list of blocks used for allocating entries - * for FI_AV_MAP - * @var ref_cnt ref cnt on this object - */ -struct gnix_fid_av { - struct fid_av av_fid; - struct gnix_fid_domain *domain; - enum fi_av_type type; - struct gnix_av_addr_entry* table; - int *valid_entry_vec; - size_t addrlen; - size_t capacity; - size_t count; - uint64_t rx_ctx_bits; - uint64_t mask; - struct gnix_hashtable *map_ht; - struct slist block_list; - struct gnix_reference ref_cnt; -}; - -enum gnix_fab_req_type { - GNIX_FAB_RQ_SEND, - GNIX_FAB_RQ_SENDV, - GNIX_FAB_RQ_TSEND, - GNIX_FAB_RQ_TSENDV, - GNIX_FAB_RQ_RDMA_WRITE, - GNIX_FAB_RQ_RDMA_READ, - GNIX_FAB_RQ_RECV, - GNIX_FAB_RQ_RECVV, - GNIX_FAB_RQ_TRECV, - GNIX_FAB_RQ_TRECVV, - GNIX_FAB_RQ_MRECV, - GNIX_FAB_RQ_AMO, - GNIX_FAB_RQ_FAMO, - GNIX_FAB_RQ_CAMO, - GNIX_FAB_RQ_END_NON_NATIVE, - GNIX_FAB_RQ_START_NATIVE = GNIX_NAMO_AX, - GNIX_FAB_RQ_NAMO_AX = GNIX_NAMO_AX, - GNIX_FAB_RQ_NAMO_AX_S = GNIX_NAMO_AX_S, - GNIX_FAB_RQ_NAMO_FAX = GNIX_NAMO_FAX, - GNIX_FAB_RQ_NAMO_FAX_S = GNIX_NAMO_FAX_S, - GNIX_FAB_RQ_MAX_TYPES, -}; - -struct gnix_fab_req_rma { - uint64_t loc_addr; - struct gnix_fid_mem_desc *loc_md; - size_t len; - uint64_t rem_addr; - uint64_t rem_mr_key; - uint64_t imm; - ofi_atomic32_t outstanding_txds; - gni_return_t status; - struct slist_entry sle; -}; - -struct gnix_fab_req_msg { - struct gnix_tag_list_element tle; - - struct send_info_t { - uint64_t send_addr; - size_t send_len; - gni_mem_handle_t mem_hndl; - uint32_t head; - uint32_t tail; - } send_info[GNIX_MAX_MSG_IOV_LIMIT]; - struct gnix_fid_mem_desc *send_md[GNIX_MAX_MSG_IOV_LIMIT]; - size_t send_iov_cnt; - uint64_t send_flags; - size_t cum_send_len; - struct gnix_fab_req *parent; - size_t mrecv_space_left; - uint64_t mrecv_buf_addr; - - struct recv_info_t { - uint64_t recv_addr; - size_t recv_len; - gni_mem_handle_t mem_hndl; - uint32_t tail_len : 2; /* If the send len is > the recv_len, we - * need to fetch the unaligned tail into - * the txd's int buf - */ - uint32_t head_len : 2; - } recv_info[GNIX_MAX_MSG_IOV_LIMIT]; - struct gnix_fid_mem_desc *recv_md[GNIX_MAX_MSG_IOV_LIMIT]; - size_t recv_iov_cnt; - uint64_t recv_flags; /* protocol, API info */ - size_t cum_recv_len; - - uint64_t tag; - uint64_t ignore; - uint64_t imm; - gni_mem_handle_t rma_mdh; - uint64_t rma_id; - ofi_atomic32_t outstanding_txds; - gni_return_t status; -}; - -struct gnix_fab_req_amo { - uint64_t loc_addr; - struct gnix_fid_mem_desc *loc_md; - size_t len; - uint64_t rem_addr; - uint64_t rem_mr_key; - uint64_t imm; - enum fi_datatype datatype; - enum fi_op op; - uint64_t first_operand; - uint64_t second_operand; -}; - -/* - * Check for remote peer capabilities. - * inputs: - * pc - peer capabilities - * ops_flags - current operation flags (FI_RMA, FI_READ, etc.) - * - * See capabilities section in fi_getinfo.3. - */ -static inline int gnix_rma_read_target_allowed(uint64_t pc, - uint64_t ops_flags) -{ - if (ops_flags & FI_RMA) { - if (ops_flags & FI_READ) { - if (pc & FI_RMA) { - if (pc & FI_REMOTE_READ) - return 1; - if (pc & (FI_READ | FI_WRITE | FI_REMOTE_WRITE)) - return 0; - return 1; - } - } - } - return 0; -} -static inline int gnix_rma_write_target_allowed(uint64_t pc, - uint64_t ops_flags) -{ - if (ops_flags & FI_RMA) { - if (ops_flags & FI_WRITE) { - if (pc & FI_RMA) { - if (pc & FI_REMOTE_WRITE) - return 1; - if (pc & (FI_READ | FI_WRITE | FI_REMOTE_READ)) - return 0; - return 1; - } - } - } - return 0; -} - -static inline int gnix_atomic_read_target_allowed(uint64_t pc, - uint64_t ops_flags) -{ - if (ops_flags & FI_ATOMICS) { - if (ops_flags & FI_READ) { - if (pc & FI_ATOMICS) { - if (pc & FI_REMOTE_READ) - return 1; - if (pc & (FI_READ | FI_WRITE | FI_REMOTE_WRITE)) - return 0; - return 1; - } - } - } - return 0; -} - -static inline int gnix_atomic_write_target_allowed(uint64_t pc, - uint64_t ops_flags) -{ - if (ops_flags & FI_ATOMICS) { - if (ops_flags & FI_WRITE) { - if (pc & FI_ATOMICS) { - if (pc & FI_REMOTE_WRITE) - return 1; - if (pc & (FI_READ | FI_WRITE | FI_REMOTE_READ)) - return 0; - return 1; - } - } - } - return 0; -} - -/* - * Test if this operation is permitted based on the type of transfer - * (encoded in the flags parameter), the endpoint capabilities and the - * remote endpoint (peer) capabilities. Set a flag to speed up future checks. - */ - -static inline int gnix_ops_allowed(struct gnix_fid_ep *ep, - uint64_t peer_caps, - uint64_t flags) -{ - uint64_t caps = ep->caps; - - GNIX_DEBUG(FI_LOG_EP_DATA, "flags:0x%llx, %s\n", flags, - fi_tostr(&flags, FI_TYPE_OP_FLAGS)); - GNIX_DEBUG(FI_LOG_EP_DATA, "peer_caps:0x%llx, %s\n", peer_caps, - fi_tostr(&peer_caps, FI_TYPE_OP_FLAGS)); - GNIX_DEBUG(FI_LOG_EP_DATA, "caps:0x%llx, %s\n", - ep->caps, fi_tostr(&ep->caps, FI_TYPE_CAPS)); - - if ((flags & FI_RMA) && (flags & FI_READ)) { - if (OFI_UNLIKELY(!ep->ep_ops.rma_read_allowed)) { - /* check if read initiate capabilities are allowed */ - if (caps & FI_RMA) { - if (caps & FI_READ) { - ; - } else if (caps & (FI_WRITE | - FI_REMOTE_WRITE | - FI_REMOTE_READ)) { - return 0; - } - } else { - return 0; - } - /* check if read remote capabilities are allowed */ - if (gnix_rma_read_target_allowed(peer_caps, flags)) { - ep->ep_ops.rma_read_allowed = 1; - return 1; - } - return 0; - } - return 1; - } else if ((flags & FI_RMA) && (flags & FI_WRITE)) { - if (OFI_UNLIKELY(!ep->ep_ops.rma_write_allowed)) { - /* check if write initiate capabilities are allowed */ - if (caps & FI_RMA) { - if (caps & FI_WRITE) { - ; - } else if (caps & (FI_READ | - FI_REMOTE_WRITE | - FI_REMOTE_READ)) { - return 0; - } - } else { - return 0; - } - /* check if write remote capabilities are allowed */ - if (gnix_rma_write_target_allowed(peer_caps, flags)) { - ep->ep_ops.rma_write_allowed = 1; - return 1; - } - return 0; - } - return 1; - } else if ((flags & FI_ATOMICS) && (flags & FI_READ)) { - if (OFI_UNLIKELY(!ep->ep_ops.atomic_read_allowed)) { - /* check if read initiate capabilities are allowed */ - if (caps & FI_ATOMICS) { - if (caps & FI_READ) { - ; - } else if (caps & (FI_WRITE | - FI_REMOTE_WRITE | - FI_REMOTE_READ)) { - return 0; - } - } else { - return 0; - } - /* check if read remote capabilities are allowed */ - if (gnix_atomic_read_target_allowed(peer_caps, flags)) { - ep->ep_ops.atomic_read_allowed = 1; - return 1; - } - return 0; - } - return 1; - } else if ((flags & FI_ATOMICS) && (flags & FI_WRITE)) { - if (OFI_UNLIKELY(!ep->ep_ops.atomic_write_allowed)) { - /* check if write initiate capabilities are allowed */ - if (caps & FI_ATOMICS) { - if (caps & FI_WRITE) { - ; - } else if (caps & (FI_READ | - FI_REMOTE_WRITE | - FI_REMOTE_READ)) { - return 0; - } - } else { - return 0; - } - /* check if write remote capabilities are allowed */ - if (gnix_atomic_write_target_allowed(peer_caps, - flags)) { - ep->ep_ops.atomic_write_allowed = 1; - return 1; - } - return 0; - } - return 1; - } - - GNIX_ERR(FI_LOG_EP_DATA, "flags do not make sense %llx\n", flags); - - return 0; -} - -/** - * Fabric request layout, there is a one to one - * correspondence between an application's invocation of fi_send, fi_recv - * and a gnix fab_req. - * - * @var dlist a doubly linked list entry used to queue a request in - * either the vc's tx_queue or work_queue. - * @var addr the peer's gnix_address associated with this request. - * @var type the fabric request type - * @var gnix_ep the gni endpoint associated with this request - * @var user_context the user context, typically the receive buffer address for - * a send or the send buffer address for a receive. - * @var vc the virtual channel or connection edge between the sender - * and receiver. - * @var work_fn the function called by the nic progress loop to initiate - * the fabric request. - * @var flags a set of bit patterns that apply to all message types - * @cb optional call back to be invoked when ref cnt on this - * object drops to zero - * @ref_cnt ref cnt for this object - * @var iov_txds A list of pending Rdma/CtFma GET txds. - * @var iov_txd_cnt The count of outstanding iov txds. - * @var tx_failures tx failure bits. - * @var rma GNI PostRdma request - * @var msg GNI SMSG request - * @var amo GNI Fma request - */ -struct gnix_fab_req { - struct dlist_entry dlist; - struct gnix_address addr; - enum gnix_fab_req_type type; - struct gnix_fid_ep *gnix_ep; - void *user_context; - struct gnix_vc *vc; - int (*work_fn)(void *); - uint64_t flags; - void (*cb)(void *); - struct gnix_reference ref_cnt; - - struct slist_entry *int_tx_buf_e; - uint8_t *int_tx_buf; - gni_mem_handle_t int_tx_mdh; - - struct gnix_tx_descriptor *iov_txds[GNIX_MAX_MSG_IOV_LIMIT]; - /* - * special value of UINT_MAX is used to indicate - * an unrecoverable (aka non-transient) error has occurred - * in one of the underlying GNI transactions - */ - uint32_t tx_failures; - - /* common to rma/amo/msg */ - union { - struct gnix_fab_req_rma rma; - struct gnix_fab_req_msg msg; - struct gnix_fab_req_amo amo; - }; - char inject_buf[GNIX_INJECT_SIZE]; -}; - -/* - * test whether a request is replayable - * or not based on the value of the tx_failures field - */ - -static inline bool _gnix_req_replayable(struct gnix_fab_req *req) -{ - bool ret = false; - uint32_t tx_failures, max_retrans; - - tx_failures = req->tx_failures; - max_retrans = req->gnix_ep->domain->params.max_retransmits; - if ((req->tx_failures != UINT_MAX) && - (++tx_failures < max_retrans)) - ret = true; - - return ret; -} -static inline int _gnix_req_inject_err(struct gnix_fab_req *req) -{ - int err_cnt = req->gnix_ep->domain->params.err_inject_count; - - if (OFI_LIKELY(!err_cnt)) { - return 0; - } else if (err_cnt > 0) { - return req->tx_failures < err_cnt; - } else { /* (err_cnt < 0) */ - return req->tx_failures < (rand() % (-err_cnt)); - } -} - -static inline int _gnix_req_inject_smsg_err(struct gnix_fab_req *req) -{ - int err_cnt = req->gnix_ep->domain->params.err_inject_count; - int retrans_cnt = req->gnix_ep->domain->params.max_retransmits; - - if (OFI_LIKELY(!err_cnt)) { - return 0; - } else if (retrans_cnt <= err_cnt) { - return 1; - } else { - return 0; - } -} - -extern int gnix_default_user_registration_limit; -extern int gnix_default_prov_registration_limit; -extern int gnix_dealloc_aki_on_fabric_close; - -/* This is a per-node limitation of the GNI provider. Each process - should request only as many registrations as it intends to use - and no more than that. */ -#define GNIX_MAX_SCALABLE_REGISTRATIONS 4096 - -/* - * work queue struct, used for handling delay ops, etc. in a generic wat - */ - -struct gnix_work_req { - struct dlist_entry list; - /* function to be invoked to progress this work queue req. - first element is pointer to data needec by the func, second - is a pointer to an int which will be set to 1 if progress - function is complete */ - int (*progress_fn)(void *, int *); - /* data to be passed to the progress function */ - void *data; - /* function to be invoked if this work element has completed */ - int (*completer_fn)(void *); - /* data for completer function */ - void *completer_data; -}; - -/* - * globals - */ -extern const char gnix_fab_name[]; -extern const char gnix_dom_name[]; -extern uint32_t gnix_cdm_modes; -extern ofi_atomic32_t gnix_id_counter; - - -/* - * linked list helpers - */ - -static inline void gnix_slist_insert_tail(struct slist_entry *item, - struct slist *list) -{ - item->next = NULL; - slist_insert_tail(item, list); -} - -/* - * prototypes for fi ops methods - */ -int gnix_domain_open(struct fid_fabric *fabric, struct fi_info *info, - struct fid_domain **domain, void *context); - -int gnix_av_open(struct fid_domain *domain, struct fi_av_attr *attr, - struct fid_av **av, void *context); - -int gnix_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, - struct fid_cq **cq, void *context); - -int gnix_ep_open(struct fid_domain *domain, struct fi_info *info, - struct fid_ep **ep, void *context); - -int gnix_pep_open(struct fid_fabric *fabric, - struct fi_info *info, struct fid_pep **pep, - void *context); - -int gnix_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr, - struct fid_eq **eq, void *context); - -int gnix_mr_reg(struct fid *fid, const void *buf, size_t len, - uint64_t access, uint64_t offset, uint64_t requested_key, - uint64_t flags, struct fid_mr **mr_o, void *context); - -int gnix_mr_regv(struct fid *fid, const struct iovec *iov, - size_t count, uint64_t access, - uint64_t offset, uint64_t requested_key, - uint64_t flags, struct fid_mr **mr, void *context); - -int gnix_mr_regattr(struct fid *fid, const struct fi_mr_attr *attr, - uint64_t flags, struct fid_mr **mr); - -int gnix_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr, - struct fid_cntr **cntr, void *context); - -int gnix_sep_open(struct fid_domain *domain, struct fi_info *info, - struct fid_ep **sep, void *context); - -int gnix_ep_bind(fid_t fid, struct fid *bfid, uint64_t flags); - -int gnix_ep_close(fid_t fid); - -/* - * prototype for static data initialization method - */ -void _gnix_init(void); - -/* Prepend DIRECT_FN to provider specific API functions for global visibility - * when using fabric direct. If the API function is static use the STATIC - * macro to bind symbols globally when compiling with fabric direct. - */ -#ifdef FABRIC_DIRECT_ENABLED -#define DIRECT_FN __attribute__((visibility ("default"))) -#define STATIC -#else -#define DIRECT_FN -#define STATIC static -#endif - -#endif /* _GNIX_H_ */ diff --git a/prov/gni/include/gnix_atomic.h b/prov/gni/include/gnix_atomic.h deleted file mode 100644 index 56e3f29780b..00000000000 --- a/prov/gni/include/gnix_atomic.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2015-2016 Cray Inc. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _GNIX_ATOMIC_H_ -#define _GNIX_ATOMIC_H_ - -#define GNIX_ATOMIC_WRITE_FLAGS_DEF (FI_ATOMIC | FI_WRITE) -#define GNIX_ATOMIC_READ_FLAGS_DEF (FI_ATOMIC | FI_READ) - -int _gnix_atomic_cmd(enum fi_datatype dt, enum fi_op op, - enum gnix_fab_req_type fr_type); -ssize_t _gnix_atomic(struct gnix_fid_ep *ep, - enum gnix_fab_req_type fr_type, - const struct fi_msg_atomic *msg, - const struct fi_ioc *comparev, - void **compare_desc, - size_t compare_count, - struct fi_ioc *resultv, - void **result_desc, - size_t result_count, - uint64_t flags); - -/* SMSG callback for AMO remote counter control message. */ -int __smsg_amo_cntr(void *data, void *msg); - -#endif /* _GNIX_ATOMIC_H_ */ - diff --git a/prov/gni/include/gnix_auth_key.h b/prov/gni/include/gnix_auth_key.h deleted file mode 100644 index 0e93a934f42..00000000000 --- a/prov/gni/include/gnix_auth_key.h +++ /dev/null @@ -1,205 +0,0 @@ -/* - * Copyright (c) 2017 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef PROV_GNI_INCLUDE_GNIX_AUTH_KEY_H_ -#define PROV_GNI_INCLUDE_GNIX_AUTH_KEY_H_ - -#include - -#include "fi_ext_gni.h" -#include "gnix_bitmap.h" - -/* - * GNIX Authorization keys are directly associated with a specific GNI network - * key. There are some limitations to GNI network keys that should be noted. - * - * GNI network keys are directly associated with memory registrations, and - * can only support a single type of memory mode at a time. This means that - * the memory mode must be tracked with the authorization key, and must exist - * as globally known information. Since references to objects may still exist - * after the fabric is closed, this information must persist unless the - * application has promised not to open any more GNI fabric instances. - * See fi_gni man page for guidance on GNI_DEALLOC_AKI_ON_FABRIC_CLOSE. - */ - -/** - * GNIX authorization key construct - * - * @var lock lock for data structure - * @var attr authorization key attributes - * @var enabled Is this authorization key live? If so, refuse changes to limits - * @var using_vmdh Is this authorization key associated with a domain using - * VMDH? - * @var prov bitmap for detecting provider key usage - * @var user bitmap for detecting user key usage - */ -struct gnix_auth_key { - ofi_spin_t lock; - struct gnix_auth_key_attr attr; - int enabled; - uint8_t ptag; - uint32_t cookie; - int using_vmdh; - uint32_t key_partition_size; - uint32_t key_offset; - gnix_bitmap_t *prov; - gnix_bitmap_t *user; -}; - -/** - * Allocates an authorization key structure, if possible - * - * @return non-NULL pointer to initialized memory on success - * NULL on failure - */ -struct gnix_auth_key *_gnix_auth_key_alloc(); - -/** - * Frees an authorization key structure - * - * @param key A GNI authorization key structure to free - * @return 0 on success - * -FI_EINVAL, if invalid parameter passed as key - */ -int _gnix_auth_key_free(struct gnix_auth_key *key); - -/** - * Lookup an authorization key in global data storage - * - * @param auth_key authorization key - * @param auth_key_size length of authorization key in bytes - * @return non-NULL pointer on success - * NULL pointer if not found - */ -struct gnix_auth_key *_gnix_auth_key_lookup( - uint8_t *auth_key, - size_t auth_key_size); - -/** - * Enables and prevents further limit modifications for an authorization key - * - * @param key GNI authorization key - * @return FI_SUCCESS on success - * -FI_EINVAL, if bad parameters were passed - * -FI_EBUSY, if already enabled - */ - -int _gnix_auth_key_enable(struct gnix_auth_key *key); - -/** - * Retrieves the next available provider-reserved key for a given - * authorization key - * - * @param info A GNI authorization key - * @return FI_SUCCESS on success - * -FI_EINVAL, if bad parameters were passed - * -FI_EAGAIN, if no available key could be foundi - */ -int _gnix_get_next_reserved_key(struct gnix_auth_key *info); - -/** - * Releases a reserved key back to the bitset to be reused - * - * @param info A GNI authorization key - * @param reserved_key index of the reserved key - * @return FI_SUCCESS on success - * -FI_EINVAL, if invalid parameters were passed - * -FI_EBUSY, if reserved key was already released - */ -int _gnix_release_reserved_key(struct gnix_auth_key *info, int reserved_key); - -/** - * Creates an authorization key from default configuration - * - * @param auth_key authorization key - * @param auth_key_size length of authorization key in bytes - * @return non-NULL pointer on success - * NULL pointer on failure - */ -struct gnix_auth_key *_gnix_auth_key_create( - uint8_t *auth_key, - size_t auth_key_size); - -/** - * Inserts an authorization key into global data storage - * - * @param auth_key authorization key - * @param auth_key_size length of authorization key in bytes - * @param to_insert GNI authorization key structure to insert - * @return FI_SUCCESS on success - * -FI_EINVAL, if to_insert is NULL or global data - * storage is destroyed - * -FI_ENOSPC, if auth key exists in global data - * storage - */ -int _gnix_auth_key_insert( - uint8_t *auth_key, - size_t auth_key_size, - struct gnix_auth_key *to_insert); - -#define GNIX_GET_AUTH_KEY(auth_key, auth_key_size, requested_mode) \ - ({ \ - struct gnix_auth_key *_tmp; \ - _tmp = _gnix_auth_key_lookup((auth_key), (auth_key_size)); \ - int _tmp_ret; \ - if (!_tmp) { \ - GNIX_INFO(FI_LOG_FABRIC, \ - "failed to find authorization " \ - "key, creating new authorization key\n"); \ - _tmp = _gnix_auth_key_create( \ - (auth_key), (auth_key_size)); \ - if (!_tmp) { \ - GNIX_DEBUG(FI_LOG_FABRIC, \ - "failed to create new " \ - "authorization key, "\ - "another thread beat us to the insert " \ - "- searching again\n"); \ - _tmp = _gnix_auth_key_lookup((auth_key), \ - (auth_key_size)); \ - assert(_tmp); \ - } \ - _tmp->using_vmdh = (requested_mode); \ - _tmp_ret = _gnix_auth_key_enable(_tmp); \ - if (_tmp_ret) { \ - GNIX_WARN(FI_LOG_FABRIC, \ - "failed to enable new " \ - "authorization key\n"); \ - } \ - } \ - _tmp; \ - }) - -/* provider subsystem initialization and teardown functions */ -int _gnix_auth_key_subsys_init(void); -int _gnix_auth_key_subsys_fini(void); - -#endif /* PROV_GNI_INCLUDE_GNIX_AUTH_KEY_H_ */ diff --git a/prov/gni/include/gnix_av.h b/prov/gni/include/gnix_av.h deleted file mode 100644 index da09c95706b..00000000000 --- a/prov/gni/include/gnix_av.h +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright (c) 2015-2016 Cray Inc. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _GNIX_AV_H_ -#define _GNIX_AV_H_ - -#include "gnix.h" - -/* - * this structure should ideally be as compact - * as possible, since its looked up in the critical - * path for FI_EP_RDM EPs that use FI_AV_MAP. It - * needs to hold sufficient content that the gnix_ep_name - * can be regnerated in full for fi_av_lookup. - */ - -/** - * Av addr entry struct - * - * @var gnix_addr gnix address for this entry - * @var name_type the endpoint type associated with this - * address (GNIX_EPN_TYPE_UNBOUND/BOUND) - * @var cm_nic_cdm_id for GNIX_EPN_TYPE_UNBOUND endpoint types - * the cdm id of the cm_nic with which the endpoint - * is associated - * @var cookie RDMA cookie credential for the endpoint - * this entry corresponds to - * @var rx_ctx_cnt number of contexts associated with this AV - */ -struct gnix_av_addr_entry { - struct gnix_address gnix_addr; - struct { - uint32_t name_type : 8; - uint32_t cm_nic_cdm_id : 24; - uint32_t cookie; - }; - struct { - uint32_t rx_ctx_cnt : 8; - uint32_t key_offset: 12; - uint32_t unused1 : 12; - }; -}; - -/* - * Prototypes for GNI AV helper functions for managing the AV system. - */ - -/** - * @brief Return pointer to an AV table internal gnix_av_addr_entry for - * a given fi_addr address - * - * @param[in] gnix_av pointer to a previously allocated gnix_fid_av - * @param[in] fi_addr address to be translated - * @param[out] addr pointer to address entry in AV table - * @return FI_SUCCESS on success, -FI_EINVAL on error - */ -int _gnix_av_lookup(struct gnix_fid_av *gnix_av, fi_addr_t fi_addr, - struct gnix_av_addr_entry *addr); - -/** - * @brief Return the FI address mapped to a given GNIX address. - * - * @param[in] gnix_av The AV to use for lookup. - * @param[in] gnix_addr The GNIX address to translate. - * @param[out] fi_addr The FI address mapped to gnix_addr. - * @return FI_SUCCESS on success, -FI_EINVAL or -FI_ENOENT on error. - */ -int _gnix_av_reverse_lookup(struct gnix_fid_av *gnix_av, - struct gnix_address gnix_addr, - fi_addr_t *fi_addr); - -/******************************************************************************* - * If the caller already knows the av type they can call the lookups directly - * using the following functions. - ******************************************************************************/ - -/** - * @brief (FI_AV_TABLE) Return fi_addr using its corresponding gnix address. - * - * @param[in] int_av The AV to use for the lookup. - * @param[in] gnix_addr The gnix address - * @param[in/out] fi_addr The pointer to the corresponding fi_addr. - * - * @return FI_SUCCESS on successfully looking up the entry in the entry table. - * @return -FI_EINVAL upon passing an invalid parameter. - */ -int _gnix_table_reverse_lookup(struct gnix_fid_av *int_av, - struct gnix_address gnix_addr, - fi_addr_t *fi_addr); - -/** - * @brief (FI_AV_MAP) Return fi_addr using its corresponding gnix address. - * - * @param[in] int_av The AV to use for the lookup. - * @param[in] gnix_addr The gnix address - * @param[in/out] fi_addr The pointer to the corresponding fi_addr. - * - * @return FI_SUCCESS on successfully looking up the entry in the entry table. - * @return -FI_EINVAL upon passing an invalid parameter. - */ -int _gnix_map_reverse_lookup(struct gnix_fid_av *int_av, - struct gnix_address gnix_addr, - fi_addr_t *fi_addr); - -/** - * @brief Return the string representation of the FI address. - * - * @param[in] av The AV to use. - * @param[in] addr The GNIX address to translate. - * @param[in/out] buf The buffer that contains the address string. - * @param[in/out] len The length of the address string. - * @return char The buffer that contains the address string. - */ -const char *gnix_av_straddr(struct fid_av *av, - const void *addr, - char *buf, - size_t *len); - -#endif /* _GNIX_AV_H_ */ diff --git a/prov/gni/include/gnix_bitmap.h b/prov/gni/include/gnix_bitmap.h deleted file mode 100644 index 1d1433b3e31..00000000000 --- a/prov/gni/include/gnix_bitmap.h +++ /dev/null @@ -1,203 +0,0 @@ -/* - * Copyright (c) 2015,2017 Cray Inc. All rights reserved. - * - * Created on: Apr 16, 2015 - * Author: jswaro - */ - -#ifndef BITMAP_H_ -#define BITMAP_H_ - -#include -#include -#include -#include - -#define GNIX_BITMAP_BUCKET_BITS 6 -#define GNIX_BITMAP_BUCKET_LENGTH (1ULL << GNIX_BITMAP_BUCKET_BITS) -#define GNIX_BUCKET_INDEX(index) ((index) >> GNIX_BITMAP_BUCKET_BITS) -#define GNIX_BIT_INDEX(index) ((index) % GNIX_BITMAP_BUCKET_LENGTH) -#define GNIX_BIT_VALUE(index) (1ULL << GNIX_BIT_INDEX(index)) - -#define __PARTIAL_BLOCKS(nbits) (((nbits) % GNIX_BITMAP_BUCKET_LENGTH) ? 1 : 0) -#define __FULL_BLOCKS(nbits) ((nbits) >> GNIX_BITMAP_BUCKET_BITS) -#define GNIX_BITMAP_BLOCKS(nbits) \ - (__FULL_BLOCKS(nbits) + __PARTIAL_BLOCKS(nbits)) - -typedef uint64_t gnix_bitmap_value_t; - -#ifdef HAVE_ATOMICS -#include - -typedef atomic_uint_fast64_t gnix_bitmap_block_t; -#else -typedef struct atomic_uint64_t { - ofi_spin_t lock; - gnix_bitmap_value_t val; -} gnix_bitmap_block_t; -#endif - -typedef enum gnix_bitmap_state { - GNIX_BITMAP_STATE_UNINITIALIZED = 0, - GNIX_BITMAP_STATE_READY, - GNIX_BITMAP_STATE_FREE, -} gnix_bitmap_state_e; - -/** - * @brief gnix bitmap structure - * - * @var state state of the bitmap - * @var length length of bitmap in bits - * @var arr bitmap array - * @var internal_buffer_allocation flag to denote use of an externally - * allocated buffer - */ -typedef struct gnix_bitmap { - gnix_bitmap_state_e state; - uint32_t length; - gnix_bitmap_block_t *arr; - int internal_buffer_allocation; -} gnix_bitmap_t; - -/** - * Tests to see if a bit has been set in the bit. - * - * @param bitmap a gnix_bitmap pointer to the bitmap struct - * @param index index of the bit in the map to test - * @return 0 if the bit is not set, 1 if the bit is set - */ -int _gnix_test_bit(gnix_bitmap_t *bitmap, uint32_t index); - -/** - * Sets a bit in the bitmap - * - * @param bitmap a gnix_bitmap pointer to the bitmap struct - * @param index index of the bit in the map to set - */ -void _gnix_set_bit(gnix_bitmap_t *bitmap, uint32_t index); - -/** - * Clears a bit in the bitmap - * - * @param bitmap a gnix_bitmap pointer to the bitmap struct - * @param index index of the bit in the map to clear - */ -void _gnix_clear_bit(gnix_bitmap_t *bitmap, uint32_t index); - -/** - * Tests to see if a bit is set, then sets the bit in the bitmap - * - * @param bitmap a gnix_bitmap pointer to the bitmap struct - * @param index index of the bit in the map to test and set - * @return 0 if the bit was not set, 1 if the bit was already set - */ -int _gnix_test_and_set_bit(gnix_bitmap_t *bitmap, uint32_t index); - -/** - * Tests to see if a bit is set, the clears the bit in the bitmap - * - * @param bitmap a gnix_bitmap pointer to the bitmap struct - * @param index index of the bit in the map to test and set - * @return 0 if the bit was not set, 1 if the bit was already set - */ -int _gnix_test_and_clear_bit(gnix_bitmap_t *bitmap, uint32_t index); - -/** - * Takes a gnix_bitmap and allocates the internal structures and performs - * generic setup based on the number of bits requested - * - * @param bitmap a gnix_bitmap pointer to the bitmap struct - * @param nbits number of bits to request space for - * @param addr if provided, external memory allocation used for internal - array - * @return 0 on success - * @return -FI_EINVAL if bitmap is already initialized, or 0 is given - * as nbits - * @return -FI_ENOMEM if there isn't sufficient memory available to - * create bitmap - * @note If addr parameter is provided, realloc_bitmap will not work - */ -int _gnix_alloc_bitmap(gnix_bitmap_t *bitmap, uint32_t nbits, void *addr); - -/** - * Takes a gnix_bitmap and reallocates the internal structures to the requested - * size given in bits - * - * @note On return of a ENOMEM error code, the bitmap will not be - * resized and will still be a valid and operable bitmap. - * The ENOMEM error only serves to indication that resources - * are limited. - * - * @param bitmap a gnix_bitmap pointer to the bitmap struct - * @param nbits number of bits to resize the bitmap to - * @return 0 on success - * @return -FI_EINVAL if the bitmap hasn't been allocated yet or nbits == 0 - * @return -FI_ENOMEM if there wasn't sufficient memory to expand the bitmap. - */ -int _gnix_realloc_bitmap(gnix_bitmap_t *bitmap, uint32_t nbits); - -/** - * Frees the internal structures of gnix_bitmap - * - * @param bitmap a gnix_bitmap pointer to the bitmap struct - * @return 0 on success - * @return -FI_EINVAL if the internal resources are uninitialized or already free - */ -int _gnix_free_bitmap(gnix_bitmap_t *bitmap); - -/** - * Sets every bit in the bitmap with (value != 0) - * - * @param bitmap a gnix_bitmap pointer to the bitmap struct - * @param value an integer value to be compared with 0 to set bits to - */ -void _gnix_fill_bitmap(gnix_bitmap_t *bitmap, uint64_t value); - -/** - * Finds the bit index of the first zero bit in the bitmap - * - * @param bitmap a gnix_bitmap pointer to the bitmap struct - * @return index on success, returns an index s.t. - * 0 <= index < bitmap->length - * @return -FI_EAGAIN on failure to find a zero bit - */ -int _gnix_find_first_zero_bit(gnix_bitmap_t *bitmap); - -/** - * Finds the bit index of the first set bit in the bitmap - * - * @param bitmap a gnix_bitmap pointer to the bitmap struct - * @return index on success, returns a index s.t. - * 0 <= index < bitmap->length - * @return -FI_EAGAIN on failure to find a set bit - */ -int _gnix_find_first_set_bit(gnix_bitmap_t *bitmap); - -/** - * Tests to verify that the bitmap is full - * - * @param bitmap a gnix_bitmap pointer to the bitmap struct - * @return 0 if the bitmap has cleared bits, 1 if the bitmap is fully set - */ -int _gnix_bitmap_full(gnix_bitmap_t *bitmap); - -/** - * Tests to verify that the bitmap is empty - * - * @param bitmap a gnix_bitmap pointer to the bitmap struct - * @return 0 if the bitmap has set bits, 1 if the bitmap is fully cleared - */ -int _gnix_bitmap_empty(gnix_bitmap_t *bitmap); - -/** - * Helper function for determining the size of array needed to support - * 'x' number of bits for an externally provided buffer address - * @param nbits number of bits requested for the bitmap - */ -__attribute__((unused)) -static inline uint32_t _gnix_bitmap_get_buffer_size(int nbits) -{ - return GNIX_BITMAP_BLOCKS(nbits) * sizeof(gnix_bitmap_block_t); -} - -#endif /* BITMAP_H_ */ diff --git a/prov/gni/include/gnix_buddy_allocator.h b/prov/gni/include/gnix_buddy_allocator.h deleted file mode 100644 index 6b37531b1e8..00000000000 --- a/prov/gni/include/gnix_buddy_allocator.h +++ /dev/null @@ -1,211 +0,0 @@ -/* - * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All - * rights reserved. - * Copyright (c) 2015-2016 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _GNIX_BUDDY_ALLOCATOR_H_ -#define _GNIX_BUDDY_ALLOCATOR_H_ - -#include -#include "ofi_list.h" -#include "gnix_bitmap.h" -#include "gnix_util.h" -#include "gnix.h" - -#define MIN_BLOCK_SIZE 256 - -/* The following table was taken from: - * http://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn - */ -static const uint32_t MultiplyDeBruijnBitPosition[32] = { - 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, - 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 -}; - -/* The following log2 function was taken from: - * http://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn. - * - * Note: this function always truncates the result. - */ -static inline uint32_t __gnix_buddy_log2(uint32_t v) -{ - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - - return MultiplyDeBruijnBitPosition[(uint32_t)(v * 0x07C4ACDDU) >> 27]; -} - -/* Find the bitmap index for block X of size X_LEN */ -static inline size_t __gnix_buddy_bitmap_index(void *_x, size_t x_len, - void *_base, size_t base_len, - size_t min_len) -{ - /* arithmetic on void * is not part of the C standard (yet?) */ - uint8_t *x = _x; - uint8_t *base = _base; - - return (size_t) ((x - base) / (size_t) x_len) + - base_len / (min_len / 2) - base_len / (x_len / 2); -} - -/* Find the address of X's buddy block: - * If the "index" of block X is even then the buddy must be to the right of X, - * otherwise the buddy is to the left of X. - */ -static inline void *__gnix_buddy_address(void *x, size_t len, void *base) -{ - return (void *) (((((size_t) base - (size_t) x) / len) % 2) ? - (size_t) x - len : (size_t) x + len); -} - -/* evaluates to zero if X is not a power of two, otherwise evaluates to X - 1 */ -#define IS_NOT_POW_TWO(X) (((X) & (~(X) + 1)) ^ (X)) - -/* Find the block size (in bytes) required for allocating LEN bytes */ -#define BLOCK_SIZE(LEN, MIN_LEN) ((LEN) <= (MIN_LEN) ? (MIN_LEN) :\ - (IS_NOT_POW_TWO(LEN)) ? (((LEN) << 1) & ~(LEN)) :\ - (LEN)) - -/* Calculate the offset of a free block, OFFSET = MIN_LEN * 2^MULT. */ -#define OFFSET(MIN_LEN, MULT) ((MIN_LEN) * (1 << (MULT))) - -/* Find the index into the free list with block size LEN. */ -#define LIST_INDEX(LEN, MIN_LEN) (__gnix_buddy_log2((LEN) / (MIN_LEN))) - -/** - * Structure representing a buddy allocator. - * - * @var base The base address of the buffer being managed. - * @var len The length of the buffer the buddy allocator is - * managing. - * @var max The largest chunk of memory that can be allocated. - * - * @var nlists The number of free lists. - * @var lists The array of free lists ordered from smallest block - * size. - * at index 0 to largest block size at index nlists - 1. - * - * @var bitmap Each bit is 1 if the block is allocated or split, - * otherwise the bit is 0. - * - * @var lock The buddy alloc handle lock. - */ -typedef struct gnix_buddy_alloc_handle { - void *base; - uint32_t len; - uint32_t max; - - uint32_t nlists; - struct dlist_entry *lists; - - gnix_bitmap_t bitmap; - - ofi_spin_t lock; -} gnix_buddy_alloc_handle_t; - -/** - * Creates a buddy allocator - * - * @param[in] base Base address of buffer to be managed by - * allocator. - * - * @param[in] len Size of the buffer to be managed by allocator - * (must be a multiple of max). - * - * @param[in] max Maximum amount of memory that can be allocated - * by a single call to _gnix_buddy_alloc (power 2). - * - * @param[in/out] alloc_handle Handle to be used for when allocating/freeing - * memory managed by the buddy allocator. - * - * @return FI_SUCCESS Upon successfully creating an allocator. - * - * @return -FI_EINVAL Upon an invalid parameter. - * - * @return -FI_ENOMEM Upon failure to allocate memory to create the - * buddy allocator. - */ -int _gnix_buddy_allocator_create(void *base, uint32_t len, uint32_t max, - gnix_buddy_alloc_handle_t **alloc_handle); - -/** - * Releases all resources associated with a buddy allocator handle. - * - * @param[in] alloc_handle Buddy alloc handle to destroy. - * - * @return FI_SUCCESS Upon successfully destroying an allocator. - * - * @return -FI_EINVAL Upon an invalid parameter. - */ -int _gnix_buddy_allocator_destroy(gnix_buddy_alloc_handle_t *alloc_handle); - -/** - * Allocate a buffer from the buddy allocator - * - * @param[in] alloc_handle Previously allocated GNI buddy_alloc_handle to - * use as allocator. - * - * @param[in/out] ptr Pointer to an address where the address of the - * allocated buffer will be returned. - * - * @param[in] len Size of buffer to allocate in bytes. - * - * @return FI_SUCCESS Upon successfully allocating a buffer. - * - * @return -FI_ENOMEM Upon not being able to allocate a buffer of the - * requested size. - * - * @return -FI_EINVAL Upon an invalid parameter. - */ -int _gnix_buddy_alloc(gnix_buddy_alloc_handle_t *alloc_handle, void **ptr, - uint32_t len); - -/** - * Free a previously allocated buffer - * - * @param[in] alloc_handle Previously allocated GNI buddy_alloc_handle to - * use as allocator. - * - * @param[in/out] ptr Pointer to the previously allocated block. - * - * @param[in] len Size of the previously allocated block. - * - * @return FI_SUCCESS Upon successfully freeing a block. - * - * @return -FI_EINVAL Upon an invalid parameter. - */ -int _gnix_buddy_free(gnix_buddy_alloc_handle_t *alloc_handle, void *ptr, - uint32_t len); -#endif /* _GNIX_BUDDY_ALLOCATOR_H_ */ diff --git a/prov/gni/include/gnix_cm.h b/prov/gni/include/gnix_cm.h deleted file mode 100644 index 81fdea134af..00000000000 --- a/prov/gni/include/gnix_cm.h +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Copyright (c) 2016 Cray Inc. All rights reserved. - * Copyright (c) 2017 Los Alamos National Security, LLC. All rights reserved. - * Copyright (c) 2019-2020 Triad National Security, LLC. - * All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _GNIX_CM_H_ -#define _GNIX_CM_H_ - -#include "gnix.h" - -#define GNIX_CM_DATA_MAX_SIZE 256 -#define GNIX_CM_EQE_BUF_SIZE (sizeof(struct fi_eq_cm_entry) + \ - GNIX_CM_DATA_MAX_SIZE) - -struct gnix_pep_sock_connreq { - struct fi_info info; - struct gnix_ep_name src_addr; - struct gnix_ep_name dest_addr; - struct fi_tx_attr tx_attr; - struct fi_rx_attr rx_attr; - struct fi_ep_attr ep_attr; - struct fi_domain_attr domain_attr; - struct fi_fabric_attr fabric_attr; - int vc_id; - gni_smsg_attr_t vc_mbox_attr; - gni_mem_handle_t cq_irq_mdh; - uint64_t peer_caps; - size_t cm_data_len; - char eqe_buf[GNIX_CM_EQE_BUF_SIZE]; - uint32_t key_offset; -}; - -enum gnix_pep_sock_resp_cmd { - GNIX_PEP_SOCK_RESP_ACCEPT, - GNIX_PEP_SOCK_RESP_REJECT -}; - -struct gnix_pep_sock_connresp { - enum gnix_pep_sock_resp_cmd cmd; - int vc_id; - gni_smsg_attr_t vc_mbox_attr; - gni_mem_handle_t cq_irq_mdh; - uint64_t peer_caps; - size_t cm_data_len; - char eqe_buf[GNIX_CM_EQE_BUF_SIZE]; - uint32_t key_offset; -}; - -struct gnix_pep_sock_conn { - struct fid fid; - struct dlist_entry list; - int sock_fd; - struct gnix_pep_sock_connreq req; - int bytes_read; - struct fi_info *info; -}; - -int _gnix_pep_progress(struct gnix_fid_pep *pep); -int _gnix_ep_progress(struct gnix_fid_ep *ep); - -/** - * Parse a given address (of format FI_ADDR_GNI) into FI_ADDR_STR. - * @param ep_name [IN] the FI_ADDR_GNI address. - * @param out_buf [IN/OUT] the FI_ADDR_STR address. - * @return either FI_SUCCESS or a negative integer on failure. - */ -int _gnix_ep_name_to_str(struct gnix_ep_name *ep_name, char **out_buf); - -/** - * Parse a given address (of format FI_ADDR_STR) into FI_ADDR_GNI. - * @param addr[IN] the FI_ADDR_STR address. - * @param resolved_addr[OUT] the FI_ADDR_GNI address. - * @return either FI_SUCCESS or a negative integer on failure. - */ -int _gnix_ep_name_from_str(const char *addr, - struct gnix_ep_name *resolved_addr); - -/** - * Find a FI_ADDR_GNI. - * @param ep_name[IN] the array of addresses. - * @param idx [IN] the index of the desired address. - * @param addr [OUT] the desired address. - */ -static inline int -_gnix_resolve_gni_ep_name(const char *ep_name, int idx, - struct gnix_ep_name *addr) -{ - int ret = FI_SUCCESS; - static size_t addr_size = sizeof(struct gnix_ep_name); - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - /*TODO (optimization): Just return offset into ep_name */ - memcpy(addr, &ep_name[addr_size * idx], addr_size); - return ret; -} - -/** - * Find and convert a FI_ADDR_STR to FI_ADDR_GNI. - * @param ep_name [IN] the FI_ADDR_STR address. - * @param idx [IN] the index of the desired address. - * @param addr [OUT] the desired address converted to FI_ADDR_GNI. - * @return either FI_SUCCESS or a negative integer on failure. - */ -static inline int -_gnix_resolve_str_ep_name(const char *ep_name, int idx, - struct gnix_ep_name *addr) -{ - int ret = FI_SUCCESS; - static size_t addr_size = GNIX_FI_ADDR_STR_LEN; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - ret = _gnix_ep_name_from_str(&ep_name[addr_size * idx], addr); - return ret; -} - -/** - * Find and resolve the given ep_name. - * - * @param ep_name [IN] the ep name to resolve. - * @param idx [IN] the index of the desired address. - * @param addr [OUT] the desired address. - * @param domain [IN] the given domain. - * @return either FI_SUCCESS or a negative integer on failure. - */ -static inline int -_gnix_get_ep_name(const char *ep_name, int idx, struct gnix_ep_name *addr, - struct gnix_fid_domain *domain) -{ - int ret = FI_SUCCESS; - /* Use a function pointer to resolve the address */ - if (domain->addr_format == FI_ADDR_STR) { - ret = _gnix_resolve_str_ep_name(ep_name, idx, addr); - } else { - ret = _gnix_resolve_gni_ep_name(ep_name, idx, addr); - } - return ret; -} -#endif - diff --git a/prov/gni/include/gnix_cm_nic.h b/prov/gni/include/gnix_cm_nic.h deleted file mode 100644 index 7b2b0deebfc..00000000000 --- a/prov/gni/include/gnix_cm_nic.h +++ /dev/null @@ -1,214 +0,0 @@ -/* - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * Copyright (c) 2015-2017 Los Alamos National Security, LLC. - * All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _GNIX_CM_NIC_H_ -#define _GNIX_CM_NIC_H_ - -#include "gnix.h" - -#define GNIX_CM_NIC_MAX_MSG_SIZE (GNI_DATAGRAM_MAXSIZE - sizeof(uint8_t)) - -extern struct dlist_entry gnix_cm_nic_list; -extern pthread_mutex_t gnix_cm_nic_list_lock; - -typedef int gnix_cm_nic_rcv_cb_func(struct gnix_cm_nic *cm_nic, - char *rbuf, - struct gnix_address addr); - -/** - * @brief GNI provider connection management (cm) nic structure - * - * @var cm_nic_list global CM NIC list element - * @var nic pointer to gnix_nic associated with this cm nic - * @var dgram_hndl handle to dgram allocator associated with this nic - * @var fabric GNI provider fabric associated with this nic - * @var addr_to_ep_ht Hash table for looking up ep bound to this - * cm nic, key is ep's gnix_address - * @var wq_lock spin lock for cm nic's work queue - * @var cm_nic_wq workqueue associated with this nic - * @var ref_cnt used for internal reference counting - * @var ctl_progress control progress type for this cm nic - * @var my_name gnix ep name for this cm nic - * @var rcv_cb_fn pointer to callback function used to process - * incoming messages received by this cm nic - * @var ptag ptag of this nic. - * @var poll_cnt non-atomic counter to reduce datagram polling cnt - * when using FI_PROGRESS_MANUAL for control progress. - * @var device_id local Aries device id associated with this nic. - */ -struct gnix_cm_nic { - struct dlist_entry cm_nic_list; - struct gnix_nic *nic; - struct gnix_dgram_hndl *dgram_hndl; - struct gnix_fid_domain *domain; - struct gnix_hashtable *addr_to_ep_ht; - ofi_spin_t wq_lock; - struct dlist_entry cm_nic_wq; - struct gnix_reference ref_cnt; - enum fi_progress ctrl_progress; - struct gnix_ep_name my_name; - gnix_cm_nic_rcv_cb_func *rcv_cb_fn; - uint8_t ptag; - uint32_t poll_cnt; - uint32_t device_id; -}; - - -/** - * @brief send a message to a cm_nic - * - * @param[in] cm_nic pointer to a previously allocated gnix_cm_nic struct - * @param[in] sbuf pointer to the beginning of a message to send - * @param[in] len length of message to send. May not exceed GNI_DATAGRAM_MAXSIZE - * bytes. - * @param[in] taddr address of target cm_nic - * @return FI_SUCCESS on success, -FI_EINVAL on invalid argument, - * -FI_AGAIN unable to send message , -FI_ENOSPC - * message too large - * Upon return, sbuf may be reused. - */ -int _gnix_cm_nic_send(struct gnix_cm_nic *cm_nic, - char *sbuf, size_t len, - struct gnix_address target_addr); - -/** - * @brief register a callback function to invoke upon receiving message - * - * @param[in] cm_nic pointer to previously allocated gnix_cm_nic struct - * @param[in] recv_fn pointer to receive function to invoke upon - * receipt of a message - * @param[out] o_fn pointer to previously registered callback function - * message. Must be GNI_DATAGRAM_MAXSIZE bytes in size. - * @return FI_SUCCESS on success, -FI_EINVAL on invalid argument. - * - * This call is non-blocking. If FI_SUCCESS is returned, a message - * sent from peer cm_nic at src_addr will be present in rbuf. - */ -int _gnix_cm_nic_reg_recv_fn(struct gnix_cm_nic *cm_nic, - gnix_cm_nic_rcv_cb_func *recv_fn, - gnix_cm_nic_rcv_cb_func **o_fn); - -/** - * @brief Frees a previously allocated cm nic structure - * - * @param[in] cm_nic pointer to previously allocated gnix_cm_nic struct - * @return FI_SUCCESS on success, -EINVAL on invalid argument - */ -int _gnix_cm_nic_free(struct gnix_cm_nic *cm_nic); - -/** - * @brief allocates a cm nic structure - * - * @param[in] domain pointer to a previously allocated gnix_fid_domain struct - * @param[in] info pointer to fi_info struct returned from fi_getinfo (may - * be NULL) - * @param[in] cdm_id cdm id to be used for this cm nic - * @param[out] cm_nic pointer to address where address of the allocated - * cm nic structure should be returned - * @return FI_SUCCESS on success, -EINVAL on invalid argument, - * -FI_ENOMEM if insufficient memory to allocate - * the cm nic structure - */ -int _gnix_cm_nic_alloc(struct gnix_fid_domain *domain, - struct fi_info *info, - uint32_t cdm_id, - struct gnix_auth_key *auth_key, - struct gnix_cm_nic **cm_nic); - -/** - * @brief enable a cm_nic for receiving incoming connection requests - * - * @param[in] cm_nic pointer to previously allocated gnix_cm_nic struct - * @return FI_SUCCESS on success, -EINVAL on invalid argument. - */ -int _gnix_cm_nic_enable(struct gnix_cm_nic *cm_nic); - -/** - * @brief poke the cm nic's progress engine - * - * @param[in] arg pointer to previously allocated gnix_cm_nic struct - * @return FI_SUCCESS on success, -EINVAL on invalid argument. - * Other error codes may be returned depending on the - * error codes returned from callback function - * that had been added to the nic's work queue. - */ -int _gnix_cm_nic_progress(void *arg); - -/** - * @brief generate a cdm_id to be used in call to GNI_CdmCreate based on a seed - * value previously returned from _gnix_cm_nic_get_cdm_seed_set - * - * @param[in] domain pointer to previously allocated gnix_fid_domain struct - * @param[out] id pointer to address where the 32 bit ids will be returned - * @return FI_SUCCESS upon generation of 32 bit id. - */ -int _gnix_cm_nic_create_cdm_id(struct gnix_fid_domain *domain, uint32_t *id); - -/** - * @brief generate a set of contiguous, unique 32 bit cdm_ids for use with GNI_CdmCreate - * - * @param domain pointer to previously allocated gnix_fid_domain struct - * @param nids number of ids to be allocated - * @param id pointer to address where the 32 bit id will be returned - * @return FI_SUCCESS upon generate ion of 32 bit id. - */ -int _gnix_get_new_cdm_id_set(struct gnix_fid_domain *domain, int nids, - uint32_t *id); - -/** - * @brief helper function to quickly check whether progress is required on - * a cm_nic - * - * @param cm_nic pointer to previously allocated gnix_cm_nic struct - * @return true if progress is needed, otherwise false - */ -static inline bool _gnix_cm_nic_need_progress(struct gnix_cm_nic *cm_nic) -{ - bool ret; - - /* - * if control progress is manual, always need to progress - */ - if (cm_nic->domain->control_progress == FI_PROGRESS_MANUAL) - return true; - - /* - * otherwise we only need to see if the wq has stuff to - * progress - */ - ret = (dlist_empty(&cm_nic->cm_nic_wq)) ? false : true; - return ret; -} - -#endif /* _GNIX_CM_NIC_H_ */ diff --git a/prov/gni/include/gnix_cntr.h b/prov/gni/include/gnix_cntr.h deleted file mode 100644 index ea7f220a0af..00000000000 --- a/prov/gni/include/gnix_cntr.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights reserved. - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _GNIX_CNTR_H_ -#define _GNIX_CNTR_H_ - -#include - -#include "gnix.h" -#include "gnix_progress.h" -#include "gnix_wait.h" -#include "gnix_util.h" - -struct gnix_fid_cntr { - struct fid_cntr cntr_fid; - struct gnix_fid_domain *domain; - struct fid_wait *wait; - struct fi_cntr_attr attr; - ofi_atomic32_t cnt; - ofi_atomic32_t cnt_err; - struct gnix_reference ref_cnt; - struct dlist_entry trigger_list; - ofi_spin_t trigger_lock; - struct gnix_prog_set pset; - bool requires_lock; -}; - -/** - * @brief Increment event counter associated with a gnix_fid counter - * object - * @param[in] cntr pointer to previously allocated gnix_fid_cntr structure - * @return FI_SUCCESS on success, -FI_EINVAL on invalid argument - */ -int _gnix_cntr_inc(struct gnix_fid_cntr *cntr); - -/** - * @brief Increment error event counter associated with a gnix_fid counter - * object - * @param[in] cntr pointer to previously allocated gnix_fid_cntr structure - * @return FI_SUCCESS on success, -FI_EINVAL on invalid argument - */ -int _gnix_cntr_inc_err(struct gnix_fid_cntr *cntr); - -/** - * @brief Add an object to the list progressed when fi_cntr_read - * and related functions are called. - * @param[in] cntr pointer to previously allocated gnix_fid_cntr structure - * @param[in] obj pointer to object to add to the progress list. - * @param[in] prog_fn object progress function - * @return FI_SUCCESS on success, -FI_EINVAL on invalid argument - */ -int _gnix_cntr_poll_obj_add(struct gnix_fid_cntr *cntr, void *obj, - int (*prog_fn)(void *data)); - -/** - * @brief Remove an object from the list progressed when - * fi_cntr_read and related functions are called. - * @param[in] cntr pointer to previously allocated gnix_fid_cntr structure - * @param[in] obj pointer to previously added object - * @param[in] prog_fn object progress function - * @return FI_SUCCESS on success, -FI_EINVAL on invalid argument - */ -int _gnix_cntr_poll_obj_rem(struct gnix_fid_cntr *cntr, void *obj, - int (*prog_fn)(void *data)); - -#endif diff --git a/prov/gni/include/gnix_cq.h b/prov/gni/include/gnix_cq.h deleted file mode 100644 index 23756953997..00000000000 --- a/prov/gni/include/gnix_cq.h +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (c) 2015-2017 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _GNIX_CQ_H_ -#define _GNIX_CQ_H_ - -#include - -#include "gnix_progress.h" -#include "gnix_queue.h" -#include "gnix_wait.h" -#include "gnix_util.h" -#include -#include - -#define GNIX_CQ_DEFAULT_FORMAT struct fi_cq_entry -#define GNIX_CQ_DEFAULT_SIZE 256 -#define GNIX_CQ_MAX_ERR_DATA_SIZE 64 - -/* forward declaration */ -struct gnix_fid_ep; - -struct gnix_cq_entry { - void *the_entry; - fi_addr_t src_addr; - struct slist_entry item; -}; - -struct gnix_fid_cq { - struct fid_cq cq_fid; - struct gnix_fid_domain *domain; - - struct gnix_queue *events; - struct gnix_queue *errors; - - struct fi_cq_attr attr; - size_t entry_size; - - struct fid_wait *wait; - - ofi_spin_t lock; - struct gnix_reference ref_cnt; - - struct gnix_prog_set pset; - - bool requires_lock; - char err_data[GNIX_CQ_MAX_ERR_DATA_SIZE]; -}; - -ssize_t _gnix_cq_add_event(struct gnix_fid_cq *cq, struct gnix_fid_ep *ep, - void *op_context, uint64_t flags, size_t len, - void *buf, uint64_t data, uint64_t tag, - fi_addr_t src_addr); - -ssize_t _gnix_cq_add_error(struct gnix_fid_cq *cq, void *op_context, - uint64_t flags, size_t len, void *buf, - uint64_t data, uint64_t tag, size_t olen, - int err, int prov_errno, void *err_data, - size_t err_data_size); - -int _gnix_cq_poll_obj_add(struct gnix_fid_cq *cq, void *obj, - int (*prog_fn)(void *data)); -int _gnix_cq_poll_obj_rem(struct gnix_fid_cq *cq, void *obj, - int (*prog_fn)(void *data)); - -#endif diff --git a/prov/gni/include/gnix_datagram.h b/prov/gni/include/gnix_datagram.h deleted file mode 100644 index d46ef1c1616..00000000000 --- a/prov/gni/include/gnix_datagram.h +++ /dev/null @@ -1,374 +0,0 @@ -/* - * Copyright (c) 2015-2016 Cray Inc. All rights reserved. - * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _GNIX_DATAGRAM_H_ -#define _GNIX_DATAGRAM_H_ - -#include "gnix.h" - -/* - * GNI datagram related structs and defines. - * The GNI_EpPostDataWId, etc. are used to manage - * connecting VC's for the FI_EP_RDM endpoint - * type. - * - * There are two types of datagrams used by the - * gni provider: bound (bnd) datagrams and wildcard (wc) - * datagrams. - * - * Bound datagrams are those that are bound to a particular - * target nic address by means of the GNI_EpBind function - * When a bound datagram is submitted to the datagram system via - * a GNI_EpPostDataWId, kgni forwards the datagram to - * the target node/cdm_id. Note that once a datagram exchange - * has been completed, the datagram can be unbound using - * the GNI_EpUnbind, and subsequently reused to target a different - * gni nic address/cdm_id. - * - * Wildcard datagrams have semantics similar to listening - * sockets. When a wildcard datagram is submitted to the - * datagram system, kgni adds the datagram to the list of - * datagrams to match for the given gni nic/cdm_id. When an - * incoming bound datagram matches the wildcard, the datagram - * exchange is completed. - */ - -/** - * Set of attributes that can be used as an argument to gnix_dgram_hndl_alloc - * - * @var timeout_needed pointer to a function which returns true - * if a timeout is needed in the call to - * GNI_EpPostdataWaitById to insure progress - * @var timeout_progress pointer to a function should be invoked - * by the datagram engine to progress - * the state of the consumer of the datagram - * functionality. - * @var timeout_data pointer to data supplied as the argument to - * the timeout_needed and timeout_progress methods - * @var timeout the timeout value in milliseconds to be - * supplied to GNI_EpPostdataWaitById if - * timeout_needed returns to true - */ -struct gnix_dgram_hndl_attr { - bool (*timeout_needed)(void *); - void (*timeout_progress)(void *); - void *timeout_data; - uint32_t timeout; -}; - -/** - * Datagram allocator struct - * - * @var cm_nic pointer to a previously allocated cm_nic with - * which this datagram is associated - * @var bnd_dgram_free_list head of free list for bound datagrams - * @var bnd_dgram_active_list head of active list for bound datagrams - * @var wc_dgram_free_list head of free list of wildcard datagrams - * @var wc_dgram_active_list head of active list of wildcard datagrams - * @var dgram_base starting address of memory block from - * which datagram structures are allocated - * @var timeout_needed In the case of FI_PROGRESS_AUTO, invoke this - * method prior to call to GNI_EpPostDataWaitById - * to check if we need to timeout in order to - * progress datagrams which had been stalled - * due to GNI_RC_ERROR_RESOURCE. - * @var lock lock to protect dgram lists - * @var progress_thread pthread id of progress thread for this - * datagram allocator - * @var n_dgrams number of bound datagrams managed by the - * datagram allocator - * @var n_wc_dgrams number of wildcard datagrams managed by - * the datagram allocator - * @var timeout time in milliseconds to wait for datagram to - * complete. By default set to -1 (infinite timeout), - * but can be set to handle cases where a timeout - * is required when using FI_PROGRESS_AUTO for - * control progress. - */ -struct gnix_dgram_hndl { - struct gnix_cm_nic *cm_nic; - struct dlist_entry bnd_dgram_free_list; - struct dlist_entry bnd_dgram_active_list; - struct dlist_entry wc_dgram_free_list; - struct dlist_entry wc_dgram_active_list; - struct gnix_datagram *dgram_base; - bool (*timeout_needed)(void *); - void (*timeout_progress)(void *); - void *timeout_data; - ofi_spin_t lock; - pthread_t progress_thread; - int n_dgrams; - int n_wc_dgrams; - uint32_t timeout; -}; - -enum gnix_dgram_type { - GNIX_DGRAM_WC = 100, - GNIX_DGRAM_BND -}; - -enum gnix_dgram_state { - GNIX_DGRAM_STATE_FREE, - GNIX_DGRAM_STATE_ACTIVE -}; - -enum gnix_dgram_buf { - GNIX_DGRAM_IN_BUF, - GNIX_DGRAM_OUT_BUF -}; - -enum gnix_dgram_poll_type { - GNIX_DGRAM_NOBLOCK, - GNIX_DGRAM_BLOCK -}; - -/** - * @brief GNI datagram structure - * - * @var list list element for managing datagrams in llists - * @var free_list_head pointer to free list head from which - * this datagram is allocated - * @var gni_ep GNI ep used for posting this datagram to GNI - * @var nic gnix connection management (cm) nic with which - * this datagram is associated - * @var target_addr target address to which this datagram is to be - * delivered which posted to GNI (applicable only - * for bound datagrams) - * @var state state of the datagram (see enum gnix_dgram_state) - * @var type datagram type (bound or wildcard) - * @var d_hndl pointer to datagram handle this datagram is - * associated - * @var pre_post_clbk_fn Call back function to be called prior to - * to the call to GNI_EpPostDataWId. This callback - * is invoked while the lock is held on the cm nic. - * @var post_post_clbk_fn Call back function to be called following - * a call to GNI_EpPostDataWId. This callback - * is invoked while the lock is held on the cm nic. - * @var callback_fn Call back function to be called following - * a call GNI_EpPostDataTestById and a datagram - * is returned in any of the following GNI - * post state states: GNI_POST_TIMEOUT, - * GNI_POST_TERMINATED, GNI_POST_ERROR, or - * GNI_POST_COMPLETED. The cm nic lock is - * not held when this callback is invoked. - * @var r_index_in_buf Internal index for tracking where to unstart - * a unpack request on the GNIX_DGRAM_IN_BUF buffer - * of the datagram. - * @var w_index_in_buf Internal index for tracking where to unstart - * a pack request on the GNIX_DGRAM_IN_BUF buffer - * of the datagram. - * @var r_index_out_buf Internal index for tracking where to unstart - * a unpack request on the GNIX_DGRAM_OUT_BUF buffer - * of the datagram. - * @var w_index_out_buf Internal index for tracking where to unstart - * a pack request on the GNIX_DGRAM_OUT_BUF buffer - * of the datagram. - * @var cache Pointer that can be used by datagram user to track - * data associated with the datagram transaction. - * @var dgram_in_buf Internal buffer used for the IN data to be - * posted to the GNI. - * @var dgram_out_buf Internal buffer used for the OUT data to be - * posted to the GNI. - */ -struct gnix_datagram { - struct dlist_entry list; - struct dlist_entry *free_list_head; - gni_ep_handle_t gni_ep; - struct gnix_cm_nic *cm_nic; - struct gnix_address target_addr; - enum gnix_dgram_state state; - enum gnix_dgram_type type; - struct gnix_dgram_hndl *d_hndl; - int (*pre_post_clbk_fn)(struct gnix_datagram *, - int *); - int (*post_post_clbk_fn)(struct gnix_datagram *, - gni_return_t); - int (*callback_fn)(struct gnix_datagram *, - struct gnix_address, - gni_post_state_t); - int r_index_in_buf; - int w_index_in_buf; - int r_index_out_buf; - int w_index_out_buf; - void *cache; - char dgram_in_buf[GNI_DATAGRAM_MAXSIZE]; - char dgram_out_buf[GNI_DATAGRAM_MAXSIZE]; -}; - -/* - * prototypes for gni datagram internal functions - */ - -/** - * @brief Allocates a handle to a datagram allocator instance - * - * @param[in] cm_nic pointer to previously allocated gnix_cm_nic object - * @param[in] attr optional pointer to a gnix_dgram_hndl_attr - * structure - * @param[in] progress progress model to be used for this cm_nic - * (see fi_domain man page) - * @param[out] handl_ptr location in which the address of the allocated - * datagram allocator handle is to be returned - * @return FI_SUCCESS Upon successfully creating a datagram allocator. - * @return -FI_ENOMEM Insufficient memory to create datagram allocator - * @return -FI_EINVAL Upon getting an invalid fabric or cm_nic handle - * @return -FI_EAGAIN In the case of FI_PROGRESS_AUTO, system lacked - * resources to spawn a progress thread. - */ -int _gnix_dgram_hndl_alloc(struct gnix_cm_nic *cm_nic, - enum fi_progress progress, - const struct gnix_dgram_hndl_attr *attr, - struct gnix_dgram_hndl **hndl_ptr); - -/** - * @brief Frees a handle to a datagram allocator and associated internal - * structures - * - * @param[in] hndl pointer to previously allocated datagram allocator - * instance - * @return FI_SUCCESS Upon successfully freeing the datagram allocator - * handle and associated internal structures - * @return -FI_EINVAL Invalid handle to a datagram allocator was supplied - * as input - */ -int _gnix_dgram_hndl_free(struct gnix_dgram_hndl *hndl); - -/** - * @brief Allocates a datagram - * - * @param[in] hndl pointer to previously allocated datagram allocator - * instance - * @param[in] type datagram type - wildcard or bound - * @param[out] d_ptr location in which the address of the allocated - * datagram is to be returned - * @return FI_SUCCESS Upon successfully allocating a datagram - * @return -FI_EAGAIN Temporarily insufficient resources to allocate - * a datagram. The associated cm_nic needs to be - * progressed. - */ -int _gnix_dgram_alloc(struct gnix_dgram_hndl *hndl, - enum gnix_dgram_type type, - struct gnix_datagram **d_ptr); - -/** - * @brief Frees a datagram - * - * @param[in] d pointer to previously allocated datagram - * datagram is to be returned - * @return FI_SUCCESS Upon successfully freeing a datagram - * @return -FI_EINVAL Invalid argument was supplied - * @return -FI_EOPBADSTAT Datagram is currently in an internal state where - * it cannot be freed - */ -int _gnix_dgram_free(struct gnix_datagram *d); - -/** - * @brief Post a wildcard datagram to the GNI datagram state engine - * - * @param[in] d pointer to previously allocated datagram - * @return FI_SUCCESS Upon successfully posting a wildcard datagram - * @return -FI_EINVAL Invalid argument was supplied - * @return -FI_ENOMEM Insufficient memory to post datagram - * @return -FI_EMSGSIZE Payload for datagram exceeds internally - * supported size (see GNI_DATAGRAM_MAXSIZE in - * gni_pub.h) - */ -int _gnix_dgram_wc_post(struct gnix_datagram *d); - -/** - * @brief Post a bound datagram to the GNI datagram state engine - * - * @param[in] d pointer to previously allocated datagram - * @return FI_SUCCESS Upon successfully posting a wildcard datagram - * @return -FI_EINVAL Invalid argument was supplied - * @return -FI_ENOMEM Insufficient memory to post datagram - * @return -FI_BUSY Only one outstanding datagram to a given - 8 target address is allowed - * @return -FI_EMSGSIZE Payload for datagram exceeds internally - * supported size (see GNI_DATAGRAM_MAXSIZE in - * gni_pub.h) - */ -int _gnix_dgram_bnd_post(struct gnix_datagram *d); - -/** - * @brief Pack the buffer of a previously allocated datagram - * with application data - * @param[in] d pointer to previously allocated datagram - * @param[in] gnix_dgram_buf which buffer into which to pack data - * @param[in] data pointer to data to be packed - * @param[in] nbytes number of bytes to pack - * @return (> 0) number of bytes packed - * @return -FI_EINVAL Invalid argument was supplied - * @return -FI_ENOSPC Insufficient space for data - */ -ssize_t _gnix_dgram_pack_buf(struct gnix_datagram *d, enum gnix_dgram_buf, - void *data, uint32_t nbytes); - -/** - * @brief Unpack the buffer of a previously allocated datagram - * with application data - * @param[in] d pointer to previously allocated datagram - * @param[in] gnix_dgram_buf which buffer from which to unpack data - * @param[in] data address into which the data is to be unpacked - * @param[in] nbytes number of bytes to unpacked - * @return (> 0) number of bytes unpacked - */ -ssize_t _gnix_dgram_unpack_buf(struct gnix_datagram *d, enum gnix_dgram_buf, - void *data, uint32_t nbytes); - -/** - * @brief rewind the internal pointers to datagram buffers to - * beginning of the internal buffers - * @param[in] d pointer to previously allocated datagram - * @param[in] gnix_dgram_buf which buffer to rewind - * @param[in] data address into which the data is to be unpacked - * @param[in] nbytes number of bytes to unpacked - * @return FI_SUCCESS Upon successfully rewinding internal buffer - * pointers - */ -int _gnix_dgram_rewind_buf(struct gnix_datagram *d, enum gnix_dgram_buf); - -/** - * @brief poll datagram handle to progress the underlying cm_nic's - * progress engine - * @param[in] hndl_ptr handle to a previously allocated datagram - * allocator - * @param[in] type progress type (blocking or non-blocking) - * @return FI_SUCCESS Upon successfully progressing the state - * engine - */ -int _gnix_dgram_poll(struct gnix_dgram_hndl *hndl_ptr, - enum gnix_dgram_poll_type type); - - -#endif /* _GNIX_DATAGRAM_H_ */ diff --git a/prov/gni/include/gnix_ep.h b/prov/gni/include/gnix_ep.h deleted file mode 100644 index 550e9cdd0f4..00000000000 --- a/prov/gni/include/gnix_ep.h +++ /dev/null @@ -1,466 +0,0 @@ -/* - * Copyright (c) 2015-2019 Cray Inc. All rights reserved. - * Copyright (c) 2015-2018 Los Alamos National Security, LLC. - * All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _GNIX_EP_H_ -#define _GNIX_EP_H_ - -#include "gnix.h" -#include "gnix_nic.h" - -/* Default minimum multi receive buffer size. */ -#define GNIX_OPT_MIN_MULTI_RECV_DEFAULT 64 - -/* - * enum of tags used for GNI_SmsgSendWTag - * and callbacks at receive side to process - * these messages - */ - -enum { - GNIX_SMSG_T_EGR_W_DATA = 10, - GNIX_SMSG_T_EGR_W_DATA_ACK, - GNIX_SMSG_T_EGR_GET, - GNIX_SMSG_T_EGR_GET_ACK, - GNIX_SMSG_T_RNDZV_RTS, - GNIX_SMSG_T_RNDZV_RTR, - GNIX_SMSG_T_RNDZV_COOKIE, - GNIX_SMSG_T_RNDZV_SDONE, - GNIX_SMSG_T_RNDZV_RDONE, - GNIX_SMSG_T_RNDZV_START, - GNIX_SMSG_T_RNDZV_FIN, - GNIX_SMSG_T_RMA_DATA, - GNIX_SMSG_T_AMO_CNTR, - GNIX_SMSG_T_RNDZV_IOV_START -}; - -/** - * Set of attributes that can be passed to the _gnix_alloc_ep - * - * @var cm_ops pointer to connection management interface - * @var msg_ops pointer to message transfer interface - * @var rma_ops pointer to rma transfer interface - * @var tagged_ops pointer to tagged message transfer interface - * @var atomic_ops pointer to atomic interface - * @var cm_nic cm_nic associated with this EP - * @var nic gnix nic associated with this EP - * @var gni_cdm_modes The mode bits gni_cdm_hndl was created with. - * @var use_cdm_id true if the cdm_id field should be used for - * initializing underlying gni cdm, etc. - * @var cdm_id user supplied cmd_id to use for this endpoint - */ -struct gnix_ep_attr { - struct fi_ops_cm *cm_ops; - struct fi_ops_msg *msg_ops; - struct fi_ops_rma *rma_ops; - struct fi_ops_tagged *tagged_ops; - struct fi_ops_atomic *atomic_ops; - struct gnix_cm_nic *cm_nic; - struct gnix_nic *nic; - bool use_cdm_id; - uint32_t cdm_id; -}; - -extern smsg_completer_fn_t gnix_ep_smsg_completers[]; -extern smsg_callback_fn_t gnix_ep_smsg_callbacks[]; - -/* - * typedefs for function vectors used to steer send/receive/rma/amo requests, - * i.e. fi_send, fi_recv, etc. to ep type specific methods - */ - -typedef ssize_t (*send_func_t)(struct fid_ep *ep, const void *buf, - size_t len, void *desc, - fi_addr_t dest_addr, void *context); - -typedef ssize_t (*sendv_func_t)(struct fid_ep *ep, const struct iovec *iov, - void **desc, size_t count, - fi_addr_t dest_addr, void *context); - -typedef ssize_t (*sendmsg_func_t)(struct fid_ep *ep, const struct fi_msg *msg, - uint64_t flags); - -typedef ssize_t (*msg_inject_func_t)(struct fid_ep *ep, const void *buf, - size_t len, fi_addr_t dest_addr); - -typedef ssize_t (*recv_func_t)(struct fid_ep *ep, const void *buf, - size_t len, void *desc, - fi_addr_t dest_addr, void *context); - -typedef ssize_t (*recvv_func_t)(struct fid_ep *ep, const struct iovec *iov, - void **desc, size_t count, - fi_addr_t dest_addr, void *context); - -typedef ssize_t (*recvmsg_func_t)(struct fid_ep *ep, const struct fi_msg *msg, - uint64_t flags); - -typedef ssize_t (*tsend_func_t)(struct fid_ep *ep, const void *buf, - size_t len, void *desc, - fi_addr_t dest_addr, uint64_t tag, - void *context); - -typedef ssize_t (*tsendv_func_t)(struct fid_ep *ep, const struct iovec *iov, - void **desc, size_t count, - fi_addr_t dest_addr, uint64_t tag, - void *context); - -typedef ssize_t (*tsendmsg_func_t)(struct fid_ep *ep, - const struct fi_msg_tagged *msg, - uint64_t flags); - -typedef ssize_t (*tinject_func_t)(struct fid_ep *ep, - const void *buf, - size_t len, - fi_addr_t dest_addr, - uint64_t flags); - -typedef ssize_t (*trecv_func_t)(struct fid_ep *ep, - void *buf, - size_t len, - void *desc, - fi_addr_t src_addr, - uint64_t tag, - uint64_t ignore, - void *context); - -typedef ssize_t (*trecvv_func_t)(struct fid_ep *ep, - const struct iovec *iov, - void **desc, - size_t count, - fi_addr_t src_addr, - uint64_t tag, - uint64_t ignore, - void *context); - -typedef ssize_t (*trecvmsg_func_t)(struct fid_ep *ep, - const struct fi_msg_tagged *msg, - uint64_t flags); - -/** - * Internal function for growing tx buffer pool - * - * @param[in] ep pointer to a EP - */ -int _gnix_ep_int_tx_pool_grow(struct gnix_fid_ep *ep); - -/** - * Internal function for initializing tx buffer pool - * - * @param[in] ep pointer to a EP - */ -int _gnix_ep_int_tx_pool_init(struct gnix_fid_ep *ep); - - -/* - * inline functions - */ - -static inline struct slist_entry -*_gnix_ep_get_int_tx_buf(struct gnix_fid_ep *ep) -{ - struct slist_entry *e; - - ofi_spin_lock(&ep->int_tx_pool.lock); - - e = slist_remove_head(&ep->int_tx_pool.sl); - - ofi_spin_unlock(&ep->int_tx_pool.lock); - - if (e == NULL) { - int ret; - - ret = _gnix_ep_int_tx_pool_grow(ep); - if (ret != FI_SUCCESS) - return NULL; - - ofi_spin_lock(&ep->int_tx_pool.lock); - e = slist_remove_head(&ep->int_tx_pool.sl); - ofi_spin_unlock(&ep->int_tx_pool.lock); - } - - return e; -} - -static inline gni_mem_handle_t _gnix_ep_get_int_tx_mdh(void *e) -{ - return ((struct gnix_int_tx_buf *)e)->md->mem_hndl; -} - -static inline void _gnix_ep_release_int_tx_buf(struct gnix_fid_ep *ep, - struct slist_entry *e) -{ - ofi_spin_lock(&ep->int_tx_pool.lock); - - GNIX_DEBUG(FI_LOG_EP_DATA, "sl.head = %p, sl.tail = %p\n", - ep->int_tx_pool.sl.head, ep->int_tx_pool.sl.tail); - - slist_insert_head(e, &ep->int_tx_pool.sl); - - ofi_spin_unlock(&ep->int_tx_pool.lock); -} - -static inline struct gnix_fab_req * -_gnix_fr_alloc(struct gnix_fid_ep *ep) -{ - struct dlist_entry *de = NULL; - struct gnix_fab_req *fr = NULL; - int ret = _gnix_fl_alloc(&de, &ep->fr_freelist); - - while (OFI_UNLIKELY(ret == -FI_EAGAIN)) - ret = _gnix_fl_alloc(&de, &ep->fr_freelist); - - if (ret == FI_SUCCESS) { - fr = container_of(de, struct gnix_fab_req, dlist); - fr->gnix_ep = ep; - dlist_init(&fr->dlist); - dlist_init(&fr->msg.tle.free); - - /* reset common fields */ - fr->tx_failures = 0; - _gnix_ref_get(ep); - } - - return fr; -} - -static inline struct gnix_fab_req * -_gnix_fr_alloc_w_cb(struct gnix_fid_ep *ep, void (*cb)(void *)) -{ - struct dlist_entry *de = NULL; - struct gnix_fab_req *fr = NULL; - int ret = _gnix_fl_alloc(&de, &ep->fr_freelist); - - while (OFI_UNLIKELY(ret == -FI_EAGAIN)) - ret = _gnix_fl_alloc(&de, &ep->fr_freelist); - - if (ret == FI_SUCCESS) { - fr = container_of(de, struct gnix_fab_req, dlist); - fr->gnix_ep = ep; - fr->cb = cb; - _gnix_ref_init(&fr->ref_cnt, 1, cb); - dlist_init(&fr->dlist); - dlist_init(&fr->msg.tle.free); - - /* reset common fields */ - fr->tx_failures = 0; - _gnix_ref_get(ep); - } - - return fr; -} - -static inline void -_gnix_fr_free(struct gnix_fid_ep *ep, struct gnix_fab_req *fr) -{ - assert(fr->gnix_ep == ep); - - assert((fr->flags & FI_LOCAL_MR) == 0); - - if (fr->int_tx_buf_e != NULL) { - _gnix_ep_release_int_tx_buf(ep, fr->int_tx_buf_e); - fr->int_tx_buf_e = NULL; - fr->int_tx_buf = NULL; - } - - _gnix_fl_free(&fr->dlist, &ep->fr_freelist); - _gnix_ref_put(ep); -} - -static inline int -__msg_match_fab_req(struct dlist_entry *item, const void *arg) -{ - struct gnix_fab_req *req; - const struct gnix_address *addr_ptr = arg; - - req = container_of(item, struct gnix_fab_req, dlist); - - return ((GNIX_ADDR_UNSPEC(*addr_ptr)) || - (GNIX_ADDR_EQUAL(req->addr, *addr_ptr))); -} - -/* - * EP related internal helper functions - */ - -ssize_t _ep_recv(struct fid_ep *ep, void *buf, size_t len, - void *desc, fi_addr_t src_addr, void *context, - uint64_t flags, uint64_t tag, uint64_t ignore); -ssize_t _ep_recvv(struct fid_ep *ep, const struct iovec *iov, - void **desc, size_t count, fi_addr_t src_addr, - void *context, uint64_t flags, uint64_t tag, - uint64_t ignore); -ssize_t _ep_recvmsg(struct fid_ep *ep, const struct fi_msg *msg, - uint64_t flags, uint64_t tag, - uint64_t ignore); -ssize_t _ep_send(struct fid_ep *ep, const void *buf, size_t len, - void *desc, fi_addr_t dest_addr, void *context, - uint64_t flags, uint64_t tag); -ssize_t _ep_sendv(struct fid_ep *ep, const struct iovec *iov, - void **desc, size_t count, fi_addr_t dest_addr, - void *context, uint64_t flags, uint64_t tag); -ssize_t _ep_sendmsg(struct fid_ep *ep, const struct fi_msg *msg, - uint64_t flags, uint64_t tag); -ssize_t _ep_inject(struct fid_ep *ep, const void *buf, - size_t len, uint64_t data, fi_addr_t dest_addr, - uint64_t flags, uint64_t tag); -ssize_t _ep_senddata(struct fid_ep *ep, const void *buf, - size_t len, void *desc, uint64_t data, - fi_addr_t dest_addr, void *context, - uint64_t flags, uint64_t tag); - -/** - * Allocate a gnix ep struct - * - * @param[in] domain the domain from which this EP is being created - * @param[in] info details about the domain endpoint to be opened - * @param[in] attr attributes to be used for allocating the EP - * @param[out] ep the endpoint to open - * @param[in] context the context associated with the endpoint - * - * @return FI_SUCCESS upon successfully opening a passive endpoint - * @return -FI_EINVAL invalid input arguments supplied - * @return -FI_ENOMEM no memory to allocate EP struct - */ -int _gnix_ep_alloc(struct fid_domain *domain, struct fi_info *info, - struct gnix_ep_attr *attr, - struct fid_ep **ep, void *context); - -int _gnix_ep_init_vc(struct gnix_fid_ep *ep_priv); - -/** - * Internal function for enabling ep tx resources - * - * @param[in] ep_priv pointer to a previously allocated EP - */ -int _gnix_ep_tx_enable(struct gnix_fid_ep *ep_priv); - -/** - * Internal function for enabling ep rx resources - * - * @param[in] ep_priv pointer to a previously allocated EP - */ -int _gnix_ep_rx_enable(struct gnix_fid_ep *ep_priv); - -/******************************************************************************* - * API Functions - ******************************************************************************/ -/** - * Allocates a new passive endpoint. - * - * @param[in] fabric the fabric - * @param[in] info details about the fabric endpoint to be opened - * @param[in/out] pep the passive endpoint to open - * @param[in] context the context associated with the endpoint - * - * @return FI_SUCCESS upon successfully opening a passive endpoint - * @return -FI_ERRNO upon an error - * @return -FI_ENOSYS if this operation is not supported - */ -int gnix_pep_open(struct fid_fabric *fabric, - struct fi_info *info, struct fid_pep **pep, - void *context); - -int gnix_scalable_ep_bind(fid_t fid, struct fid *bfid, uint64_t flags); - -/** - * Associates the passive endpoint with an event queue. - * - * @param[in] fid the fabric or access domain - * @param[in] bfid the fabric identifier for the endpoint - * @param[in] flags flags to apply to the binding - * - * @return FI_SUCCESS upon successfully binding a passive endpoint to an EQ - * @return -FI_ERRNO upon an error - * @return -FI_ENOSYS if this operation is not supported - */ -int gnix_pep_bind(fid_t fid, struct fid *bfid, uint64_t flags); - -/** - * Cancels a transaction posted to an endpoint, if possible. - * - * @param[in] fid the endpoint - * @param[in] context context of the transaction to be canceled - * - * @return FI_SUCCESS upon successfully canceling transaction - * @return -FI_ENONT no entry to cancel - */ -ssize_t gnix_cancel(fid_t fid, void *context); - -/** - * Get an endpoint option - * - * @param[in] fid the endpoint - * @param[in] level the option level, must be FI_OPT_ENDPOINT - * @param[in] optname the option name - * @param[out] optval the value of the named option - * @param[out] optlen the width of the value in bytes - * - * @return FI_SUCCESS upon successfully returning the value of the option - * @return -FI_EINVAL missing or invalid fid, optval, and/or optlen - * @return -FI_ENOENT unsupported context type - * @return -FI_ENOPROTOOPT unknown option level or name - */ - -int gnix_getopt(fid_t fid, int level, int optname, - void *optval, size_t *optlen); - -/** - * Set an endpoint option - * - * @param[in] fid the endpoint - * @param[in] level the option level, must be FI_OPT_ENDPOINT - * @param[in] optname the option name - * @param[out] optval the value to set in the named option - * @param[out] optlen the width of the value in bytes - * - * @return FI_SUCCESS upon successfully setting the value of the option - * @return -FI_EINVAL missing or invalid fid, optval, and/or optlen - * @return -FI_ENOENT unsupported context type - * @return -FI_ENOPROTOOPT unknown option level or name - */ - -int gnix_setopt(fid_t fid, int level, int optname, - const void *optval, size_t optlen); - -DIRECT_FN int gnix_ep_atomic_valid(struct fid_ep *ep, - enum fi_datatype datatype, - enum fi_op op, size_t *count); - -DIRECT_FN int gnix_ep_fetch_atomic_valid(struct fid_ep *ep, - enum fi_datatype datatype, - enum fi_op op, size_t *count); - -DIRECT_FN int gnix_ep_cmp_atomic_valid(struct fid_ep *ep, - enum fi_datatype datatype, - enum fi_op op, size_t *count); -#endif /* _GN IX_EP_H_ */ diff --git a/prov/gni/include/gnix_eq.h b/prov/gni/include/gnix_eq.h deleted file mode 100644 index c43b9e4515d..00000000000 --- a/prov/gni/include/gnix_eq.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights reserved. - * Copyright (c) 2015-2016 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _GNIX_EQ_H_ -#define _GNIX_EQ_H_ - -#include -#include - -#include "gnix_queue.h" -#include "gnix_wait.h" -#include "gnix_util.h" - -#define GNIX_EQ_DEFAULT_SIZE 256 - -extern struct dlist_entry gnix_eq_list; -extern pthread_mutex_t gnix_eq_list_lock; - -/* - * Stores events inside of the event queue. - * - * type: EQ event type defined in fi_eq.h - * len: length of the event - * flags: control flags - * buf: event - * item: list entry, contains next pointer - */ -struct gnix_eq_entry { - uint64_t flags; - uint32_t type; - size_t len; - void *the_entry; - - struct slist_entry item; -}; - -struct gnix_eq_poll_obj { - struct dlist_entry list; - struct fid *obj_fid; -}; - -struct gnix_eq_err_buf { - struct dlist_entry dlist; - int do_free; - char buf[]; -}; - -/* - * EQ structure. Contains error and event queue. - */ -struct gnix_fid_eq { - struct fid_eq eq_fid; - struct gnix_fid_fabric *fabric; - - bool requires_lock; - - struct gnix_queue *events; - struct gnix_queue *errors; - - struct fi_eq_attr attr; - - struct fid_wait *wait; - - ofi_spin_t lock; - struct gnix_reference ref_cnt; - - rwlock_t poll_obj_lock; - struct dlist_entry poll_objs; - struct dlist_entry gnix_fid_eq_list; - - struct dlist_entry err_bufs; -}; - -ssize_t _gnix_eq_write_error(struct gnix_fid_eq *eq, fid_t fid, - void *context, uint64_t index, int err, - int prov_errno, void *err_data, - size_t err_size); - -int _gnix_eq_progress(struct gnix_fid_eq *eq); - -int _gnix_eq_poll_obj_add(struct gnix_fid_eq *eq, struct fid *obj_fid); -int _gnix_eq_poll_obj_rem(struct gnix_fid_eq *eq, struct fid *obj_fid); - -#endif /* _GNIX_EQ_H_ */ diff --git a/prov/gni/include/gnix_freelist.h b/prov/gni/include/gnix_freelist.h deleted file mode 100644 index 942bc5b5e14..00000000000 --- a/prov/gni/include/gnix_freelist.h +++ /dev/null @@ -1,197 +0,0 @@ -/* - * Copyright (c) 2015-2016 Cray Inc. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights reserved. - * Copyright (c) 2020 Triad National Security, LLC. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _GNIX_FREELIST_H_ -#define _GNIX_FREELIST_H_ - -#include -#include -#include "include/gnix_util.h" - -/* Number of elements to seed the freelist with */ -#define GNIX_FL_INIT_SIZE 100 -/* Refill growth factor */ -#define GNIX_FL_GROWTH_FACTOR 2 - -/** Free list implementation - * - * @var freelist The free list itself - * @var chunks Memory chunks (must be saved for freeing) - * @var refill_size Number of elements for the next refill - * @var growth_factor Factor for increasing refill size - * @var max_refill_size; Max refill size - * @var elem_size Size of element (in bytes) - * @var offset Offset of dlist_entry field (in bytes) - */ -struct gnix_freelist { - struct dlist_entry freelist; - struct slist chunks; - int refill_size; - int growth_factor; - int max_refill_size; - int elem_size; - int offset; - int ts; - ofi_spin_t lock; -}; - -/** Initializes a gnix_freelist - * - * @param elem_size Size of element - * @param offset Offset of dlist_entry field - * @param init_size Initial freelist size - * @param refill_size Number of elements for next refill - * @param growth_factor Factor for increasing refill size - * @param max_refill_size Max refill size - * @param fl gnix_freelist - * @return FI_SUCCESS on success, -FI_ENOMEM on failure - * @note - If the refill_size is zero, then the freelist is not growable. - */ -int _gnix_fl_init(int elem_size, int offset, int init_size, - int refill_size, int growth_factor, - int max_refill_size, struct gnix_freelist *fl); - -/** Initializes a thread safe gnix_freelist - * - * @param elem_size Size of element - * @param offset Offset of dlist_entry field - * @param init_size Initial freelist size - * @param refill_size Number of elements for next refill - * @param growth_factor Factor for increasing refill size - * @param max_refill_size Max refill size - * @param fl gnix_freelist - * @return FI_SUCCESS on success, -FI_ENOMEM on failure - * @note - If the refill_size is zero, then the freelist is not growable. - */ -int _gnix_fl_init_ts(int elem_size, int offset, int init_size, - int refill_size, int growth_factor, - int max_refill_size, struct gnix_freelist *fl); - -/** Clean up a gnix_freelist, including deleting memory chunks - * - * @param fl Freelist - */ -void _gnix_fl_destroy(struct gnix_freelist *fl); - -extern int __gnix_fl_refill(struct gnix_freelist *fl, int n); - -/** Return an item from the freelist - * - * @param e item - * @param fl gnix_freelist - * @return FI_SUCCESS on success, -FI_ENOMEM or -FI_EAGAIN on failure, - * or -FI_ECANCELED if the refill size is zero. - */ -__attribute__((unused)) -static inline int _gnix_fl_alloc(struct dlist_entry **e, struct gnix_freelist *fl) -{ - int ret = FI_SUCCESS; - struct dlist_entry *de = NULL; - - assert(fl); - - if (fl->ts) - ofi_spin_lock(&fl->lock); - - if (dlist_empty(&fl->freelist)) { - - if (fl->refill_size == 0) { - ret = -FI_ECANCELED; - - GNIX_DEBUG(FI_LOG_EP_CTRL, "Freelist not growable (refill " - "size is 0\n"); - - goto err; - } - - ret = __gnix_fl_refill(fl, fl->refill_size); - if (ret != FI_SUCCESS) - goto err; - if (fl->refill_size < fl->max_refill_size) { - int ns = fl->refill_size *= fl->growth_factor; - - fl->refill_size = (ns >= fl->max_refill_size ? - fl->max_refill_size : ns); - } - - if (dlist_empty(&fl->freelist)) { - /* Can't happen unless multithreaded */ - ret = -FI_EAGAIN; - goto err; - } - } - - de = fl->freelist.next; - dlist_remove_init(de); - - *e = de; -err: - if (fl->ts) - ofi_spin_unlock(&fl->lock); - return ret; -} - -/** Return an item to the free list - * - * @param e item - * @param fl gnix_freelist - */ -__attribute__((unused)) -static inline void _gnix_fl_free(struct dlist_entry *e, struct gnix_freelist *fl) -{ - assert(e); - assert(fl); - - e->next = NULL; /* keep slist implementation happy */ - - if (fl->ts) - ofi_spin_lock(&fl->lock); - dlist_init(e); - dlist_insert_head(e, &fl->freelist); - if (fl->ts) - ofi_spin_unlock(&fl->lock); -} - - -/** Is freelist empty (primarily used for testing - * - * @param fl gnix_freelist - * @return True if list is currently empty, false otherwise - */ -static inline int _gnix_fl_empty(struct gnix_freelist *fl) -{ - return dlist_empty(&fl->freelist); -} - -#endif /* _GNIX_FREELIST_H_ */ diff --git a/prov/gni/include/gnix_hashtable.h b/prov/gni/include/gnix_hashtable.h deleted file mode 100644 index 6676fc71bb0..00000000000 --- a/prov/gni/include/gnix_hashtable.h +++ /dev/null @@ -1,248 +0,0 @@ -/* - * Copyright (c) 2015 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef GNIX_HASHTABLE_H_ -#define GNIX_HASHTABLE_H_ - -#include -#include - -#include -#include - -#include "gnix_util.h" - -typedef uint64_t gnix_ht_key_t; - -typedef enum gnix_ht_state { - GNIX_HT_STATE_UNINITIALIZED = 0, - GNIX_HT_STATE_READY, - GNIX_HT_STATE_DEAD, -} gnix_ht_state_e; - -typedef struct gnix_ht_entry { - struct dlist_entry entry; - gnix_ht_key_t key; - void *value; -} gnix_ht_entry_t; - -typedef struct gnix_ht_lk_lh { - rwlock_t lh_lock; - struct dlist_entry head; -} gnix_ht_lk_lh_t; - -typedef struct gnix_ht_lf_lh { - struct dlist_entry head; -} gnix_ht_lf_lh_t; - -enum gnix_ht_increase { - GNIX_HT_INCREASE_ADD = 0, - GNIX_HT_INCREASE_MULT -}; - -/** - * Set of attributes that can be passed to the gnix_ht_init. - * - * @var ht_initial_size initial number of buckets allocated - * @var ht_maximum_size maximum number of buckets to allocate on resize - * @var ht_increase_step additive or multiplicative factor to increase by. - * If additive, the new_size = (old_size + increase) - * If multiplicative, the new size = (old_size * - * increase) - * @var ht_increase_type based on the gnix_ht_increase enum, this - * influences whether the increase of the bucket - * count is additive or multiplicative - * @var ht_collision_thresh threshold for resizing based on insertion - * collisions. The threshold is based on the - * average number of collisions per insertion, - * multiplied by 100. If you want an average bucket - * depth of 4, you would want to see 3-4 collisions - * on average, so the appropriate threshold would be - * ~400. - * @var ht_hash_seed seed value that affects how items are hashed - * internally. Using the same seed value and the same - * insertion pattern will allow for repeatable - * results. - * @var ht_internal_locking if non-zero, uses a version of the hash table with - * internal locking implemented - * - * @var destructor if non-NULL, will be called with value when - * destroying the hash table - */ -typedef struct gnix_hashtable_attr { - int ht_initial_size; - int ht_maximum_size; - int ht_increase_step; - int ht_increase_type; - int ht_collision_thresh; - uint64_t ht_hash_seed; - int ht_internal_locking; - void (*destructor)(void *); -} gnix_hashtable_attr_t; - -struct gnix_hashtable; -struct gnix_hashtable_iter; - -typedef struct gnix_hashtable_ops { - int (*init)(struct gnix_hashtable *); - int (*destroy)(struct gnix_hashtable *); - int (*insert)(struct gnix_hashtable *, gnix_ht_entry_t *, uint64_t *); - int (*remove)(struct gnix_hashtable *, gnix_ht_key_t); - void *(*lookup)(struct gnix_hashtable *, gnix_ht_key_t); - int (*resize)(struct gnix_hashtable *, int, int); - struct dlist_entry *(*retrieve_list)(struct gnix_hashtable *, int bucket); - void *(*iter_next)(struct gnix_hashtable_iter *); -} gnix_hashtable_ops_t; - -/** - * Hashtable structure - * - * @var ht_lock reader/writer lock for protecting internal structures - * during a resize - * @var ht_state internal state mechanism for detecting valid state - * transitions - * @var ht_attr attributes for the hash map to follow after init - * @var ht_ops function table for internal hash map calls - * @var ht_elements number of items in the hash map - * @var ht_collisions number of insertion collisions since the last resize - * @var ht_insertions number of insertions since the last resize - * @var ht_size number of hash buckets - * @var ht_tbl array of hash buckets - */ -typedef struct gnix_hashtable { - pthread_rwlock_t ht_lock; - gnix_ht_state_e ht_state; - gnix_hashtable_attr_t ht_attr; - gnix_hashtable_ops_t *ht_ops; - ofi_atomic32_t ht_elements; - ofi_atomic32_t ht_collisions; - ofi_atomic32_t ht_insertions; - int ht_size; - union { - gnix_ht_lf_lh_t *ht_lf_tbl; - gnix_ht_lk_lh_t *ht_lk_tbl; - }; -} gnix_hashtable_t; - -struct gnix_hashtable_iter { - struct gnix_hashtable *ht; - int cur_idx; - gnix_ht_entry_t *cur_entry; -}; - -#define GNIX_HASHTABLE_ITERATOR(_ht, _iter) \ - struct gnix_hashtable_iter _iter = { \ - .ht = (_ht), \ - .cur_idx = 0, \ - .cur_entry = NULL \ - } -#define GNIX_HASHTABLE_ITERATOR_KEY(_iter) ((_iter).cur_entry->key) - -/** - * Initializes the hash table with provided attributes, if any - * - * @param ht pointer to the hash table structure - * @param attr pointer to the hash table attributes to initialize with - * @return 0 on success, -FI_EINVAL on initialization error, or - * -FI_ENOMEM if allocation of the bucket array fails - */ -int _gnix_ht_init(gnix_hashtable_t *ht, gnix_hashtable_attr_t *attr); - -/** - * Destroys the hash table - * - * @param ht pointer to the hash table structure - * @return 0 on success, -FI_EINVAL upon passing an uninitialized - * or dead structure - */ -int _gnix_ht_destroy(gnix_hashtable_t *ht); - -/** - * Inserts an entry into the map with the provided key - * - * @param ht pointer to the hash table structure - * @param key key used to hash the entry - * @param entry entry to be stored - * @return 0 on success, -FI_ENOSPC when another entry with the same key - * exists in the hashtable, or -FI_EINVAL when called on a - * dead or uninitialized hash table - */ -int _gnix_ht_insert(gnix_hashtable_t *ht, gnix_ht_key_t key, void *entry); - -/** - * Removes an entry from the map with the provided key - * - * @param ht pointer to the hash table structure - * @param key key used to hash the entry - * @return 0 on success, -FI_ENOENT when the key doesn't exist in - * the hash table, or -FI_EINVAL when called on a dead or - * uninitialized hash table - */ -int _gnix_ht_remove(gnix_hashtable_t *ht, gnix_ht_key_t key); - -/** - * Looks up an entry in the hash table using key - * - * @param ht pointer to the hash table structure - * @param key key used to hash the entry - * @return NULL if the key did not exist in the hash table, or the - * entry if the key exists in the hash table - */ -void *_gnix_ht_lookup(gnix_hashtable_t *ht, gnix_ht_key_t key); - -/** - * Tests to see if the hash table is empty - * - * @param ht pointer to the hash table structure - * @return true if the hash table is empty, false if not - */ -int _gnix_ht_empty(gnix_hashtable_t *ht); - -/** - * Return next element in the hashtable - * - * @param iter pointer to the hashtable iterator - * @return pointer to next element in the hashtable - */ -void *_gnix_ht_iterator_next(struct gnix_hashtable_iter *iter); - -/* Hastable iteration macros */ -#define ht_lf_for_each(ht, ht_entry) \ - dlist_for_each(ht->ht_lf_tbl->head, ht_entry, entry) \ - -#define ht_lk_for_each(ht, ht_entry) \ - dlist_for_each(ht.ht_lk_tbl->head, ht_entry, entry) - -#define ht_entry_value(ht_entry) \ - ht_entry->value - -#endif /* GNIX_HASHTABLE_H_ */ diff --git a/prov/gni/include/gnix_mbox_allocator.h b/prov/gni/include/gnix_mbox_allocator.h deleted file mode 100644 index 23182f939f4..00000000000 --- a/prov/gni/include/gnix_mbox_allocator.h +++ /dev/null @@ -1,201 +0,0 @@ -/* - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights reserved. - * Copyright (c) 2015-2016 Cray Inc. All rights reserved. - * Copyright (c) 2019 Triad National Security, LLC. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _GNIX_MBOX_ALLOCATOR_ -#define _GNIX_MBOX_ALLOCATOR_ - -#include - -#include "gni_pub.h" - -#include "gnix.h" -#include "gnix_bitmap.h" - -enum gnix_page_size { - GNIX_PAGE_2MB = 2, - GNIX_PAGE_4MB = 4, - GNIX_PAGE_8MB = 8, - GNIX_PAGE_16MB = 16, - GNIX_PAGE_32MB = 32, - GNIX_PAGE_64MB = 64, - GNIX_PAGE_128MB = 128, - GNIX_PAGE_256MB = 256, - GNIX_PAGE_512MB = 512 -}; - -/** - * Structure representing mailbox allocated by mbox_allocator. - * - * @var memory_handle Memory handle returned by GNI_MemRegister. - * @var slab Slab from which mbox was allocated. - * @var base Pointer to the start of the memory returned by mmap. - * @var offset Offset from the base pointer where mailbox is located. - */ -struct gnix_mbox { - gni_mem_handle_t *memory_handle; - struct gnix_slab *slab; - - void *base; - ptrdiff_t offset; -}; - -/** - * Structure representing a slab of memory allocated by the mbox_allocator. - * - * @var base The pointer to the start of memory returned by mmap. - * @var allocator Pointer to the parent allocator. - * @var used Bitmap of slab usage. - * @var memory_handle The handle returned by GNI_MemRegister. - * @var list_entry Entry for storing structure in slist structs. - */ -struct gnix_slab { - void *base; - struct gnix_mbox_alloc_handle *allocator; - - gnix_bitmap_t *used; - gni_mem_handle_t memory_handle; - - struct slist_entry list_entry; -}; - -/** - * Structure representing an mbox_allocator. - * - * @var nic_handle Gnix_nic that the memory is associated with. - * @var cq_handle CQ handle that the memory is associated with. - * @var last_offset Last offset within file with name filename. - * @var filename File name created in the HUGETLBFS. - * @var fd Opened descriptor for file with name filename. - * @var page_size The page size used for HUGETLBFS and mmap. - * @var mbox_size Size per each mailbox. - * @var mpmmap Mailboxes per mmap slab. - * @var slab_list List of slab objects. - * - * @note If HUGETLBFS is not available, memory is allocated via ANON mmap. - */ -struct gnix_mbox_alloc_handle { - struct gnix_nic *nic_handle; - ofi_spin_t lock; - gni_cq_handle_t cq_handle; - - size_t last_offset; - char *filename; - int fd; - - size_t page_size; - size_t mbox_size; - size_t mpmmap; - - struct slist slab_list; -}; - -/** - * Creates an allocator that can be used with mbox_alloc to allocate mailboxes. - * - * @param nic IN Gnix_nic memory is associated with. - * @param cq_handle IN CQ handle memory is associated with. - * @param page_size IN Page size used for HUGETLBFS and mmap. - * @param mbox_size IN Size per each mailbox. - * @param mpmmap IN Mailboxes per mmap slab. - * @param alloc_handle IN/OUT Handle needed for use with mbox_alloc. - * - * @return FI_SUCCESS Upon successfully creating an allocator. - * @return -FI_EINVAL Upon getting an invalid nic, mbox_size, mpmmap, or - * alloc_handle. - * @return -FI_ENOMEM Upon failure to allocate a handle using calloc. - * @return -FI_EIO Upon failure to open a huge page. - * @return [Unspec] If failure in GNI_MemRegister. Converts gni_return_t - * status code to FI_ERRNO value. - */ -int _gnix_mbox_allocator_create(struct gnix_nic *nic, - gni_cq_handle_t cq_handle, - enum gnix_page_size page_size, - size_t mbox_size, - size_t mpmmap, - struct gnix_mbox_alloc_handle **alloc_handle); - -/** - * Releases all resources associated with an allocator handle. - * - * @param alloc_handle IN Alloc handle to destroy. - * - * @return FI_SUCCESS Upon successfully destroying an allocator. - * @return -FI_EINVAL Upon receiving an invalid alloc handle. - * @return -FI_EBUSY Upon finding that there are still mailboxes allocated - * that haven't been freed using gnix_mbox_free. - */ -int _gnix_mbox_allocator_destroy(struct gnix_mbox_alloc_handle *alloc_handle); - -/** - * Allocate a new mailbox. - * - * @param alloc_handle IN Gnix_mbox_alloc_handle to use as allocator. - * @param ptr IN/OUT Pointer to gnix_mbox to be allocated. - * - * @return FI_SUCCESS Upon successfully allocating a mailbox. - * @return -FI_ENOMEM Upon not being able to allocate memory for a slab or - * bitmap. - * @return -FI_EINVAL Upon finding that input generates invalid location for - * mbox. - * @return [Unspec] Upon failure in alloc_bitmap. Returns error code from - * alloc_bitmap. - * @return [Unspec] Upon failure in GNI_MemRegister. Converts gni_return_t - * to FI_ERRNO value. - */ -int _gnix_mbox_alloc(struct gnix_mbox_alloc_handle *alloc_handle, - struct gnix_mbox **ptr); - -/** - * Mark mailbox as free. - * - * @param ptr IN Pointer to allocated gnix_mbox to free. - * - * @return FI_SUCCESS Upon successful free. - * @return -FI_EINVAL Upon an invalid parameter, or finding that the bitmap - * is in a corrupted state. - */ -int _gnix_mbox_free(struct gnix_mbox *ptr); - -/* - * Initialized in gnix_init.c, used for updating filename when creating - * hugepages. - */ -extern ofi_atomic32_t file_id_counter; - -/* - * safety valve for disabling mbox allocator fallback to base pages - */ - -extern bool gnix_mbox_alloc_allow_fallback; -#endif /* _GNIX_MBOX_ALLOCATOR_ */ diff --git a/prov/gni/include/gnix_mr.h b/prov/gni/include/gnix_mr.h deleted file mode 100644 index 947236f3fab..00000000000 --- a/prov/gni/include/gnix_mr.h +++ /dev/null @@ -1,237 +0,0 @@ -/* - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef GNIX_MR_H_ -#define GNIX_MR_H_ - -#ifdef HAVE_UDREG -#include -#endif - -/* global includes */ -#include "rdma/fi_domain.h" - -/* provider includes */ -#include "gnix_priv.h" -#include "gnix_mr_cache.h" - -#define GNIX_USER_REG 0 -#define GNIX_PROV_REG 1 - -#define GNIX_MR_PAGE_SHIFT 12 -#define GNIX_MR_PFN_BITS 37 -#define GNIX_MR_MDD_BITS 12 -#define GNIX_MR_FMT_BITS 1 -#define GNIX_MR_FLAG_BITS 2 -#define GNIX_MR_VA_BITS (GNIX_MR_PFN_BITS + GNIX_MR_PAGE_SHIFT) -#define GNIX_MR_KEY_BITS (GNIX_MR_PFN_BITS + GNIX_MR_MDD_BITS) -#define GNIX_MR_RESERVED_BITS \ - (GNIX_MR_KEY_BITS + GNIX_MR_FLAG_BITS + GNIX_MR_FMT_BITS) -#define GNIX_MR_PADDING_LENGTH (64 - GNIX_MR_RESERVED_BITS) - -/* TODO: optimize to save space by using a union to combine the two - * independent sets of data - */ -struct gnix_mr_cache_info { - /* used only with internal mr cache */ - gnix_mr_cache_t *mr_cache_rw; - gnix_mr_cache_t *mr_cache_ro; - - /* used only with udreg */ - struct udreg_cache *udreg_cache; - struct gnix_fid_domain *domain; - struct gnix_auth_key *auth_key; - - ofi_spin_t mr_cache_lock; - int inuse; -}; - -enum { - GNIX_MR_FLAG_READONLY = 1 << 0, - GNIX_MR_FLAG_BASIC_REG = 1 << 1, -}; - -enum { - GNIX_MR_TYPE_INTERNAL = 0, - GNIX_MR_TYPE_UDREG, - GNIX_MR_TYPE_NONE, - GNIX_MR_MAX_TYPE, -}; - -#define GNIX_DEFAULT_CACHE_TYPE GNIX_MR_TYPE_INTERNAL - -/* forward declarations */ -struct gnix_fid_domain; -struct gnix_nic; - -/** - * @brief gnix memory descriptor object for use with fi_mr_reg - * - * @var mr_fid libfabric memory region descriptor - * @var domain gnix domain associated with this memory region - * @var mem_hndl gni memory handle for the memory region - * @var nic gnix nic associated with this memory region - * @var key gnix memory cache key associated with this memory region - */ -struct gnix_fid_mem_desc { - struct fid_mr mr_fid; - struct gnix_fid_domain *domain; - gni_mem_handle_t mem_hndl; - struct gnix_nic *nic; - struct gnix_auth_key *auth_key; -#ifdef HAVE_UDREG - udreg_entry_t *entry; -#endif -}; - -/** - * @brief gnix memory region key - * - * @var pfn prefix of the virtual address - * @var mdd index for the mdd - * @var format flag for determining whether new mdd format is used - * @var flags set of bits for passing flags such as read-only - * @var padding reserved bits, unused for now - */ -typedef struct gnix_mr_key { - union { - struct { - uint64_t pfn: GNIX_MR_PFN_BITS; - uint64_t mdd: GNIX_MR_MDD_BITS; - uint64_t format : GNIX_MR_FMT_BITS; - uint64_t flags : GNIX_MR_FLAG_BITS; - uint64_t padding: GNIX_MR_PADDING_LENGTH; - }; - uint64_t value; - }; -} gnix_mr_key_t; - -/** - * - */ -struct gnix_mr_ops { - int (*init)(struct gnix_fid_domain *domain, - struct gnix_auth_key *auth_key); - int (*is_init)(struct gnix_fid_domain *domain, - struct gnix_auth_key *auth_key); - int (*reg_mr)(struct gnix_fid_domain *domain, uint64_t address, - uint64_t length, struct _gnix_fi_reg_context *fi_reg_context, - void **handle); - int (*dereg_mr)(struct gnix_fid_domain *domain, - struct gnix_fid_mem_desc *md); - int (*destroy_cache)(struct gnix_fid_domain *domain, - struct gnix_mr_cache_info *info); - int (*flush_cache)(struct gnix_fid_domain *domain); -}; - - -/** - * @brief Converts a libfabric key to a gni memory handle, skipping memory - * handle CRC generation. - * - * @param[in] key libfabric memory region key - * @param[in,out] mhdl gni memory handle - */ -void _gnix_convert_key_to_mhdl_no_crc( - gnix_mr_key_t *key, - gni_mem_handle_t *mhdl); - -/** - * @brief Converts a libfabric key to a gni memory handle - * - * @param[in] key libfabric memory region key - * @param[in,out] mhdl gni memory handle - */ -void _gnix_convert_key_to_mhdl( - gnix_mr_key_t *key, - gni_mem_handle_t *mhdl); - -#define _GNIX_CONVERT_MR_KEY(scalable, offset, convert_func, key, mhdl) \ - do { \ - if (scalable) { \ - gnix_mr_key_t _gnix_mr_key = { \ - .value = ((gnix_mr_key_t *) (key))->value + (offset), \ - }; \ - convert_func(&_gnix_mr_key, (mhdl)); \ - } else { \ - convert_func((gnix_mr_key_t *) (key), (mhdl)); \ - } \ - } while (0) - -/** - * @brief Converts a gni memory handle to a libfabric key - * - * @param[in] mhdl gni memory handle - * @return fi_mr_key to be used by remote EPs. - */ -uint64_t _gnix_convert_mhdl_to_key(gni_mem_handle_t *mhdl); - -/* initializes mr cache for a given domain */ -int _gnix_open_cache(struct gnix_fid_domain *domain, int type); - -/* destroys mr cache for a given domain */ -int _gnix_close_cache(struct gnix_fid_domain *domain, - struct gnix_mr_cache_info *info); - -/* flushes the memory registration cache for a given domain */ -int _gnix_flush_registration_cache(struct gnix_fid_domain *domain); - - -/** - * used for internal registrations, - * - * @param fid endpoint fid - * @param buf buffer to register - * @param len length of buffer to register - * @param access access permissions - * @param offset registration offset - * @param requested_key key requested for new registration - * @param flags registration flags - * @param mr_o pointer to returned registration - * @param context context to associate with registration - * @param auth_key authorization key to associate with registration - * @param reserved 1 if provider registration, 0 otherwise - * - * @note Set reserved to 0 for a user registration - * @note Set reserved to 1 for a provider registration - */ -int _gnix_mr_reg(struct fid *fid, const void *buf, size_t len, - uint64_t access, uint64_t offset, - uint64_t requested_key, uint64_t flags, - struct fid_mr **mr_o, void *context, - struct gnix_auth_key *auth_key, - int reserved); - -extern gnix_mr_cache_attr_t _gnix_default_mr_cache_attr; - -#endif /* GNIX_MR_H_ */ diff --git a/prov/gni/include/gnix_mr_cache.h b/prov/gni/include/gnix_mr_cache.h deleted file mode 100644 index e29cac1093a..00000000000 --- a/prov/gni/include/gnix_mr_cache.h +++ /dev/null @@ -1,270 +0,0 @@ -/* - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/** - * @note The GNIX memory registration cache has the following properties: - * - Not thread safe - * - The hard registration limit includes the number of stale entries. - * Stale entries will be evicted to make room for new entries as - * the registration cache becomes full. - * - Allows multiplexing of libfabric memory registrations onto a - * single GNIX memory registration so long as the libfabric - * registration can be contained wholly within the GNIX memory - * registration. - * - Uses a LRU cache eviction scheme. This should reduce the overall - * calls to reg/dereg in the underlying layers when the user - * application consistently sends messages from the same buffers - * but continually registers and deregisters those regions. The - * LRU is implemented as a queue using a doubly linked list for - * fast removal/insertion. Note that this is an - * approximate LRU scheme, because the find function may - * return a larger entry in the stale tree. - * - By default, there is no limit to the number of 'inuse' - * registrations in the cache. This can be changed by passing - * in a set of attributes during _gnix_mr_cache_init. - * - By default, there is a limit of 128 stale entries in the cache. - * This is done to limit the amount of unused entries to retain. - * Some traffic patterns may burst traffic across a network, - * potentially leaving stale entries unused for long periods of - * time. Some stale entries may never be reused by an application. - * This value may also be changed by passing in a set of attributes - * during _gnix_mr_cache_init. - * - * The memory registration framework is based on the design of a two tree - * system for fast lookups. The first tree is a red-black tree for O(lg n) - * search times. The intent of the design is to minimize the - * number of occurrences where memory must be registered with the NIC. - * - * Registering a new region of memory with the NIC is computationally - * expensive. This can be avoided by caching registrations and reusing existing - * registrations. The caching portion is easy, since we can store the - * registrations in any form we choose, so long as there is a data structure - * that supports it. The minimization of registrations is actually difficult. - * - * The fastpath utilizes the red-black tree for O(lg n) search times where - * the user is attempting to register a memory region where there is already - * a pre-existing registration at the same base address. By searching for the - * base address, we can check the length of the registration to see if it can - * satisfy the address. If it can satisfy the request, we are done. - * - * The slowpath utilizes the same red-black tree for O(lg n) search times. The - * slowpath uses the result from the first search (fastpath) to decide whether - * a new registration must be made. If the entry couldn't subsume the - * registration request, then a new registration must be made. If a new - * registration has to be made, then it will be some portion of the found entry - * and potentially some other entries in the tree. Traverse the tree in a - * linear fashion until a non-overlapping entry is found, then remove all - * matching elements from the tree and mark them as retired. A new registration - * is made that covers the original request and all of the requests that were - * pruned from the tree. The result is a larger memory registration that covers - * the initial request and adjacent/overlapping registrations with the request. - * - * Pruning the elements from the tree allows us to maintain a smaller search - * space and fewer elements in the red-black tree, which in turn gives us fewer - * LRU evictions. - * - * A number of assumptions are being made: - * - When a lookup is being performed, no one else can modify the cache. - * - Since no one can modify the cache while a lookup is occurring, certain - * search criteria can be bypassed since we know a subsumable entry could - * not have existed if a later search method is called. - * - Since earlier methods could have found a registration but did not, then - * some insertion criteria can be assumed to decrease the amount of - * instructions necessary to create a new registration. - */ -#ifndef PROV_GNI_INCLUDE_GNIX_CACHE_H_ -#define PROV_GNI_INCLUDE_GNIX_CACHE_H_ - -/* global includes */ -#include "rbtree.h" - -/* provider includes */ -#include "gnix_util.h" -#include "gnix_smrn.h" - -/* struct declarations */ -struct _gnix_fi_reg_context { - uint64_t access; - uint64_t offset; - uint64_t requested_key; - uint64_t flags; - void *context; - struct gnix_auth_key *auth_key; - int reserved; -}; - -/** - * @brief gnix memory registration cache attributes - * - * @var soft_reg_limit unused currently, imposes a soft limit for which - * a flush can be called during register to - * drain any stale registrations - * @var hard_reg_limit limit to the number of memory registrations - * in the cache - * @var hard_stale_limit limit to the number of stale memory - * registrations in the cache. If the number is - * exceeded during deregistration, - * gnix_mr_cache_flush will be called to flush - * the stale entries. - * @var lazy_deregistration if non-zero, allows registrations to linger - * until the hard_stale_limit is exceeded. This - * prevents unnecessary re-registration of memory - * regions that may be reused frequently. Larger - * values for hard_stale_limit may reduce the - * frequency of flushes. - */ -typedef struct gnix_mr_cache_attr { - int soft_reg_limit; - int hard_reg_limit; - int hard_stale_limit; - int lazy_deregistration; - void *reg_context; - void *dereg_context; - void *destruct_context; - struct gnix_smrn *smrn; - void *(*reg_callback)(void *handle, void *address, size_t length, - struct _gnix_fi_reg_context *fi_reg_context, - void *context); - int (*dereg_callback)(void *handle, void *context); - int (*destruct_callback)(void *context); - int elem_size; -} gnix_mr_cache_attr_t; - -extern gnix_mr_cache_attr_t __default_mr_cache_attr; - -typedef enum { - GNIX_MRC_STATE_UNINITIALIZED = 0, - GNIX_MRC_STATE_READY, - GNIX_MRC_STATE_DEAD, -} gnix_mrc_state_e; - -/** - * @brief gnix memory registration cache entry storage - */ -struct gnix_mrce_storage { - ofi_atomic32_t elements; - RbtHandle rb_tree; -}; - -/** - * @brief gnix memory registration cache object - * - * @var state state of the cache - * @var attr cache attributes, @see gnix_mr_cache_attr_t - * @var lru_head head of LRU eviction list - * @var inuse cache entry storage struct - * @var stale cache entry storage struct - * @var hits cache hits - * @var misses cache misses - */ -typedef struct gnix_mr_cache { - gnix_mrc_state_e state; - gnix_mr_cache_attr_t attr; - struct gnix_smrn_rq rq; - struct dlist_entry lru_head; - struct gnix_mrce_storage inuse; - struct gnix_mrce_storage stale; - uint64_t hits; - uint64_t misses; -} gnix_mr_cache_t; - -/** - * @brief Destroys a gnix memory registration cache. Flushes stale memory - * registrations if the hard limit for stale registrations has been - * exceeded - * - * @param[in] cache a gnix memory registration cache - * - * @return FI_SUCCESS on success - * -FI_EINVAL if an invalid cache pointer has been passed - * into the function - * -FI_EAGAIN if the cache still contains memory - * registrations that have not yet been deregistered - */ -int _gnix_mr_cache_destroy(gnix_mr_cache_t *cache); - -/** - * @brief Flushes stale memory registrations from a memory registration cache. - * - * @param[in] cache a gnix memory registration cache - * - * @return FI_SUCCESS on success - * -FI_EINVAL if an invalid cache pointer has been passed - * into the function - */ -int _gnix_mr_cache_flush(gnix_mr_cache_t *cache); - -/** - * @brief Initializes the MR cache state - * - * @param[in,out] cache a gnix memory registration cache - * @param[in] attr cache attributes, @see gnix_mr_cache_attr_t - * - * @return FI_SUCCESS on success - * -FI_ENOMEM otherwise - */ -int _gnix_mr_cache_init( - gnix_mr_cache_t **cache, - gnix_mr_cache_attr_t *attr); - -/** - * Function to register memory with the cache - * - * @param[in] cache gnix memory registration cache pointer - * @param[in] address base address of the memory region to be registered - * @param[in] length length of the memory region to be registered - * @param[in,out] handle memory handle pointer to written to and returned - */ -int _gnix_mr_cache_register( - gnix_mr_cache_t *cache, - uint64_t address, - uint64_t length, - struct _gnix_fi_reg_context *fi_reg_context, - void **handle); - -/** - * Function to deregister memory in the cache - * - * @param[in] cache gnix memory registration cache pointer - * @param[in] mr gnix memory registration descriptor pointer - * - * @return FI_SUCCESS on success - * -FI_ENOENT if there isn't an active memory registration - * associated with the mr - * return codes for potential calls to callbacks - */ -int _gnix_mr_cache_deregister( - gnix_mr_cache_t *cache, - void *handle); - -#endif /* PROV_GNI_INCLUDE_GNIX_CACHE_H_ */ diff --git a/prov/gni/include/gnix_mr_notifier.h b/prov/gni/include/gnix_mr_notifier.h deleted file mode 100644 index 5f38f9b962d..00000000000 --- a/prov/gni/include/gnix_mr_notifier.h +++ /dev/null @@ -1,183 +0,0 @@ -/* - * Copyright (c) 2016 Cray Inc. All rights reserved. - * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _GNIX_MR_NOTIFIER_H_ -#define _GNIX_MR_NOTIFIER_H_ - -#include -#include -#include "rdma/fi_errno.h" -#include "config.h" - -#if HAVE_KDREG - -#include -#include -#include -#include -#include -#include -#include - -#include "rdma/providers/fi_log.h" -#include "gnix_util.h" - -#define KDREG_DEV "/dev/kdreg" - -typedef volatile uint64_t kdreg_user_delta_t; - -/** - * @brief memory registration notifier - * - * @var fd File descriptor for KDREG_DEV - * @var cntr Kernel managed counter for monitored events - * @var lock Only used for set up and tear down (no guarantees - * if reading or writing while setting up or tearing down) - */ -struct gnix_mr_notifier { - int fd; - kdreg_user_delta_t *cntr; - ofi_spin_t lock; - int ref_cnt; -}; - -/** - * @brief initialize the process for use of the notifier - * - * @return FI_SUCESSS on success - */ -int _gnix_notifier_init(void); - -/** - * @brief open the kdreg device and prepare for notifications - * - * @param[in,out] k Empty and initialized gnix_mr_notifier struct - * @return FI_SUCESSS on success - * -FI_EBUSY if device already open - * -FI_ENODATA if user delta unavailable - * -fi_errno or -errno on other failures - */ -int _gnix_notifier_open(struct gnix_mr_notifier **mrn); - -/** - * @brief close the kdreg device and zero the notifier - * - * @param[in] k gnix_mr_notifier struct - * @return FI_SUCESSS on success - * -fi_errno or -errno on other failures - */ -int _gnix_notifier_close(struct gnix_mr_notifier *mrn); - -/** - * @brief monitor a memory region - * - * @param[in] k gnix_mr_notifier struct - * @param[in] addr address of memory region to monitor - * @param[in] len length of memory region - * @param[in] cookie user identifier associated with the region - * @return FI_SUCESSS on success - * -fi_errno or -errno on failure - */ -int _gnix_notifier_monitor(struct gnix_mr_notifier *mrn, void *addr, - uint64_t len, uint64_t cookie); - -/** - * @brief stop monitoring a memory region - * - * @param[in] k gnix_mr_notifier struct - * @param[out] cookie user identifier for notification event - * @return FI_SUCESSS on success - * -fi_errno or -errno on failure - */ -int _gnix_notifier_unmonitor(struct gnix_mr_notifier *mrn, uint64_t cookie); - -/** - * @brief get a monitoring event - * - * @param[in] k gnix_mr_notifier struct - * @param[out] buf buffer to write event data - * @param[in] len length of buffer - * @return Number of bytes read on success - * -FI_EINVAL if invalid arguments - * -FI_EAGAIN if nothing to read - * -fi_errno or -errno on failure - */ -int _gnix_notifier_get_event(struct gnix_mr_notifier *mrn, - void* buf, size_t len); - -#else - -struct gnix_mr_notifier { - int dummy[0]; -}; - -static inline int -_gnix_notifier_init(void) -{ - return FI_SUCCESS; -} - -static inline int -_gnix_notifier_open(struct gnix_mr_notifier **mrn) -{ - return FI_SUCCESS; -} - -static inline int -_gnix_notifier_close(struct gnix_mr_notifier *mrn) -{ - return FI_SUCCESS; -} - -static inline int -_gnix_notifier_monitor(struct gnix_mr_notifier *mrn, void *addr, - uint64_t len, uint64_t cookie) -{ - return FI_SUCCESS; -} - -static inline int -_gnix_notifier_unmonitor(struct gnix_mr_notifier *mrn, uint64_t cookie) -{ - return FI_SUCCESS; -} - -static inline int -_gnix_notifier_get_event(struct gnix_mr_notifier *mrn, void* buf, size_t len) -{ - return -FI_EAGAIN; -} - -#endif /* HAVE_KDREG */ - -#endif /* _GNIX_MR_NOTIFIER_H_ */ diff --git a/prov/gni/include/gnix_msg.h b/prov/gni/include/gnix_msg.h deleted file mode 100644 index 3632a0da1b0..00000000000 --- a/prov/gni/include/gnix_msg.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2015-2016 Cray Inc. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _GNIX_MSG_H_ -#define _GNIX_MSG_H_ - -ssize_t _gnix_recv(struct gnix_fid_ep *ep, uint64_t buf, size_t len, void *desc, - uint64_t src_addr, void *context, uint64_t flags, - uint64_t tag, uint64_t ignore, struct gnix_fab_req *req); - -ssize_t _gnix_recv_mr(struct gnix_fid_ep *ep, uint64_t buf, size_t len, void *desc, - uint64_t src_addr, void *context, uint64_t flags, - uint64_t tag, uint64_t ignore); - -ssize_t _gnix_send(struct gnix_fid_ep *ep, uint64_t loc_addr, size_t len, - void *mdesc, uint64_t dest_addr, void *context, - uint64_t flags, uint64_t data, uint64_t tag); - -ssize_t _gnix_recvv(struct gnix_fid_ep *ep, const struct iovec *iov, - void **desc, size_t count, uint64_t src_addr, void *context, - uint64_t flags, uint64_t ignore, uint64_t tag); - -ssize_t _gnix_sendv(struct gnix_fid_ep *ep, const struct iovec *iov, - void **mdesc, size_t count, uint64_t dest_addr, - void *context, uint64_t flags, uint64_t tag); - -#endif /* _GNIX_MSG_H_ */ diff --git a/prov/gni/include/gnix_nameserver.h b/prov/gni/include/gnix_nameserver.h deleted file mode 100644 index df4abf5fa8d..00000000000 --- a/prov/gni/include/gnix_nameserver.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2015-2016 Cray Inc. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _GNIX_NAMESERVER_H_ -#define _GNIX_NAMESERVER_H_ - -#include "gnix.h" - -/* - * defines, data structs, and prototypes for gnix nameserver - */ - -/* - * prototypes - */ - -int _gnix_local_ipaddr(struct sockaddr_in *sin); - -int _gnix_pe_to_ip(const struct gnix_ep_name *ep_name, - struct sockaddr_in *saddr); - -int _gnix_resolve_name(IN const char *node, IN const char *service, - IN uint64_t flags, INOUT struct gnix_ep_name - *resolved_addr); - -int _gnix_src_addr(struct gnix_ep_name *resolved_addr); - -#endif /* _GNIX_NAMESERVER_H_ */ diff --git a/prov/gni/include/gnix_nic.h b/prov/gni/include/gnix_nic.h deleted file mode 100644 index 3d1eb434f7e..00000000000 --- a/prov/gni/include/gnix_nic.h +++ /dev/null @@ -1,518 +0,0 @@ -/* - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _GNIX_NIC_H_ -#define _GNIX_NIC_H_ - -#if HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ -#include -#include - -#include "gnix.h" -#include "gnix_bitmap.h" -#include "gnix_mbox_allocator.h" -#include "gnix_util.h" - -#define GNIX_DEF_MAX_NICS_PER_PTAG 4 - -/* - * globals - */ - -extern uint32_t gnix_max_nics_per_ptag; -extern struct dlist_entry gnix_nic_list_ptag[]; -extern struct dlist_entry gnix_nic_list; -extern pthread_mutex_t gnix_nic_list_lock; - -/* - * allocation flags for cleaning up GNI resources - * when closing a gnix_nic - needed since these - * can be dup'd from another structure. - */ - -#define GNIX_NIC_CDM_ALLOCD (1ULL << 1) -#define GNIX_NIC_TX_CQ_ALLOCD (1ULL << 2) -#define GNIX_NIC_RX_CQ_ALLOCD (1ULL << 3) - -/* - * typedefs for callbacks for handling - * receipt of SMSG messages at the target - */ -typedef int (*smsg_callback_fn_t)(void *ptr, void *msg); - -extern smsg_callback_fn_t gnix_ep_smsg_callbacks[]; - -/* - * typedef for completer functions invoked - * at initiator when local CQE (tx) is processed - */ -typedef int (*smsg_completer_fn_t)(void *desc, gni_return_t); - -/** - * Set of attributes that can be passed to the gnix_alloc_nic. - * - * @var gni_cdm_hndl optional previously allocated gni_cdm_hndl to - * use for allocating GNI resources (GNI CQs) for - * this nic. - * @var gni_nic_hndl optional previously allocated gni_nic_hndl to - * use for allocating GNI resources (GNI CQs) for - * this nic - * - * @var gni_cdm_modes The mode bits gni_cdm_hndl was created with. - */ -struct gnix_nic_attr { - gni_cdm_handle_t gni_cdm_hndl; - uint32_t gni_cdm_modes; - gni_nic_handle_t gni_nic_hndl; - bool use_cdm_id; - uint32_t cdm_id; - bool must_alloc; - struct gnix_auth_key *auth_key; -}; - -/** - * GNIX nic struct - * - * @var gnix_nic_list list element used for global NIC list - * @var ptag_nic_list list element used for NIC linked list associated - * with a given PTAG. - * @var lock lock used for serializing access to - * gni_nic_hndl, rx_cq, and tx_cq - * @var gni_cdm_hndl handle for the GNI communication domain (CDM) - * this nic is bound to. - * @var gni_cdm_modes The mode bits gni_cdm_hndl was created with. - * @var gni_nic_hndl handle for the GNI nic to which this GNIX nic is bound - * @var rx_cq GNI rx cq (non-blocking) bound to this nic - * @var rx_cq_blk GNI rx cq (blocking) bound to this nic - * @var tx_cq GNI tx cq (non-blocking) bound to this nic - * @var tx_cq_blk GNI tx cq (blocking) bound to this nic - * @var progress_thread thread id of progress thread for this nic - * @var tx_desc_active_list linked list of active tx descriptors associated - * with this nic - * @var tx_desc_free_list linked list of free tx descriptors associated - * with this nic - * @var tx_desc_base base address for the block of memory from which - * tx descriptors were allocated - * @var prog_vcs_lock lock for prog_vcs - * @var prog_vcs list of VCs needing progress - * @var wq_lock lock for serializing access to the nic's work queue - * @var nic_wq head of linked list of work queue elements - * associated with this nic - * @var ptag ptag of the GNI CDM this nic is bound to - * @var cookie cookie of the GNI CDM this nic is bound to - * of the VC when using GNI_CQ_GET_INST_ID to get - * @var device_id device id of the GNI nic this nic is bound to (always 0, - * unless ever need to support multiple GNI nics/node) - * @var device_addr address (L2) of the GNI nic to which this nic is bound - * @var max_tx_desc_id max tx descriptor id for this nic - * @var vc_id_lock lock for serializing access to the vc_id_table for - * this nic - * @var vc_id_table base address of the nic's vc_id_table - * @var vc_id_table_capacity current capacity of the nic's vc_id_table - * @var vc_id_table_count current location of the next unoccupied entry in the - * vc_id_table - note there may be unused entries - * below this entry. - * @var vc_id_bitmap bitmap indicating which entries in the vc_id_table are - * currently in use (1 - used, 0 - unused) - * @var mem_per_mbox number of bytes consumed per GNI SMSG mailbox associated - * with this nic's vd_id_table - * @var mbox_hndl handle for the mailbox allocator bound to this nic - * @var s_rdma_buf_hndl handle for send side rdma buffer allocator bound to this nic - * @var r_rdma_buf_hndl handle for recv side rdma buffer allocator bound to this nic - * @var ref_cnt ref cnt for this nid - * @var smsg_callbacks pointer to table of GNI SMSG callback functions used - * by this nic for processing incoming GNI SMS - * messages - * @var err_txds slist of error'd tx descriptors - * @var tx_cq_blk_post_cnt count of outstanding tx desc's posted using tx_cq_blk - * GNI CQ. - * @var irq_mem_hndl gni_mem_handle_t for mmap region registered with - * gni hw cq handle used for GNI_PostCqWrite - * @var irq_mmap_addr base address of mmap associated with irq_dma_hndl - * @var irq_mmap_len length of the mmap in bytes - * @var using_vmdh denotes whether nic is associated with a domain - * that is utilizing VMDH - * @var mdd_resources_set flag to indicate whether GNI_SetMDDResources has - * called yet to reserve MDD resources - */ -struct gnix_nic { - struct dlist_entry gnix_nic_list; /* global NIC list */ - struct dlist_entry ptag_nic_list; /* global PTAG NIC list */ - struct dlist_entry gnix_nic_prog_list; /* temporary list for nic progression */ - ofi_spin_t lock; - uint32_t allocd_gni_res; - gni_cdm_handle_t gni_cdm_hndl; - uint32_t gni_cdm_modes; - gni_nic_handle_t gni_nic_hndl; - gni_cq_handle_t rx_cq; - gni_cq_handle_t rx_cq_blk; - gni_cq_handle_t tx_cq; - gni_cq_handle_t tx_cq_blk; - pthread_t progress_thread; - ofi_spin_t tx_desc_lock; - struct dlist_entry tx_desc_active_list; - struct dlist_entry tx_desc_free_list; - struct gnix_tx_descriptor *tx_desc_base; - ofi_spin_t prog_vcs_lock; - struct dlist_entry prog_vcs; - /* note this free list will be initialized for thread safe */ - struct gnix_freelist vc_freelist; - uint8_t ptag; - uint32_t cookie; - uint32_t device_id; - uint32_t device_addr; - int max_tx_desc_id; - ofi_spin_t vc_id_lock; - void **vc_id_table; - int vc_id_table_capacity; - int vc_id_table_count; - gnix_bitmap_t vc_id_bitmap; - uint32_t mem_per_mbox; - struct gnix_mbox_alloc_handle *mbox_hndl; - /* TODO: gnix_buddy_alloc_handle_t *alloc_handle */ - struct gnix_mbox_alloc_handle *s_rdma_buf_hndl; - struct gnix_mbox_alloc_handle *r_rdma_buf_hndl; - struct gnix_reference ref_cnt; - smsg_callback_fn_t const *smsg_callbacks; - struct slist err_txds; - gni_mem_handle_t irq_mem_hndl; - void *irq_mmap_addr; - size_t irq_mmap_len; - int requires_lock; - int mdd_resources_set; - int using_vmdh; -}; - - -/** - * gnix_smsg_eager_hdr - first part of an eager send SMSG message - * - * @var flags flag bits from send side that are needed at - * rcv side (e.g. FI_REMOTE_CQ_DATA) - * @var imm immediate data associated with this message - * @var msg_tag libfabric tag associated with this message - * @var len length in bytes of the incoming message - */ -struct gnix_smsg_eager_hdr { - uint64_t flags; - uint64_t imm; - uint64_t msg_tag; - size_t len; -}; - -/** - * gnix_smsg_rndzv_start_hdr - first part of a rendezvous send start SMSG - * message - * - * @var flags flag bits from send side that are needed at - * rcv side (e.g. FI_REMOTE_CQ_DATA) - * @var imm immediate data associated with this message - * @var msg_tag libfabric tag associated with this message - * @var mdh MDH for the rendezvous send buffer - * @var addr address of the rendezvous send buffer - * @var len length in bytes of the send buffer - * @var req_addr local request address - * @var head unaligned data at the head of a rendezvous send - * @var tail unaligned data at the tail of a rendezvous send - */ -struct gnix_smsg_rndzv_start_hdr { - uint64_t flags; - uint64_t imm; - uint64_t msg_tag; - gni_mem_handle_t mdh; - uint64_t addr; - size_t len; - uint64_t req_addr; - uint32_t head; - uint32_t tail; -}; - -/** - * gnix_smsg_rndzv_iov_start_hdr - * - * @var flags the sender's flags needed on the receive side. - * @var imm the immediate data associated with this message. - * @var msg_tag the tag associated with this message. - * @var mdh the memory handle associated with the iov buffer. - * @var iov_cnt the length of the scatter/gather vector. - * @var req_addr the sender's fabric request address. - * @var send_len the cumulative size (in bytes) of the client's - * iov base buffers. - * - * @note the actual iov base addresses and lengths are placed in the - * data section of the start message. - */ -struct gnix_smsg_rndzv_iov_start_hdr { - uint64_t flags; - uint64_t imm; - uint64_t msg_tag; - uint64_t req_addr; - size_t iov_cnt; - uint64_t send_len; -}; - -/** - * gnix_smsg_rndzv_fin_hdr - first part of a rendezvous send fin SMSG message - * - * @var req_addr returned local request address - */ -struct gnix_smsg_rndzv_fin_hdr { - uint64_t req_addr; - int status; -}; - -/** - * gnix_smsg_rma_data_hdr - RMA remote data message - * - * @var flags control flags - * @var user_flags remote CQ user flags - * @var user_data remote CQ user immediate data - */ -struct gnix_smsg_rma_data_hdr { - uint64_t flags; - uint64_t user_flags; - uint64_t user_data; -}; - -/** - * gnix_smsg_amo_cntr_hdr - RMA remote counter message - * - * @var user_flags control flags - */ -struct gnix_smsg_amo_cntr_hdr { - uint64_t flags; -}; - -/** - * gni_tx_descriptor - full tx descriptor used to to track GNI SMSG - * and Post operations - * - * @var list list element - * @var gni_desc embedded GNI post descriptor - * @var gnix_ct_descs embedded GNI post descriptors for concatenated gets - * used for unaligned gets - * @var gni_more_ct_descs embedded GNI post descriptors for concatenated puts - or gets for FI_MORE. - * @var gnix_smsg_eager_hdr embedded header for SMSG eager protocol - * @var gnix_smsg_rndzv_start_hdr embedded header for rendezvous protocol - * @var gnix_smsg_rndzv_iov_start_hdr embedded header for iovec rndzv protocol - * @var gnix_smsg_rndzv_fin_hdr embedded header for rendezvous protocol - * @var gnix_smsg_rndzv_rma_data_hdr embedded header for remote notification for - * rma operations - * @var gnix_smsg_amo_cntr_hdr embedded header for AMO remote counter events. - * @var req pointer to fab request associated with this descriptor - * @var completer_fn call back to invoke when associated GNI CQE's are - * returned. - * @var id the id of this descriptor - the value returned - * from GNI_CQ_MSG_ID - * @var err_list Error TXD list entry - * @var tx_failures Number of times this transmission descriptor failed. - */ -struct gnix_tx_descriptor { - struct dlist_entry list; - union { - struct { - gni_post_descriptor_t gni_desc; - gni_ct_get_post_descriptor_t gni_ct_descs[2]; - void *gni_more_ct_descs; - }; - struct gnix_smsg_eager_hdr eager_hdr; - struct gnix_smsg_rndzv_start_hdr rndzv_start_hdr; - struct gnix_smsg_rndzv_iov_start_hdr rndzv_iov_start_hdr; - struct gnix_smsg_rndzv_fin_hdr rndzv_fin_hdr; - struct gnix_smsg_rma_data_hdr rma_data_hdr; - struct gnix_smsg_amo_cntr_hdr amo_cntr_hdr; - }; - struct gnix_fab_req *req; - int (*completer_fn)(void *, gni_return_t); - int id; - struct slist_entry err_list; -}; - -/* - * prototypes - */ - -/** - * @brief allocate a tx descriptor to use for GNI Post, SMSG ops - * - * @param[in] nic pointer to previously allocated gnix_nic struct - * @param[in] tdesc pointer to address where allocated tx descriptor - * is to be stored - * @return FI_SUCCESS on success, -FI_ENOSPC no free tx descriptors - */ -static inline int _gnix_nic_tx_alloc(struct gnix_nic *nic, - struct gnix_tx_descriptor **desc) -{ - struct dlist_entry *entry; - - COND_ACQUIRE(nic->requires_lock, &nic->tx_desc_lock); - if (dlist_empty(&nic->tx_desc_free_list)) { - COND_RELEASE(nic->requires_lock, &nic->tx_desc_lock); - return -FI_ENOSPC; - } - - entry = nic->tx_desc_free_list.next; - dlist_remove_init(entry); - dlist_insert_head(entry, &nic->tx_desc_active_list); - *desc = dlist_entry(entry, struct gnix_tx_descriptor, list); - COND_RELEASE(nic->requires_lock, &nic->tx_desc_lock); - - return FI_SUCCESS; -} - - -/** - * @brief frees a previously allocated tx descriptor - * - * @param[in] nic pointer to previously allocated gnix_nic struct used - * when allocating the tx descriptor to be freed - * @param[in] tdesc pointer to previously allocated tx descriptor - * @return FI_SUCCESS on success - */ -static inline int _gnix_nic_tx_free(struct gnix_nic *nic, - struct gnix_tx_descriptor *desc) -{ - COND_ACQUIRE(nic->requires_lock, &nic->tx_desc_lock); - dlist_remove_init(&desc->list); - dlist_insert_head(&desc->list, &nic->tx_desc_free_list); - COND_RELEASE(nic->requires_lock, &nic->tx_desc_lock); - - return FI_SUCCESS; -} - - -/** - * @brief allocate a gnix_nic struct - * - * @param[in] domain pointer to previously allocated gnix_fid_domain struct - * @param[in] attrs optional pointer to an attributes argument. NULL - * can be supplied if no attributes are required - * @param[out] nic_ptr pointer to address where address of allocated nic is - * to be returned - * @return FI_SUCCESS on success, -FI_ENOMEM if insufficient memory - * to allocate nic struct, -FI_EINVAL if an invalid domain - * struct was supplied, -FI_EBUSY if insufficient resources - * are available to allocate the nic struct, -FI_EACCESS - * if a permissions access error occurred while allocating - * the nic struct, -FI_EAGAIN if an invalid state - * prevents creation or an interrupt was received while - * allocating kernel related resources for the nic. - */ -int gnix_nic_alloc(struct gnix_fid_domain *domain, - struct gnix_nic_attr *attrs, - struct gnix_nic **nic_ptr); - -/** - * @brief frees a previously allocated gnix_nic struct - * - * @param[in] nic pointer to previously allocated gnix_nic struct - * @return FI_SUCCESS on success, -FI_ENOSPC no free tx descriptors - */ -int _gnix_nic_free(struct gnix_nic *nic); - -/** - * @brief progresses control/data operations associated with the nic - * - * @param[in] arg pointer to previously allocated gnix_nic struct - * @return FI_SUCCESS on success, -FI_EINVAL if an invalid - * nic struct was supplied. TODO: a lot more error - * values can be returned. - */ -int _gnix_nic_progress(void *arg); - -/** - * @brief allocate a remote id for an object, used for looking up an object - * in O(1) based on returned value of GNI_CQ_INST_ID applied to a GNI - * CQE - * - * @param[in] nic pointer to previously allocated gnix_nic struct - * @param[out] remote_id address where allocate remote_id is returned - * @param[in] entry pointer to object to be associated with the - * remote id - - * @return FI_SUCCESS on success, -FI_ENOMEM if insufficient - * memory to allocate remote_id - */ -int _gnix_nic_get_rem_id(struct gnix_nic *nic, int *remote_id, void *entry); - -/** - * @brief release a previously allocated remote_id - * - * @param[in] nic pointer to previously allocated gnix_nic struct - * @param[in] remote_id previously allocated remote_id - - * @return FI_SUCCESS on success, -FI_EINVAL if an invalid - * argument was provided. - */ -int _gnix_nic_free_rem_id(struct gnix_nic *nic, int remote_id); - -/** - * @brief Look up an element by id - * - * @param[in] nic pointer to gni nic with which the vc is associated - * @param[in] rem_id rem_id of the object being looked up - * rem_id comes from GNI_CQ_GET_INST_ID on a GNI CQE - * - * @return pointer to vc with the given vc_id - * - * This function is only here because its used for criterion tests, - * otherwise it would be a static function within gnix_nic.c - */ -static inline void *__gnix_nic_elem_by_rem_id(struct gnix_nic *nic, int rem_id) -{ - void *elem; - - assert(nic); - - COND_ACQUIRE(nic->requires_lock, &nic->vc_id_lock); - - assert(rem_id <= nic->vc_id_table_count); - elem = nic->vc_id_table[rem_id]; - - COND_RELEASE(nic->requires_lock, &nic->vc_id_lock); - - return elem; -} - -void _gnix_nic_txd_err_inject(struct gnix_nic *nic, - struct gnix_tx_descriptor *txd); - -/** - * @brief Initialize global NIC data. - */ -void _gnix_nic_init(void); - -#endif /* _GNIX_NIC_H_ */ diff --git a/prov/gni/include/gnix_poll.h b/prov/gni/include/gnix_poll.h deleted file mode 100644 index 5c9c2b49c80..00000000000 --- a/prov/gni/include/gnix_poll.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All - * Copyright (c) 2016 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _GNIX_POLL_H_ -#define _GNIX_POLL_H_ - -/******************************************************************************* - * API Functions - ******************************************************************************/ -/** - * Poll progress and events across multiple completion queues and counters. - * - * @param[in] pollset the pollset - * @param[in/out] context user context values associated with completion - * queues or counters - * @param[in] count number of entries in context - * - * @return FI_SUCCESS upon successfully polling progress - * @return -FI_ERRNO upon an error - * @return -FI_ENOSYS if this operation is not supported - */ -int gnix_poll_poll(struct fid_poll *pollset, void **context, int count); - -/** - * Associates a completions queue or counter with a poll set - * - * @param[in] pollset the pollset - * @param[in] event_fid the queue or counter - * @param[in] flags flags for the requests - * - * @return FI_SUCCESS upon adding the completion queue or counter - * @return -FI_ERRNO upon an error - * @return -FI_ENOSYS if this operation is not supported - */ -int gnix_poll_add(struct fid_poll *pollset, struct fid *event_fid, - uint64_t flags); - -/** - * Removes a completion queue or counter from a poll set. - * - * @param[in] pollset the pollset - * @param[in] event_fid the queue or counter - * @param[in] flags flags for the requests - * - * @return FI_SUCCESS upon removing the completion queue or counter - * @return -FI_ERRNO upon an error - * @return -FI_ENOSYS if this operation is not supported - */ -int gnix_poll_del(struct fid_poll *pollset, struct fid *event_fid, - uint64_t flags); - -/******************************************************************************* - * Exposed internal functions. - ******************************************************************************/ - -#endif /* _GNIX_POLL_H_ */ diff --git a/prov/gni/include/gnix_priv.h b/prov/gni/include/gnix_priv.h deleted file mode 100644 index 89466f7297f..00000000000 --- a/prov/gni/include/gnix_priv.h +++ /dev/null @@ -1,160 +0,0 @@ -/* - * Copyright (c) 2015 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef GNIX_PRIV_H_ -#define GNIX_PRIV_H_ - -#include "gnix.h" - -extern uint8_t precomputed_crc_results[256]; - -/* - * Start of code pulled from gni_priv.h - */ -#define gni_crc_bits(data) precomputed_crc_results[(data)] - -inline static uint8_t gni_memhndl_calc_crc(gni_mem_handle_t *memhndl) -{ - uint64_t qw1 = memhndl->qword1; - uint64_t qw2 = memhndl->qword2; - uint8_t crc = 0; - crc = gni_crc_bits((qw1 ^ crc)&0xff); - crc = gni_crc_bits(((qw1 >> 8) ^ crc)&0xff); - crc = gni_crc_bits(((qw1 >> 16) ^ crc)&0xff); - crc = gni_crc_bits(((qw1 >> 24) ^ crc)&0xff); - crc = gni_crc_bits(((qw1 >> 32) ^ crc)&0xff); - crc = gni_crc_bits(((qw1 >> 40) ^ crc)&0xff); - crc = gni_crc_bits(((qw1 >> 48) ^ crc)&0xff); - crc = gni_crc_bits(((qw1 >> 56) ^ crc)&0xff); - crc = gni_crc_bits((qw2 ^ crc)&0xff); - crc = gni_crc_bits(((qw2 >> 8) ^ crc)&0xff); - crc = gni_crc_bits(((qw2 >> 16) ^ crc)&0xff); - crc = gni_crc_bits(((qw2 >> 24) ^ crc)&0xff); - crc = gni_crc_bits(((qw2 >> 32) ^ crc)&0xff); - crc = gni_crc_bits(((qw2 >> 40) ^ crc)&0xff); - crc = gni_crc_bits(((qw2 >> 48) ^ crc)&0xff); - - return crc; -} - -typedef struct gni_mem_hndl_v1 { - struct { - uint64_t va: 52; - uint64_t mdh: 12; - }; - struct { - uint64_t npages: 28; - uint64_t pgsize: 6; - uint64_t flags: 8; - uint64_t unused: 14; - uint64_t crc: 8; - }; -} gni_mem_hndl_v1_t; -typedef struct gni_mem_hndl_v2 { - union { - struct { - uint64_t va: 52; - uint64_t entropy: 12; - }; - uint64_t id; - }; - struct { - uint64_t npages: 28; - uint64_t pgsize: 6; - uint64_t flags: 8; - uint64_t mdh: 12; - uint64_t unused: 2; - uint64_t crc: 8; - }; -} gni_mem_hndl_v2_t; - -/*************** Memory Handle ****************/ -/* Flags (8 bits)*/ -#define GNI_MEMHNDL_FLAG_READONLY 0x01UL /* Memory is not writable */ -#define GNI_MEMHNDL_FLAG_VMDH 0x02UL /* Mapped via virtual MDH table */ -#define GNI_MEMHNDL_FLAG_MRT 0x04UL /* MRT was used for mapping */ -#define GNI_MEMHNDL_FLAG_GART 0x08UL /* GART was used for mapping */ -#define GNI_MEMHNDL_FLAG_IOMMU 0x10UL /* IOMMU was used for mapping */ -#define GNI_MEMHNDL_FLAG_PCI_IOMMU 0x20UL /* PCI IOMMU was used for mapping */ -#define GNI_MEMHNDL_FLAG_CLONE 0x40UL /* Registration cloned from a master MDD */ -#define GNI_MEMHNDL_FLAG_NEW_FRMT 0x80UL /* Used to support MDD sharing */ -/* Memory Handle manipulations */ -#define GNI_MEMHNDL_INIT(memhndl) do {memhndl.qword1 = 0; memhndl.qword2 = 0;} while(0) -/* Support macros, 34 is the offset of the flags value */ -#define GNI_MEMHNDL_NEW_FRMT(memhndl) ((memhndl.qword2 >> 34) & GNI_MEMHNDL_FLAG_NEW_FRMT) -#define GNI_MEMHNDL_FRMT_SET(memhndl, val, value) \ - if (GNI_MEMHNDL_NEW_FRMT(memhndl)) { \ - uint64_t tmp = value; \ - ((gni_mem_hndl_v2_t *)&memhndl)->val = tmp; \ - } else { \ - uint64_t tmp = value; \ - ((gni_mem_hndl_v1_t *)&memhndl)->val = tmp; \ - } - -#define GNI_MEMHNDL_FRMT_GET(memhndl, val) \ - ((uint64_t)(GNI_MEMHNDL_NEW_FRMT(memhndl) ? ((gni_mem_hndl_v2_t *)&memhndl)->val : ((gni_mem_hndl_v1_t *)&memhndl)->val)) - -/* Differing locations for V1 and V2 mem handles */ -#define GNI_MEMHNDL_SET_VA(memhndl, value) GNI_MEMHNDL_FRMT_SET(memhndl, va, (value) >> 12) -#define GNI_MEMHNDL_GET_VA(memhndl) (GNI_MEMHNDL_FRMT_GET(memhndl, va) << 12) -#define GNI_MEMHNDL_SET_MDH(memhndl, value) GNI_MEMHNDL_FRMT_SET(memhndl, mdh, value) -#define GNI_MEMHNDL_GET_MDH(memhndl) GNI_MEMHNDL_FRMT_GET(memhndl, mdh) - - -/* The MDH field size is the same, and there is no other define to - * limit max MDHs in uGNI. */ - -#define GNI_MEMHNDL_MDH_MASK 0xFFFUL - -/* From this point forward, there is no difference. We don't need the - * inlined conditionals */ - -/* Number of Registered pages (1TB for 4kB pages): QWORD2[27:0] */ -#define GNI_MEMHNDL_NPGS_MASK 0xFFFFFFFUL -#define GNI_MEMHNDL_SET_NPAGES(memhndl, value) memhndl.qword2 |= (value & GNI_MEMHNDL_NPGS_MASK) -/* Page size that was used to calculate the total number of pages : QWORD2[33:28] */ -#define GNI_MEMHNDL_PSIZE_MASK 0x3FUL -#define GNI_MEMHNDL_SET_PAGESIZE(memhndl, value) memhndl.qword2 |= (((uint64_t)value & GNI_MEMHNDL_PSIZE_MASK) << 28) -/* Flags: QWORD2[41:34] */ -#define GNI_MEMHNDL_FLAGS_MASK 0xFFUL -#define GNI_MEMHNDL_SET_FLAGS(memhndl, value) memhndl.qword2 |= ((value & GNI_MEMHNDL_FLAGS_MASK) << 34) -#define GNI_MEMHNDL_GET_FLAGS(memhndl) ((memhndl.qword2 >> 34) & GNI_MEMHNDL_FLAGS_MASK) -/* QWORD2[55:54] left blank */ -/* CRC to verify integrity of the handle: QWORD2[63:56] ( Call this only after all other field are set!)*/ -#define GNI_MEMHNDL_CRC_MASK 0xFFUL -#define GNI_MEMHNDL_SET_CRC(memhndl) (memhndl.qword2 |= ((uint64_t)gni_memhndl_calc_crc(&memhndl)<<56)) - -/* - * End of code pulled from gni_priv.h - */ - -#endif /* GNIX_PRIV_H_ */ diff --git a/prov/gni/include/gnix_progress.h b/prov/gni/include/gnix_progress.h deleted file mode 100644 index b6941eea799..00000000000 --- a/prov/gni/include/gnix_progress.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2017 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _GNIX_PROGRESS_H_ -#define _GNIX_PROGRESS_H_ - -#include "gnix_util.h" - -/* - * Progress common code - */ - -struct gnix_prog_set { - struct dlist_entry prog_objs; - rwlock_t lock; - int requires_lock; -}; - -int _gnix_prog_progress(struct gnix_prog_set *set); -int _gnix_prog_obj_add(struct gnix_prog_set *set, void *obj, - int (*prog_fn)(void *data)); -int _gnix_prog_obj_rem(struct gnix_prog_set *set, void *obj, - int (*prog_fn)(void *data)); -int _gnix_prog_init(struct gnix_prog_set *set); -int _gnix_prog_fini(struct gnix_prog_set *set); - -#endif /* _GNIX_PROGRESS_H_ */ - diff --git a/prov/gni/include/gnix_queue.h b/prov/gni/include/gnix_queue.h deleted file mode 100644 index f89efef347d..00000000000 --- a/prov/gni/include/gnix_queue.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights reserved. - * Copyright (c) 2016 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _GNIX_QUEUE_H -#define _GNIX_QUEUE_H - -#include - -typedef struct slist_entry *(*alloc_func)(size_t entry_size); -typedef void (*free_func)(struct slist_entry *item); - -struct gnix_queue { - struct slist item_list; - struct slist free_list; - - alloc_func alloc_item; - free_func free_item; - - size_t entry_size; -}; - -int _gnix_queue_create(struct gnix_queue **queue, alloc_func alloc_item, - free_func free_item, size_t entry_size, - size_t entry_count); -void _gnix_queue_destroy(struct gnix_queue *queue); - -struct slist_entry *_gnix_queue_peek(struct gnix_queue *queue); - -struct slist_entry *_gnix_queue_get_free(struct gnix_queue *queue); -struct slist_entry *_gnix_queue_dequeue(struct gnix_queue *queue); -struct slist_entry *_gnix_queue_dequeue_free(struct gnix_queue *queue); - -void _gnix_queue_enqueue(struct gnix_queue *queue, struct slist_entry *item); -void _gnix_queue_enqueue_free(struct gnix_queue *queue, - struct slist_entry *item); - -#endif /* #define _GNIX_QUEUE_H */ diff --git a/prov/gni/include/gnix_rma.h b/prov/gni/include/gnix_rma.h deleted file mode 100644 index 34ecaea415b..00000000000 --- a/prov/gni/include/gnix_rma.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2015-2016 Cray Inc. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _GNIX_RMA_H_ -#define _GNIX_RMA_H_ - -#define GNIX_RMA_READ_FLAGS_DEF (FI_RMA | FI_READ) -#define GNIX_RMA_WRITE_FLAGS_DEF (FI_RMA | FI_WRITE) - -ssize_t _gnix_rma(struct gnix_fid_ep *ep, enum gnix_fab_req_type fr_type, - uint64_t loc_addr, size_t len, void *mdesc, - uint64_t dest_addr, uint64_t rem_addr, uint64_t mkey, - void *context, uint64_t flags, uint64_t data); - -/** - * @brief try to deliver an IRQ to peer - * - * This routine can be used to deliver an IRQ to the remote peer - * via a GNI_PostCqWrite. - * - * @param[in] vc pointer to previously allocated gnix_vc struct which - * is in connected state - * @return FI_SUCCESS GNI_PostCqWrite successfully posted. - * @return -FI_INVALID vc in invalid state or incorrect memory handle used - * @return -FI_ENOSPC no free tx descriptors - */ -int _gnix_rma_post_irq(struct gnix_vc *vc); - -/* SMSG callback for RMA data control message. */ -int __smsg_rma_data(void *data, void *msg); - -#endif /* _GNIX_RMA_H_ */ - diff --git a/prov/gni/include/gnix_shmem.h b/prov/gni/include/gnix_shmem.h deleted file mode 100644 index 77b8c754e59..00000000000 --- a/prov/gni/include/gnix_shmem.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 2017 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _GNIX_SHMEM_H -#define _GNIX_SHMEM_H - -#include - - -struct gnix_shared_memory { - void *addr; - uint32_t size; -}; - -/** - * @brief Creates a shared memory region - * - * @param path path to shared memory region - * @param size size of shared memory region in bytes - * @param init_func initialization function for region, if created - * @param region pointer to returned memory region - */ -int _gnix_shmem_create( - const char *path, - uint32_t size, - int (*init_func)(const char *path, uint32_t size, void *region), - struct gnix_shared_memory *region); - -/** - * @brief Destroys a shared memory region - * - * @param region memory region to destroy - */ -int _gnix_shmem_destroy(struct gnix_shared_memory *region); - -#endif diff --git a/prov/gni/include/gnix_smrn.h b/prov/gni/include/gnix_smrn.h deleted file mode 100644 index d49f444f335..00000000000 --- a/prov/gni/include/gnix_smrn.h +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright (c) 2017 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef PROV_GNI_INCLUDE_GNIX_SMRN_H -#define PROV_GNI_INCLUDE_GNIX_SMRN_H - -#include "include/ofi_list.h" -#include "include/ofi_lock.h" -#include "config.h" - -#include "gnix_mr_notifier.h" - -/** - * @brief shared memory registration notifier - * - * @var lock Only used for set up and tear down (no guarantees - * if reading or writing while setting up or tearing down) - */ -struct gnix_smrn { - ofi_spin_t lock; - struct gnix_mr_notifier *notifier; - struct dlist_entry rq_head; - int references; -}; - -struct gnix_smrn_rq { - ofi_spin_t lock; - struct dlist_entry list; - struct dlist_entry entry; -}; - -struct gnix_smrn_context { - struct gnix_smrn_rq *rq; - uint64_t cookie; - struct dlist_entry entry; -}; - -int _gnix_smrn_init(void); - -/** - * @brief open the prepare for notifications - * - * @param[in,out] k Empty and initialized gnix_smrn struct - * @return FI_SUCESSS on success - * -FI_EBUSY if device already open - * -FI_ENODATA if user delta unavailable - * -fi_errno or -errno on other failures - */ -int _gnix_smrn_open(struct gnix_smrn **smrn); - -/** - * @brief close the kdreg device and zero the notifier - * - * @param[in] k gnix_smrn struct - * @return FI_SUCESSS on success - * -fi_errno or -errno on other failures - */ -int _gnix_smrn_close(struct gnix_smrn *mrn); - -/** - * @brief monitor a memory region - * - * @param[in] k gnix_smrn struct - * @param[in] addr address of memory region to monitor - * @param[in] len length of memory region - * @param[in] cookie user identifier associated with the region - * @return FI_SUCESSS on success - * -fi_errno or -errno on failure - */ -int _gnix_smrn_monitor(struct gnix_smrn *smrn, - struct gnix_smrn_rq *rq, - void *addr, - uint64_t len, - uint64_t cookie, - struct gnix_smrn_context *context); - -/** - * @brief stop monitoring a memory region - * - * @param[in] k gnix_smrn struct - * @param[out] cookie user identifier for notification event - * @return FI_SUCESSS on success - * -fi_errno or -errno on failure - */ -int _gnix_smrn_unmonitor(struct gnix_smrn *smrn, - uint64_t cookie, - struct gnix_smrn_context *context); - -/** - * @brief get a monitoring event - * - * @param[in] k gnix_smrn struct - * @param[out] buf buffer to write event data - * @param[in] len length of buffer - * @return Number of bytes read on success - * -FI_EINVAL if invalid arguments - * -FI_EAGAIN if nothing to read - * -fi_errno or -errno on failure - */ -int _gnix_smrn_get_event(struct gnix_smrn *smrn, - struct gnix_smrn_rq *rq, - struct gnix_smrn_context **context); - -#endif diff --git a/prov/gni/include/gnix_tags.h b/prov/gni/include/gnix_tags.h deleted file mode 100644 index c989217ccc3..00000000000 --- a/prov/gni/include/gnix_tags.h +++ /dev/null @@ -1,494 +0,0 @@ -/* - * Copyright (c) 2015-2016 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/** - * Notes: - * - * This tag matching system currently implements a linked-list version of - * a tag matcher. - * - * A hash list implementation was planned but will be not pursued due to the - * constraints of the problem. - * - * As understood at the time of this writing, matching a tag involves matching - * the bits of the tag against the bits of the tag to find, less the ignored - * bits. When no bits are ignored, there is no subset of tags to match other - * than the exact match. When some bits are ignored, there is a subset of tags - * that are not exact matches, but are considered matches according to the - * ignored bits. This problem represents a search over a k-space, where k is - * the number of distinct ignore fields given by the tag format for the - * provider. Each ignore field in the tag format distinct separates tags into - * different 'bins' within the same field. - * - * A hash list implementation is not impossible, but can be computationally - * impractical. This is due to the way a search must occur if bits are ignored. - * The problem would be trivial if no bits were ignored, as the implementation - * could simply go to the bucket where the tag was hashed and search there. - * However, because ignored bits could hash to multiple, if not all, buckets, - * the tag matcher must search all buckets. It takes more time to compute all - * permutations of affected tags, than to just search each tag, so the default - * behavior is to search all buckets if a non-zero ignore field is provided. - * - * In the event that the ignore field is always zero, a hash list - * implementation is strictly better than a list implementation. However, - * once ignore bits are considered, the problem becomes more complex. - * - * Consider a hash list implementation where tags are split into K buckets, - * and tags are evenly distributed. If N tags are placed into the hash list in - * an evenly distributed manner, we can compute a best, average and worst - * case expectation for the hash list implementation. - * - * In the best case, the tag we are searching for is at the front of the list - * and only one tag in the system could match the search parameters, then - * we get the following best case analysis: - * - * When tags are evenly distributed, there should be ~(N/K) tags per bucket. - * - * Best(hash-list, middle list): ((N/K) * (K/ 2)) + 1 == (N/2) + 1 - * Best(hash-list, first list): 1 - * - * We can assume that on average, we'll have to search half the buckets in - * the best case, since the tags are evenly distributed. The very best case - * is that the tag would be in the first bucket, giving us O(1) instead. - * However, since tag location is determined on the hash, we have to assume - * that we will have to search half the buckets first (N/K) * (K/2). Since - * the element we are searching for is at the front of the list, it only - * takes one search on that list. - * - * Average(hash-list): (N/K) * (K/2) + (N/(2K)) == (1/2) * (N + N/K) - * - * Similar to the best case, we can assume we'll have to search through half - * of the bucket when the ignore bits are set. Also, we can assume that the - * entry we are searching for lies in the middle of one of the hash lists. - * In this case, the average is not much worse than the best case. - * - * Worst(hash-list, last list): ((N/K) * K - 1) + (N/K) == N - * - * The worst case is much more simple than the rest. It would be the last - * element in the last list. This would be no worse than a linear search - * through the list. - * - * All things considered, the hash list seems like it would be relatively - * reasonable with a more likely chance of finding entries faster than - * a standard linked list. However, a simple sanalysis shows that - * that the best case is likely to be more expensive than the estimate. - * - * There is a bias in the algorithm. Since we always prefer the oldest, and - * entries are ordered from oldest to newest, searching should - * complete in less than N/2 operations on average. However, with the - * hash-list, the bias has no influence due to the way that tags are - * distributed. This causes the hash-list to perform much more slowly than - * the list implementation. - * - * For the above listed reasons, a hash-list version of the tag matcher is - * available, but should only be used under certain expectations. If the - * frequency of searching with ignored bits is low, then a hash list may be - * faster. If the ignore bits are used frequently and should match with - * several requests, then that may also be preferred. However, in the event that - * the user searches with few ignored bits that would match against very - * few requests, then they would likely encounter average case behavior more - * frequently than expected and thus would spend more time in the tag matcher. - */ - -/* - * Examples: - * - * Init: - * - * _gnix_posted_tag_storage_init(&ep->posted_recvs, NULL); - * _gnix_unexpected_tag_storage_init(&ep->unexpected_recvs, NULL); - * - * On receipt of a message: - * - * ofi_spin_lock(&ep->tag_lock); - * req = _gnix_remove_by_tag(&ep->posted_recvs, msg->tag, 0); - * if (!req) - * _gnix_insert_by_tag(&ep->unexpected_recvs, msg->tag, msg->req); - * ofi_spin_unlock(&ep->tag_lock); - * - * On post of receive: - * - * ofi_spin_lock(&ep->tag_lock); - * tag_req = _gnix_remove_by_tag(&ep->unexpected_recvs, - * req->tag, req->ignore); - * if (!tag_req) - * _gnix_insert_by_tag(&ep->posted_recvs, tag, req); - * ofi_spin_unlock(&ep->tag_lock); - * - */ - -#ifndef PROV_GNI_SRC_GNIX_TAGS_H_ -#define PROV_GNI_SRC_GNIX_TAGS_H_ - -#include -#include -#include - -#include "gnix_util.h" - -/* enumerations */ - -/** - * Enumeration for determining the underlying data structure for a - * tag storage. - * - * Using auto will choose one of list, hlist or kdtree based on mem_tag_format. - */ -enum { - GNIX_TAG_AUTOSELECT = 0,//!< GNIX_TAG_AUTOSELECT - GNIX_TAG_LIST, //!< GNIX_TAG_LIST - GNIX_TAG_HLIST, //!< GNIX_TAG_HLIST - GNIX_TAG_KDTREE, //!< GNIX_TAG_KDTREE - GNIX_TAG_MAXTYPES, //!< GNIX_TAG_MAXTYPES -}; - -/** - * Enumeration for the tag storage states - */ -enum { - GNIX_TS_STATE_UNINITIALIZED = 0,//!< GNIX_TS_STATE_UNINITIALIZED - GNIX_TS_STATE_INITIALIZED, //!< GNIX_TS_STATE_INITIALIZED - GNIX_TS_STATE_DESTROYED, //!< GNIX_TS_STATE_DESTROYED -}; - -/* forward declarations */ -struct gnix_tag_storage; -struct gnix_fab_req; -struct gnix_address; - -/* structure declarations */ -/** - * @brief Function dispatch table for the different types of underlying structures - * used in the tag storage. - * - * @var insert_tag insert a request into the tag storage - * @var remove_tag remove a request from the tag storage - * @var peek_tag probe tag storage for a specific tag - * @var init performs specific initialization based on underlying - * data structure - * @var fini performs specific finalization based on underlying - * data structure - */ -struct gnix_tag_storage_ops { - int (*insert_tag)(struct gnix_tag_storage *ts, uint64_t tag, - struct gnix_fab_req *req); - struct gnix_fab_req *(*remove_tag)(struct gnix_tag_storage *ts, - uint64_t tag, uint64_t ignore, - uint64_t flags, void *context, - struct gnix_address *addr); - struct gnix_fab_req *(*peek_tag)(struct gnix_tag_storage *ts, - uint64_t tag, uint64_t ignore, - uint64_t flags, void *context, - struct gnix_address *addr); - void (*remove_tag_by_req)(struct gnix_tag_storage *ts, - struct gnix_fab_req *req); - int (*init)(struct gnix_tag_storage *ts); - int (*fini)(struct gnix_tag_storage *ts); - struct gnix_fab_req *(*remove_req_by_context)(struct gnix_tag_storage *ts, - void *context); -}; - -/** - * @note The sequence and generation numbers will be used in the future for - * optimizing the search with branch and bound. - */ -struct gnix_tag_list_element { - /* entry to the next element in the list */ - struct dlist_entry free; - /* has element been claimed with FI_CLAIM? */ - int claimed; - /* associated fi_context with claimed element */ - void *context; - /* sequence number */ - uint32_t seq; - /* generation number */ - uint32_t gen; -}; - -/** - * @note The type field is based on the GNIX_TAG_* enumerations listed above - */ -struct gnix_tag_storage_attr { - /* one of 'auto', 'list', 'hlist' or 'kdtree' */ - int type; - /* should the tag storage check addresses? */ - int use_src_addr_matching; -}; - -/** - * @note Unused. This will be used in the future for the init heuristic when - * performing auto detection based on the mem_tag_format. - */ -struct gnix_tag_field { - uint64_t mask; - uint64_t len; -}; - -/** - * @note Unused. This will be used in the future for the init heuristic when - * performing auto detection based on the mem_tag_format. - */ -struct gnix_tag_format { - int field_cnt; - struct gnix_tag_field *fields; -}; - -struct gnix_tag_list { - struct dlist_entry list; -}; - -struct gnix_hlist_head { - struct dlist_entry head; - uint64_t oldest_tag_id; - uint64_t oldest_gen; -}; - - -struct gnix_tag_hlist { - struct gnix_hlist_head *array; - int elements; - uint64_t last_inserted_id; - uint64_t oldest_tag_id; - uint64_t current_gen; -}; - -struct gnix_tag_kdtree { - -}; - -/** - * @brief gnix tag storage structure - * - * Used to store gnix_fab_requests by tag, and optionally, by address. - * - * @var seq sequence counter for elements - * @var state state of the tag storage structure - * @var gen generation counter for elements - * @var match_func matching function for the tag storage, either posted or - * unexpected - * @var attr tag storage attributes - * @var ops function dispatch table for underlying data structures - * @var tag_format unused. used during init for determining what type of - * data structure to use for storing data - */ -struct gnix_tag_storage { - ofi_atomic32_t seq; - int state; - int gen; - int (*match_func)(struct dlist_entry *entry, const void *arg); - struct gnix_tag_storage_attr attr; - struct gnix_tag_storage_ops *ops; - struct gnix_tag_format tag_format; - union { - struct gnix_tag_list list; - struct gnix_tag_hlist hlist; - struct gnix_tag_kdtree kdtree; - }; -}; - -/* function declarations */ -/** - * @brief generic matching function for posted and unexpected tag storages - * - * @param req gnix fabric request to match - * @param tag tag to match - * @param ignore bits to ignore in the tags - * @param flags fi_tagged flags - * @param context fi_context to match in request - * @param uses_src_addr_matching should we check addresses? - * @param addr gnix address to match - * @param matching_posted is matching on a posted tag storage? - * @return 1 if this request matches the parameters, 0 otherwise - */ -int _gnix_req_matches_params( - struct gnix_fab_req *req, - uint64_t tag, - uint64_t ignore, - uint64_t flags, - void *context, - int use_src_addr_matching, - struct gnix_address *addr, - int matching_posted); - -/** - * @brief matching function for unexpected tag storages - * - * @param entry dlist entry pointing to the request to search - * @param arg search parameters as a gnix_tag_search_element - * @return 1 if this request matches the parameters, 0 otherwise - */ -int _gnix_match_unexpected_tag(struct dlist_entry *entry, const void *arg); - -/** - * @brief matching function for posted tag storages - * - * @param entry dlist entry pointing to the request to search - * @param arg search parameters as a gnix_tag_search_element - * @return 1 if this request matches the parameters, 0 otherwise - */ -int _gnix_match_posted_tag(struct dlist_entry *entry, const void *arg); - -/** - * @brief base initialization function for tag storages - * @note This function should never be called directly. It is exposed for the - * purpose of allowing the test suite to reinitialize tag storages - * without knowing what type of tag storage is being reinitialized - * - * @param ts tag storage pointer - * @param attr tag storage attributes - * @param match_func match function to be used on individual list elements - * @return -FI_EINVAL, if any invalid parameters were given - * FI_SUCCESS, otherwise - */ -int _gnix_tag_storage_init( - struct gnix_tag_storage *ts, - struct gnix_tag_storage_attr *attr, - int (*match_func)(struct dlist_entry *, const void *)); - -/** - * @brief initialization function for posted tag storages - * - * @param ts tag storage pointer - * @param attr tag storage attributes - * @param match_func match function to be used on individual list elements - * @return -FI_EINVAL, if any invalid parameters were given - * FI_SUCCESS, otherwise - */ -static inline int _gnix_posted_tag_storage_init( - struct gnix_tag_storage *ts, - struct gnix_tag_storage_attr *attr) -{ - return _gnix_tag_storage_init(ts, attr, _gnix_match_posted_tag); -} - -/** - * @brief initialization function for unexpected tag storages - * - * @param ts tag storage pointer - * @param attr tag storage attributes - * @param match_func match function to be used on individual list elements - * @return -FI_EINVAL, if any invalid parameters were given - * FI_SUCCESS, otherwise - */ -static inline int _gnix_unexpected_tag_storage_init( - struct gnix_tag_storage *ts, - struct gnix_tag_storage_attr *attr) -{ - return _gnix_tag_storage_init(ts, attr, _gnix_match_unexpected_tag); -} - -/** - * @brief destroys a tag storage and releases any held memory - * - * @param ts - * @return -FI_EINVAL, if the tag storage is in a bad state - * -FI_EAGAIN, if there are tags remaining in the tag storage - * FI_SUCCESS, otherwise - */ -int _gnix_tag_storage_destroy(struct gnix_tag_storage *ts); - -/** - * @brief inserts a gnix_fab_req into the tag storage - * - * @param ts pointer to the tag storage - * @param tag tag associated with fab request - * @param req gnix fabric request - * @param ignore bits to ignore in tag (only applies to posted) - * @param addr_ignore bits to ignore in addr (only applies to posted) - * @return - * - * @note if ts is a posted tag storage, 'req->ignore_bits' will be set to - * the value of 'ignore'. - * - * @note if ts is a posted tag storage and ts->attr.use_src_addr_matching - * is enabled, 'req->addr_ignore_bits' will be set to the value - * of 'addr_ignore'. - */ -int _gnix_insert_tag( - struct gnix_tag_storage *ts, - uint64_t tag, - struct gnix_fab_req *req, - uint64_t ignore); - - -/** - * @brief matches at a request from the tag storage by tag and address - * - * @param ts pointer to the tag storage - * @param tag tag to remove - * @param ignore bits to ignore in tag - * @param flags fi_tagged flags - * @param context fi_context associated with tag - * @param addr gnix_address associated with tag - * @param addr_ignore bits to ignore in address - * @return NULL, if no entry found that matches parameters - * otherwise, a non-null value pointing to a gnix_fab_req - * - * @note ignore parameter is not used for posted tag storages - * @note addr_ignore parameter is not used for posted tag storages - * @note if FI_CLAIM is not provided in flags, the call is an implicit removal - * of the tag - * @note When the FI_PEEK flag is not set, the request will be removed - * from the tag storage - */ -struct gnix_fab_req *_gnix_match_tag( - struct gnix_tag_storage *ts, - uint64_t tag, - uint64_t ignore, - uint64_t flags, - void *context, - struct gnix_address *addr); - -struct gnix_fab_req *_gnix_remove_req_by_context( - struct gnix_tag_storage *ts, - void *context); - -/** - * @brief removes a gnix_fab_req from the tag storage list element - * - * @param ts pointer to the tag storage - * @param req gnix fabric request - * @param ignore bits to ignore in tag (only applies to posted) - * @return none - * - * @note This is similar to _gnix_match_tag with the FI_PEEK flag not set - * but it does not need to search the list to remove the request - */ -void _gnix_remove_tag( - struct gnix_tag_storage *ts, - struct gnix_fab_req *req); - -/* external symbols */ - - - -#endif /* PROV_GNI_SRC_GNIX_TAGS_H_ */ diff --git a/prov/gni/include/gnix_trigger.h b/prov/gni/include/gnix_trigger.h deleted file mode 100644 index 9adb83dcb4f..00000000000 --- a/prov/gni/include/gnix_trigger.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2016 Cray Inc. All rights reserved. - * Copyright (c) 2016 Los Alamos National Security, LLC. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/* - * Triggered operations handling. - */ - -#ifndef GNIX_TRIGGER_H_ -#define GNIX_TRIGGER_H_ - -#include "gnix.h" -#include "gnix_cntr.h" -#include "gnix_vc.h" - -int _gnix_trigger_queue_req(struct gnix_fab_req *req); -void _gnix_trigger_check_cntr(struct gnix_fid_cntr *cntr); - -static inline int _gnix_trigger_pending(struct gnix_fid_cntr *cntr) -{ - return dlist_empty(&cntr->trigger_list) ? 0 : 1; -} - -#endif /* GNIX_TRIGGER_H */ diff --git a/prov/gni/include/gnix_util.h b/prov/gni/include/gnix_util.h deleted file mode 100644 index 83873f7a8f8..00000000000 --- a/prov/gni/include/gnix_util.h +++ /dev/null @@ -1,288 +0,0 @@ -/* - * Copyright (c) 2015-2017 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#if HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#ifndef _GNIX_UTIL_H_ -#define _GNIX_UTIL_H_ - -#include -#include - -extern struct fi_provider gnix_prov; -#if HAVE_CRITERION -extern int gnix_first_pe_on_node; /* globally visible for criterion */ -#endif - -/* - * For debug logging (ENABLE_DEBUG) - * Q: should this just always be available? - */ -#ifndef ENABLE_DEBUG - -#define GNIX_LOG_INTERNAL(FI_LOG_FN, LEVEL, subsystem, fmt, ...) \ - FI_LOG_FN(&gnix_prov, subsystem, fmt, ##__VA_ARGS__) - -#define GNIX_FI_PRINT(prov, subsystem, ...) - -#else - -/* defined in gnix_init.c */ -extern __thread pid_t gnix_debug_pid; -extern __thread uint32_t gnix_debug_tid; -extern ofi_atomic32_t gnix_debug_next_tid; - -#define GNIX_FI_PRINT(prov, subsystem, ...) \ - do { \ - fi_log(prov, FI_LOG_WARN, subsystem, \ - __func__, __LINE__, __VA_ARGS__); \ - } while (0) - - -/* These macros are used to prepend the log message with the pid and - * unique thread id. Do not use them directly. Rather use the normal - * GNIX_* macros. - */ -#define GNIX_LOG_INTERNAL(FI_LOG_FN, LEVEL, subsystem, fmt, ...) \ - do { \ - if (fi_log_enabled(&gnix_prov, LEVEL, subsystem)) { \ - const int fmt_len = 256; \ - char new_fmt[fmt_len]; \ - if (gnix_debug_tid == ~(uint32_t) 0) { \ - gnix_debug_tid = ofi_atomic_inc32(&gnix_debug_next_tid); \ - } \ - if (gnix_debug_pid == ~(uint32_t) 0) { \ - gnix_debug_pid = getpid(); \ - } \ - snprintf(new_fmt, fmt_len, "[%%d:%%d] %s", fmt); \ - FI_LOG_FN(&gnix_prov, subsystem, new_fmt, \ - gnix_debug_pid, gnix_debug_tid, ##__VA_ARGS__); \ - } \ - } while (0) - -#endif - -#define GNIX_WARN(subsystem, ...) \ - GNIX_LOG_INTERNAL(FI_WARN, FI_LOG_WARN, subsystem, __VA_ARGS__) -#define GNIX_TRACE(subsystem, ...) \ - GNIX_LOG_INTERNAL(FI_TRACE, FI_LOG_TRACE, subsystem, __VA_ARGS__) -#define GNIX_INFO(subsystem, ...) \ - GNIX_LOG_INTERNAL(FI_INFO, FI_LOG_INFO, subsystem, __VA_ARGS__) -#if ENABLE_DEBUG -#define GNIX_DEBUG(subsystem, ...) \ - GNIX_LOG_INTERNAL(FI_DBG, FI_LOG_DEBUG, subsystem, __VA_ARGS__) -#define GNIX_DBG_TRACE(subsystem, ...) \ - GNIX_LOG_INTERNAL(FI_TRACE, FI_LOG_TRACE, subsystem, __VA_ARGS__) -#else -#define GNIX_DEBUG(subsystem, ...) \ - do {} while (0) -#define GNIX_DBG_TRACE(subsystem, ...) \ - do {} while (0) -#endif - -#define GNIX_ERR(subsystem, ...) \ - GNIX_LOG_INTERNAL(GNIX_FI_PRINT, FI_LOG_WARN, subsystem, __VA_ARGS__) -#define GNIX_FATAL(subsystem, ...) \ - do { \ - GNIX_LOG_INTERNAL(GNIX_FI_PRINT, FI_LOG_WARN, subsystem, __VA_ARGS__); \ - abort(); \ - } while (0) - -#if 1 -#define GNIX_LOG_DUMP_TXD(txd) -#else -#define GNIX_LOG_DUMP_TXD(txd) \ - do { \ - gni_mem_handle_t *tl_mdh = &(txd)->gni_desc.local_mem_hndl; \ - gni_mem_handle_t *tr_mdh = &(txd)->gni_desc.remote_mem_hndl; \ - GNIX_INFO(FI_LOG_EP_DATA, "la: %llx ra: %llx len: %d\n", \ - (txd)->gni_desc.local_addr, \ - (txd)->gni_desc.remote_addr, \ - (txd)->gni_desc.length); \ - GNIX_INFO(FI_LOG_EP_DATA, \ - "lmdh: %llx:%llx rmdh: %llx:%llx key: %llx\n", \ - *(uint64_t *)tl_mdh, *(((uint64_t *)tl_mdh) + 1), \ - *(uint64_t *)tr_mdh, *(((uint64_t *)tr_mdh) + 1), \ - fab_req->amo.rem_mr_key); \ - } while (0) -#endif - -/* slist and dlist utilities */ -#include "ofi_list.h" - -static inline void dlist_node_init(struct dlist_entry *e) -{ - e->prev = e->next = NULL; -} - -#define DLIST_IN_LIST(e) e.prev != e.next - -#define DLIST_HEAD(dlist) struct dlist_entry dlist = { &(dlist), &(dlist) } - -#define dlist_entry(e, type, member) container_of(e, type, member) - -#define dlist_first_entry(h, type, member) \ - (dlist_empty(h) ? NULL : dlist_entry((h)->next, type, member)) - -/* Iterate over the entries in the list */ -#define dlist_for_each(h, e, member) \ - for (e = dlist_first_entry(h, typeof(*e), member); \ - e && (&e->member != h); \ - e = dlist_entry((&e->member)->next, typeof(*e), member)) - -/* Iterate over the entries in the list, possibly deleting elements */ -#define dlist_for_each_safe(h, e, n, member) \ - for (e = dlist_first_entry(h, typeof(*e), member), \ - n = e ? dlist_entry((&e->member)->next, \ - typeof(*e), member) : NULL; \ - e && (&e->member != h); \ - e = n, n = dlist_entry((&e->member)->next, typeof(*e), member)) - -#define rwlock_t pthread_rwlock_t -#define rwlock_init(lock) pthread_rwlock_init(lock, NULL) -#define rwlock_destroy(lock) pthread_rwlock_destroy(lock) -#define rwlock_wrlock(lock) pthread_rwlock_wrlock(lock) -#define rwlock_rdlock(lock) pthread_rwlock_rdlock(lock) -#define rwlock_unlock(lock) pthread_rwlock_unlock(lock) - -/* - * prototypes - */ -int _gnix_get_cq_limit(void); -int gnixu_get_rdma_credentials(void *addr, uint8_t *ptag, uint32_t *cookie); -int gnixu_to_fi_errno(int err); - -int _gnix_task_is_not_app(void); -int _gnix_job_enable_unassigned_cpus(void); -int _gnix_job_disable_unassigned_cpus(void); -int _gnix_job_enable_affinity_apply(void); -int _gnix_job_disable_affinity_apply(void); - -void _gnix_app_cleanup(void); -int _gnix_job_fma_limit(uint32_t dev_id, uint8_t ptag, uint32_t *limit); -int _gnix_job_cq_limit(uint32_t dev_id, uint8_t ptag, uint32_t *limit); -int _gnix_pes_on_node(uint32_t *num_pes); -int _gnix_pe_node_rank(int *pe_node_rank); -int _gnix_nics_per_rank(uint32_t *nics_per_rank); -void _gnix_dump_gni_res(uint8_t ptag); -int _gnix_get_num_corespec_cpus(uint32_t *num_core_spec_cpus); - -struct gnix_reference { - ofi_atomic32_t references; - void (*destruct)(void *obj); -}; - -/* Should not be used unless the reference counting variable has a - * non-standard name - */ -#define __ref_get(ptr, var) \ - ({ \ - struct gnix_reference *ref = &(ptr)->var; \ - int references_held = ofi_atomic_inc32(&ref->references); \ - GNIX_DEBUG(FI_LOG_CORE, "%p refs %d\n", \ - ref, references_held); \ - assert(references_held > 0); \ - references_held; }) - -#define __ref_put(ptr, var) \ - ({ \ - struct gnix_reference *ref = &(ptr)->var; \ - int references_held = ofi_atomic_dec32(&ref->references); \ - GNIX_DEBUG(FI_LOG_CORE, "%p refs %d\n", \ - ref, references_held); \ - assert(references_held >= 0); \ - if (references_held == 0) \ - ref->destruct((void *) (ptr)); \ - references_held; }) - -/* by default, all of the gnix reference counting variables are - * named 'ref_cnt'. The macros provided below will autofill the var arg. - */ -#define _gnix_ref_get(ptr) __ref_get(ptr, ref_cnt) -#define _gnix_ref_put(ptr) __ref_put(ptr, ref_cnt) - -/** - * Only allow FI_REMOTE_CQ_DATA when the EP cap, FI_RMA_EVENT, is also set. - * - * @return zero if FI_REMOTE_CQ_DATA is not permitted; otherwise one. - */ -#define GNIX_ALLOW_FI_REMOTE_CQ_DATA(_flags, _ep_caps) \ - (((_flags) & FI_REMOTE_CQ_DATA) && \ - ((_ep_caps) & FI_RMA_EVENT)) - -static inline void _gnix_ref_init( - struct gnix_reference *ref, - int initial_value, - void (*destruct)(void *)) -{ - ofi_atomic_initialize32(&ref->references, initial_value); - GNIX_DEBUG(FI_LOG_CORE, "%p refs %d\n", - ref, initial_value); - ref->destruct = destruct; -} - -#define __STRINGIFY(expr) #expr -#define STRINGIFY(expr) __STRINGIFY(expr) - -#define __COND_FUNC(cond, lock, func) \ - do { \ - if ((cond)) { \ - func(lock); \ - } \ - } while (0) - -#define COND_ACQUIRE(cond, lock) \ - __COND_FUNC((cond), (lock), ofi_spin_lock) -#define COND_READ_ACQUIRE(cond, lock) \ - __COND_FUNC((cond), (lock), rwlock_rdlock) -#define COND_WRITE_ACQUIRE(cond, lock) \ - __COND_FUNC((cond), (lock), rwlock_wrlock) - -#define COND_RELEASE(cond, lock) \ - __COND_FUNC((cond), (lock), ofi_spin_unlock) -#define COND_RW_RELEASE(cond, lock) \ - __COND_FUNC((cond), (lock), rwlock_unlock) -#ifdef __GNUC__ -#define __PREFETCH(addr, rw, locality) __builtin_prefetch(addr, rw, locality) -#else -#define __PREFETCH(addr, rw, locality) ((void *) 0) -#endif - -#define READ_PREFETCH(addr) __PREFETCH(addr, 0, 3) -#define WRITE_PREFETCH(addr) __PREFETCH(addr, 1, 3) - -#endif diff --git a/prov/gni/include/gnix_vc.h b/prov/gni/include/gnix_vc.h deleted file mode 100644 index ae9055d93c2..00000000000 --- a/prov/gni/include/gnix_vc.h +++ /dev/null @@ -1,328 +0,0 @@ -/* - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * Copyright (c) 2015-2017 Los Alamos National Security, LLC. - * All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _GNIX_VC_H_ -#define _GNIX_VC_H_ - -#if HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "gnix.h" -#include "gnix_bitmap.h" -#include "gnix_av.h" -#include "gnix_xpmem.h" -#include "gnix_cm_nic.h" - -/* - * mode bits - */ -#define GNIX_VC_MODE_IN_WQ (1U) -#define GNIX_VC_MODE_IN_HT (1U << 1) -#define GNIX_VC_MODE_DG_POSTED (1U << 2) -#define GNIX_VC_MODE_PENDING_MSGS (1U << 3) -#define GNIX_VC_MODE_PEER_CONNECTED (1U << 4) -#define GNIX_VC_MODE_IN_TABLE (1U << 5) -#define GNIX_VC_MODE_XPMEM (1U << 6) - -/* VC flags */ -#define GNIX_VC_FLAG_RX_SCHEDULED 0 -#define GNIX_VC_FLAG_WORK_SCHEDULED 1 -#define GNIX_VC_FLAG_TX_SCHEDULED 2 -#define GNIX_VC_FLAG_SCHEDULED 4 - -/* - * defines for connection state for gnix VC - */ -enum gnix_vc_conn_state { - GNIX_VC_CONN_NONE = 1, - GNIX_VC_CONNECTING, - GNIX_VC_CONNECTED, - GNIX_VC_CONN_TERMINATING, - GNIX_VC_CONN_TERMINATED, - GNIX_VC_CONN_ERROR -}; - -enum gnix_vc_conn_req_type { - GNIX_VC_CONN_REQ = 1, - GNIX_VC_CONN_RESP -}; - -#define LOCAL_MBOX_SENT (1UL) -#define REMOTE_MBOX_RCVD (1UL << 1) - -/** - * Virtual Connection (VC) struct - * - * @var prog_list NIC VC progress list - * @var work_queue Deferred work request queue - * @var tx_queue TX request queue - * @var list used for unmapped vc list - * @var fr_list used for vc free list - * @var entry used internally for managing linked lists - * of vc structs that require O(1) insertion/removal - * @var peer_fi_addr FI address of peer with which this VC is connected - * @var peer_addr address of peer with which this VC is connected - * @var peer_cm_nic_addr address of the cm_nic being used by peer, this - * is the address to which GNI datagrams must be - * posted - * @var ep libfabric endpoint with which this VC is - * associated - * @var smsg_mbox pointer to GNI SMSG mailbox used by this VC - * to exchange SMSG messages with its peer - * @var gnix_ep_name cache for storing remote endpoint name - * @var gni_ep GNI endpoint for this VC - * @var outstanding_fab_reqs Count of outstanding libfabric level requests - * associated with this endpoint. - * @var conn_state Connection state of this VC - * @var vc_id ID of this vc. Allows for rapid O(1) lookup - * of the VC when using GNI_CQ_GET_INST_ID to get - * the inst_id of a GNI CQE. - * @var peer_id vc_id of peer. - * @var modes Used internally to track current state of - * the VC not pertaining to the connection state. - * @var flags Bitmap used to hold vc schedule state - * @var peer_irq_mem_hndl peer GNI memhndl used for delivering - * GNI_PostCqWrite requests to remote peer - * @var peer_caps peer capability flags - */ -struct gnix_vc { - struct dlist_entry prog_list; /* NIC VC progress list entry */ - struct dlist_entry work_queue; /* Work reqs */ - struct dlist_entry tx_queue; /* TX reqs */ - - struct dlist_entry list; /* General purpose list */ - struct dlist_entry fr_list; /* fr list */ - fi_addr_t peer_fi_addr; - struct gnix_address peer_addr; - struct gnix_address peer_cm_nic_addr; - struct gnix_fid_ep *ep; - void *smsg_mbox; - void *gnix_ep_name; - gni_ep_handle_t gni_ep; - ofi_atomic32_t outstanding_tx_reqs; - enum gnix_vc_conn_state conn_state; - uint32_t post_state; - int vc_id; - int peer_id; - int modes; - gnix_bitmap_t flags; /* We're missing regular bit ops */ - gni_mem_handle_t peer_irq_mem_hndl; - xpmem_apid_t peer_apid; - uint64_t peer_caps; - uint32_t peer_key_offset; -}; - -/* - * prototypes - */ - -/** - * @brief Allocates a virtual channel(vc) struct - * - * @param[in] ep_priv pointer to previously allocated gnix_fid_ep object - * @param[in] entry av entry for remote peer for this VC. Can be NULL - * for accepting VCs. - * @param[out] vc location in which the address of the allocated vc - * struct is to be returned. - * @return FI_SUCCESS on success, -FI_ENOMEM if allocation of vc struct fails, - */ -int _gnix_vc_alloc(struct gnix_fid_ep *ep_priv, - struct gnix_av_addr_entry *entry, struct gnix_vc **vc); - -/** - * @brief Initiates non-blocking connect of a vc with its peer - * - * @param[in] vc pointer to previously allocated vc struct - * - * @return FI_SUCCESS on success, -FI_EINVAL if an invalid field in the vc - * struct is encountered, -ENOMEM if insufficient memory to initiate - * connection request. - */ -int _gnix_vc_connect(struct gnix_vc *vc); - -/** - * @brief Destroys a previously allocated vc and cleans up resources - * associated with the vc - * - * @param[in] vc pointer to previously allocated vc struct - * - * @return FI_SUCCESS on success, -FI_EINVAL if an invalid field in the vc - * struct is encountered. - */ -int _gnix_vc_destroy(struct gnix_vc *vc); - -/** - * @brief Add a vc to the work queue of its associated nic - * - * @param[in] vc pointer to previously allocated vc struct - * - * @return FI_SUCCESS on success, -ENOMEM if insufficient memory - * allocate memory to enqueue work request - */ -int _gnix_vc_add_to_wq(struct gnix_vc *vc); - -/** - * @brief Progress a VC's SMSG mailbox. - * - * Messages are dequeued from the VCs SMSG mailbox until cleared or a failure - * is encountered. - * - * @param[in] req The GNIX VC to progress. - */ -int _gnix_vc_dequeue_smsg(struct gnix_vc *vc); - -/** - * @brief Schedule a VC for RX progress. - * - * The VC will have it's SMSG mailbox progressed while the NIC is being - * progressed in the near future. - * - * @param[in] vc The GNIX VC to schedule. - */ -int _gnix_vc_rx_schedule(struct gnix_vc *vc); - -/** - * @brief Queue a request with deferred work. - * - * @param[in] req The GNIX fabric request to queue. - */ -int _gnix_vc_queue_work_req(struct gnix_fab_req *req); - -/** - * @brief Requeue a request with deferred work. Used only in TX completers - * where the VC lock is not yet held. - * - * @param[in] req The GNIX fabric request to requeue. - */ -int _gnix_vc_requeue_work_req(struct gnix_fab_req *req); - -/** - * @brief Schedule a VC for TX progress. - * - * The VC will have it's tx_queue progressed while the NIC is being progressed - * in the near future. - * - * @param[in] vc The GNIX VC to schedule. - */ -int _gnix_vc_tx_schedule(struct gnix_vc *vc); - -/** - * @brief Queue a new TX request. - * - * @param[in] req The GNIX fabric request to queue. - */ -int _gnix_vc_queue_tx_req(struct gnix_fab_req *req); - -/** - * @brief Progress NIC VCs. - * - * There are three facets of VC progress: RX, deferred work and TX. The NIC - * maintains one queue of VCs for each type of progress. When a VC requires - * progress, the associated _gnix_vc__schedule() function is used to - * schedule processing within _gnix_vc_nic_progress(). The queues are - * independent to prevent a stall in TX processing from delaying RX processing, - * and so forth. - * - * RX progress involves dequeueing SMSG messages and progressing the state of - * associated requests. If receipt of a message during RX progress will - * trigger a new network operation (or similarly heavy or lock dependent - * operation), that work should be queued in the deferred work queue, which - * will be progressed once VC RX work is complete. Examples of this deferred - * work include the start of rendezvous data transfer or freeing an automatic - * memory registration after an RX completion. - * - * The deferred work queue is processed after RX progress, where most deferred - * work will be originated, and before TX processing, giving network resource - * priority (specifically TXDs) to TX requests which have already been - * initiated. - * - * New TX requests belong in a VCs TX queue. Ordering of the VC TX queue is - * enforced. A request using the FI_FENCE flag will cause a VCs TX queue to be - * stalled until that request is completed. - * - * @param[in] nic The GNIX NIC to progress. - */ -int _gnix_vc_nic_progress(struct gnix_nic *nic); - -/** - * @brief return vc associated with a given ep/dest address, or the ep in the - * case of FI_EP_MSG endpoint type. For FI_EP_RDM type, a vc may be - * allocated and a connection initiated if no vc is associated with - * ep/dest_addr. - * - * @param[in] ep pointer to a previously allocated endpoint - * @param[in] dest_addr for FI_EP_RDM endpoints, used to look up vc associated - * with this target address - * @param[out] vc_ptr address in which to store pointer to returned vc - * @return FI_SUCCESS on success, -FI_ENOMEM insufficient - * memory to allocate vc, -FI_EINVAL if an invalid - * argument was supplied - */ -int _gnix_vc_ep_get_vc(struct gnix_fid_ep *ep, fi_addr_t dest_addr, - struct gnix_vc **vc_ptr); - -/** - * @brief Return the FI address of a VC. - * - * @param vc The VC for to use for lookup. - * @return The FI address of the input VC. FI_ADDR_NOTAVAIL on error or - * if the VC is of incompatible type. - */ -fi_addr_t _gnix_vc_peer_fi_addr(struct gnix_vc *vc); - -int _gnix_vc_cm_init(struct gnix_cm_nic *cm_nic); -int _gnix_vc_schedule(struct gnix_vc *vc); -int _gnix_vc_smsg_init(struct gnix_vc *vc, - int peer_id, - gni_smsg_attr_t *peer_smsg_attr, - gni_mem_handle_t *peer_irq_mem_hndl); - -/* - * inline functions - */ - -/** - * @brief Return connection state of a vc - * - * @param[in] vc pointer to previously allocated vc struct - * @return connection state of vc - */ -static inline enum gnix_vc_conn_state _gnix_vc_state(struct gnix_vc *vc) -{ - assert(vc); - return vc->conn_state; -} - -#endif /* _GNIX_VC_H_ */ diff --git a/prov/gni/include/gnix_vector.h b/prov/gni/include/gnix_vector.h deleted file mode 100644 index 0f2d20c9636..00000000000 --- a/prov/gni/include/gnix_vector.h +++ /dev/null @@ -1,427 +0,0 @@ -/* - * Copyright (c) 2015-2016 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2016 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef GNIX_VECTOR_H_ -#define GNIX_VECTOR_H_ - -#include "gnix.h" -#include "gnix_util.h" - -#include "stdlib.h" -#include "string.h" - -/******************************************************************************* - * DATASTRUCTS - ******************************************************************************/ -typedef enum gnix_vec_state { - GNIX_VEC_STATE_READY = 0xdeadbeef, - GNIX_VEC_STATE_DEAD, -} gnix_vec_state_e; - -typedef enum gnix_vec_increase { - GNIX_VEC_INCREASE_ADD, - GNIX_VEC_INCREASE_MULT, -} gnix_vec_increase_e; - -typedef enum gnix_vec_lock { - GNIX_VEC_UNLOCKED, - GNIX_VEC_LOCKED, -} gnix_vec_lock_e; - -typedef uint32_t gnix_vec_index_t; -typedef void * gnix_vec_entry_t; - -/** - * Set of attributes that MUST be initialized and passed to _gnix_vec_init. - * - * @var vec_initial_size Initial size of the vector - * @var vec_maximum_size Maximum size of the vector - * @var vec_increase_step Type of step to increase vector by, ADD or MULT - * @var vec_internal_locking GNIX_VEC_UNLOCKED for unlocked, otherwise locked - * @var creator fn required to properly alloc the vector element - */ -typedef struct gnix_vec_attr { - uint32_t vec_initial_size; - uint32_t cur_size; - uint32_t vec_maximum_size; - - uint32_t vec_increase_step; - - gnix_vec_increase_e vec_increase_type; - - gnix_vec_lock_e vec_internal_locking; -} gnix_vec_attr_t; - -struct gnix_vector; - -struct gnix_vector_iter { - struct gnix_vector *vec; - uint32_t cur_idx; -}; - -#define GNIX_VECTOR_ITERATOR(_vec, _iter) \ - struct gnix_vector_iter _iter = { \ - .vec = (_vec), \ - .cur_idx = 0, \ - } - -/* Returns the current index of the iterator */ -#define GNIX_VECTOR_ITERATOR_IDX(_iter) ((_iter).cur_idx - 1) - -/** - * Vector operations - * - * @var insert_last Insert an entry into the last index of the vector. - * @var insert_at Insert an entry into the vector at the given index. - * - * @var remove_last Removes the last element from the vector. - * @var remove_at Removes the element at index from the vector. - * - * @var last Return the last element of the vector. - * @var at Return the element at the specified index. - * - * @var iter_next Return the element at the current index and move them - * index to the next element. - */ -typedef struct gnix_vector_ops { - int (*resize)(struct gnix_vector *, uint32_t); - - int (*insert_last)(struct gnix_vector *, gnix_vec_entry_t *); - int (*insert_at)(struct gnix_vector *, gnix_vec_entry_t *, - gnix_vec_index_t); - - int (*remove_last)(struct gnix_vector *); - int (*remove_at)(struct gnix_vector *, gnix_vec_index_t); - - int (*last)(struct gnix_vector *, void **); - int (*at)(struct gnix_vector *, void **, gnix_vec_index_t); - - gnix_vec_entry_t *(*iter_next)(struct gnix_vector_iter *); -} gnix_vector_ops_t; - -/** - * Vector handle - * - * @var state The current state of the vector instance. - * @var attr The attributes of this vector. - * @var ops The supported operations on this vector. - * @var vector The begging address of the vector. - * @var size The current size of the vector. - * @var lock A read/write lock for the vector. - */ -typedef struct gnix_vector { - gnix_vec_state_e state; - gnix_vec_attr_t attr; - gnix_vector_ops_t *ops; - gnix_vec_entry_t *vector; - rwlock_t lock; -} gnix_vector_t; - - -/******************************************************************************* - * API Prototypes - ******************************************************************************/ -/** - * Create the initial vector. The user is responsible for initializing the - * "attr" parameter prior to calling this function. - * - * @param[in] vec the vector to initialize - * @param[in] attr the vector attributes - * - * @return FI_SUCCESS Upon successfully creating the vector - * @return -FI_EINVAL Upon receiving an invalid parameter - * @return -FI_ENOMEM Upon insufficient memory to create the vector - */ -int _gnix_vec_init(struct gnix_vector *vec, gnix_vec_attr_t *attr); - -/** - * Close the vector elements and then the vector. - * - * @param[in] vec the vector to close - * - * @return FI_SUCCESS Upon successfully closing the vector - * @return -FI_EINVAL Upon a uninitialized or dead vector - */ -int _gnix_vec_close(gnix_vector_t *vec); - -/******************************************************************************* - * INLINE OPS FNS - ******************************************************************************/ -/** - * Resize the vector to size. - * - * @param[in] vec the vector to resize - * @param[in] size the new size of the vector - * - * @return FI_SUCCESS Upon successfully resizing the vector - * @return -FI_EINVAL Upon passing a uninitialized or dead vector, a size - * less than the minimum vector size, or a size greater - * than the maximum vector size - * @return -FI_ENOMEM Upon running out of memory - */ -static inline int _gnix_vec_resize(gnix_vector_t *vec, uint32_t size) -{ - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - if (OFI_UNLIKELY(!vec)) { - GNIX_WARN(FI_LOG_EP_CTRL, "Invalid parameter to " - "_gnix_vec_resize.\n"); - return -FI_EINVAL; - } else { - return vec->ops->resize(vec, size); - } -} - -/** - * Get the element at index in the vector. - * - * @param[in] vec The vector to return an element from - * @param[in/out] element The element at the specified index in the vector - * @param[in] index The index of the desired element - * - * @return FI_SUCCESS Upon successfully returning the element - * @return -FI_EINVAL Upon passing a NULL or dead vector - * @return -FI_ECANCLED Upon attempting to get an empty element - */ -static inline int _gnix_vec_at(gnix_vector_t *vec, void **element, - gnix_vec_index_t index) -{ - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - if (OFI_UNLIKELY(!vec || !element)) { - GNIX_WARN(FI_LOG_EP_CTRL, "Invalid parameter to " - "_gnix_vec_at\n"); - return -FI_EINVAL; - } else { - return vec->ops->at(vec, element, index); - } -} - -/** - * Get the first element in the vector. - * - * @param[in] vec The vector to return an element from - * @param[in/out] element the first element in the vector - * - * @return FI_SUCCESS Upon successfully returning the element - * @return -FI_EINVAL Upon passing a NULL or dead vector - * @return -FI_ECANCLED Upon attempting to get an empty element - */ -static inline int _gnix_vec_last(gnix_vector_t *vec, void **element) -{ - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - if (OFI_UNLIKELY(!vec || !element)) { - GNIX_WARN(FI_LOG_EP_CTRL, "Invalid parameter to " - "_gnix_vec_last\n"); - return -FI_EINVAL; - } else { - return vec->ops->last(vec, element); - } -} - -/** - * Get the first element in the vector. - * - * @param[in] vec The vector to return an element from - * @param[in/out] element the first element in the vector - * - * @return FI_SUCCESS Upon successfully returning the element - * @return -FI_EINVAL Upon passing a NULL or dead vector - * @return -FI_ECANCLED Upon attempting to get an empty element - */ -static inline int _gnix_vec_first(gnix_vector_t *vec, void **element) -{ - return _gnix_vec_at(vec, element, 0); -} - -/** - * Removes the element at index from the vector. Note that - * the user is responsible for properly disconnecting and/or destroying - * this vector element. - * - * @param[in] vec the vector to remove an entry from - * @param[in] index the index of the entry to remove - * - * @return FI_SUCCESS Upon successfully removing the entry - * @return -FI_EINVAL Upon passing a dead vector - * @return -FI_ECANCELED Upon attempting to remove an empty entry - */ -static inline int _gnix_vec_remove_at(gnix_vector_t *vec, - gnix_vec_index_t index) -{ - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - if (OFI_UNLIKELY(!vec)) { - GNIX_WARN(FI_LOG_EP_CTRL, "Invalid parameter to " - "_gnix_vec_remove_at\n"); - return -FI_EINVAL; - } else { - return vec->ops->remove_at(vec, index); - } -} - -/** - * Removes the last element from the vector. Note that - * the user is responsible for properly disconnecting and/or destroying - * this vector element. - * - * @param[in] vec the vector to remove an entry from - * - * @return FI_SUCCESS Upon successfully removing and destroying the entry - * @return -FI_EINVAL Upon passing a dead entry - * @return -FI_ECANCELED Upon attempting to remove an empty entry - */ -static inline int _gnix_vec_remove_last(gnix_vector_t *vec) -{ - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - if (OFI_UNLIKELY(!vec)) { - GNIX_WARN(FI_LOG_EP_CTRL, "Invalid parameter to " - "_gnix_vec_remove_at\n"); - return -FI_EINVAL; - } else { - return vec->ops->remove_last(vec); - } -} - -/** - * Removes the first element from the vector. Note that - * the user is responsible for properly disconnecting and/or destroying - * this vector element. - * - * @param[in] vec the vector to remove an entry from - * - * @return FI_SUCCESS Upon successfully removing and destroying the entry - * @return -FI_EINVAL Upon passing a dead entry - * @return -FI_ECANCELED Upon attempting to remove an empty entry - */ -static inline int _gnix_vec_remove_first(gnix_vector_t *vec) -{ - return _gnix_vec_remove_at(vec, 0); -} - -/** - * Inserts an entry into the vector at the given index. If the current size - * of the vector is not large enough to satisfy the insertion then the vector - * will be grown up to the maximum size. If the entry at index is not empty - * the insertion will be canceled. - * - * @param[in] vec the vector to insert entry into - * @param[in] entry the item to insert into the vector - * @param[in] index the index to insert the item at - * - * @return FI_SUCCESS Upon successfully inserting the entry into the vector - * @return -FI_ENOMEM Upon exceeding the available memory - * @return -FI_EINVAL Upon passing a dead or null vector, or an index passed - * the maximum size. - * @return -FI_ECANCELED Upon an existing non-empty entry being found at index - * or reaching the maximum vector size. - */ -static inline int _gnix_vec_insert_at(gnix_vector_t *vec, - gnix_vec_entry_t *entry, - gnix_vec_index_t index) -{ - if (OFI_UNLIKELY(!vec || !entry)) { - GNIX_WARN(FI_LOG_EP_CTRL, "Invalid parameter to " - "_gnix_vec_insert_at\n"); - return -FI_EINVAL; - } else { - return vec->ops->insert_at(vec, entry, index); - } -} - -/** - * Inserts an entry into the last index of the vector. If the entry at the - * last index is not empty the insertion will be canceled. - * - * @param[in] vec the vector to insert entry into - * @param[in] entry the item to insert into the vector - * - * @return FI_SUCCESS Upon successfully inserting the entry into the vector - * @return -FI_EINVAL Upon passing a dead vector, or a null - * entry - * @return -FI_ECANCELED Upon an existing non-empty entry being found at the - * last index - */ -static inline int _gnix_vec_insert_last(gnix_vector_t *vec, - gnix_vec_entry_t *entry) -{ - if (OFI_UNLIKELY(!vec || !entry)) { - GNIX_WARN(FI_LOG_EP_CTRL, "Invalid parameter to " - "_gnix_vec_insert_last\n"); - return -FI_EINVAL; - } else { - return vec->ops->insert_last(vec, entry); - } -} - -/** - * Inserts an entry into the first index of the vector. If the entry at the - * first index is not empty the insertion will be canceled. - * - * @param[in] vec the vector to insert entry into - * @param[in] entry the item to insert into the vector - * - * @return FI_SUCCESS Upon successfully inserting the entry into the vector - * @return -FI_EINVAL Upon passing a dead vector, or a null - * entry - * @return -FI_ECANCELED Upon an existing non-empty entry being found at index 0 - */ -static inline int _gnix_vec_insert_first(gnix_vector_t *vec, - gnix_vec_entry_t *entry) -{ - return _gnix_vec_insert_at(vec, entry, 0); -} - -/** - * Return the current element in the vector iterator and move - * the iterator to the next element. - * - * @param iter pointer to the vector iterator - * @return pointer to current element in the vector - */ -static inline -gnix_vec_entry_t *_gnix_vec_iterator_next(struct gnix_vector_iter *iter) -{ - if (iter == NULL) { - GNIX_WARN(FI_LOG_EP_DATA, "Invalid parameter to" - "_gnix_vec_iterator_next\n"); - return NULL; - } else { - return iter->vec->ops->iter_next(iter); - } -} - -#endif /* GNIX_VECTOR_H_ */ diff --git a/prov/gni/include/gnix_wait.h b/prov/gni/include/gnix_wait.h deleted file mode 100644 index f31742ae3ce..00000000000 --- a/prov/gni/include/gnix_wait.h +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights reserved. - * Copyright (c) 2016 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _GNIX_WAIT_H_ -#define _GNIX_WAIT_H_ - -#include -#include -#include - -#define WAIT_SUB (FI_LOG_CQ | FI_LOG_EQ) - -enum { - WAIT_READ, - WAIT_WRITE -}; - -struct gnix_wait_entry { - struct fid *wait_obj; - struct slist_entry entry; -}; - -struct gnix_fid_wait { - struct fid_wait wait; - struct gnix_fid_fabric *fabric; - - enum fi_cq_wait_cond cond_type; - enum fi_wait_obj type; - - union { - int fd[2]; - struct { - pthread_mutex_t mutex; - pthread_cond_t cond; - }; - }; - - struct slist set; -}; - -extern uint32_t gnix_wait_thread_sleep_time; - -/* - * API Functions - */ -int gnix_wait_open(struct fid_fabric *fabric, struct fi_wait_attr *attr, - struct fid_wait **waitset); -int gnix_wait_close(struct fid *wait); -int gnix_wait_wait(struct fid_wait *wait, int timeout); - -/* - * Exposed internal functions. - */ -int _gnix_wait_set_add(struct fid_wait *wait, struct fid *wait_obj); -int _gnix_wait_set_remove(struct fid_wait *wait, struct fid *wait_obj); -int _gnix_get_wait_obj(struct fid_wait *wait, void *arg); -void _gnix_signal_wait_obj(struct fid_wait *wait); - -#endif diff --git a/prov/gni/include/gnix_xpmem.h b/prov/gni/include/gnix_xpmem.h deleted file mode 100644 index c928adb27e7..00000000000 --- a/prov/gni/include/gnix_xpmem.h +++ /dev/null @@ -1,207 +0,0 @@ -/* - * Copyright (c) 2016 Los Alamos National Security, LLC. - * All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef GNIX_XPMEM_H_ -#define GNIX_XPMEM_H_ - -#include "gnix.h" -#if HAVE_XPMEM -#include -#else -typedef int64_t xpmem_apid_t; -typedef int64_t xpmem_segid_t; -#endif - -struct gnix_xpmem_handle { - struct gnix_reference ref_cnt; - struct gnix_hashtable *apid_ht; - ofi_spin_t lock; -}; - -struct gnix_xpmem_access_handle { - struct gnix_xpmem_handle *xp_hndl; - struct gnix_xpmem_ht_entry *entry; - void *attach_addr; - void *remote_base_addr; - size_t access_len; -}; - -extern bool gnix_xpmem_disabled; - -/******************************************************************************* - * API Prototypes - ******************************************************************************/ - -/** - * @brief create an xpmem handle to use for subsequent - * xpmem operations - * - * @param [in] dom pointer to a previously allocated - * gnix_fid_domain struct - * @param [out] handle pointer to a memory location where - * a pointer to an xpmem_handle will be - * returned - * - * @return FI_SUCCESS xpmem handle successfully allocated - * @return -FI_EINVAL Upon receiving an invalid parameter - */ -int _gnix_xpmem_handle_create(struct gnix_fid_domain *dom, - struct gnix_xpmem_handle **handle); - -/** - * @brief destroy an xpmem handle - * - * @param [in] handle pointer to a previously allocated - * xpmem_handle - * @return FI_SUCCESS xpmem handle successfully destroyed - * @return -FI_EINVAL Upon receiving an invalid parameter - */ -int _gnix_xpmem_handle_destroy(struct gnix_xpmem_handle *hndl); - -/** - * @brief get an access handle to a address range a peer's - * address space - * - * @param[in] xp_handle pointer to previously created - * xpmem handle - * @param[in] peer_apid xpmem apid for peer - * @param[in] remote_vaddr virtual address in process associated - * with the target EP - * @param[in] len length in bytes of the region to - * to be accessed in the target process - * @param[out] access_hndl access handle to be used to copy data - * from the peer process in to the local - * address space - * - * @return FI_SUCCESS Upon xpmem successfully initialized - * @return -FI_EINVAL Upon receiving an invalid parameter - * @return -FI_ENOSYS Target EP can't be attached to local process - * address space - */ -int _gnix_xpmem_access_hndl_get(struct gnix_xpmem_handle *xp_hndl, - xpmem_apid_t peer_apid, - uint64_t remote_vaddr, - size_t len, - struct gnix_xpmem_access_handle **access_hndl); - - -/** - * @brief release an access handle - * - * @param[in] access_handle pointer to previously created - * access handle - * - * @return FI_SUCCESS Upon xpmem successfully initialized - * @return -FI_EINVAL Upon receiving an invalid parameter - */ -int _gnix_xpmem_access_hndl_put(struct gnix_xpmem_access_handle *access_hndl); - -/** - * @brief memcpy from previously accessed memory in peer's - * virtual address space - * - * @param[in] access_hndl pointer to previously created - * xpmem access handle - * @param[in] dst_addr starting virtual address in the calling - * process address space where data - * will be copied - * @param[in] remote_start_addr starting virtual address in the target - * address space from which data will be copied - * @param[in] len copy length in bytes - * - * @return FI_SUCCESS Upon successful copy - * @return -FI_EINVAL Invalid argument - */ -int _gnix_xpmem_copy(struct gnix_xpmem_access_handle *access_hndl, - void *dst_addr, - void *remote_start_addr, - size_t len); - -/** - * @brief get the xpmem segid associated with an xpmem_handle - * - * @param[in] xp_handle pointer to previously created - * will be copied - * @param[out] seg_id pointer to memory location where - * the segid value will be returned - * - * @return FI_SUCCESS Upon success - * @return -FI_EINVAL Invalid argument - */ -int _gnix_xpmem_get_my_segid(struct gnix_xpmem_handle *xp_hndl, - xpmem_segid_t *seg_id); - -/** - * @brief get the xpmem apid associated with an xpmem_handle - * and input segid - * - * @param[in] xp_handle pointer to previously created - * will be copied - * @param[in] seg_id seg_id obtained from process - * whose memory is to be accessed - * via xpmem. - * @param[out] peer_apid pointer to memory location where - * the apid value to use for accessing - * the address space of the peer - * process. - * - * @return FI_SUCCESS Upon success - * @return -FI_EINVAL Invalid argument - */ -int _gnix_xpmem_get_apid(struct gnix_xpmem_handle *xp_hndl, - xpmem_segid_t segid, - xpmem_apid_t *peer_apid); - -/** - * @brief determine if a process at a given gnix_address can - * be accessed using xpmem - * - * @param[in] ep pointer to a previously allocated - * gnix_fid_ep structure - * @param[in] addr address used by an endpoint of the - * peer process - * @param[out] accessible set to true if endpoint with - * gnix_address addr can be accessed - * using xpmem, otherwise false - * - * @return FI_SUCCESS Upon success - * @return -FI_EINVAL Invalid argument - */ -int _gnix_xpmem_accessible(struct gnix_fid_ep *ep, - struct gnix_address addr, - bool *accessible); - - - - -#endif /* GNIX_XPMEM_H_ */ diff --git a/prov/gni/include/rdma/fi_direct.h b/prov/gni/include/rdma/fi_direct.h deleted file mode 100644 index be977ea2396..00000000000 --- a/prov/gni/include/rdma/fi_direct.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2013-2014 Intel Corporation. All rights reserved. - * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All - * rights reserved. - * Copyright (c) 2015-2016 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _FI_DIRECT_H_ -#define _FI_DIRECT_H_ - -#define FABRIC_DIRECT_ 1 - -struct fi_context { - void *internal[4]; -}; - -#endif /* _FI_DIRECT_H_ */ diff --git a/prov/gni/include/rdma/fi_direct_atomic.h b/prov/gni/include/rdma/fi_direct_atomic.h deleted file mode 100644 index 99d7353c238..00000000000 --- a/prov/gni/include/rdma/fi_direct_atomic.h +++ /dev/null @@ -1,267 +0,0 @@ -/* - * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All - * rights reserved. - * Copyright (c) 2015-2016 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _FI_DIRECT_ATOMIC_H_ -#define _FI_DIRECT_ATOMIC_H_ - -#define FABRIC_DIRECT_ATOMIC 1 - -/******************************************************************************* - * GNI API Functions - ******************************************************************************/ -extern ssize_t gnix_ep_atomic_write(struct fid_ep *ep, const void *buf, - size_t count, void *desc, - fi_addr_t dest_addr, uint64_t addr, - uint64_t key, enum fi_datatype datatype, - enum fi_op op, void *context); - -extern ssize_t gnix_ep_atomic_writev(struct fid_ep *ep, - const struct fi_ioc *iov, void **desc, - size_t count, fi_addr_t dest_addr, - uint64_t addr, uint64_t key, - enum fi_datatype datatype, enum fi_op op, - void *context); - -extern ssize_t gnix_ep_atomic_writemsg(struct fid_ep *ep, - const struct fi_msg_atomic *msg, - uint64_t flags); - -extern ssize_t gnix_ep_atomic_inject(struct fid_ep *ep, const void *buf, - size_t count, fi_addr_t dest_addr, - uint64_t addr, uint64_t key, - enum fi_datatype datatype, enum fi_op op); - -extern ssize_t gnix_ep_atomic_readwrite(struct fid_ep *ep, const void *buf, - size_t count, void *desc, void *result, - void *result_desc, fi_addr_t dest_addr, - uint64_t addr, uint64_t key, - enum fi_datatype datatype, - enum fi_op op, void *context); - -extern ssize_t gnix_ep_atomic_readwritev(struct fid_ep *ep, - const struct fi_ioc *iov, - void **desc, size_t count, - struct fi_ioc *resultv, - void **result_desc, - size_t result_count, - fi_addr_t dest_addr, uint64_t addr, - uint64_t key, - enum fi_datatype datatype, - enum fi_op op, void *context); - -extern ssize_t gnix_ep_atomic_readwritemsg(struct fid_ep *ep, - const struct fi_msg_atomic *msg, - struct fi_ioc *resultv, - void **result_desc, - size_t result_count, uint64_t flags); - -extern ssize_t gnix_ep_atomic_compwrite(struct fid_ep *ep, const void *buf, - size_t count, void *desc, - const void *compare, void *compare_desc, - void *result, void *result_desc, - fi_addr_t dest_addr, uint64_t addr, - uint64_t key, enum fi_datatype datatype, - enum fi_op op, void *context); - -extern ssize_t gnix_ep_atomic_compwritev(struct fid_ep *ep, - const struct fi_ioc *iov, void **desc, - size_t count, - const struct fi_ioc *comparev, - void **compare_desc, - size_t compare_count, - struct fi_ioc *resultv, - void **result_desc, - size_t result_count, - fi_addr_t dest_addr, uint64_t addr, - uint64_t key, - enum fi_datatype datatype, - enum fi_op op, void *context); - -extern ssize_t -gnix_ep_atomic_compwritemsg(struct fid_ep *ep, const struct fi_msg_atomic *msg, - const struct fi_ioc *comparev, void **compare_desc, - size_t compare_count, struct fi_ioc *resultv, - void **result_desc, size_t result_count, - uint64_t flags); - -extern int gnix_ep_atomic_valid(struct fid_ep *ep, - enum fi_datatype datatype, enum fi_op op, - size_t *count); - -extern int gnix_ep_fetch_atomic_valid(struct fid_ep *ep, - enum fi_datatype datatype, enum fi_op op, - size_t *count); - -extern int gnix_ep_cmp_atomic_valid(struct fid_ep *ep, - enum fi_datatype datatype, - enum fi_op op, size_t *count); - -/******************************************************************************* - * Libfabric API Functions - ******************************************************************************/ -static inline ssize_t fi_atomic(struct fid_ep *ep, const void *buf, - size_t count, void *desc, fi_addr_t dest_addr, - uint64_t addr, uint64_t key, - enum fi_datatype datatype, enum fi_op op, - void *context) -{ - return gnix_ep_atomic_write(ep, buf, count, desc, dest_addr, addr, key, - datatype, op, context); -} - -static inline ssize_t fi_atomicv(struct fid_ep *ep, const struct fi_ioc *iov, - void **desc, size_t count, fi_addr_t dest_addr, - uint64_t addr, uint64_t key, - enum fi_datatype datatype, enum fi_op op, - void *context) -{ - return gnix_ep_atomic_writev(ep, iov, desc, count, dest_addr, addr, key, - datatype, op, context); -} - -static inline ssize_t -fi_atomicmsg(struct fid_ep *ep, const struct fi_msg_atomic *msg, uint64_t flags) -{ - return gnix_ep_atomic_writemsg(ep, msg, flags); -} - -static inline ssize_t fi_inject_atomic(struct fid_ep *ep, const void *buf, - size_t count, fi_addr_t dest_addr, - uint64_t addr, uint64_t key, - enum fi_datatype datatype, enum fi_op op) -{ - return gnix_ep_atomic_inject(ep, buf, count, dest_addr, addr, key, - datatype, op); -} - -static inline ssize_t fi_fetch_atomic(struct fid_ep *ep, const void *buf, - size_t count, void *desc, void *result, - void *result_desc, fi_addr_t dest_addr, - uint64_t addr, uint64_t key, - enum fi_datatype datatype, enum fi_op op, - void *context) -{ - return gnix_ep_atomic_readwrite(ep, buf, count, desc, result, - result_desc, dest_addr, addr, key, - datatype, op, context); -} - -static inline ssize_t fi_fetch_atomicv(struct fid_ep *ep, - const struct fi_ioc *iov, void **desc, - size_t count, struct fi_ioc *resultv, - void **result_desc, size_t result_count, - fi_addr_t dest_addr, uint64_t addr, - uint64_t key, enum fi_datatype datatype, - enum fi_op op, void *context) -{ - return gnix_ep_atomic_readwritev(ep, iov, desc, count, resultv, - result_desc, result_count, dest_addr, - addr, key, datatype, op, context); -} - -static inline ssize_t fi_fetch_atomicmsg(struct fid_ep *ep, - const struct fi_msg_atomic *msg, - struct fi_ioc *resultv, - void **result_desc, - size_t result_count, uint64_t flags) -{ - return gnix_ep_atomic_readwritemsg(ep, msg, resultv, result_desc, - result_count, flags); -} - -static inline ssize_t fi_compare_atomic(struct fid_ep *ep, const void *buf, - size_t count, void *desc, - const void *compare, void *compare_desc, - void *result, void *result_desc, - fi_addr_t dest_addr, uint64_t addr, - uint64_t key, enum fi_datatype datatype, - enum fi_op op, void *context) -{ - return gnix_ep_atomic_compwrite( - ep, buf, count, desc, compare, compare_desc, result, result_desc, - dest_addr, addr, key, datatype, op, context); -} - -static inline ssize_t fi_compare_atomicv(struct fid_ep *ep, - const struct fi_ioc *iov, - void **desc, size_t count, - const struct fi_ioc *comparev, - void **compare_desc, - size_t compare_count, - struct fi_ioc *resultv, - void **result_desc, - size_t result_count, - fi_addr_t dest_addr, uint64_t addr, - uint64_t key, - enum fi_datatype datatype, - enum fi_op op, void *context) -{ - return gnix_ep_atomic_compwritev(ep, iov, desc, count, comparev, - compare_desc, compare_count, resultv, - result_desc, result_count, dest_addr, - addr, key, datatype, op, context); -} - -static inline ssize_t -fi_compare_atomicmsg(struct fid_ep *ep, const struct fi_msg_atomic *msg, - const struct fi_ioc *comparev, void **compare_desc, - size_t compare_count, struct fi_ioc *resultv, - void **result_desc, size_t result_count, uint64_t flags) -{ - return gnix_ep_atomic_compwritemsg(ep, msg, comparev, compare_desc, - compare_count, resultv, result_desc, - result_count, flags); -} - -static inline int fi_atomicvalid(struct fid_ep *ep, enum fi_datatype datatype, - enum fi_op op, size_t *count) -{ - return gnix_ep_atomic_valid(ep, datatype, op, count); -} - -static inline int fi_fetch_atomicvalid(struct fid_ep *ep, - enum fi_datatype datatype, enum fi_op op, - size_t *count) -{ - return gnix_ep_fetch_atomic_valid(ep, datatype, op, count); -} - -static inline int fi_compare_atomicvalid(struct fid_ep *ep, - enum fi_datatype datatype, - enum fi_op op, size_t *count) -{ - return gnix_ep_cmp_atomic_valid(ep, datatype, op, count); -} - -#endif /* _FI_DIRECT_ATOMIC_H_ */ diff --git a/prov/gni/include/rdma/fi_direct_atomic_def.h b/prov/gni/include/rdma/fi_direct_atomic_def.h deleted file mode 100644 index 1bf521f0e94..00000000000 --- a/prov/gni/include/rdma/fi_direct_atomic_def.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2015-2016 Cray Inc. All rights reserved. - * Copyright (c) 2015-2016 Los Alamos National Security, LLC. - * All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _FI_DIRECT_ATOMIC_DEF_H_ -#define _FI_DIRECT_ATOMIC_DEF_H_ - -#define FABRIC_DIRECT_ATOMIC_DEF 1 - -enum fi_datatype { - FI_INT8, - FI_UINT8, - FI_INT16, - FI_UINT16, - FI_INT32, - FI_UINT32, - FI_INT64, - FI_UINT64, - FI_FLOAT, - FI_DOUBLE, - FI_FLOAT_COMPLEX, - FI_DOUBLE_COMPLEX, - FI_LONG_DOUBLE, - FI_LONG_DOUBLE_COMPLEX, - FI_DATATYPE_LAST -}; - -enum fi_op { - FI_MIN, - FI_MAX, - FI_SUM, - FI_PROD, - FI_LOR, - FI_LAND, - FI_BOR, - FI_BAND, - FI_LXOR, - FI_BXOR, - FI_ATOMIC_READ, - FI_ATOMIC_WRITE, - FI_CSWAP, - FI_CSWAP_NE, - FI_CSWAP_LE, - FI_CSWAP_LT, - FI_CSWAP_GE, - FI_CSWAP_GT, - FI_MSWAP, - FI_ATOMIC_OP_LAST -}; - -#endif /* _FI_DIRECT_ATOMIC_DEF_H_ */ diff --git a/prov/gni/include/rdma/fi_direct_cm.h b/prov/gni/include/rdma/fi_direct_cm.h deleted file mode 100644 index 301a1848489..00000000000 --- a/prov/gni/include/rdma/fi_direct_cm.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All - * rights reserved. - * Copyright (c) 2015-2016 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _FI_DIRECT_CM_H_ -#define _FI_DIRECT_CM_H_ - -#define FABRIC_DIRECT_CM 1 - -/******************************************************************************* - * GNI API Functions - ******************************************************************************/ -extern int gnix_setname(fid_t fid, void *addr, size_t addrlen); - -extern int gnix_getname(fid_t fid, void *addr, size_t *addrlen); - -extern int gnix_getpeer(struct fid_ep *ep, void *addr, size_t *addrlen); - -extern int gnix_listen(struct fid_pep *pep); - -extern int gnix_connect(struct fid_ep *ep, const void *addr, const void *param, - size_t paramlen); - -extern int gnix_accept(struct fid_ep *ep, const void *param, size_t paramlen); - -extern int gnix_reject(struct fid_pep *pep, fid_t handle, const void *param, - size_t paramlen); - -extern int gnix_shutdown(struct fid_ep *ep, uint64_t flags); - -/******************************************************************************* - * Libfabric API Functions - ******************************************************************************/ -static inline int fi_setname(fid_t fid, void *addr, size_t addrlen) -{ - return gnix_setname(fid, addr, addrlen); -} - -static inline int fi_getname(fid_t fid, void *addr, size_t *addrlen) -{ - return gnix_getname(fid, addr, addrlen); -} - -static inline int fi_getpeer(struct fid_ep *ep, void *addr, size_t *addrlen) -{ - return gnix_getpeer(ep, addr, addrlen); -} - -static inline int fi_listen(struct fid_pep *pep) -{ - return gnix_listen(pep); -} - -static inline int fi_connect(struct fid_ep *ep, const void *addr, - const void *param, size_t paramlen) -{ - return gnix_connect(ep, addr, param, paramlen); -} - -static inline int fi_accept(struct fid_ep *ep, const void *param, - size_t paramlen) -{ - return gnix_accept(ep, param, paramlen); -} - -static inline int fi_reject(struct fid_pep *pep, fid_t handle, - const void *param, size_t paramlen) -{ - return gnix_reject(pep, handle, param, paramlen); -} - -static inline int fi_shutdown(struct fid_ep *ep, uint64_t flags) -{ - return gnix_shutdown(ep, flags); -} - -#endif /* _FI_DIRECT_CM_H_ */ diff --git a/prov/gni/include/rdma/fi_direct_domain.h b/prov/gni/include/rdma/fi_direct_domain.h deleted file mode 100644 index 9ad85073863..00000000000 --- a/prov/gni/include/rdma/fi_direct_domain.h +++ /dev/null @@ -1,218 +0,0 @@ -/* - * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All - * rights reserved. - * Copyright (c) 2015-2016 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _FI_DIRECT_DOMAIN_H_ -#define _FI_DIRECT_DOMAIN_H_ - -#define FABRIC_DIRECT_DOMAIN 1 - -/******************************************************************************* - * GNI API Functions - ******************************************************************************/ -extern int gnix_domain_open(struct fid_fabric *fabric, struct fi_info *info, - struct fid_domain **dom, void *context); - -extern int gnix_domain_bind(struct fid_domain *domain, struct fid *fid, - uint64_t flags); - -extern int gnix_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, - struct fid_cq **cq, void *context); - -extern int gnix_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr, - struct fid_cntr **cntr, void *context); - -extern int gnix_wait_open(struct fid_fabric *fabric, struct fi_wait_attr *attr, - struct fid_wait **waitset); - -extern int gnix_poll_open(struct fid_domain *domain, struct fi_poll_attr *attr, - struct fid_poll **pollset); - -extern int gnix_mr_reg(struct fid *fid, const void *buf, size_t len, - uint64_t access, uint64_t offset, uint64_t requested_key, - uint64_t flags, struct fid_mr **mr_o, void *context); - -extern int gnix_mr_bind(fid_t fid, struct fid *bfid, uint64_t flags); - -extern int gnix_av_open(struct fid_domain *domain, struct fi_av_attr *attr, - struct fid_av **av, void *context); - -extern int gnix_av_bind(struct fid_av *av, struct fid *fid, uint64_t flags); - -extern int gnix_av_insert(struct fid_av *av, const void *addr, size_t count, - fi_addr_t *fi_addr, uint64_t flags, void *context); - -extern int gnix_av_insertsvc(struct fid_av *av, const char *node, - const char *service, fi_addr_t *fi_addr, - uint64_t flags, void *context); - -extern int gnix_av_insertsym(struct fid_av *av, const char *node, - size_t nodecnt, const char *service, size_t svccnt, - fi_addr_t *fi_addr, uint64_t flags, void *context); - -extern int gnix_av_remove(struct fid_av *av, fi_addr_t *fi_addr, size_t count, - uint64_t flags); - -extern int gnix_av_lookup(struct fid_av *av, fi_addr_t fi_addr, void *addr, - size_t *addrlen); - -extern const char *gnix_av_straddr(struct fid_av *av, const void *addr, - char *buf, size_t *len); - -/******************************************************************************* - * Libfabric API Functions - ******************************************************************************/ -static inline int fi_domain(struct fid_fabric *fabric, struct fi_info *info, - struct fid_domain **domain, void *context) -{ - return gnix_domain_open(fabric, info, domain, context); -} - -static inline int fi_domain_bind(struct fid_domain *domain, struct fid *fid, - uint64_t flags) -{ - return gnix_domain_bind(domain, fid, flags); -} - -static inline int fi_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, - struct fid_cq **cq, void *context) -{ - return gnix_cq_open(domain, attr, cq, context); -} - -static inline int fi_cntr_open(struct fid_domain *domain, - struct fi_cntr_attr *attr, - struct fid_cntr **cntr, void *context) -{ - return gnix_cntr_open(domain, attr, cntr, context); -} - -static inline int fi_wait_open(struct fid_fabric *fabric, - struct fi_wait_attr *attr, - struct fid_wait **waitset) -{ - return gnix_wait_open(fabric, attr, waitset); -} - -static inline int fi_poll_open(struct fid_domain *domain, - struct fi_poll_attr *attr, - struct fid_poll **pollset) -{ - return gnix_poll_open(domain, attr, pollset); -} - -static inline int fi_mr_reg(struct fid_domain *domain, const void *buf, - size_t len, uint64_t access, uint64_t offset, - uint64_t requested_key, uint64_t flags, - struct fid_mr **mr, void *context) -{ - return gnix_mr_reg(&domain->fid, buf, len, access, offset, - requested_key, flags, mr, context); -} - -static inline void *fi_mr_desc(struct fid_mr *mr) -{ - return mr->mem_desc; -} - -static inline uint64_t fi_mr_key(struct fid_mr *mr) -{ - return mr->key; -} - -static inline int fi_mr_bind(struct fid_mr *mr, struct fid *bfid, - uint64_t flags) -{ - return gnix_mr_bind(&mr->fid, bfid, flags); -} - -static inline int fi_av_open(struct fid_domain *domain, struct fi_av_attr *attr, - struct fid_av **av, void *context) -{ - return gnix_av_open(domain, attr, av, context); -} - -static inline int fi_av_bind(struct fid_av *av, struct fid *fid, uint64_t flags) -{ - return gnix_av_bind(av, fid, flags); -} - -static inline int fi_av_insert(struct fid_av *av, const void *addr, - size_t count, fi_addr_t *fi_addr, uint64_t flags, - void *context) -{ - return gnix_av_insert(av, addr, count, fi_addr, flags, context); -} - -static inline int fi_av_insertsvc(struct fid_av *av, const char *node, - const char *service, fi_addr_t *fi_addr, - uint64_t flags, void *context) -{ - return gnix_av_insertsvc(av, node, service, fi_addr, flags, context); -} - -static inline int fi_av_insertsym(struct fid_av *av, const char *node, - size_t nodecnt, const char *service, - size_t svccnt, fi_addr_t *fi_addr, - uint64_t flags, void *context) -{ - return gnix_av_insertsym(av, node, nodecnt, service, svccnt, fi_addr, - flags, context); -} - -static inline int fi_av_remove(struct fid_av *av, fi_addr_t *fi_addr, - size_t count, uint64_t flags) -{ - return gnix_av_remove(av, fi_addr, count, flags); -} - -static inline int fi_av_lookup(struct fid_av *av, fi_addr_t fi_addr, void *addr, - size_t *addrlen) -{ - return gnix_av_lookup(av, fi_addr, addr, addrlen); -} - -static inline const char *fi_av_straddr(struct fid_av *av, const void *addr, - char *buf, size_t *len) -{ - return gnix_av_straddr(av, addr, buf, len); -} - -static inline fi_addr_t fi_rx_addr(fi_addr_t fi_addr, int rx_index, - int rx_ctx_bits) -{ - return (fi_addr_t)(((uint64_t)rx_index << (64 - rx_ctx_bits)) | - fi_addr); -} - -#endif /* _FI_DIRECT_DOMAIN_H_ */ diff --git a/prov/gni/include/rdma/fi_direct_endpoint.h b/prov/gni/include/rdma/fi_direct_endpoint.h deleted file mode 100644 index be3b01a445d..00000000000 --- a/prov/gni/include/rdma/fi_direct_endpoint.h +++ /dev/null @@ -1,275 +0,0 @@ -/* - * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All - * rights reserved. - * Copyright (c) 2015-2016 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _FI_DIRECT_ENDPOINT_H_ -#define _FI_DIRECT_ENDPOINT_H_ - -#define FABRIC_DIRECT_ENDPOINT 1 - -/******************************************************************************* - * GNI API Functions - ******************************************************************************/ -extern int gnix_pep_open(struct fid_fabric *fabric, struct fi_info *info, - struct fid_pep **pep, void *context); - -extern int gnix_ep_open(struct fid_domain *domain, struct fi_info *info, - struct fid_ep **ep, void *context); - -extern int gnix_sep_open(struct fid_domain *domain, - struct fi_info *info, struct fid_ep **ep, - void *context); - -extern int gnix_ep_bind(fid_t fid, fid_t bfid, uint64_t flags); - -extern int gnix_pep_bind(fid_t pep, struct fid *bfid, uint64_t flags); - -extern int gnix_sep_bind(fid_t sep, fid_t bfid, uint64_t flags); - -extern int gnix_ep_control(fid_t fid, int command, void *arg); - -extern ssize_t gnix_ep_cancel(fid_t fid, void *context); - -extern int gnix_ep_setopt(fid_t fid, int level, int optname, const void *optval, - size_t optlen); - -extern int gnix_getopt(fid_t fid, int level, int optname, void *optval, - size_t *optlen); - -extern int gnix_tx_context(struct fid_ep *ep, int index, - struct fi_tx_attr *attr, struct fid_ep **tx_ep, - void *context); - -extern int gnix_rx_context(struct fid_ep *ep, int index, - struct fi_rx_attr *attr, struct fid_ep **rx_ep, - void *context); - -extern ssize_t gnix_ep_tx_size_left(struct fid_ep *ep); - -extern ssize_t gnix_ep_rx_size_left(struct fid_ep *ep); - -extern int gnix_stx_open(struct fid_domain *dom, struct fi_tx_attr *tx_attr, - struct fid_stx **stx, void *context); - -extern int gnix_srx_context(struct fid_domain *domain, struct fi_rx_attr *attr, - struct fid_ep **rx_ep, void *context); - -extern ssize_t gnix_ep_recv(struct fid_ep *ep, void *buf, size_t len, - void *desc, fi_addr_t src_addr, void *context); - -extern ssize_t gnix_ep_recvv(struct fid_ep *ep, const struct iovec *iov, - void **desc, size_t count, fi_addr_t src_addr, - void *context); - -extern ssize_t gnix_ep_recvmsg(struct fid_ep *ep, const struct fi_msg *msg, - uint64_t flags); - -extern ssize_t gnix_ep_send(struct fid_ep *ep, const void *buf, size_t len, - void *desc, fi_addr_t dest_addr, void *context); - -extern ssize_t gnix_ep_sendv(struct fid_ep *ep, const struct iovec *iov, - void **desc, size_t count, fi_addr_t dest_addr, - void *context); - -extern ssize_t gnix_ep_sendmsg(struct fid_ep *ep, const struct fi_msg *msg, - uint64_t flags); - -extern ssize_t gnix_ep_msg_inject(struct fid_ep *ep, const void *buf, - size_t len, fi_addr_t dest_addr); - -extern ssize_t gnix_ep_senddata(struct fid_ep *ep, const void *buf, size_t len, - void *desc, uint64_t data, fi_addr_t dest_addr, - void *context); - -extern ssize_t gnix_ep_msg_injectdata(struct fid_ep *ep, const void *buf, - size_t len, uint64_t data, - fi_addr_t dest_addr); - -/******************************************************************************* - * Libfabric API Functions - ******************************************************************************/ -static inline int fi_passive_ep(struct fid_fabric *fabric, struct fi_info *info, - struct fid_pep **pep, void *context) -{ - return gnix_pep_open(fabric, info, pep, context); -} - -static inline int fi_endpoint(struct fid_domain *domain, struct fi_info *info, - struct fid_ep **ep, void *context) -{ - return gnix_ep_open(domain, info, ep, context); -} - -static inline int fi_scalable_ep(struct fid_domain *domain, - struct fi_info *info, struct fid_ep **sep, - void *context) -{ - return gnix_sep_open(domain, info, sep, context); -} - -static inline int fi_ep_bind(struct fid_ep *ep, fid_t bfid, uint64_t flags) -{ - return gnix_ep_bind(&ep->fid, bfid, flags); -} - -static inline int fi_pep_bind(struct fid_pep *pep, fid_t bfid, uint64_t flags) -{ - return gnix_pep_bind(&pep->fid, bfid, flags); -} - -static inline int fi_scalable_ep_bind(struct fid_ep *sep, fid_t bfid, - uint64_t flags) -{ - return gnix_sep_bind(&sep->fid, bfid, flags); -} - -static inline int fi_enable(struct fid_ep *ep) -{ - return gnix_ep_control(&ep->fid, FI_ENABLE, NULL); -} - -static inline ssize_t fi_cancel(fid_t fid, void *context) -{ - struct fid_ep *ep = container_of(fid, struct fid_ep, fid); - return gnix_ep_cancel(&ep->fid, context); -} - -static inline int fi_setopt(fid_t fid, int level, int optname, - const void *optval, size_t optlen) -{ - return gnix_ep_setopt(fid, level, optname, optval, optlen); -} - -static inline int fi_getopt(fid_t fid, int level, int optname, void *optval, - size_t *optlen) -{ - return gnix_getopt(fid, level, optname, optval, optlen); -} - -static inline int fi_tx_context(struct fid_ep *ep, int index, - struct fi_tx_attr *attr, struct fid_ep **tx_ep, - void *context) -{ - return gnix_tx_context(ep, index, attr, tx_ep, context); -} - -static inline int fi_rx_context(struct fid_ep *ep, int index, - struct fi_rx_attr *attr, struct fid_ep **rx_ep, - void *context) -{ - return gnix_rx_context(ep, index, attr, rx_ep, context); -} - -static inline ssize_t fi_rx_size_left(struct fid_ep *ep) -{ - return gnix_ep_rx_size_left(ep); -} - -static inline ssize_t fi_tx_size_left(struct fid_ep *ep) -{ - return gnix_ep_tx_size_left(ep); -} - -static inline int fi_stx_context(struct fid_domain *domain, - struct fi_tx_attr *attr, struct fid_stx **stx, - void *context) -{ - return gnix_stx_open(domain, attr, stx, context); -} - -static inline int fi_srx_context(struct fid_domain *domain, - struct fi_rx_attr *attr, struct fid_ep **rx_ep, - void *context) -{ - return gnix_srx_context(domain, attr, rx_ep, context); -} - -static inline ssize_t fi_recv(struct fid_ep *ep, void *buf, size_t len, - void *desc, fi_addr_t src_addr, void *context) -{ - return gnix_ep_recv(ep, buf, len, desc, src_addr, context); -} - -static inline ssize_t fi_recvv(struct fid_ep *ep, const struct iovec *iov, - void **desc, size_t count, fi_addr_t src_addr, - void *context) -{ - return gnix_ep_recvv(ep, iov, desc, count, src_addr, context); -} - -static inline ssize_t fi_recvmsg(struct fid_ep *ep, const struct fi_msg *msg, - uint64_t flags) -{ - return gnix_ep_recvmsg(ep, msg, flags); -} - -static inline ssize_t fi_send(struct fid_ep *ep, const void *buf, size_t len, - void *desc, fi_addr_t dest_addr, void *context) -{ - return gnix_ep_send(ep, buf, len, desc, dest_addr, context); -} - -static inline ssize_t fi_sendv(struct fid_ep *ep, const struct iovec *iov, - void **desc, size_t count, fi_addr_t dest_addr, - void *context) -{ - return gnix_ep_sendv(ep, iov, desc, count, dest_addr, context); -} - -static inline ssize_t fi_sendmsg(struct fid_ep *ep, const struct fi_msg *msg, - uint64_t flags) -{ - return gnix_ep_sendmsg(ep, msg, flags); -} - -static inline ssize_t fi_inject(struct fid_ep *ep, const void *buf, size_t len, - fi_addr_t dest_addr) -{ - return gnix_ep_msg_inject(ep, buf, len, dest_addr); -} - -static inline ssize_t fi_senddata(struct fid_ep *ep, const void *buf, - size_t len, void *desc, uint64_t data, - fi_addr_t dest_addr, void *context) -{ - return gnix_ep_senddata(ep, buf, len, desc, data, dest_addr, context); -} - -static inline ssize_t fi_injectdata(struct fid_ep *ep, const void *buf, - size_t len, uint64_t data, - fi_addr_t dest_addr) -{ - return gnix_ep_msg_injectdata(ep, buf, len, data, dest_addr); -} - -#endif /* _FI_DIRECT_ENDPOINT_H_ */ diff --git a/prov/gni/include/rdma/fi_direct_eq.h b/prov/gni/include/rdma/fi_direct_eq.h deleted file mode 100644 index bb5f6485df9..00000000000 --- a/prov/gni/include/rdma/fi_direct_eq.h +++ /dev/null @@ -1,257 +0,0 @@ -/* - * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All - * rights reserved. - * Copyright (c) 2015-2016 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _FI_DIRECT_EQ_H_ -#define _FI_DIRECT_EQ_H_ - -#define FABRIC_DIRECT_EQ 1 - -/******************************************************************************* - * GNI API Functions - ******************************************************************************/ -extern int gnix_fabric_trywait(struct fid_fabric *fabric, struct fid **fids, - int count); - -extern int gnix_wait_wait(struct fid_wait *wait, int timeout); - -extern int gnix_poll_poll(struct fid_poll *pollset, void **context, int count); - -extern int gnix_poll_add(struct fid_poll *pollset, struct fid *event_fid, - uint64_t flags); - -extern int gnix_poll_del(struct fid_poll *pollset, struct fid *event_fid, - uint64_t flags); - -extern int gnix_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr, - struct fid_eq **eq, void *context); - -extern ssize_t gnix_eq_read(struct fid_eq *eq, uint32_t *event, void *buf, - size_t len, uint64_t flags); - -extern ssize_t gnix_eq_readerr(struct fid_eq *eq, struct fi_eq_err_entry *buf, - uint64_t flags); - -extern ssize_t gnix_eq_write(struct fid_eq *eq, uint32_t event, const void *buf, - size_t len, uint64_t flags); - -extern ssize_t gnix_eq_sread(struct fid_eq *eq, uint32_t *event, void *buf, - size_t len, int timeout, uint64_t flags); - -extern const char *gnix_eq_strerror(struct fid_eq *eq, int prov_errno, - const void *err_data, char *buf, - size_t len); - -extern ssize_t gnix_cq_read(struct fid_cq *cq, void *buf, size_t count); - -extern ssize_t gnix_cq_readfrom(struct fid_cq *cq, void *buf, size_t count, - fi_addr_t *src_addr); - -extern ssize_t gnix_cq_readerr(struct fid_cq *cq, struct fi_cq_err_entry *buf, - uint64_t flags); - -extern ssize_t gnix_cq_sread(struct fid_cq *cq, void *buf, size_t count, - const void *cond, int timeout); - -extern ssize_t gnix_cq_sreadfrom(struct fid_cq *cq, void *buf, size_t count, - fi_addr_t *src_addr, const void *cond, - int timeout); - -extern int gnix_cq_signal(struct fid_cq *cq); - -extern const char *gnix_cq_strerror(struct fid_cq *cq, int prov_errno, - const void *err_data, char *buf, - size_t len); - -extern uint64_t gnix_cntr_read(struct fid_cntr *cntr); - -extern uint64_t gnix_cntr_readerr(struct fid_cntr *cntr); - -extern int gnix_cntr_add(struct fid_cntr *cntr, uint64_t value); - -extern int gnix_cntr_set(struct fid_cntr *cntr, uint64_t value); - -extern int gnix_cntr_wait(struct fid_cntr *cntr, uint64_t threshold, - int timeout); - -extern int gnix_cntr_adderr(struct fid_cntr *cntr, uint64_t value); - -extern int gnix_cntr_seterr(struct fid_cntr *cntr, uint64_t value); - -/******************************************************************************* - * Libfabric API Functions - ******************************************************************************/ -static inline int -fi_trywait(struct fid_fabric *fabric, struct fid **fids, int count) -{ - return gnix_fabric_trywait(fabric, fids, count); -} - -static inline int fi_wait(struct fid_wait *waitset, int timeout) -{ - return gnix_wait_wait(waitset, timeout); -} - -static inline int fi_poll(struct fid_poll *pollset, void **context, int count) -{ - return gnix_poll_poll(pollset, context, count); -} - -static inline int fi_poll_add(struct fid_poll *pollset, struct fid *event_fid, - uint64_t flags) -{ - return gnix_poll_add(pollset, event_fid, flags); -} - -static inline int fi_poll_del(struct fid_poll *pollset, struct fid *event_fid, - uint64_t flags) -{ - return gnix_poll_del(pollset, event_fid, flags); -} - -static inline int fi_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr, - struct fid_eq **eq, void *context) -{ - return gnix_eq_open(fabric, attr, eq, context); -} - -static inline ssize_t fi_eq_read(struct fid_eq *eq, uint32_t *event, void *buf, - size_t len, uint64_t flags) -{ - return gnix_eq_read(eq, event, buf, len, flags); -} - -static inline ssize_t fi_eq_readerr(struct fid_eq *eq, - struct fi_eq_err_entry *buf, uint64_t flags) -{ - return gnix_eq_readerr(eq, buf, flags); -} - -static inline ssize_t fi_eq_write(struct fid_eq *eq, uint32_t event, - const void *buf, size_t len, uint64_t flags) -{ - return gnix_eq_write(eq, event, buf, len, flags); -} - -static inline ssize_t fi_eq_sread(struct fid_eq *eq, uint32_t *event, void *buf, - size_t len, int timeout, uint64_t flags) -{ - return gnix_eq_sread(eq, event, buf, len, timeout, flags); -} - -static inline const char *fi_eq_strerror(struct fid_eq *eq, int prov_errno, - const void *err_data, char *buf, - size_t len) -{ - return gnix_eq_strerror(eq, prov_errno, err_data, buf, len); -} - -static inline ssize_t fi_cq_read(struct fid_cq *cq, void *buf, size_t count) -{ - return gnix_cq_read(cq, buf, count); -} - -static inline ssize_t fi_cq_readfrom(struct fid_cq *cq, void *buf, size_t count, - fi_addr_t *src_addr) -{ - return gnix_cq_readfrom(cq, buf, count, src_addr); -} - -static inline ssize_t fi_cq_readerr(struct fid_cq *cq, - struct fi_cq_err_entry *buf, uint64_t flags) -{ - return gnix_cq_readerr(cq, buf, flags); -} - -static inline ssize_t fi_cq_sread(struct fid_cq *cq, void *buf, size_t count, - const void *cond, int timeout) -{ - return gnix_cq_sread(cq, buf, count, cond, timeout); -} - -static inline ssize_t fi_cq_sreadfrom(struct fid_cq *cq, void *buf, - size_t count, fi_addr_t *src_addr, - const void *cond, int timeout) -{ - return gnix_cq_sreadfrom(cq, buf, count, src_addr, cond, timeout); -} - -static inline int fi_cq_signal(struct fid_cq *cq) -{ - return gnix_cq_signal(cq); -} - -static inline const char *fi_cq_strerror(struct fid_cq *cq, int prov_errno, - const void *err_data, char *buf, - size_t len) -{ - return gnix_cq_strerror(cq, prov_errno, err_data, buf, len); -} - -static inline uint64_t fi_cntr_read(struct fid_cntr *cntr) -{ - return gnix_cntr_read(cntr); -} - -static inline uint64_t fi_cntr_readerr(struct fid_cntr *cntr) -{ - return gnix_cntr_readerr(cntr); -} - -static inline int fi_cntr_add(struct fid_cntr *cntr, uint64_t value) -{ - return gnix_cntr_add(cntr, value); -} - -static inline int fi_cntr_set(struct fid_cntr *cntr, uint64_t value) -{ - return gnix_cntr_set(cntr, value); -} - -static inline int fi_cntr_wait(struct fid_cntr *cntr, uint64_t threshold, - int timeout) -{ - return gnix_cntr_wait(cntr, threshold, timeout); -} - -static inline int fi_cntr_adderr(struct fid_cntr *cntr, uint64_t value) -{ - return gnix_cntr_adderr(cntr, value); -} - -static inline int fi_cntr_seterr(struct fid_cntr *cntr, uint64_t value) -{ - return gnix_cntr_seterr(cntr, value); -} - -#endif /* _FI_DIRECT_EQ_H_ */ diff --git a/prov/gni/include/rdma/fi_direct_rma.h b/prov/gni/include/rdma/fi_direct_rma.h deleted file mode 100644 index 9c6265e2774..00000000000 --- a/prov/gni/include/rdma/fi_direct_rma.h +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All - * rights reserved. - * Copyright (c) 2015-2016 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _FI_DIRECT_RMA_H_ -#define _FI_DIRECT_RMA_H_ - -#define FABRIC_DIRECT_RMA 1 - -/******************************************************************************* - * GNI API Functions - ******************************************************************************/ -extern ssize_t gnix_ep_read(struct fid_ep *ep, void *buf, size_t len, - void *desc, fi_addr_t src_addr, uint64_t addr, - uint64_t key, void *context); - -extern ssize_t gnix_ep_readv(struct fid_ep *ep, const struct iovec *iov, - void **desc, size_t count, fi_addr_t src_addr, - uint64_t addr, uint64_t key, void *context); - -extern ssize_t gnix_ep_readmsg(struct fid_ep *ep, const struct fi_msg_rma *msg, - uint64_t flags); - -extern ssize_t gnix_ep_write(struct fid_ep *ep, const void *buf, size_t len, - void *desc, fi_addr_t dest_addr, uint64_t addr, - uint64_t key, void *context); - -extern ssize_t gnix_ep_writev(struct fid_ep *ep, const struct iovec *iov, - void **desc, size_t count, fi_addr_t dest_addr, - uint64_t addr, uint64_t key, void *context); - -extern ssize_t gnix_ep_writemsg(struct fid_ep *ep, const struct fi_msg_rma *msg, - uint64_t flags); - -extern ssize_t gnix_ep_rma_inject(struct fid_ep *ep, const void *buf, - size_t len, fi_addr_t dest_addr, - uint64_t addr, uint64_t key); - -extern ssize_t gnix_ep_writedata(struct fid_ep *ep, const void *buf, size_t len, - void *desc, uint64_t data, fi_addr_t dest_addr, - uint64_t addr, uint64_t key, void *context); - -extern ssize_t gnix_ep_rma_injectdata(struct fid_ep *ep, const void *buf, - size_t len, uint64_t data, - fi_addr_t dest_addr, uint64_t addr, - uint64_t key); - -/******************************************************************************* - * Libfabric API Functions - ******************************************************************************/ -static inline ssize_t fi_read(struct fid_ep *ep, void *buf, size_t len, - void *desc, fi_addr_t src_addr, uint64_t addr, - uint64_t key, void *context) -{ - return gnix_ep_read(ep, buf, len, desc, src_addr, addr, key, context); -} - -static inline ssize_t fi_readv(struct fid_ep *ep, const struct iovec *iov, - void **desc, size_t count, fi_addr_t src_addr, - uint64_t addr, uint64_t key, void *context) -{ - return gnix_ep_readv(ep, iov, desc, count, src_addr, addr, key, - context); -} - -static inline ssize_t fi_readmsg(struct fid_ep *ep, - const struct fi_msg_rma *msg, uint64_t flags) -{ - return gnix_ep_readmsg(ep, msg, flags); -} - -static inline ssize_t fi_write(struct fid_ep *ep, const void *buf, size_t len, - void *desc, fi_addr_t dest_addr, uint64_t addr, - uint64_t key, void *context) -{ - return gnix_ep_write(ep, buf, len, desc, dest_addr, addr, key, context); -} - -static inline ssize_t fi_writev(struct fid_ep *ep, const struct iovec *iov, - void **desc, size_t count, fi_addr_t dest_addr, - uint64_t addr, uint64_t key, void *context) -{ - return gnix_ep_writev(ep, iov, desc, count, dest_addr, addr, key, - context); -} - -static inline ssize_t fi_writemsg(struct fid_ep *ep, - const struct fi_msg_rma *msg, uint64_t flags) -{ - return gnix_ep_writemsg(ep, msg, flags); -} - -static inline ssize_t fi_inject_write(struct fid_ep *ep, const void *buf, - size_t len, fi_addr_t dest_addr, - uint64_t addr, uint64_t key) -{ - return gnix_ep_rma_inject(ep, buf, len, dest_addr, addr, key); -} - -static inline ssize_t fi_writedata(struct fid_ep *ep, const void *buf, - size_t len, void *desc, uint64_t data, - fi_addr_t dest_addr, uint64_t addr, - uint64_t key, void *context) -{ - return gnix_ep_writedata(ep, buf, len, desc, data, dest_addr, addr, key, - context); -} - -static inline ssize_t fi_inject_writedata(struct fid_ep *ep, const void *buf, - size_t len, uint64_t data, - fi_addr_t dest_addr, uint64_t addr, - uint64_t key) -{ - return gnix_ep_rma_injectdata(ep, buf, len, data, dest_addr, addr, key); -} - -#endif /* _FI_DIRECT_RMA_H_ */ diff --git a/prov/gni/include/rdma/fi_direct_tagged.h b/prov/gni/include/rdma/fi_direct_tagged.h deleted file mode 100644 index b355a470d15..00000000000 --- a/prov/gni/include/rdma/fi_direct_tagged.h +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All - * rights reserved. - * Copyright (c) 2015-2016 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _FI_DIRECT_TAGGED_H_ -#define _FI_DIRECT_TAGGED_H_ - -#define FABRIC_DIRECT_TAGGED 1 - -/******************************************************************************* - * GNI API Functions - ******************************************************************************/ -extern ssize_t gnix_ep_trecv(struct fid_ep *ep, void *buf, size_t len, - void *desc, fi_addr_t src_addr, uint64_t tag, - uint64_t ignore, void *context); - -extern ssize_t gnix_ep_trecvv(struct fid_ep *ep, const struct iovec *iov, - void **desc, size_t count, fi_addr_t src_addr, - uint64_t tag, uint64_t ignore, void *context); - -extern ssize_t gnix_ep_trecvmsg(struct fid_ep *ep, - const struct fi_msg_tagged *msg, - uint64_t flags); - -extern ssize_t gnix_ep_tsend(struct fid_ep *ep, const void *buf, size_t len, - void *desc, fi_addr_t dest_addr, uint64_t tag, - void *context); - -extern ssize_t gnix_ep_tsendv(struct fid_ep *ep, const struct iovec *iov, - void **desc, size_t count, fi_addr_t dest_addr, - uint64_t tag, void *context); - -extern ssize_t gnix_ep_tsendmsg(struct fid_ep *ep, - const struct fi_msg_tagged *msg, - uint64_t flags); - -extern ssize_t gnix_ep_tinject(struct fid_ep *ep, const void *buf, size_t len, - fi_addr_t dest_addr, uint64_t tag); - -extern ssize_t gnix_ep_tsenddata(struct fid_ep *ep, const void *buf, size_t len, - void *desc, uint64_t data, fi_addr_t dest_addr, - uint64_t tag, void *context); - -extern ssize_t gnix_ep_tinjectdata(struct fid_ep *ep, const void *buf, - size_t len, uint64_t data, - fi_addr_t dest_addr, uint64_t tag); - -/******************************************************************************* - * Libfabric API Functions - ******************************************************************************/ -static inline ssize_t fi_trecv(struct fid_ep *ep, void *buf, size_t len, - void *desc, fi_addr_t src_addr, uint64_t tag, - uint64_t ignore, void *context) -{ - return gnix_ep_trecv(ep, buf, len, desc, src_addr, tag, ignore, - context); -} - -static inline ssize_t fi_trecvv(struct fid_ep *ep, const struct iovec *iov, - void **desc, size_t count, fi_addr_t src_addr, - uint64_t tag, uint64_t ignore, void *context) -{ - return gnix_ep_trecvv(ep, iov, desc, count, src_addr, tag, ignore, - context); -} - -static inline ssize_t -fi_trecvmsg(struct fid_ep *ep, const struct fi_msg_tagged *msg, uint64_t flags) -{ - return gnix_ep_trecvmsg(ep, msg, flags); -} - -static inline ssize_t fi_tsend(struct fid_ep *ep, const void *buf, size_t len, - void *desc, fi_addr_t dest_addr, uint64_t tag, - void *context) -{ - return gnix_ep_tsend(ep, buf, len, desc, dest_addr, tag, context); -} - -static inline ssize_t fi_tsendv(struct fid_ep *ep, const struct iovec *iov, - void **desc, size_t count, fi_addr_t dest_addr, - uint64_t tag, void *context) -{ - return gnix_ep_tsendv(ep, iov, desc, count, dest_addr, tag, context); -} - -static inline ssize_t -fi_tsendmsg(struct fid_ep *ep, const struct fi_msg_tagged *msg, uint64_t flags) -{ - return gnix_ep_tsendmsg(ep, msg, flags); -} - -static inline ssize_t fi_tinject(struct fid_ep *ep, const void *buf, size_t len, - fi_addr_t dest_addr, uint64_t tag) -{ - return gnix_ep_tinject(ep, buf, len, dest_addr, tag); -} - -static inline ssize_t fi_tsenddata(struct fid_ep *ep, const void *buf, - size_t len, void *desc, uint64_t data, - fi_addr_t dest_addr, uint64_t tag, - void *context) -{ - return gnix_ep_tsenddata(ep, buf, len, desc, data, dest_addr, tag, - context); -} - -static inline ssize_t fi_tinjectdata(struct fid_ep *ep, const void *buf, - size_t len, uint64_t data, - fi_addr_t dest_addr, uint64_t tag) -{ - return gnix_ep_tinjectdata(ep, buf, len, data, dest_addr, tag); -} - -#endif /* _FI_DIRECT_TAGGED_H_ */ diff --git a/prov/gni/include/rdma/fi_direct_trigger.h b/prov/gni/include/rdma/fi_direct_trigger.h deleted file mode 100644 index 678cabe91f6..00000000000 --- a/prov/gni/include/rdma/fi_direct_trigger.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All - * rights reserved. - * Copyright (c) 2015-2016 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _FI_DIRECT_TRIGGER_H_ -#define _FI_DIRECT_TRIGGER_H_ - -#define FABRIC_DIRECT_TRIGGER 1 - -/* Size must match struct fi_context */ -struct fi_triggered_context { - enum fi_trigger_event event_type; - union { - struct fi_trigger_threshold threshold; - void *internal[3]; - } trigger; -}; - -#endif /* _FI_DIRECT_TRIGGER_H_ */ diff --git a/prov/gni/provider_FABRIC_1.0.map b/prov/gni/provider_FABRIC_1.0.map deleted file mode 100644 index 950f393f8e6..00000000000 --- a/prov/gni/provider_FABRIC_1.0.map +++ /dev/null @@ -1,113 +0,0 @@ -/* - * used for exporting GNI provider - * symbols when building to support FI_DIRECT - */ - gnix_av_straddr; - gnix_cq_strerror; - gnix_eq_strerror; - gnix_accept; - gnix_av_bind; - gnix_av_insertsvc; - gnix_av_insertsym; - gnix_av_open; - gnix_cntr_open; - gnix_cntr_wait; - gnix_connect; - gnix_cq_open; - gnix_domain_bind; - gnix_domain_open; - gnix_ep_atomic_compwrite; - gnix_ep_atomic_compwritemsg; - gnix_ep_atomic_compwritev; - gnix_ep_atomic_inject; - gnix_ep_atomic_readwrite; - gnix_ep_atomic_readwritemsg; - gnix_ep_atomic_readwritev; - gnix_ep_atomic_write; - gnix_ep_atomic_writemsg; - gnix_ep_atomic_writev; - gnix_ep_msg_injectdata; - gnix_ep_open; - gnix_ep_send; - gnix_ep_senddata; - gnix_ep_tinjectdata; - gnix_ep_tsenddata; - gnix_eq_open; - gnix_eq_read; - gnix_eq_sread; - gnix_fabric_trywait; - gnix_getpeer; - gnix_listen; - gnix_mr_bind; - gnix_mr_reg; - gnix_mr_regv; - gnix_mr_regattr; - gnix_pep_open; - gnix_pep_bind; - gnix_poll_add; - gnix_poll_del; - gnix_poll_open; - gnix_poll_poll; - gnix_reject; - gnix_scalable_ep_bind; - gnix_scalable_ep_open; - gnix_setname; - gnix_shutdown; - gnix_srx_context; - gnix_stx_open; - gnix_wait_open; - gnix_wait_wait; - gnix_av_insert; - gnix_av_lookup; - gnix_av_remove; - gnix_cntr_add; - gnix_cntr_set; - gnix_cq_signal; - gnix_ep_write; - gnix_ep_rma_inject; - gnix_ep_rma_injectdata; - gnix_ep_writedata; - gnix_ep_readmsg; - gnix_ep_readv; - gnix_ep_writedata; - gnix_ep_writemsg; - gnix_ep_writev; - gnix_ep_atomic_valid; - gnix_ep_bind; - gnix_ep_cmp_atomic_valid; - gnix_ep_control; - gnix_ep_fetch_atomic_valid; - gnix_ep_getopt; - gnix_ep_setopt; - gnix_eq_close; - gnix_eq_control; - gnix_getname; - gnix_rx_context; - gnix_tx_context; - gnix_cq_read; - gnix_cq_readerr; - gnix_cq_readfrom; - gnix_cq_sread; - gnix_cq_sreadfrom; - gnix_ep_cancel; - gnix_ep_msg_inject; - gnix_ep_read; - gnix_ep_readv; - gnix_ep_recv; - gnix_ep_recvmsg; - gnix_ep_recvv; - gnix_ep_rx_size_left; - gnix_ep_sendmsg; - gnix_ep_sendv; - gnix_ep_tinject; - gnix_ep_trecv; - gnix_ep_trecvmsg; - gnix_ep_trecvv; - gnix_ep_tsend; - gnix_ep_tsendmsg; - gnix_ep_tsendv; - gnix_ep_tx_size_left; - gnix_eq_readerr; - gnix_eq_write; - gnix_cntr_read; - gnix_cntr_readerr; diff --git a/prov/gni/src/gnix_atomic.c b/prov/gni/src/gnix_atomic.c deleted file mode 100644 index af95620af1b..00000000000 --- a/prov/gni/src/gnix_atomic.c +++ /dev/null @@ -1,703 +0,0 @@ -/* - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * Copyright (c) 2015-2017 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2019 Triad National Security, LLC. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include - -#include "gnix.h" -#include "gnix_nic.h" -#include "gnix_vc.h" -#include "gnix_ep.h" -#include "gnix_mr.h" -#include "gnix_cntr.h" - -static int __gnix_amo_send_err(struct gnix_fid_ep *ep, - struct gnix_fab_req *req, - int error) -{ - struct gnix_fid_cntr *cntr = NULL; - int rc = FI_SUCCESS; - uint64_t flags = req->flags & GNIX_AMO_COMPLETION_FLAGS; - - if (ep->send_cq) { - rc = _gnix_cq_add_error(ep->send_cq, req->user_context, - flags, 0, 0, 0, 0, 0, error, - gnixu_to_fi_errno(GNI_RC_TRANSACTION_ERROR), - NULL, 0); - if (rc) { - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_cq_add_error() failed: %d\n", rc); - } - } - - if (((req->type == GNIX_FAB_RQ_AMO) || - (req->type == GNIX_FAB_RQ_NAMO_AX) || - (req->type == GNIX_FAB_RQ_NAMO_AX_S)) && - ep->write_cntr) { - cntr = ep->write_cntr; - } else if ((req->type == GNIX_FAB_RQ_FAMO || - req->type == GNIX_FAB_RQ_CAMO || - req->type == GNIX_FAB_RQ_NAMO_FAX || - req->type == GNIX_FAB_RQ_NAMO_FAX_S) && - ep->read_cntr) { - cntr = ep->read_cntr; - } - - if (cntr) { - rc = _gnix_cntr_inc_err(cntr); - if (rc) - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_cntr_inc_err() failed: %d\n", rc); - } - - return rc; -} - -static int __gnix_amo_send_completion(struct gnix_fid_ep *ep, - struct gnix_fab_req *req) -{ - struct gnix_fid_cntr *cntr = NULL; - int rc = FI_SUCCESS; - uint64_t flags = req->flags & GNIX_AMO_COMPLETION_FLAGS; - - if ((req->flags & FI_COMPLETION) && ep->send_cq) { - rc = _gnix_cq_add_event(ep->send_cq, ep, req->user_context, - flags, 0, 0, 0, 0, FI_ADDR_NOTAVAIL); - if (rc) { - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_cq_add_event() failed: %d\n", rc); - } - } - - if ((req->type == GNIX_FAB_RQ_AMO || - req->type == GNIX_FAB_RQ_NAMO_AX || - req->type == GNIX_FAB_RQ_NAMO_AX_S) && - ep->write_cntr) { - cntr = ep->write_cntr; - } else if ((req->type == GNIX_FAB_RQ_FAMO || - req->type == GNIX_FAB_RQ_CAMO || - req->type == GNIX_FAB_RQ_NAMO_FAX || - req->type == GNIX_FAB_RQ_NAMO_FAX_S) && - ep->read_cntr) { - cntr = ep->read_cntr; - } - - if (cntr) { - rc = _gnix_cntr_inc(cntr); - if (rc) - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_cntr_inc() failed: %d\n", rc); - } - - return FI_SUCCESS; -} - -static void __gnix_amo_fr_complete(struct gnix_fab_req *req) -{ - int rc; - - if (req->flags & FI_LOCAL_MR) { - GNIX_INFO(FI_LOG_EP_DATA, "freeing auto-reg MR: %p\n", - req->amo.loc_md); - rc = fi_close(&req->amo.loc_md->mr_fid.fid); - if (rc != FI_SUCCESS) { - GNIX_ERR(FI_LOG_DOMAIN, - "failed to deregister auto-registered region, " - "rc=%d\n", rc); - } - - req->flags &= ~FI_LOCAL_MR; - } - - ofi_atomic_dec32(&req->vc->outstanding_tx_reqs); - - /* Schedule VC TX queue in case the VC is 'fenced'. */ - _gnix_vc_tx_schedule(req->vc); - - _gnix_fr_free(req->vc->ep, req); -} - -static int __gnix_amo_post_err(struct gnix_fab_req *req, int error) -{ - int rc; - - rc = __gnix_amo_send_err(req->vc->ep, req, error); - if (rc != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_DATA, - "__gnix_amo_send_err() failed: %d\n", - rc); - - __gnix_amo_fr_complete(req); - return FI_SUCCESS; -} - -/* SMSG callback for AMO remote counter control message. */ -int __smsg_amo_cntr(void *data, void *msg) -{ - int ret = FI_SUCCESS; - struct gnix_vc *vc = (struct gnix_vc *)data; - struct gnix_smsg_amo_cntr_hdr *hdr = - (struct gnix_smsg_amo_cntr_hdr *)msg; - struct gnix_fid_ep *ep = vc->ep; - gni_return_t status; - - if (hdr->flags & FI_REMOTE_WRITE && ep->rwrite_cntr) { - ret = _gnix_cntr_inc(ep->rwrite_cntr); - if (ret != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_cntr_inc() failed: %d\n", - ret); - } - - if (hdr->flags & FI_REMOTE_READ && ep->rread_cntr) { - ret = _gnix_cntr_inc(ep->rread_cntr); - if (ret != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_cntr_inc() failed: %d\n", - ret); - } - - status = GNI_SmsgRelease(vc->gni_ep); - if (OFI_UNLIKELY(status != GNI_RC_SUCCESS)) { - GNIX_WARN(FI_LOG_EP_DATA, - "GNI_SmsgRelease returned %s\n", - gni_err_str[status]); - ret = gnixu_to_fi_errno(status); - } - - return ret; -} - -static int __gnix_amo_txd_cntr_complete(void *arg, gni_return_t tx_status) -{ - struct gnix_tx_descriptor *txd = (struct gnix_tx_descriptor *)arg; - struct gnix_fab_req *req = txd->req; - int rc; - - _gnix_nic_tx_free(req->gnix_ep->nic, txd); - - if (tx_status != GNI_RC_SUCCESS) - return __gnix_amo_post_err(req, FI_ECANCELED); - - /* Successful data delivery. Generate local completion. */ - rc = __gnix_amo_send_completion(req->vc->ep, req); - if (rc != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_DATA, - "__gnix_amo_send_completion() failed: %d\n", - rc); - - __gnix_amo_fr_complete(req); - - return FI_SUCCESS; -} - -static int __gnix_amo_send_cntr_req(void *arg) -{ - struct gnix_fab_req *req = (struct gnix_fab_req *)arg; - struct gnix_fid_ep *ep = req->gnix_ep; - struct gnix_nic *nic = ep->nic; - struct gnix_tx_descriptor *txd; - gni_return_t status; - int rc; - int inject_err = _gnix_req_inject_err(req); - - rc = _gnix_nic_tx_alloc(nic, &txd); - if (rc) { - GNIX_INFO(FI_LOG_EP_DATA, - "_gnix_nic_tx_alloc() failed: %d\n", - rc); - return -FI_ENOSPC; - } - - txd->req = req; - txd->completer_fn = __gnix_amo_txd_cntr_complete; - - if (req->type == GNIX_FAB_RQ_AMO) { - txd->amo_cntr_hdr.flags = FI_REMOTE_WRITE; - } else { - txd->amo_cntr_hdr.flags = FI_REMOTE_READ; - } - - COND_ACQUIRE(nic->requires_lock, &nic->lock); - if (inject_err) { - _gnix_nic_txd_err_inject(nic, txd); - status = GNI_RC_SUCCESS; - } else { - status = GNI_SmsgSendWTag(req->vc->gni_ep, - &txd->amo_cntr_hdr, - sizeof(txd->amo_cntr_hdr), - NULL, 0, txd->id, - GNIX_SMSG_T_AMO_CNTR); - } - COND_RELEASE(nic->requires_lock, &nic->lock); - - if (status == GNI_RC_NOT_DONE) { - _gnix_nic_tx_free(nic, txd); - GNIX_INFO(FI_LOG_EP_DATA, - "GNI_SmsgSendWTag returned %s\n", - gni_err_str[status]); - } else if (status != GNI_RC_SUCCESS) { - _gnix_nic_tx_free(nic, txd); - GNIX_WARN(FI_LOG_EP_DATA, - "GNI_SmsgSendWTag returned %s\n", - gni_err_str[status]); - } else { - GNIX_INFO(FI_LOG_EP_DATA, "Sent RMA CQ data, req: %p\n", req); - } - - return gnixu_to_fi_errno(status); -} - -static int __gnix_amo_txd_complete(void *arg, gni_return_t tx_status) -{ - struct gnix_tx_descriptor *txd = (struct gnix_tx_descriptor *)arg; - struct gnix_fab_req *req = txd->req; - int rc = FI_SUCCESS; - - _gnix_nic_tx_free(req->vc->ep->nic, txd); - - if (tx_status != GNI_RC_SUCCESS) { - return __gnix_amo_post_err(req, FI_ECANCELED); - } - - if (req->vc->peer_caps & FI_RMA_EVENT) { - /* control message needed for a counter event. */ - req->work_fn = __gnix_amo_send_cntr_req; - _gnix_vc_queue_work_req(req); - } else { - /* complete request */ - rc = __gnix_amo_send_completion(req->vc->ep, req); - if (rc != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_DATA, - "__gnix_amo_send_completion() failed: %d\n", - rc); - - __gnix_amo_fr_complete(req); - } - - return FI_SUCCESS; -} - -/* - * Datatypes: - * - * FI_INT8, FI_UINT8, FI_INT16, FI_UINT16, - * FI_INT32, FI_UINT32, - * FI_INT64, FI_UINT64, - * FI_FLOAT, FI_DOUBLE, - * FI_FLOAT_COMPLEX, FI_DOUBLE_COMPLEX, - * FI_LONG_DOUBLE, FI_LONG_DOUBLE_COMPLEX - */ - -static int __gnix_amo_cmds[FI_ATOMIC_OP_LAST][FI_DATATYPE_LAST] = { - /* - * Basic AMO types: - * FI_MIN, FI_MAX, FI_SUM, FI_PROD, FI_LOR, FI_LAND, FI_BOR, FI_BAND, - * FI_LXOR, FI_BXOR, and FI_ATOMIC_WRITE. - */ - [FI_MIN] = { 0,0,0,0, GNI_FMA_ATOMIC2_IMIN_S, 0, GNI_FMA_ATOMIC2_IMIN, 0, GNI_FMA_ATOMIC2_FPMIN_S, GNI_FMA_ATOMIC2_FPMIN }, - [FI_MAX] = { 0,0,0,0, GNI_FMA_ATOMIC2_IMAX_S, 0, GNI_FMA_ATOMIC2_IMAX, 0, GNI_FMA_ATOMIC2_FPMAX_S, GNI_FMA_ATOMIC2_FPMAX }, - [FI_SUM] = { 0,0,0,0, GNI_FMA_ATOMIC2_IADD_S, GNI_FMA_ATOMIC2_IADD_S, GNI_FMA_ATOMIC2_IADD, GNI_FMA_ATOMIC2_IADD, GNI_FMA_ATOMIC2_FPADD_S, 0 /* DP addition is broken */ }, - [FI_BOR] = { 0,0,0,0, GNI_FMA_ATOMIC2_OR_S, GNI_FMA_ATOMIC2_OR_S, GNI_FMA_ATOMIC2_OR, GNI_FMA_ATOMIC2_OR, 0, 0 }, - [FI_BAND] = { 0,0,0,0, GNI_FMA_ATOMIC2_AND_S, GNI_FMA_ATOMIC2_AND_S, GNI_FMA_ATOMIC2_AND, GNI_FMA_ATOMIC2_AND, 0, 0 }, - [FI_BXOR] = { 0,0,0,0, GNI_FMA_ATOMIC2_XOR_S, GNI_FMA_ATOMIC2_XOR_S, GNI_FMA_ATOMIC2_XOR, GNI_FMA_ATOMIC2_XOR, 0, 0 }, - [FI_ATOMIC_WRITE] = { 0,0,0,0, GNI_FMA_ATOMIC2_SWAP_S, GNI_FMA_ATOMIC2_SWAP_S, GNI_FMA_ATOMIC2_SWAP, GNI_FMA_ATOMIC2_SWAP, GNI_FMA_ATOMIC2_SWAP_S, GNI_FMA_ATOMIC2_SWAP }, -}; - -static int __gnix_fetch_amo_cmds[FI_ATOMIC_OP_LAST][FI_DATATYPE_LAST] = { - /* - * Fetch AMO types: - * FI_MIN, FI_MAX, FI_SUM, FI_PROD, FI_LOR, FI_LAND, FI_BOR, FI_BAND, - * FI_LXOR, FI_BXOR, FI_ATOMIC_READ, and FI_ATOMIC_WRITE. - */ - [FI_MIN] = { 0,0,0,0, GNI_FMA_ATOMIC2_FIMIN_S, 0, GNI_FMA_ATOMIC2_FIMIN, 0, GNI_FMA_ATOMIC2_FFPMIN_S, GNI_FMA_ATOMIC2_FFPMIN }, - [FI_MAX] = { 0,0,0,0, GNI_FMA_ATOMIC2_FIMAX_S, 0, GNI_FMA_ATOMIC2_FIMAX, 0, GNI_FMA_ATOMIC2_FFPMAX_S, GNI_FMA_ATOMIC2_FFPMAX }, - [FI_SUM] = { 0,0,0,0, GNI_FMA_ATOMIC2_FIADD_S, GNI_FMA_ATOMIC2_FIADD_S, GNI_FMA_ATOMIC2_FIADD, GNI_FMA_ATOMIC2_FIADD, GNI_FMA_ATOMIC2_FFPADD_S, 0 /* DP addition is broken */ }, - [FI_BOR] = { 0,0,0,0, GNI_FMA_ATOMIC2_FOR_S, GNI_FMA_ATOMIC2_FOR_S, GNI_FMA_ATOMIC2_FOR, GNI_FMA_ATOMIC2_FOR, 0, 0 }, - [FI_BAND] = { 0,0,0,0, GNI_FMA_ATOMIC2_FAND_S, GNI_FMA_ATOMIC2_FAND_S, GNI_FMA_ATOMIC2_FAND, GNI_FMA_ATOMIC2_FAND, 0, 0 }, - [FI_BXOR] = { 0,0,0,0, GNI_FMA_ATOMIC2_FXOR_S, GNI_FMA_ATOMIC2_FXOR_S, GNI_FMA_ATOMIC2_FXOR, GNI_FMA_ATOMIC2_FXOR, 0, 0 }, - [FI_ATOMIC_READ] = { 0,0,0,0, GNI_FMA_ATOMIC2_FAND_S, GNI_FMA_ATOMIC2_FAND_S, GNI_FMA_ATOMIC2_FAND, GNI_FMA_ATOMIC2_FAND, GNI_FMA_ATOMIC2_FAND_S, GNI_FMA_ATOMIC2_FAND }, - [FI_ATOMIC_WRITE] = { 0,0,0,0, GNI_FMA_ATOMIC2_FSWAP_S, GNI_FMA_ATOMIC2_FSWAP_S, GNI_FMA_ATOMIC2_FSWAP, GNI_FMA_ATOMIC2_FSWAP, GNI_FMA_ATOMIC2_FSWAP_S, GNI_FMA_ATOMIC2_FSWAP }, -}; - -static int __gnix_cmp_amo_cmds[FI_ATOMIC_OP_LAST][FI_DATATYPE_LAST] = { - /* - * Compare AMO types: - * FI_CSWAP, FI_CSWAP_NE, FI_CSWAP_LE, - * FI_CSWAP_LT, FI_CSWAP_GE, FI_CSWAP_GT, and FI_MSWAP. - */ - [FI_CSWAP] = { 0,0,0,0, GNI_FMA_ATOMIC2_FCSWAP_S, GNI_FMA_ATOMIC2_FCSWAP_S, GNI_FMA_ATOMIC2_FCSWAP, GNI_FMA_ATOMIC2_FCSWAP, GNI_FMA_ATOMIC2_FCSWAP_S, GNI_FMA_ATOMIC2_FCSWAP }, - [FI_MSWAP] = { 0,0,0,0, GNI_FMA_ATOMIC2_FAX_S, GNI_FMA_ATOMIC2_FAX_S, GNI_FMA_ATOMIC2_FAX, GNI_FMA_ATOMIC2_FAX, GNI_FMA_ATOMIC2_FAX_S, GNI_FMA_ATOMIC2_FAX }, -}; - -/* Return a GNI AMO command for a LF operation, datatype, AMO type. */ -int _gnix_atomic_cmd(enum fi_datatype dt, enum fi_op op, - enum gnix_fab_req_type fr_type) -{ - if (!((fr_type == GNIX_FAB_RQ_NAMO_AX) || - (fr_type == GNIX_FAB_RQ_NAMO_FAX) || - (fr_type == GNIX_FAB_RQ_NAMO_AX_S) || - (fr_type == GNIX_FAB_RQ_NAMO_FAX_S)) && - (dt >= FI_DATATYPE_LAST || op >= FI_ATOMIC_OP_LAST)) { - return -FI_EOPNOTSUPP; - } - - switch(fr_type) { - case GNIX_FAB_RQ_AMO: - return __gnix_amo_cmds[op][dt] ?: -FI_EOPNOTSUPP; - case GNIX_FAB_RQ_FAMO: - return __gnix_fetch_amo_cmds[op][dt] ?: -FI_EOPNOTSUPP; - case GNIX_FAB_RQ_CAMO: - return __gnix_cmp_amo_cmds[op][dt] ?: -FI_EOPNOTSUPP; - case GNIX_FAB_RQ_NAMO_AX: - return GNI_FMA_ATOMIC2_AX; - case GNIX_FAB_RQ_NAMO_AX_S: - return GNI_FMA_ATOMIC2_AX_S; - case GNIX_FAB_RQ_NAMO_FAX: - return GNI_FMA_ATOMIC2_FAX; - case GNIX_FAB_RQ_NAMO_FAX_S: - return GNI_FMA_ATOMIC2_FAX_S; - default: - break; - } - - return -FI_EOPNOTSUPP; -} - -int _gnix_amo_post_req(void *data) -{ - struct gnix_fab_req *fab_req = (struct gnix_fab_req *)data; - struct gnix_fid_ep *ep = fab_req->gnix_ep; - struct gnix_nic *nic = ep->nic; - struct gnix_fid_mem_desc *loc_md; - struct gnix_tx_descriptor *txd; - gni_mem_handle_t mdh; - gni_return_t status; - int rc; - int inject_err = _gnix_req_inject_err(fab_req); - - if (!gnix_ops_allowed(ep, fab_req->vc->peer_caps, fab_req->flags)) { - GNIX_DEBUG(FI_LOG_EP_DATA, "flags:0x%llx, %s\n", fab_req->flags, - fi_tostr(&fab_req->flags, FI_TYPE_OP_FLAGS)); - GNIX_DEBUG(FI_LOG_EP_DATA, "caps:0x%llx, %s\n", - ep->caps, fi_tostr(&ep->caps, FI_TYPE_CAPS)); - GNIX_DEBUG(FI_LOG_EP_DATA, "peer_caps:0x%llx, %s\n", - fab_req->vc->peer_caps, - fi_tostr(&fab_req->vc->peer_caps, FI_TYPE_OP_FLAGS)); - - rc = __gnix_amo_post_err(fab_req, FI_EOPNOTSUPP); - if (rc != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_DATA, - "__gnix_amo_post_err() failed: %d\n", rc); - return -FI_ECANCELED; - } - - rc = _gnix_nic_tx_alloc(nic, &txd); - if (rc) { - GNIX_INFO(FI_LOG_EP_DATA, "_gnix_nic_tx_alloc() failed: %d\n", - rc); - return -FI_ENOSPC; - } - - txd->completer_fn = __gnix_amo_txd_complete; - txd->req = fab_req; - - /* Mem handle CRC is not validated during FMA operations. Skip this - * costly calculation. */ - _GNIX_CONVERT_MR_KEY(ep->auth_key->using_vmdh, - fab_req->vc->peer_key_offset, - _gnix_convert_key_to_mhdl_no_crc, - &fab_req->amo.rem_mr_key, &mdh); - - loc_md = (struct gnix_fid_mem_desc *)fab_req->amo.loc_md; - - txd->gni_desc.type = GNI_POST_AMO; - txd->gni_desc.cq_mode = GNI_CQMODE_GLOBAL_EVENT; /* check flags */ - txd->gni_desc.dlvr_mode = GNI_DLVMODE_PERFORMANCE; /* check flags */ - txd->gni_desc.local_addr = (uint64_t)fab_req->amo.loc_addr; - if (loc_md) { - txd->gni_desc.local_mem_hndl = loc_md->mem_hndl; - } - txd->gni_desc.remote_addr = (uint64_t)fab_req->amo.rem_addr; - txd->gni_desc.remote_mem_hndl = mdh; - txd->gni_desc.length = fab_req->amo.len; - txd->gni_desc.rdma_mode = 0; /* check flags */ - txd->gni_desc.src_cq_hndl = nic->tx_cq; /* check flags */ - - txd->gni_desc.amo_cmd = _gnix_atomic_cmd(fab_req->amo.datatype, - fab_req->amo.op, - fab_req->type); - txd->gni_desc.first_operand = fab_req->amo.first_operand; - txd->gni_desc.second_operand = fab_req->amo.second_operand; - - GNIX_DEBUG(FI_LOG_EP_DATA, "fo:%016lx so:%016lx\n", - txd->gni_desc.first_operand, txd->gni_desc.second_operand); - GNIX_DEBUG(FI_LOG_EP_DATA, "amo_cmd:%x\n", - txd->gni_desc.amo_cmd); - GNIX_LOG_DUMP_TXD(txd); - - COND_ACQUIRE(nic->requires_lock, &nic->lock); - - if (OFI_UNLIKELY(inject_err)) { - _gnix_nic_txd_err_inject(nic, txd); - status = GNI_RC_SUCCESS; - } else { - status = GNI_PostFma(fab_req->vc->gni_ep, &txd->gni_desc); - } - - COND_RELEASE(nic->requires_lock, &nic->lock); - - if (status != GNI_RC_SUCCESS) { - _gnix_nic_tx_free(nic, txd); - GNIX_INFO(FI_LOG_EP_DATA, "GNI_Post*() failed: %s\n", - gni_err_str[status]); - } - - return gnixu_to_fi_errno(status); -} - -ssize_t _gnix_atomic(struct gnix_fid_ep *ep, - enum gnix_fab_req_type fr_type, - const struct fi_msg_atomic *msg, - const struct fi_ioc *comparev, - void **compare_desc, - size_t compare_count, - struct fi_ioc *resultv, - void **result_desc, - size_t result_count, - uint64_t flags) -{ - struct gnix_vc *vc; - struct gnix_fab_req *req; - struct gnix_fid_mem_desc *md = NULL; - int rc, len; - struct fid_mr *auto_mr = NULL; - void *mdesc = NULL; - uint64_t compare_operand = 0; - void *loc_addr = NULL; - int dt_len, dt_align; - int connected; - - if (!(flags & FI_INJECT) && !ep->send_cq && - (((fr_type == GNIX_FAB_RQ_AMO || - fr_type == GNIX_FAB_RQ_NAMO_AX || - fr_type == GNIX_FAB_RQ_NAMO_AX_S) && - !ep->write_cntr) || - ((fr_type == GNIX_FAB_RQ_FAMO || - fr_type == GNIX_FAB_RQ_CAMO || - fr_type == GNIX_FAB_RQ_NAMO_FAX || - fr_type == GNIX_FAB_RQ_NAMO_FAX_S) && - !ep->read_cntr))) { - return -FI_ENOCQ; - } - - - if (!ep || !msg || !msg->msg_iov || - msg->msg_iov[0].count != 1 || - msg->iov_count != GNIX_MAX_ATOMIC_IOV_LIMIT || - !msg->rma_iov) - return -FI_EINVAL; - - /* - * see fi_atomic man page - */ - - if ((msg->op != FI_ATOMIC_READ) && - !msg->msg_iov[0].addr) - return -FI_EINVAL; - - if (flags & FI_TRIGGER) { - struct fi_triggered_context *trigger_context = - (struct fi_triggered_context *)msg->context; - if ((trigger_context->event_type != FI_TRIGGER_THRESHOLD) || - (flags & FI_INJECT)) { - return -FI_EINVAL; - } - } - - if (fr_type == GNIX_FAB_RQ_CAMO) { - if (!comparev || !comparev[0].addr || compare_count != 1) - return -FI_EINVAL; - - compare_operand = *(uint64_t *)comparev[0].addr; - } - - dt_len = ofi_datatype_size(msg->datatype); - dt_align = dt_len - 1; - len = dt_len * msg->msg_iov->count; - - if (msg->rma_iov->addr & dt_align) { - GNIX_INFO(FI_LOG_EP_DATA, - "Invalid target alignment: %d (mask 0x%x)\n", - msg->rma_iov->addr, dt_align); - return -FI_EINVAL; - } - - /* need a memory descriptor for all fetching and comparison AMOs */ - if (fr_type == GNIX_FAB_RQ_FAMO || - fr_type == GNIX_FAB_RQ_CAMO || - fr_type == GNIX_FAB_RQ_NAMO_FAX || - fr_type == GNIX_FAB_RQ_NAMO_FAX_S) { - if (!resultv || !resultv[0].addr || result_count != 1) - return -FI_EINVAL; - - loc_addr = resultv[0].addr; - - if ((uint64_t)loc_addr & dt_align) { - GNIX_INFO(FI_LOG_EP_DATA, - "Invalid source alignment: %d (mask 0x%x)\n", - loc_addr, dt_align); - return -FI_EINVAL; - } - - if (!result_desc || !result_desc[0]) { - rc = _gnix_mr_reg(&ep->domain->domain_fid.fid, - loc_addr, len, FI_READ | FI_WRITE, - 0, 0, 0, &auto_mr, - NULL, ep->auth_key, GNIX_PROV_REG); - if (rc != FI_SUCCESS) { - GNIX_INFO(FI_LOG_EP_DATA, - "Failed to auto-register local buffer: %d\n", - rc); - return rc; - } - flags |= FI_LOCAL_MR; - mdesc = (void *)auto_mr; - GNIX_INFO(FI_LOG_EP_DATA, "auto-reg MR: %p\n", - auto_mr); - } else { - mdesc = result_desc[0]; - } - } - - /* setup fabric request */ - req = _gnix_fr_alloc(ep); - if (!req) { - GNIX_INFO(FI_LOG_EP_DATA, "_gnix_fr_alloc() failed\n"); - rc = -FI_ENOSPC; - goto err_fr_alloc; - } - - req->type = fr_type; - req->gnix_ep = ep; - req->user_context = msg->context; - req->work_fn = _gnix_amo_post_req; - - if (mdesc) { - md = container_of(mdesc, struct gnix_fid_mem_desc, mr_fid); - } - req->amo.loc_md = (void *)md; - req->amo.loc_addr = (uint64_t)loc_addr; - - if ((fr_type == GNIX_FAB_RQ_NAMO_AX) || - (fr_type == GNIX_FAB_RQ_NAMO_FAX) || - (fr_type == GNIX_FAB_RQ_NAMO_AX_S) || - (fr_type == GNIX_FAB_RQ_NAMO_FAX_S)) { - req->amo.first_operand = - *(uint64_t *)msg->msg_iov[0].addr; - req->amo.second_operand = - *((uint64_t *)(msg->msg_iov[0].addr) + 1); - } else if (msg->op == FI_ATOMIC_READ) { - req->amo.first_operand = 0xFFFFFFFFFFFFFFFF; /* operand to FAND */ - } else if (msg->op == FI_CSWAP) { - req->amo.first_operand = compare_operand; - req->amo.second_operand = *(uint64_t *)msg->msg_iov[0].addr; - } else if (msg->op == FI_MSWAP) { - req->amo.first_operand = ~compare_operand; - req->amo.second_operand = *(uint64_t *)msg->msg_iov[0].addr; - req->amo.second_operand &= compare_operand; - } else { - req->amo.first_operand = *(uint64_t *)msg->msg_iov[0].addr; - } - - req->amo.rem_addr = msg->rma_iov->addr; - req->amo.rem_mr_key = msg->rma_iov->key; - req->amo.len = len; - req->amo.imm = msg->data; - req->amo.datatype = msg->datatype; - req->amo.op = msg->op; - req->flags = flags; - - /* Inject interfaces always suppress completions. If - * SELECTIVE_COMPLETION is set, honor any setting. Otherwise, always - * deliver a completion. */ - if ((flags & GNIX_SUPPRESS_COMPLETION) || - (ep->send_selective_completion && !(flags & FI_COMPLETION))) { - req->flags &= ~FI_COMPLETION; - } else { - req->flags |= FI_COMPLETION; - } - - COND_ACQUIRE(ep->requires_lock, &ep->vc_lock); - - /* find VC for target */ - rc = _gnix_vc_ep_get_vc(ep, msg->addr, &vc); - if (rc) { - GNIX_INFO(FI_LOG_EP_DATA, - "_gnix_vc_ep_get_vc() failed, addr: %lx, rc:\n", - msg->addr, rc); - goto err_get_vc; - } - - req->vc = vc; - - rc = _gnix_vc_queue_tx_req(req); - connected = (vc->conn_state == GNIX_VC_CONNECTED); - - COND_RELEASE(ep->requires_lock, &ep->vc_lock); - - /* - *If a new VC was allocated, progress CM before returning. - * If the VC is connected and there's a backlog, poke - * the nic progress engine befure returning. - */ - if (!connected) { - _gnix_cm_nic_progress(ep->cm_nic); - } else if (!dlist_empty(&vc->tx_queue)) { - _gnix_nic_progress(vc->ep->nic); - } - - return rc; - -err_get_vc: - COND_RELEASE(ep->requires_lock, &ep->vc_lock); -err_fr_alloc: - if (auto_mr) { - fi_close(&auto_mr->fid); - } - return rc; -} - diff --git a/prov/gni/src/gnix_auth_key.c b/prov/gni/src/gnix_auth_key.c deleted file mode 100644 index 6ec46197130..00000000000 --- a/prov/gni/src/gnix_auth_key.c +++ /dev/null @@ -1,555 +0,0 @@ -/* - * Copyright (c) 2017 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "rdma/fabric.h" -#include "rdma/fi_errno.h" -#include "fi_ext_gni.h" - -#include "gnix_auth_key.h" -#include "gnix_hashtable.h" -#include "gnix_shmem.h" -#include "gnix_bitmap.h" -#include "gnix.h" - -#define GNIX_AUTH_KEY_HASHSEED 0xdeadbeef - -#define GAI_VERSION(major, minor) (((major) << 16) + (minor)) -typedef enum gnix_global_auth_info_version { - GNIX_GAI_VERSION_1 = GAI_VERSION(1, 0), /* initial version */ -} gnix_global_auth_info_version_t; - -#define MAX_VMDH_TAGS 4 -#define MAX_VMDH_REGS 4096 -#define BITMAP_ARR_SIZE(nbits) \ - (GNIX_BITMAP_BLOCKS(nbits) * sizeof(gnix_bitmap_block_t)) - -#define GNIX_DEFAULT_AK_DIR "/tmp" -#define GNIX_DEFAULT_AK_FILENAME "gnix_vmdh_info" -#define GNIX_DEFAULT_AK_PATH \ - GNIX_DEFAULT_AK_DIR "/" GNIX_DEFAULT_AK_FILENAME -#define GNIX_AK_PATH_LEN 256 - -static char *gnix_default_ak_path = GNIX_DEFAULT_AK_PATH; - -uint8_t* gnix_default_auth_key = NULL; - -struct gnix_global_ptag_info { - gnix_bitmap_t prov; - gnix_bitmap_t user; - uint8_t arr[BITMAP_ARR_SIZE(MAX_VMDH_REGS)]; -}; - -struct gnix_global_vmdh_info { - uint32_t version; - uint32_t size; - uint32_t table_size; - ofi_spin_t lock; - int ptag_index[MAX_VMDH_TAGS]; - struct gnix_global_ptag_info info[MAX_VMDH_TAGS]; -}; - -/* Global data storage for authorization key information */ -/* gnix_vmdh_info is the shared memory synchronization area for ptag info */ -static struct gnix_shared_memory __gnix_shmem_region; -static struct gnix_global_vmdh_info *__gnix_vmdh_info; -gnix_hashtable_t __gnix_auth_key_ht; - -static int __gnix_global_vmdh_info_init(const char *path, - uint32_t size, void *buffer) -{ - struct gnix_global_vmdh_info *info = - (struct gnix_global_vmdh_info *) buffer; - int i; - - memset(info, 0, sizeof(struct gnix_global_vmdh_info)); - - info->version = GNIX_GAI_VERSION_1; - info->size = size; - info->table_size = _gnix_bitmap_get_buffer_size(MAX_VMDH_REGS); - for (i = 0; i < MAX_VMDH_TAGS; i++) - info->ptag_index[i] = -1; - ofi_spin_init(&info->lock); - - return 0; -} - -static int _gnix_open_vmdh_info_file(const char *path) -{ - int ret; - - if (!__gnix_vmdh_info) { - ret = _gnix_shmem_create(path, - sizeof(struct gnix_global_vmdh_info), - __gnix_global_vmdh_info_init, - &__gnix_shmem_region); - if (ret) - return ret; - - __gnix_vmdh_info = (struct gnix_global_vmdh_info *) - __gnix_shmem_region.addr; - } - - if (__gnix_vmdh_info->version != GNIX_GAI_VERSION_1) - GNIX_FATAL(FI_LOG_FABRIC, - "failed to find compatible version of " - "vmdh information file, expected=%x actual=%x\n", - GNIX_GAI_VERSION_1, __gnix_vmdh_info->version); - - return 0; -} -int _gnix_get_next_reserved_key(struct gnix_auth_key *info) -{ - int reserved_key; - int offset = info->attr.user_key_limit; - int retry_limit = 10; /* randomly picked */ - int ret; - - if (!info) { - GNIX_WARN(FI_LOG_MR, "bad authorization key, key=%p\n", - info); - return -FI_EINVAL; - } - - do { - reserved_key = _gnix_find_first_zero_bit(info->prov); - if (reserved_key >= 0) { - ret = _gnix_test_and_set_bit(info->prov, reserved_key); - if (ret) - reserved_key = -FI_EAGAIN; - } - retry_limit--; - } while (reserved_key < 0 && retry_limit > 0); - - ret = (reserved_key < 0) ? reserved_key : (offset + reserved_key); - - GNIX_INFO(FI_LOG_DOMAIN, "returning key=%d offset=%d\n", ret, offset); - - return ret; -} - -int _gnix_release_reserved_key(struct gnix_auth_key *info, int reserved_key) -{ - int offset = info->attr.user_key_limit; - int ret; - - if (!info || reserved_key < 0) { - GNIX_WARN(FI_LOG_MR, "bad authorization key or reserved key," - " auth_key=%p requested_key=%d\n", - info, reserved_key); - return -FI_EINVAL; - } - - ret = _gnix_test_and_clear_bit(info->prov, reserved_key - offset); - assert(ret == 1); - - return (ret == 1) ? FI_SUCCESS : -FI_EBUSY; -} - -static inline int __gnix_auth_key_enable_vmdh(struct gnix_auth_key *info) -{ - int i, ret; - void *buffer; - - ofi_spin_lock(&__gnix_vmdh_info->lock); - /* Find ptag in node-local info structure */ - for (i = 0; i < MAX_VMDH_TAGS; i++) - if (__gnix_vmdh_info->ptag_index[i] == info->ptag) - break; - - if (i == MAX_VMDH_TAGS) { /* didn't find it */ - /* find first empty region */ - for (i = 0; i < MAX_VMDH_TAGS; i++) - if (__gnix_vmdh_info->ptag_index[i] == -1) - break; - - /* if no space ... */ - if (i == MAX_VMDH_TAGS) { - ofi_spin_unlock(&__gnix_vmdh_info->lock); - GNIX_WARN(FI_LOG_FABRIC, - "application is attempting to use too many keys " - "with scalable memory registration, " - "ret=-FI_ENOSPC\n"); - return -FI_ENOSPC; - } - - /* set index entry to ptag ID */ - __gnix_vmdh_info->ptag_index[i] = info->ptag; - - /* setup provider key space */ - buffer = (void *) __gnix_vmdh_info->info[i].arr; - ret = _gnix_alloc_bitmap(&__gnix_vmdh_info->info[i].prov, - info->attr.prov_key_limit, buffer); - if (ret) { - ofi_spin_unlock(&__gnix_vmdh_info->lock); - GNIX_WARN(FI_LOG_FABRIC, - "failed to allocate bitmap on mmap backed page, ret=%d\n", - ret); - return ret; - } - - /* advance buffer and setup user key space */ - buffer = (void *) ((uint64_t) (buffer) + - _gnix_bitmap_get_buffer_size(info->attr.prov_key_limit)); - - ret = _gnix_alloc_bitmap(&__gnix_vmdh_info->info[i].user, - info->attr.user_key_limit, buffer); - if (ret) { - ofi_spin_unlock(&__gnix_vmdh_info->lock); - GNIX_WARN(FI_LOG_FABRIC, - "failed to allocate bitmap on mmap backed page, ret=%d\n", - ret); - return ret; - } - - GNIX_INFO(FI_LOG_FABRIC, - "set resource limits: pkey=%08x ptag=%d " - "reserved=%d registration_limit=%d " - "reserved_keys=%d-%d\n", - info->cookie, - info->ptag, - info->attr.prov_key_limit, - info->attr.user_key_limit, - info->attr.user_key_limit, - (info->attr.prov_key_limit + - info->attr.user_key_limit - 1)); - } - info->prov = &__gnix_vmdh_info->info[i].prov; - info->user = &__gnix_vmdh_info->info[i].user; - ofi_spin_unlock(&__gnix_vmdh_info->lock); - - return FI_SUCCESS; -} - -int _gnix_auth_key_enable(struct gnix_auth_key *info) -{ - int ret = -FI_EBUSY; - uint32_t pes_on_node; - int logical_rank; - - if (!info) { - GNIX_WARN(FI_LOG_MR, "bad authorization key, key=%p\n", - info); - return -FI_EINVAL; - } - - ofi_spin_lock(&info->lock); - if (!info->enabled) { - info->enabled = 1; - - if (info->using_vmdh) { - if (!__gnix_vmdh_info) { - char *dir = getenv("TMPDIR"); - char *filename = getenv("GNIX_AK_FILENAME"); - char path[GNIX_AK_PATH_LEN]; - int sz, i; - - if (dir || filename) { - GNIX_DEBUG(FI_LOG_FABRIC, - "using non-default directory or file name, " - "errors may occur\n"); - if (!dir) - sz = snprintf(path, GNIX_AK_PATH_LEN, - "%s/", GNIX_DEFAULT_AK_DIR); - else - sz = snprintf(path, GNIX_AK_PATH_LEN, - "%s/", dir); - - if (sz < 0) - return -FI_EINVAL; - - i = sz; - if (!filename) - sz = snprintf(&path[i], GNIX_AK_PATH_LEN - i, - "%s", GNIX_DEFAULT_AK_FILENAME); - else - sz = snprintf(&path[i], GNIX_AK_PATH_LEN - i, - "%s", filename); - - if (sz < 0) - return -FI_EINVAL; - sz += i; - } else { - sz = snprintf(path, GNIX_AK_PATH_LEN, "%s", - gnix_default_ak_path); - } - - path[(sz == GNIX_AK_PATH_LEN) ? - GNIX_AK_PATH_LEN : sz + 1] = '\0'; - if (sz == GNIX_AK_PATH_LEN) { - GNIX_WARN(FI_LOG_FABRIC, - "file path maximum length exceeded, " - "truncating name to 256 characters path=%s\n", - path); - } - - GNIX_INFO(FI_LOG_FABRIC, - "opening auth key file at %s\n", path); - - ret = _gnix_open_vmdh_info_file(path); - if (ret) { - info->enabled = 0; - ofi_spin_unlock(&info->lock); - return ret; - } - } - - ret = __gnix_auth_key_enable_vmdh(info); - if (ret) { - info->enabled = 0; - ofi_spin_unlock(&info->lock); - return ret; - } - - ret = _gnix_pes_on_node(&pes_on_node); - if (ret) - GNIX_FATAL(FI_LOG_DOMAIN, - "failed to get count of pes on node, rc=%d\n", ret); - - ret = _gnix_pe_node_rank(&logical_rank); - if (ret) - GNIX_FATAL(FI_LOG_DOMAIN, - "failed to get logical node rank, rc=%d\n", ret); - - info->key_partition_size = info->attr.user_key_limit / pes_on_node; - info->key_offset = logical_rank * info->key_partition_size; - } - GNIX_INFO(FI_LOG_DOMAIN, - "pkey=%08x ptag=%d key_partition_size=%d key_offset=%d enabled\n", - info->cookie, info->ptag, info->key_partition_size, info->key_offset); - ret = FI_SUCCESS; - } - ofi_spin_unlock(&info->lock); - - if (ret == -FI_EBUSY) { - GNIX_DEBUG(FI_LOG_MR, "authorization key already enabled, " - "auth_key=%p\n", info); - } - - return ret; -} - -struct gnix_auth_key *_gnix_auth_key_alloc() -{ - struct gnix_auth_key *auth_key = NULL; - - auth_key = calloc(1, sizeof(*auth_key)); - if (auth_key) { - ofi_spin_init(&auth_key->lock); - } else { - GNIX_WARN(FI_LOG_MR, "failed to allocate memory for " - "authorization key\n"); - } - - return auth_key; -} - -int _gnix_auth_key_insert( - uint8_t *auth_key, - size_t auth_key_size, - struct gnix_auth_key *to_insert) -{ - int ret; - gnix_ht_key_t key; - struct fi_gni_auth_key *gni_auth_key = - (struct fi_gni_auth_key *) auth_key; - - if (!to_insert) { - GNIX_WARN(FI_LOG_MR, "bad parameters, to_insert=%p\n", - to_insert); - return -FI_EINVAL; - } - - if (!auth_key) { - GNIX_INFO(FI_LOG_FABRIC, "auth key is null\n"); - return -FI_EINVAL; - } - - switch (gni_auth_key->type) { - case GNIX_AKT_RAW: - key = (gnix_ht_key_t) gni_auth_key->raw.protection_key; - break; - default: - GNIX_INFO(FI_LOG_FABRIC, "unrecognized auth key " - "type, type=%d\n", - gni_auth_key->type); - return -FI_EINVAL; - } - - ret = _gnix_ht_insert(&__gnix_auth_key_ht, key, to_insert); - if (ret) { - GNIX_WARN(FI_LOG_MR, "failed to insert entry, ret=%d\n", - ret); - } - - return ret; -} - -int _gnix_auth_key_free(struct gnix_auth_key *key) -{ - if (!key) { - GNIX_WARN(FI_LOG_MR, "bad parameters, key=%p\n", key); - return -FI_EINVAL; - } - - ofi_spin_destroy(&key->lock); - - key->enabled = 0; - - free(key); - - return FI_SUCCESS; -} - -struct gnix_auth_key * -_gnix_auth_key_lookup(uint8_t *auth_key, size_t auth_key_size) -{ - gnix_ht_key_t key; - struct gnix_auth_key *ptr = NULL; - struct fi_gni_auth_key *gni_auth_key = NULL; - - if (auth_key == NULL || auth_key_size == 0) { - auth_key = gnix_default_auth_key; - } - - gni_auth_key = (struct fi_gni_auth_key *) auth_key; - switch (gni_auth_key->type) { - case GNIX_AKT_RAW: - key = (gnix_ht_key_t) gni_auth_key->raw.protection_key; - break; - default: - GNIX_INFO(FI_LOG_FABRIC, "unrecognized auth key type, " - "type=%d\n", gni_auth_key->type); - return NULL; - } - - ptr = (struct gnix_auth_key *) _gnix_ht_lookup( - &__gnix_auth_key_ht, key); - - return ptr; -} - -int _gnix_auth_key_subsys_init(void) -{ - int ret = FI_SUCCESS; - - gnix_hashtable_attr_t attr = { - .ht_initial_size = 8, - .ht_maximum_size = 256, - .ht_increase_step = 2, - .ht_increase_type = GNIX_HT_INCREASE_MULT, - .ht_collision_thresh = 400, - .ht_hash_seed = 0xcafed00d, - .ht_internal_locking = 1, - .destructor = NULL - }; - - ret = _gnix_ht_init(&__gnix_auth_key_ht, &attr); - assert(ret == FI_SUCCESS); - - struct fi_gni_auth_key *gni_auth_key = calloc(1, sizeof(*gni_auth_key)); - gni_auth_key->type = GNIX_AKT_RAW; - gni_auth_key->raw.protection_key = 0; - gnix_default_auth_key = (uint8_t *) gni_auth_key; - - return ret; -} - -int _gnix_auth_key_subsys_fini(void) -{ - free(gnix_default_auth_key); - - return FI_SUCCESS; -} - -struct gnix_auth_key *_gnix_auth_key_create( - uint8_t *auth_key, - size_t auth_key_size) -{ - struct gnix_auth_key *to_insert; - struct fi_gni_auth_key *gni_auth_key; - int ret; - gni_return_t grc; - uint8_t ptag; - uint32_t cookie; - - if (auth_key == NULL || auth_key_size == 0) { - auth_key = gnix_default_auth_key; - } - - gni_auth_key = (struct fi_gni_auth_key *) auth_key; - if (auth_key == gnix_default_auth_key) { - gnixu_get_rdma_credentials(NULL, &ptag, &cookie); - gni_auth_key->raw.protection_key = cookie; - } else { - switch (gni_auth_key->type) { - case GNIX_AKT_RAW: - cookie = gni_auth_key->raw.protection_key; - break; - default: - GNIX_WARN(FI_LOG_FABRIC, - "unrecognized auth key type, type=%d\n", - gni_auth_key->type); - return NULL; - } - - grc = GNI_GetPtag(0, cookie, &ptag); - if (grc) { - GNIX_WARN(FI_LOG_FABRIC, - "could not retrieve ptag, " - "cookie=%d ret=%d\n", cookie, grc); - return NULL; - } - } - - to_insert = _gnix_auth_key_alloc(); - if (!to_insert) { - GNIX_WARN(FI_LOG_MR, "failed to allocate memory for " - "auth key\n"); - return NULL; - } - - to_insert->attr.prov_key_limit = gnix_default_prov_registration_limit; - to_insert->attr.user_key_limit = gnix_default_user_registration_limit; - to_insert->ptag = ptag; - to_insert->cookie = cookie; - - ret = _gnix_auth_key_insert(auth_key, auth_key_size, to_insert); - if (ret) { - GNIX_INFO(FI_LOG_MR, "failed to insert authorization key, " - "key=%p len=%d to_insert=%p ret=%d\n", - auth_key, auth_key_size, to_insert, ret); - _gnix_auth_key_free(to_insert); - to_insert = NULL; - } - - return to_insert; -} diff --git a/prov/gni/src/gnix_av.c b/prov/gni/src/gnix_av.c deleted file mode 100644 index 3aeca6624c2..00000000000 --- a/prov/gni/src/gnix_av.c +++ /dev/null @@ -1,996 +0,0 @@ -/* - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * Copyright (c) 2015-2017 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2019-2020 Triad National Security, LLC. - * All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -// -// Address vector common code -// -#include -#include -#include - -#include "gnix.h" -#include "gnix_util.h" -#include "gnix_hashtable.h" -#include "gnix_av.h" -#include "gnix_cm.h" - -/* - * local variables and structs - */ - -#define GNIX_AV_ENTRY_VALID (1ULL) -#define GNIX_AV_ENTRY_CM_NIC_ID (1ULL << 2) - -struct gnix_av_block { - struct slist_entry slist; - struct gnix_av_addr_entry *base; -}; - -/******************************************************************************* - * Forward declarations of ops structures. - ******************************************************************************/ -static struct fi_ops_av gnix_av_ops; -static struct fi_ops gnix_fi_av_ops; - -/******************************************************************************* - * Helper functions. - ******************************************************************************/ -static int gnix_verify_av_attr(struct fi_av_attr *attr) -{ - int ret = FI_SUCCESS; - - if (attr->rx_ctx_bits > GNIX_RX_CTX_MAX_BITS) { - GNIX_WARN(FI_LOG_AV, "rx_ctx_bits too big\n"); - return -FI_EINVAL; - } - - switch (attr->type) { - case FI_AV_TABLE: - case FI_AV_MAP: - case FI_AV_UNSPEC: - break; - default: - ret = -FI_EINVAL; - break; - } - - if (attr->name != NULL) { - ret = -FI_ENOSYS; - } - - return ret; -} - -/* - * Check the capacity of the internal table used to represent FI_AV_TABLE type - * address vectors. Initially the table starts with a capacity and count of 0 - * and the capacity increases by roughly double each time a resize is necessary. - */ -static int gnix_check_capacity(struct gnix_fid_av *av, size_t count) -{ - struct gnix_av_addr_entry *addrs = NULL; - int *valid_entry_vec = NULL; - size_t capacity = av->capacity; - size_t prev_capacity; - - /* - * av->count + count is the amount of used indices after adding the - * count items. - */ - prev_capacity = capacity; - while (capacity < av->count + count) { - /* - * Handle initial capacity of 0, by adding 1. - */ - capacity = capacity * 2 + 1; - } - - /* - * Don't need to grow the table. - */ - if (capacity == av->capacity) { - return FI_SUCCESS; - } - - addrs = realloc(av->table, capacity * sizeof(*addrs)); - if (!addrs) { - return -FI_ENOMEM; - } - - memset(&addrs[prev_capacity], 0, (capacity - prev_capacity) * - sizeof(*addrs)); - - valid_entry_vec = realloc(av->valid_entry_vec, capacity * sizeof(int)); - if (!valid_entry_vec) { - return -FI_ENOMEM; - } - - memset(&valid_entry_vec[prev_capacity], 0, (capacity - prev_capacity) * - sizeof(int)); - - /* - * Update table and capacity to reflect new values. - */ - av->table = addrs; - av->valid_entry_vec = valid_entry_vec; - av->capacity = capacity; - - return FI_SUCCESS; -} - -/******************************************************************************* - * FI_AV_TABLE specific implementations. - ******************************************************************************/ -static int table_insert(struct gnix_fid_av *av_priv, const void *addr, - size_t count, fi_addr_t *fi_addr, uint64_t flags, - void *context) -{ - struct gnix_ep_name ep_name; - int ret = FI_SUCCESS, success_cnt = 0; - size_t index, i; - int *entry_err = context; - - if (gnix_check_capacity(av_priv, count)) { - return -FI_ENOMEM; - } - - assert(av_priv->table); - - for (index = av_priv->count, i = 0; i < count; index++, i++) { - ret = _gnix_get_ep_name(addr, i, &ep_name, av_priv->domain); - - /* check if this ep_name fits in the av context bits */ - if ((ret == FI_SUCCESS) && - (ep_name.name_type & GNIX_EPN_TYPE_SEP)) { - if ((1 << av_priv->rx_ctx_bits) < ep_name.rx_ctx_cnt) { - fprintf(stderr, "rx_ctx_bits %d ep.name.rx_ctx_cnt = %d\n", (1 << av_priv->rx_ctx_bits), ep_name.rx_ctx_cnt); - ret = -FI_EINVAL; - GNIX_DEBUG(FI_LOG_AV, "ep_name doesn't fit " - "into the av context bits\n"); - } - } - - if (ret != FI_SUCCESS) { - if (flags & FI_SYNC_ERR) { - entry_err[i] = ret; - if (fi_addr) - fi_addr[i] = FI_ADDR_NOTAVAIL; - continue; - } else { - return -FI_EINVAL; - } - } else { - if (flags & FI_SYNC_ERR) - entry_err[i] = FI_SUCCESS; - av_priv->table[index].gnix_addr = ep_name.gnix_addr; - av_priv->valid_entry_vec[index] = 1; - av_priv->table[index].name_type = ep_name.name_type; - av_priv->table[index].cookie = ep_name.cookie; - av_priv->table[index].rx_ctx_cnt = ep_name.rx_ctx_cnt; - av_priv->table[index].cm_nic_cdm_id = - ep_name.cm_nic_cdm_id; - av_priv->table[index].key_offset = ep_name.key_offset; - if (fi_addr) - fi_addr[i] = index; - success_cnt++; - } - - } - - av_priv->count += success_cnt; - ret = success_cnt; - - return ret; -} - -/* - * Currently only marks as 'not valid'. Should actually free memory. - * If any of the given address fail to be removed (are already marked removed) - * then the return value will be -FI_EINVAL. - */ -static int table_remove(struct gnix_fid_av *av_priv, fi_addr_t *fi_addr, - size_t count, uint64_t flags) -{ - int ret = FI_SUCCESS; - size_t index; - size_t i; - - for (i = 0; i < count; i++) { - index = (size_t) fi_addr[i]; - if (index < av_priv->count) { - if (av_priv->valid_entry_vec[index] == 0) { - ret = -FI_EINVAL; - break; - } else { - av_priv->valid_entry_vec[index] = 0; - } - } else { - ret = -FI_EINVAL; - break; - } - } - - return ret; -} - -/* - * table_lookup(): Translate fi_addr_t to struct gnix_address. - */ -static int table_lookup(struct gnix_fid_av *av_priv, fi_addr_t fi_addr, - struct gnix_av_addr_entry *entry_ptr) -{ - size_t index; - struct gnix_av_addr_entry *entry = NULL; - - index = (size_t)fi_addr; - if (index >= av_priv->count) - return -FI_EINVAL; - - assert(av_priv->table); - entry = &av_priv->table[index]; - - if (entry == NULL) - return -FI_EINVAL; - - if (av_priv->valid_entry_vec[index] == 0) - return -FI_EINVAL; - - memcpy(entry_ptr, entry, sizeof(*entry)); - - return FI_SUCCESS; -} - -static int table_reverse_lookup(struct gnix_fid_av *av_priv, - struct gnix_address gnix_addr, - fi_addr_t *fi_addr) -{ - struct gnix_av_addr_entry *entry; - int i; - - for (i = 0; i < av_priv->count; i++) { - entry = &av_priv->table[i]; - /* - * for SEP endpoint entry we may have a delta in the cdm_id - * component of the address to process - */ - if ((entry->name_type & GNIX_EPN_TYPE_SEP) && - (entry->gnix_addr.device_addr == gnix_addr.device_addr)) { - int index = gnix_addr.cdm_id - entry->gnix_addr.cdm_id; - - if ((index >= 0) && (index < entry->rx_ctx_cnt)) { - /* we have a match */ - *fi_addr = fi_rx_addr(i, index, - av_priv->rx_ctx_bits); - return FI_SUCCESS; - } - } else if (GNIX_ADDR_EQUAL(entry->gnix_addr, gnix_addr)) { - *fi_addr = i; - return FI_SUCCESS; - } - } - - return -FI_ENOENT; -} - -/******************************************************************************* - * FI_AV_MAP specific implementations. - ******************************************************************************/ - -static int map_insert(struct gnix_fid_av *av_priv, const void *addr, - size_t count, fi_addr_t *fi_addr, uint64_t flags, - void *context) -{ - int ret; - struct gnix_ep_name ep_name; - struct gnix_av_addr_entry *the_entry; - gnix_ht_key_t key; - size_t i; - struct gnix_av_block *blk = NULL; - int ret_cnt = count; - int *entry_err = context; - - assert(av_priv->map_ht != NULL); - - if (count == 0) - return 0; - - blk = calloc(1, sizeof(struct gnix_av_block)); - if (blk == NULL) - return -FI_ENOMEM; - - blk->base = calloc(count, sizeof(struct gnix_av_addr_entry)); - if (blk->base == NULL) { - free(blk); - return -FI_ENOMEM; - } - - slist_insert_tail(&blk->slist, &av_priv->block_list); - - for (i = 0; i < count; i++) { - ret = _gnix_get_ep_name(addr, i, &ep_name, av_priv->domain); - if (ret != FI_SUCCESS) { - if (flags & FI_SYNC_ERR) { - entry_err[i] = -FI_EINVAL; - fi_addr[i] = FI_ADDR_NOTAVAIL; - ret_cnt = -FI_EINVAL; - continue; - } else { - return ret; - } - } - - /* check if this ep_name fits in the av context bits */ - if (ep_name.name_type & GNIX_EPN_TYPE_SEP) { - if ((1 << av_priv->rx_ctx_bits) < ep_name.rx_ctx_cnt) { - if (flags & FI_SYNC_ERR) { - entry_err[i] = -FI_EINVAL; - fi_addr[i] = FI_ADDR_NOTAVAIL; - ret_cnt = -FI_EINVAL; - continue; - } - GNIX_DEBUG(FI_LOG_AV, "ep_name doesn't fit " - "into the av context bits\n"); - return -FI_EINVAL; /* TODO: should try to do - cleanup */ - } - } - - ((struct gnix_address *)fi_addr)[i] = ep_name.gnix_addr; - the_entry = &blk->base[i]; - memcpy(&the_entry->gnix_addr, &ep_name.gnix_addr, - sizeof(struct gnix_address)); - the_entry->name_type = ep_name.name_type; - the_entry->cm_nic_cdm_id = ep_name.cm_nic_cdm_id; - the_entry->cookie = ep_name.cookie; - the_entry->rx_ctx_cnt = ep_name.rx_ctx_cnt; - memcpy(&key, &ep_name.gnix_addr, sizeof(gnix_ht_key_t)); - ret = _gnix_ht_insert(av_priv->map_ht, - key, - the_entry); - - if (flags & FI_SYNC_ERR) { - entry_err[i] = FI_SUCCESS; - } - - /* - * we are okay with user trying to add more - * entries with same key. - */ - if ((ret != FI_SUCCESS) && (ret != -FI_ENOSPC)) { - GNIX_WARN(FI_LOG_AV, - "_gnix_ht_insert failed %d\n", - ret); - if (flags & FI_SYNC_ERR) { - entry_err[i] = ret; - fi_addr[i] = FI_ADDR_NOTAVAIL; - ret_cnt = ret; - continue; - } - return ret; - } - } - - return ret_cnt; -} - -/* - * TODO: slab should be freed when entries in the slab drop to zero, - * or as an alternative, have a free list for slabs so they can be - * reused if new fi_av_insert operations are performed. - */ -static int map_remove(struct gnix_fid_av *av_priv, fi_addr_t *fi_addr, - size_t count, uint64_t flags) -{ - int i,ret = FI_SUCCESS; - struct gnix_av_addr_entry *the_entry = NULL; - gnix_ht_key_t key; - - for (i = 0; i < count; i++) { - - key = *(gnix_ht_key_t *)&fi_addr[i]; - - /* - * first see if we have this entry in the hash - * TODO: is there a race condition here for multi-threaded? - */ - - the_entry = _gnix_ht_lookup(av_priv->map_ht, key); - if (the_entry == NULL) - return -FI_ENOENT; - - ret = _gnix_ht_remove(av_priv->map_ht, key); - - } - - return ret; -} - -static int map_lookup(struct gnix_fid_av *av_priv, fi_addr_t fi_addr, - struct gnix_av_addr_entry *entry_ptr) -{ - gnix_ht_key_t *key = (gnix_ht_key_t *)&fi_addr; - struct gnix_av_addr_entry *entry; - - entry = _gnix_ht_lookup(av_priv->map_ht, *key & av_priv->mask); - if (entry == NULL) - return -FI_ENOENT; - - memcpy(entry_ptr, entry, sizeof(*entry)); - - return FI_SUCCESS; -} - -static int map_reverse_lookup(struct gnix_fid_av *av_priv, - struct gnix_address gnix_addr, - fi_addr_t *fi_addr) -{ - GNIX_HASHTABLE_ITERATOR(av_priv->map_ht, iter); - struct gnix_av_addr_entry *entry; - fi_addr_t rx_addr; - - while ((entry = _gnix_ht_iterator_next(&iter))) { - /* - * for SEP endpoint entry we may have a delta in the cdm_id - * component of the address to process - */ - if ((entry->name_type & GNIX_EPN_TYPE_SEP) && - (entry->gnix_addr.device_addr == gnix_addr.device_addr)) { - int index = gnix_addr.cdm_id - entry->gnix_addr.cdm_id; - - if ((index >= 0) && (index < entry->rx_ctx_cnt)) { - /* we have a match */ - memcpy(&rx_addr, &entry->gnix_addr, - sizeof(fi_addr_t)); - *fi_addr = fi_rx_addr(rx_addr, - index, - av_priv->rx_ctx_bits); - return FI_SUCCESS; - } - } else { - if (GNIX_ADDR_EQUAL(entry->gnix_addr, gnix_addr)) { - *fi_addr = GNIX_HASHTABLE_ITERATOR_KEY(iter); - return FI_SUCCESS; - } - } - } - - return -FI_ENOENT; -} - -/******************************************************************************* - * FI_AV API implementations. - ******************************************************************************/ -int _gnix_table_reverse_lookup(struct gnix_fid_av *av_priv, - struct gnix_address gnix_addr, - fi_addr_t *fi_addr) -{ - return table_reverse_lookup(av_priv, gnix_addr, fi_addr); -} - -int _gnix_map_reverse_lookup(struct gnix_fid_av *av_priv, - struct gnix_address gnix_addr, - fi_addr_t *fi_addr) -{ - return map_reverse_lookup(av_priv, gnix_addr, fi_addr); -} - -int _gnix_av_lookup(struct gnix_fid_av *gnix_av, fi_addr_t fi_addr, - struct gnix_av_addr_entry *entry_ptr) -{ - int ret = FI_SUCCESS; - fi_addr_t addr = fi_addr & gnix_av->mask; - - GNIX_TRACE(FI_LOG_AV, "\n"); - - if (!gnix_av) { - ret = -FI_EINVAL; - goto err; - } - - switch (gnix_av->type) { - case FI_AV_TABLE: - ret = table_lookup(gnix_av, addr, entry_ptr); - break; - case FI_AV_MAP: - ret = map_lookup(gnix_av, addr, entry_ptr); - break; - default: - ret = -FI_EINVAL; - break; - } - - if (fi_addr & ~gnix_av->mask) { - entry_ptr->gnix_addr.cdm_id += - fi_addr >> (64 - gnix_av->rx_ctx_bits); - } - -err: - return ret; -} - -int _gnix_av_reverse_lookup(struct gnix_fid_av *gnix_av, - struct gnix_address gnix_addr, - fi_addr_t *fi_addr) -{ - int ret = FI_SUCCESS; - - GNIX_TRACE(FI_LOG_AV, "\n"); - - if (!gnix_av) { - ret = -FI_EINVAL; - goto err; - } - - switch (gnix_av->type) { - case FI_AV_TABLE: - ret = table_reverse_lookup(gnix_av, gnix_addr, fi_addr); - break; - case FI_AV_MAP: - ret = map_reverse_lookup(gnix_av, gnix_addr, fi_addr); - break; - default: - ret = -FI_EINVAL; - break; - } - -err: - return ret; -} - -/* - * Note: this function (according to WG), is not intended to - * typically be used in the critical path for messaging/rma/amo - * requests - */ -DIRECT_FN STATIC int gnix_av_lookup(struct fid_av *av, fi_addr_t fi_addr, - void *addr, size_t *addrlen) -{ - struct gnix_fid_av *gnix_av; - struct gnix_ep_name ep_name = { {0} }; - struct gnix_av_addr_entry entry; - int ret; - - GNIX_TRACE(FI_LOG_AV, "\n"); - - if (!av || !addrlen) - return -FI_EINVAL; - - gnix_av = container_of(av, struct gnix_fid_av, av_fid); - - if (gnix_av->domain->addr_format == FI_ADDR_STR) { - if (*addrlen < GNIX_FI_ADDR_STR_LEN) { - *addrlen = GNIX_FI_ADDR_STR_LEN; - return -FI_ETOOSMALL; - } - } else { - if (*addrlen < sizeof(ep_name)) { - *addrlen = sizeof(ep_name); - return -FI_ETOOSMALL; - } - } - - /* - * user better have provided a buffer since the - * value stored in addrlen is big enough to return ep_name - */ - - if (!addr) - return -FI_EINVAL; - - ret = _gnix_av_lookup(gnix_av, fi_addr, &entry); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_AV, "_gnix_av_lookup failed: %d\n", ret); - return ret; - } - - ep_name.gnix_addr = entry.gnix_addr; - ep_name.name_type = entry.name_type; - ep_name.cm_nic_cdm_id = entry.cm_nic_cdm_id; - ep_name.cookie = entry.cookie; - - if (gnix_av->domain->addr_format == FI_ADDR_STR) { - ret = _gnix_ep_name_to_str(&ep_name, (char **)&addr); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_AV, "_gnix_resolve_str_ep_name failed: %d %s\n", - ret, fi_strerror(-ret)); - return ret; - } - *addrlen = GNIX_FI_ADDR_STR_LEN; - } else { - memcpy(addr, (void *)&ep_name, MIN(*addrlen, sizeof(ep_name))); - *addrlen = sizeof(ep_name); - } - - return FI_SUCCESS; -} - -DIRECT_FN STATIC int gnix_av_insert(struct fid_av *av, const void *addr, - size_t count, fi_addr_t *fi_addr, - uint64_t flags, void *context) -{ - struct gnix_fid_av *av_priv = NULL; - int ret = FI_SUCCESS; - - GNIX_TRACE(FI_LOG_AV, "\n"); - - if (!av) - return -FI_EINVAL; - - av_priv = container_of(av, struct gnix_fid_av, av_fid); - - if (!av_priv) - return -FI_EINVAL; - - if ((av_priv->type == FI_AV_MAP) && (fi_addr == NULL)) - return -FI_EINVAL; - - if ((flags & FI_SYNC_ERR) && (context == NULL)) { - GNIX_WARN(FI_LOG_AV, "FI_SYNC_ERR requires context\n"); - return -FI_EINVAL; - } - - switch (av_priv->type) { - case FI_AV_TABLE: - ret = - table_insert(av_priv, addr, count, fi_addr, flags, context); - break; - case FI_AV_MAP: - ret = map_insert(av_priv, addr, count, fi_addr, flags, context); - break; - default: - ret = -FI_EINVAL; - break; - } - - return ret; -} - -DIRECT_FN STATIC int gnix_av_insertsvc(struct fid_av *av, const char *node, - const char *service, fi_addr_t *fi_addr, - uint64_t flags, void *context) -{ - return -FI_ENOSYS; -} - -DIRECT_FN STATIC int gnix_av_insertsym(struct fid_av *av, const char *node, - size_t nodecnt, const char *service, - size_t svccnt, fi_addr_t *fi_addr, - uint64_t flags, void *context) -{ - return -FI_ENOSYS; -} - -DIRECT_FN STATIC int gnix_av_remove(struct fid_av *av, fi_addr_t *fi_addr, - size_t count, uint64_t flags) -{ - struct gnix_fid_av *av_priv = NULL; - int ret = FI_SUCCESS; - - GNIX_TRACE(FI_LOG_AV, "\n"); - - if (!av) { - ret = -FI_EINVAL; - goto err; - } - - av_priv = container_of(av, struct gnix_fid_av, av_fid); - - if (!av_priv) { - ret = -FI_EINVAL; - goto err; - } - - switch (av_priv->type) { - case FI_AV_TABLE: - ret = table_remove(av_priv, fi_addr, count, flags); - break; - case FI_AV_MAP: - ret = map_remove(av_priv, fi_addr, count, flags); - break; - default: - ret = -FI_EINVAL; - break; - } - -err: - return ret; -} - -/* - * Given an address pointed to by addr, stuff a string into buf representing: - * device_addr:cdm_id:name_type:cm_nic_cdm_id:cookie - * where device_addr, cdm_id, cm_nic_cdm_id and cookie are represented in - * hexadecimal. And name_type is represented as an integer. - */ -DIRECT_FN const char *gnix_av_straddr(struct fid_av *av, - const void *addr, char *buf, - size_t *len) -{ - char int_buf[GNIX_AV_MAX_STR_ADDR_LEN]; - int size; - struct gnix_ep_name ep_name; - struct gnix_fid_av *av_priv; - - if (!av || !addr || !buf || !len) { - GNIX_DEBUG(FI_LOG_AV, "NULL parameter in gnix_av_straddr\n"); - return NULL; - } - - av_priv = container_of(av, struct gnix_fid_av, av_fid); - - if (av_priv->domain->addr_format == FI_ADDR_STR) - _gnix_resolve_str_ep_name(addr, 0, &ep_name); - else - ep_name = ((struct gnix_ep_name *) addr)[0]; - - /* - * if additional information is added to this string, then - * you will need to update in gnix.h: - * GNIX_AV_STR_ADDR_VERSION, increment this value - * GNIX_AV_MAX_STR_ADDR_LEN, to be the number of characters printed - */ - size = snprintf(int_buf, sizeof(int_buf), "%04i:0x%08" PRIx32 ":0x%08" - PRIx32 ":%02i:0x%06" PRIx32 ":0x%08" PRIx32 - ":%02i", GNIX_AV_STR_ADDR_VERSION, - ep_name.gnix_addr.device_addr, - ep_name.gnix_addr.cdm_id, - ep_name.name_type, - ep_name.cm_nic_cdm_id, - ep_name.cookie, - ep_name.rx_ctx_cnt); - - /* - * snprintf returns the number of character written - * without the terminating character. - */ - if ((size + 1) < *len) { - /* - * size needs to be all the characters plus the terminating - * character. Otherwise, we could lose information. - */ - size = size + 1; - } else { - /* Do not overwrite the buffer. */ - size = *len; - } - - snprintf(buf, size, "%s", int_buf); - *len = size; - - return buf; -} - -static void __av_destruct(void *obj) -{ - int ret; - struct gnix_fid_av *av = (struct gnix_fid_av *) obj; - struct slist_entry *blk_entry; - struct gnix_av_block *temp; - - - if (av->type == FI_AV_TABLE) { - if (av->table) { - free(av->table); - } - } else if (av->type == FI_AV_MAP) { - - while (!slist_empty(&av->block_list)) { - blk_entry = slist_remove_head(&av->block_list); - temp = container_of(blk_entry, - struct gnix_av_block, slist); - free(temp->base); - free(temp); - } - - ret = _gnix_ht_destroy(av->map_ht); - if (ret != FI_SUCCESS) - GNIX_WARN(FI_LOG_AV, - "_gnix_ht_destroy failed %d\n", - ret); - free(av->map_ht); - } - if (av->valid_entry_vec) { - free(av->valid_entry_vec); - } else { - GNIX_WARN(FI_LOG_AV, "valid_entry_vec is NULL\n"); - } - - free(av); -} - -static int gnix_av_close(fid_t fid) -{ - struct gnix_fid_av *av = NULL; - int ret = FI_SUCCESS; - int references_held; - - GNIX_TRACE(FI_LOG_AV, "\n"); - - if (!fid) { - ret = -FI_EINVAL; - goto err; - } - av = container_of(fid, struct gnix_fid_av, av_fid.fid); - - references_held = _gnix_ref_put(av); - if (references_held) { - GNIX_INFO(FI_LOG_AV, "failed to fully close av due to lingering " - "references. references=%i av=%p\n", - references_held, av); - } - -err: - return ret; -} - -DIRECT_FN int gnix_av_bind(struct fid_av *av, struct fid *fid, uint64_t flags) -{ - return -FI_ENOSYS; -} - -/* - * TODO: Support shared named AVs. - */ -DIRECT_FN int gnix_av_open(struct fid_domain *domain, struct fi_av_attr *attr, - struct fid_av **av, void *context) -{ - struct gnix_fid_domain *int_dom = NULL; - struct gnix_fid_av *av_priv = NULL; - struct gnix_hashtable_attr ht_attr; - - enum fi_av_type type = FI_AV_TABLE; - size_t count = 128; - int rx_ctx_bits = 0; - int ret = FI_SUCCESS; - - GNIX_TRACE(FI_LOG_AV, "\n"); - - if (!domain) { - ret = -FI_EINVAL; - goto err; - } - - int_dom = container_of(domain, struct gnix_fid_domain, domain_fid); - if (!int_dom) { - ret = -FI_EINVAL; - goto err; - } - - av_priv = calloc(1, sizeof(*av_priv)); - if (!av_priv) { - ret = -FI_ENOMEM; - goto err; - } - - if (attr) { - ret = gnix_verify_av_attr(attr); - if (ret) { - goto cleanup; - } - - if (attr->type != FI_AV_UNSPEC) { - type = attr->type; - } - count = attr->count; - rx_ctx_bits = attr->rx_ctx_bits; - } - - av_priv->domain = int_dom; - av_priv->type = type; - av_priv->addrlen = sizeof(struct gnix_address); - av_priv->rx_ctx_bits = rx_ctx_bits; - av_priv->mask = rx_ctx_bits ? - ((uint64_t)1 << (64 - attr->rx_ctx_bits)) - 1 : ~0; - - av_priv->capacity = count; - if (type == FI_AV_TABLE) { - av_priv->table = calloc(count, - sizeof(struct gnix_av_addr_entry)); - if (!av_priv->table) { - ret = -FI_ENOMEM; - goto cleanup; - } - } - - av_priv->valid_entry_vec = calloc(count, sizeof(int)); - if (!av_priv->valid_entry_vec) { - ret = -FI_ENOMEM; - goto cleanup; - } - - av_priv->av_fid.fid.fclass = FI_CLASS_AV; - av_priv->av_fid.fid.context = context; - av_priv->av_fid.fid.ops = &gnix_fi_av_ops; - av_priv->av_fid.ops = &gnix_av_ops; - - if (type == FI_AV_MAP) { - av_priv->map_ht = calloc(1, sizeof(struct gnix_hashtable)); - if (av_priv->map_ht == NULL) - goto cleanup; - - /* - * use same parameters as used for ep vc hash - */ - - ht_attr.ht_initial_size = int_dom->params.ct_init_size; - ht_attr.ht_maximum_size = int_dom->params.ct_max_size; - ht_attr.ht_increase_step = int_dom->params.ct_step; - ht_attr.ht_increase_type = GNIX_HT_INCREASE_MULT; - ht_attr.ht_collision_thresh = 500; - ht_attr.ht_hash_seed = 0xdeadbeefbeefdead; - ht_attr.ht_internal_locking = 1; - ht_attr.destructor = NULL; - - ret = _gnix_ht_init(av_priv->map_ht, - &ht_attr); - slist_init(&av_priv->block_list); - } - _gnix_ref_init(&av_priv->ref_cnt, 1, __av_destruct); - - *av = &av_priv->av_fid; - - return ret; - -cleanup: - if (av_priv->table != NULL) - free(av_priv->table); - if (av_priv->valid_entry_vec != NULL) - free(av_priv->valid_entry_vec); - free(av_priv); -err: - return ret; -} - -/******************************************************************************* - * FI_OPS_* data structures. - ******************************************************************************/ -static struct fi_ops_av gnix_av_ops = { - .size = sizeof(struct fi_ops_av), - .insert = gnix_av_insert, - .insertsvc = gnix_av_insertsvc, - .insertsym = gnix_av_insertsym, - .remove = gnix_av_remove, - .lookup = gnix_av_lookup, - .straddr = gnix_av_straddr -}; - -static struct fi_ops gnix_fi_av_ops = { - .size = sizeof(struct fi_ops), - .close = gnix_av_close, - .bind = fi_no_bind, - .control = fi_no_control, - .ops_open = fi_no_ops_open -}; diff --git a/prov/gni/src/gnix_bitmap.c b/prov/gni/src/gnix_bitmap.c deleted file mode 100644 index dda27507c76..00000000000 --- a/prov/gni/src/gnix_bitmap.c +++ /dev/null @@ -1,277 +0,0 @@ -/* - * Copyright (c) 2015,2017 Cray Inc. All rights reserved. - * - * Created on: Apr 16, 2015 - * Author: jswaro - */ - -#include -#include - -#include "gnix_bitmap.h" - -#ifdef HAVE_ATOMICS - -#define __gnix_init_block(block) atomic_init(block, 0) -#define __gnix_set_block(bitmap, index, value) \ - atomic_store(&(bitmap)->arr[(index)], (value)) -#define __gnix_load_block(bitmap, index) atomic_load(&(bitmap->arr[(index)])) -#define __gnix_set_bit(bitmap, bit) \ - atomic_fetch_or(&(bitmap)->arr[GNIX_BUCKET_INDEX(bit)], \ - GNIX_BIT_VALUE(bit)) -#define __gnix_clear_bit(bitmap, bit) \ - atomic_fetch_and(&(bitmap)->arr[GNIX_BUCKET_INDEX(bit)], \ - ~GNIX_BIT_VALUE(bit)) -#define __gnix_test_bit(bitmap, bit) \ - ((atomic_load(&(bitmap)->arr[GNIX_BUCKET_INDEX(bit)]) \ - & GNIX_BIT_VALUE(bit)) != 0) -#else - -static inline void __gnix_init_block(gnix_bitmap_block_t *block) -{ - ofi_spin_init(&block->lock); - block->val = 0llu; -} - -static inline void __gnix_set_block(gnix_bitmap_t *bitmap, int index, - uint64_t value) -{ - gnix_bitmap_block_t *block = &bitmap->arr[index]; - - ofi_spin_lock(&block->lock); - block->val = value; - ofi_spin_unlock(&block->lock); -} - -static inline uint64_t __gnix_load_block(gnix_bitmap_t *bitmap, int index) -{ - gnix_bitmap_block_t *block = &bitmap->arr[index]; - uint64_t ret; - - ofi_spin_lock(&block->lock); - ret = block->val; - ofi_spin_unlock(&block->lock); - - return ret; -} - -static inline uint64_t __gnix_set_bit(gnix_bitmap_t *bitmap, int bit) -{ - gnix_bitmap_block_t *block = &bitmap->arr[GNIX_BUCKET_INDEX(bit)]; - uint64_t ret; - - ofi_spin_lock(&block->lock); - ret = block->val; - block->val |= GNIX_BIT_VALUE(bit); - ofi_spin_unlock(&block->lock); - - return ret; -} - -static inline uint64_t __gnix_clear_bit(gnix_bitmap_t *bitmap, int bit) -{ - gnix_bitmap_block_t *block = &bitmap->arr[GNIX_BUCKET_INDEX(bit)]; - uint64_t ret; - - ofi_spin_lock(&block->lock); - ret = block->val; - block->val &= ~GNIX_BIT_VALUE(bit); - ofi_spin_unlock(&block->lock); - - return ret; -} - -static inline int __gnix_test_bit(gnix_bitmap_t *bitmap, int bit) -{ - gnix_bitmap_block_t *block = &bitmap->arr[GNIX_BUCKET_INDEX(bit)]; - int ret; - - ofi_spin_lock(&block->lock); - ret = (block->val & GNIX_BIT_VALUE(bit)) != 0; - ofi_spin_unlock(&block->lock); - - return ret; -} -#endif - -int _gnix_test_bit(gnix_bitmap_t *bitmap, uint32_t index) -{ - return __gnix_test_bit(bitmap, index); -} - -void _gnix_set_bit(gnix_bitmap_t *bitmap, uint32_t index) -{ - __gnix_set_bit(bitmap, index); -} - -void _gnix_clear_bit(gnix_bitmap_t *bitmap, uint32_t index) -{ - __gnix_clear_bit(bitmap, index); -} - -int _gnix_test_and_set_bit(gnix_bitmap_t *bitmap, uint32_t index) -{ - return (__gnix_set_bit(bitmap, index) & GNIX_BIT_VALUE(index)) != 0; -} - -int _gnix_test_and_clear_bit(gnix_bitmap_t *bitmap, uint32_t index) -{ - return (__gnix_clear_bit(bitmap, index) & GNIX_BIT_VALUE(index)) != 0; -} - -int _gnix_bitmap_full(gnix_bitmap_t *bitmap) -{ - return _gnix_find_first_zero_bit(bitmap) == -EAGAIN; -} - -int _gnix_bitmap_empty(gnix_bitmap_t *bitmap) -{ - return _gnix_find_first_set_bit(bitmap) == -FI_EAGAIN; -} - -int _gnix_find_first_zero_bit(gnix_bitmap_t *bitmap) -{ - int i, pos; - gnix_bitmap_value_t value; - - for (i = 0, pos = 0; - i < GNIX_BITMAP_BLOCKS(bitmap->length); - ++i, pos += GNIX_BITMAP_BUCKET_LENGTH) { - /* invert the bits to check for first zero bit */ - value = ~(__gnix_load_block(bitmap, i)); - - if (value != 0) { - /* no need to check for errors because we have - established there is an unset bit */ - pos += ffsll(value) - 1; - - if (pos < bitmap->length) - return pos; - else - return -FI_EAGAIN; - } - } - - return -FI_EAGAIN; -} - -int _gnix_find_first_set_bit(gnix_bitmap_t *bitmap) -{ - int i, pos; - gnix_bitmap_value_t value; - - for (i = 0, pos = 0; - i < GNIX_BITMAP_BLOCKS(bitmap->length); - ++i, pos += GNIX_BITMAP_BUCKET_LENGTH) { - value = __gnix_load_block(bitmap, i); - - if (value != 0) { - /* no need to check for errors because we have - established there is a set bit */ - pos += ffsll(value) - 1; - - if (pos < bitmap->length) - return pos; - else - return -FI_EAGAIN; } - } - - return -FI_EAGAIN; -} - -void _gnix_fill_bitmap(gnix_bitmap_t *bitmap, uint64_t value) -{ - int i; - gnix_bitmap_value_t fill_value = (value != 0) ? ~0 : 0; - - for (i = 0; i < GNIX_BITMAP_BLOCKS(bitmap->length); ++i) { - __gnix_set_block(bitmap, i, fill_value); - } -} - -int _gnix_alloc_bitmap(gnix_bitmap_t *bitmap, uint32_t nbits, void *addr) -{ - int i; - - if (bitmap->state == GNIX_BITMAP_STATE_READY) - return -FI_EINVAL; - - if (bitmap->length != 0 || nbits == 0) - return -FI_EINVAL; - - if (!addr) { - bitmap->arr = calloc(GNIX_BITMAP_BLOCKS(nbits), - sizeof(gnix_bitmap_block_t)); - bitmap->internal_buffer_allocation = 1; - } else { - bitmap->arr = addr; - bitmap->internal_buffer_allocation = 0; - } - - if (!bitmap->arr) - return -FI_ENOMEM; - - bitmap->length = nbits; - - for (i = 0; i < GNIX_BITMAP_BLOCKS(bitmap->length); ++i) - __gnix_init_block(&bitmap->arr[i]); - - bitmap->state = GNIX_BITMAP_STATE_READY; - - return 0; -} - -int _gnix_realloc_bitmap(gnix_bitmap_t *bitmap, uint32_t nbits) -{ - gnix_bitmap_block_t *new_allocation; - int blocks_to_allocate = GNIX_BITMAP_BLOCKS(nbits); - int i; - - if (bitmap->state != GNIX_BITMAP_STATE_READY) - return -FI_EINVAL; - - if (nbits == 0 || bitmap->arr == NULL) - return -FI_EINVAL; - - if (!bitmap->internal_buffer_allocation) - return -FI_EINVAL; - - new_allocation = realloc(bitmap->arr, - (blocks_to_allocate * - sizeof(gnix_bitmap_block_t))); - - if (!new_allocation) - return -FI_ENOMEM; - - bitmap->arr = new_allocation; - - /* Did we increase the size of the bitmap? - * If so, initialize new blocks */ - if (blocks_to_allocate > GNIX_BITMAP_BLOCKS(bitmap->length)) { - for (i = GNIX_BITMAP_BLOCKS(bitmap->length); - i < blocks_to_allocate; - ++i) { - __gnix_init_block(&bitmap->arr[i]); - } - } - - bitmap->length = nbits; - - return 0; -} - -int _gnix_free_bitmap(gnix_bitmap_t *bitmap) -{ - if (bitmap->state != GNIX_BITMAP_STATE_READY) - return -FI_EINVAL; - - bitmap->length = 0; - if (bitmap->arr && bitmap->internal_buffer_allocation) { - free(bitmap->arr); - bitmap->arr = NULL; - } - - bitmap->state = GNIX_BITMAP_STATE_FREE; - - return 0; -} diff --git a/prov/gni/src/gnix_buddy_allocator.c b/prov/gni/src/gnix_buddy_allocator.c deleted file mode 100644 index 38895ed4074..00000000000 --- a/prov/gni/src/gnix_buddy_allocator.c +++ /dev/null @@ -1,399 +0,0 @@ -/* - * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All - * rights reserved. - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/* - * The buddy allocator splits the "base" block being managed into smaller - * blocks. Each block is a "power-of-two length". These subblocks are kept - * track of in a doubly linked list, or free list. Here are the structures - * and format of the data structures used in the buddy allocator. For - * a description of each field please see gnix_buddy_allocator.h. - * - * Handle structure: - * ┌──────┬──────┬─────┬─────┬────────┬───────┐ - * │ BASE │ len │ min │ max │ nlists │ LISTS │ - * └──────┴──────┴─────┴─────┴────────┴───────┘ - * The LISTS pointer points to an array of dlist structures, each containing a - * head pointer to the beginning of a free list. Note that the first element of - * LISTS is a pointer to the head of the "min block size" free list, the second - * element is the head of the "min * 2 block size" free list and so on. - * - * Node format as stored in a free block: - * ┌──────┬──────┬──────────────────────┐ - * │ NEXT │ PREV │ Remaining free bytes │ - * └──────┴──────┴──────────────────────┘ - * Each NEXT and PREV pointer is stored in the first 16 bytes of the free block. - * This means that there is a hard limit of 16 bytes on the minimum block size. - * - * Bitmap layout with a min block size of 16: - * ┌──┬──┬──┬──┬──┬──┬──┬──┬──┬──┬──┬──┬──┬──┬──┐ - * │16│16│16│16│..│32│32│32│32│..│64│64│64│64│..│ - * └──┴──┴──┴──┴──┴──┴──┴──┴──┴──┴──┴──┴──┴──┴──┘ - * All the blocks that the buddy allocator allocates from the base block are of - * size X, where X = MBS * 2^Z, MBS is the minimum block size and Z is a - * non-negative integer. - * - * The bitmap has 2 * (Len / MBS) bits and it's setup so that the first - * Len / MBS bits in the bitmap flag each block of size MBS as free or - * allocated. - * - * Len is the number of bytes in the base block. - * The base block is pointed to by void *base. - * - * The first bit in the bitmap flags the first block of size MBS. - * The first block of size MBS uses the address range: - * base to (base + MBS - 1). - * - * The second bit in the bitmap flags the second block of size MBS. - * The second block of size MBS uses the address range: - * (base + MBS) to (base + 2 * MBS - 1) - * - * The third bit in the bitmap flags the third block of size MBS. - * The third block of size MBS uses the address range: - * (base + 2 * MBS) to (base + 3 * MBS - 1) - * - * And so on until we reach the Len / MBS bit in the bitmap. - * - * The second Len / MBS bits in the bitmap flag the remaining blocks of size X - * as free allocated, or split where X > MBS. - * - * So, the first bit in the second Len / MBS bits in the bitmap flags the first - * block of size MBS * 2. The first block of size MBS * 2 uses the address - * range: - * base to (base + MBS * 2 - 1) - * - * And so on until we reach the next block size. - * - * A bit is set to 1 when a block is allocated, or when the block is split into - * two smaller blocks. - * - * A bit is reset to 0 when a block is freed, or when a free block is coalesced - * with another already free and unsplit block. - * - * The bitmap is only read for coalescing blocks. When a block Y is freed we - * look at the bit in the bitmap for the buddy block of Y, if that bit is set - * then the buddy of Y is allocated, split, or both in which case we cannot - * coalesce Y with its buddy block. However, if the bitmap bit for the buddy of - * Y is reset, then the buddy block of Y is free and not split, so we coalesce Y - * with the buddy of block of Y and continue to coalesce this new larger block - * with its buddy block until we reach the max block size or a buddy block that - * is allocated, split, or both. - * - * TODO: dlist_insert_sorted for fragmentation reduction. - * TODO: Lock in __gnix_buddy_split and allow __gnix_buddy_find_block to run - * concurrently. - * TODO: Allow __gnix_buddy_coalesce to run concurrently and return to - * caller of _gnix_buddy_free sooner. __gnix_buddy_coalesce is spending ~23% - * of the time on top of the call stack compared to other functions when running - * random_alloc_free. - * TODO: Find a better solution for finding the address of a buddy block. - */ - -#include "gnix_buddy_allocator.h" - -static inline int __gnix_buddy_create_lists(gnix_buddy_alloc_handle_t - *alloc_handle) -{ - uint32_t i, offset = 0; - - alloc_handle->nlists = (uint32_t) __gnix_buddy_log2(alloc_handle->max / - MIN_BLOCK_SIZE) + 1; - alloc_handle->lists = calloc(1, sizeof(struct dlist_entry) * - alloc_handle->nlists); - - if (OFI_UNLIKELY(!alloc_handle->lists)) { - GNIX_WARN(FI_LOG_EP_CTRL, - "Could not create buddy allocator lists.\n"); - return -FI_ENOMEM; - } - - for (i = 0; i < alloc_handle->nlists; i++) { - dlist_init(alloc_handle->lists + i); - } - - /* Insert free blocks of size max in sorted order into last list */ - for (i = 0; i < alloc_handle->len / alloc_handle->max; i++) { - dlist_insert_tail((void *) ((uint8_t *) alloc_handle->base + - offset), - alloc_handle->lists + - alloc_handle->nlists - 1); - offset += alloc_handle->max; - } - - return FI_SUCCESS; -} - -/** - * Split a block in list "j" until list "i" is reached. - */ -static inline void __gnix_buddy_split(gnix_buddy_alloc_handle_t *alloc_handle, - uint32_t j, uint32_t i, void **ptr) -{ - void *tmp = alloc_handle->lists[j].next; - - dlist_remove(tmp); - - /* Split the block until we reach list "i" */ - for (; j > i; j--) { - _gnix_set_bit(&alloc_handle->bitmap, - __gnix_buddy_bitmap_index(tmp, - OFFSET(MIN_BLOCK_SIZE, j), - alloc_handle->base, - alloc_handle->len, - MIN_BLOCK_SIZE)); - - dlist_insert_tail((void *) ((uint8_t *) tmp + - OFFSET(MIN_BLOCK_SIZE, j - 1)), - alloc_handle->lists + j - 1); - } - - /* Allocate the block */ - *ptr = tmp; -} - -/** - * Find the first free block in list i. - * - * @return 1 if the block cannot be found. - * - * @return 0 if the block is found. - */ -static inline int __gnix_buddy_find_block(gnix_buddy_alloc_handle_t - *alloc_handle, uint32_t i, void **ptr) -{ - uint32_t j; - - for (j = i; j < alloc_handle->nlists; j++) { - if (!dlist_empty(alloc_handle->lists + j)) { - __gnix_buddy_split(alloc_handle, j, i, ptr); - return 0; - } - } - - return 1; -} - - -/** - * If the buddy block is on the free list then coalesce and insert into the next - * list until we reach an allocated or split buddy block, or the max list size. - */ -static inline uint32_t __gnix_buddy_coalesce(gnix_buddy_alloc_handle_t *alloc_handle - , void **ptr, uint32_t block_size) -{ - void *buddy; - - for (buddy = __gnix_buddy_address(*ptr, block_size, alloc_handle->base); - block_size < alloc_handle->max && - !_gnix_test_bit(&alloc_handle->bitmap, - __gnix_buddy_bitmap_index(buddy, - block_size, - alloc_handle->base, - alloc_handle->len, - MIN_BLOCK_SIZE)); - buddy = __gnix_buddy_address(*ptr, block_size, alloc_handle->base)) { - - dlist_remove(buddy); - - /* Ensure ptr is the beginning of the new block */ - if (*ptr > buddy) - *ptr = buddy; - - block_size *= 2; - - _gnix_clear_bit(&alloc_handle->bitmap, - __gnix_buddy_bitmap_index(*ptr, block_size, - alloc_handle->base, - alloc_handle->len, - MIN_BLOCK_SIZE)); - } - return block_size; -} - -int _gnix_buddy_allocator_create(void *base, uint32_t len, uint32_t max, - gnix_buddy_alloc_handle_t **alloc_handle) -{ - char err_buf[256] = {0}, *error = NULL; - int fi_errno; - uint32_t size_check = len / MIN_BLOCK_SIZE * 2; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - /* Ensure parameters are valid */ - if (OFI_UNLIKELY(!base || !len || !max || max > len || !alloc_handle || - IS_NOT_POW_TWO(max) || (len % max) || - !size_check)) { - - GNIX_WARN(FI_LOG_EP_CTRL, - "Invalid parameter to _gnix_buddy_allocator_create." - "\n"); - return -FI_EINVAL; - } - - *alloc_handle = calloc(1, sizeof(gnix_buddy_alloc_handle_t)); - - if (OFI_UNLIKELY(!alloc_handle)) { - error = strerror_r(errno, err_buf, sizeof(err_buf)); - GNIX_WARN(FI_LOG_EP_CTRL, - "Could not create buddy allocator handle.\n", - error); - return -FI_ENOMEM; - } - - ofi_spin_init(&alloc_handle[0]->lock); - alloc_handle[0]->base = base; - alloc_handle[0]->len = len; - alloc_handle[0]->max = max; - - if (__gnix_buddy_create_lists(alloc_handle[0])) { - free(*alloc_handle); - return -FI_ENOMEM; - } - - /* The bitmap needs len / MIN_BLOCK_SIZE * 2 bits to flag every possible - * block of size: min, min * 2, min * 4, ... , max that fits in the - * base. block. The maximum number of bits used would be if max = len. - */ - if ((fi_errno = _gnix_alloc_bitmap(&alloc_handle[0]->bitmap, - len / MIN_BLOCK_SIZE * 2, NULL))) { - - free(&alloc_handle[0]->lists); - free(*alloc_handle); - } - - return fi_errno; -} - -int _gnix_buddy_allocator_destroy(gnix_buddy_alloc_handle_t *alloc_handle) -{ - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - if (OFI_UNLIKELY(!alloc_handle)) { - GNIX_WARN(FI_LOG_EP_CTRL, - "Invalid parameter to _gnix_buddy_allocator_destroy." - "\n"); - return -FI_EINVAL; - } - - ofi_spin_lock(&alloc_handle->lock); - - free(alloc_handle->lists); - - while (_gnix_free_bitmap(&alloc_handle->bitmap)) { - GNIX_WARN(FI_LOG_EP_CTRL, - "Trying to free buddy allocator handle bitmap.\n"); - sleep(1); - } - - ofi_spin_unlock(&alloc_handle->lock); - ofi_spin_destroy(&alloc_handle->lock); - - free(alloc_handle); - - return FI_SUCCESS; -} - -int _gnix_buddy_alloc(gnix_buddy_alloc_handle_t *alloc_handle, void **ptr, - uint32_t len) -{ - uint32_t block_size, i = 0; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - if (OFI_UNLIKELY(!alloc_handle || !ptr || !len || - len > alloc_handle->max)) { - - GNIX_WARN(FI_LOG_EP_CTRL, - "Invalid parameter to _gnix_buddy_alloc.\n"); - return -FI_EINVAL; - } - - block_size = BLOCK_SIZE(len, MIN_BLOCK_SIZE); - i = (uint32_t) LIST_INDEX(block_size, MIN_BLOCK_SIZE); - - ofi_spin_lock(&alloc_handle->lock); - - if (__gnix_buddy_find_block(alloc_handle, i, ptr)) { - ofi_spin_unlock(&alloc_handle->lock); - GNIX_WARN(FI_LOG_EP_CTRL, - "Could not allocate buddy block.\n"); - return -FI_ENOMEM; - } - - ofi_spin_unlock(&alloc_handle->lock); - - _gnix_set_bit(&alloc_handle->bitmap, - __gnix_buddy_bitmap_index(*ptr, block_size, - alloc_handle->base, - alloc_handle->len, - MIN_BLOCK_SIZE)); - - return FI_SUCCESS; -} - -int _gnix_buddy_free(gnix_buddy_alloc_handle_t *alloc_handle, void *ptr, - uint32_t len) -{ - uint32_t block_size; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - if (OFI_UNLIKELY(!alloc_handle || !len || len > alloc_handle->max || - ptr >= (void *) ((uint8_t *) alloc_handle->base + - alloc_handle->len) || - ptr < alloc_handle->base)) { - - GNIX_WARN(FI_LOG_EP_CTRL, - "Invalid parameter to _gnix_buddy_free.\n"); - return -FI_EINVAL; - } - - block_size = BLOCK_SIZE(len, MIN_BLOCK_SIZE); - - _gnix_clear_bit(&alloc_handle->bitmap, - __gnix_buddy_bitmap_index(ptr, block_size, - alloc_handle->base, - alloc_handle->len, - MIN_BLOCK_SIZE)); - - ofi_spin_lock(&alloc_handle->lock); - - block_size = __gnix_buddy_coalesce(alloc_handle, &ptr, block_size); - - dlist_insert_tail(ptr, alloc_handle->lists + - LIST_INDEX(block_size, MIN_BLOCK_SIZE)); - - ofi_spin_unlock(&alloc_handle->lock); - - return FI_SUCCESS; -} diff --git a/prov/gni/src/gnix_cm.c b/prov/gni/src/gnix_cm.c deleted file mode 100644 index eebbb33f916..00000000000 --- a/prov/gni/src/gnix_cm.c +++ /dev/null @@ -1,1337 +0,0 @@ -/* - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights reserved. - * Copyright (c) 2020 Triad National Security, LLC. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include - -#include "gnix_cm.h" -#include "gnix.h" -#include "gnix_util.h" -#include "gnix_nic.h" -#include "gnix_cm_nic.h" -#include "gnix_nameserver.h" -#include "gnix_eq.h" -#include "gnix_vc.h" -#include "gnix_av.h" - -struct fi_ops gnix_pep_fi_ops; -struct fi_ops_ep gnix_pep_ops_ep; -struct fi_ops_cm gnix_pep_ops_cm; - -int _gnix_ep_name_to_str(struct gnix_ep_name *ep_name, char **out_buf) -{ - char *str; - size_t len = GNIX_FI_ADDR_STR_LEN; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - if (*out_buf == NULL) { - str = calloc(len, sizeof(char)); - if (str == NULL) { - GNIX_WARN(FI_LOG_FABRIC, fi_strerror(FI_ENOMEM)); - return -FI_ENOMEM; - } - } else { - str = *out_buf; - } - - /* Convert raw address info to string */ - snprintf(str, len, "gni;NONE;NONE;%04i;0x%08" PRIx32 ";0x%08" PRIx32 - ";%02i;0x%06" PRIx32 ";0x%08" PRIx32 ";%02i", - GNIX_AV_STR_ADDR_VERSION, - ep_name->gnix_addr.device_addr, - ep_name->gnix_addr.cdm_id, - ep_name->name_type, - ep_name->cm_nic_cdm_id, - ep_name->cookie, - ep_name->rx_ctx_cnt); - - return FI_SUCCESS; -} - -int _gnix_ep_name_from_str(const char *addr, - struct gnix_ep_name *resolved_addr) -{ - char *tok, *endptr; - int ret; - struct gnix_ep_name ep_name; - long tok_val; - char *dup_addr; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - if (!addr || !resolved_addr) { - GNIX_WARN(FI_LOG_EP_CTRL, "NULL parameter in " - "__gnix_resolved_name_from_str"); - return -FI_EINVAL; - } - - dup_addr = strdup(addr); - if (!dup_addr) { - return -FI_ENOMEM; - } - - tok = strtok(dup_addr, ";"); - if (!tok) { - GNIX_WARN(FI_LOG_EP_CTRL, "Invalid address.\n"); - return -FI_EINVAL; - } - - ret = memcmp(tok, "gni", 3); - if (ret) { - GNIX_WARN(FI_LOG_EP_CTRL, "Invalid address.\n"); - free(dup_addr); - return -FI_EINVAL; - } - - tok = strtok(NULL, ";");/*node*/ - if (!tok) { - GNIX_WARN(FI_LOG_EP_CTRL, "Invalid address.\n"); - free(dup_addr); - return -FI_EINVAL; - } - - tok = strtok(NULL, ";");/*service*/ - if (!tok) { - GNIX_WARN(FI_LOG_EP_CTRL, "Invalid address.\n"); - free(dup_addr); - return -FI_EINVAL; - } - - tok = strtok(NULL, ";");/*GNIX_AV_STR_ADDR_VERSION*/ - if (!tok) { - GNIX_WARN(FI_LOG_EP_CTRL, "Invalid address.\n"); - free(dup_addr); - return -FI_EINVAL; - } - - /*device_addr*/ - tok = strtok(NULL, ";"); - if (!tok) { - GNIX_WARN(FI_LOG_EP_CTRL, "Invalid address.\n"); - free(dup_addr); - return -FI_EINVAL; - } - tok_val = strtol(tok, &endptr, 16); - if (*endptr) { - GNIX_WARN(FI_LOG_EP_CTRL, "Invalid device_addr.\n"); - free(dup_addr); - return -FI_EINVAL; - } - ep_name.gnix_addr.device_addr = (uint32_t) tok_val; - - /*cdm_id*/ - tok = strtok(NULL, ";"); - if (!tok) { - GNIX_WARN(FI_LOG_EP_CTRL, "Invalid address.\n"); - free(dup_addr); - return -FI_EINVAL; - } - tok_val = strtol(tok, &endptr, 16); - if (*endptr) { - GNIX_WARN(FI_LOG_EP_CTRL, "Invalid cdm_id.\n"); - free(dup_addr); - return -FI_EINVAL; - } - ep_name.gnix_addr.cdm_id = (uint32_t) tok_val; - - /*name_type*/ - tok = strtok(NULL, ";"); - if (!tok) { - GNIX_WARN(FI_LOG_EP_CTRL, "Invalid address.\n"); - free(dup_addr); - return -FI_EINVAL; - } - tok_val = strtol(tok, &endptr, 10); - if (*endptr) { - GNIX_WARN(FI_LOG_EP_CTRL, "Invalid name_type.\n"); - free(dup_addr); - return -FI_EINVAL; - } - ep_name.name_type = (uint32_t) tok_val; - - /*cm_nic_cdm_id*/ - tok = strtok(NULL, ";"); - if (!tok) { - GNIX_WARN(FI_LOG_EP_CTRL, "Invalid address.\n"); - free(dup_addr); - return -FI_EINVAL; - } - tok_val = strtol(tok, &endptr, 16); - if (*endptr) { - GNIX_WARN(FI_LOG_EP_CTRL, "Invalid cm_nic_cdm_id.\n"); - free(dup_addr); - return -FI_EINVAL; - } - ep_name.cm_nic_cdm_id = (uint32_t) tok_val; - - /*cookie*/ - tok = strtok(NULL, ";"); - if (!tok) { - GNIX_WARN(FI_LOG_EP_CTRL, "Invalid address.\n"); - free(dup_addr); - return -FI_EINVAL; - } - tok_val = strtol(tok, &endptr, 16); - if (*endptr) { - GNIX_WARN(FI_LOG_EP_CTRL, "Invalid cookie.\n"); - free(dup_addr); - return -FI_EINVAL; - } - ep_name.cookie = (uint32_t) tok_val; - - /*rx_ctx_cnt*/ - tok = strtok(NULL, ";"); - if (!tok) { - GNIX_WARN(FI_LOG_EP_CTRL, "Invalid address.\n"); - free(dup_addr); - return -FI_EINVAL; - } - tok_val = strtol(tok, &endptr, 10); - if (*endptr) { - GNIX_WARN(FI_LOG_EP_CTRL, "Invalid rx_ctx_cnt.\n"); - free(dup_addr); - return -FI_EINVAL; - } - ep_name.rx_ctx_cnt = (uint32_t) tok_val; - - *resolved_addr = ep_name; - free(dup_addr); - - return FI_SUCCESS; -} - -/****************************************************************************** - * - * Common CM handling (supported for all types of endpoints). - * - *****************************************************************************/ - -/** - * Retrieve the local endpoint address. - * - * addrlen: Should indicate the size of the addr buffer. On output will contain - * the size necessary to copy the proper address structure. - * - * addr: Pointer to memory that will contain the address structure. Should be - * allocated and of size addrlen. If addrlen is less than necessary to copy - * the proper address structure then addr will contain a truncated address. - * Depending on what hints were used during setup, addr will either be in - * the FI_ADDR_STR or FI_ADDR_GNI format. - * - * return: FI_SUCCESS or negative error value. - */ -DIRECT_FN STATIC int gnix_getname(fid_t fid, void *addr, size_t *addrlen) -{ - struct gnix_fid_ep *ep = NULL; - struct gnix_fid_sep *sep = NULL; - struct gnix_fid_pep *pep = NULL; - size_t len = 0, cpylen; - bool is_fi_addr_str; - struct fi_info *info; - struct gnix_ep_name *ep_name; - int ret; - - if (OFI_UNLIKELY(addrlen == NULL)) { - GNIX_INFO(FI_LOG_EP_CTRL, "parameter \"addrlen\" is NULL in " - "gnix_getname\n"); - return -FI_EINVAL; - } - - switch (fid->fclass) { - case FI_CLASS_EP: - ep = container_of(fid, struct gnix_fid_ep, ep_fid.fid); - info = ep->info; - ep_name = &ep->src_addr; - break; - case FI_CLASS_SEP: - sep = container_of(fid, struct gnix_fid_sep, ep_fid); - info = sep->info; - ep_name = &sep->my_name; - break; - case FI_CLASS_PEP: - pep = container_of(fid, struct gnix_fid_pep, - pep_fid.fid); - info = pep->info; - ep_name = &pep->src_addr; - break; - default: - GNIX_INFO(FI_LOG_EP_CTRL, - "Invalid fid class: %d\n", - fid->fclass); - return -FI_EINVAL; - } - - is_fi_addr_str = info->addr_format == FI_ADDR_STR; - - if (!addr) { - if (OFI_UNLIKELY(is_fi_addr_str)) { - *addrlen = GNIX_FI_ADDR_STR_LEN; - } else { - *addrlen = sizeof(struct gnix_ep_name); - } - - return -FI_ETOOSMALL; - } - - if (OFI_UNLIKELY(is_fi_addr_str)) { - ret = _gnix_ep_name_to_str(ep_name, (char **) &addr); - - if (ret) - return ret; - - len = GNIX_FI_ADDR_STR_LEN; - cpylen = MIN(len, *addrlen); - } else { - len = sizeof(struct gnix_ep_name); - cpylen = MIN(len, *addrlen); - memcpy(addr, ep_name, cpylen); - } - - *addrlen = len; - return (len == cpylen) ? FI_SUCCESS : -FI_ETOOSMALL; -} - -DIRECT_FN STATIC int gnix_setname(fid_t fid, void *addr, size_t addrlen) -{ - struct gnix_fid_ep *ep = NULL; - struct gnix_fid_sep *sep = NULL; - struct gnix_fid_pep *pep = NULL; - struct fi_info *info; - struct gnix_ep_name *ep_name; - size_t len; - int ret; - - if (OFI_UNLIKELY(addr == NULL)) { - GNIX_INFO(FI_LOG_EP_CTRL, "parameter \"addr\" is NULL in " - "gnix_setname\n"); - return -FI_EINVAL; - } - - len = sizeof(struct gnix_ep_name); - - switch (fid->fclass) { - case FI_CLASS_EP: - ep = container_of(fid, struct gnix_fid_ep, ep_fid.fid); - info = ep->info; - ep_name = &ep->src_addr; - break; - case FI_CLASS_SEP: - sep = container_of(fid, struct gnix_fid_sep, ep_fid); - info = sep->info; - ep_name = &sep->my_name; - break; - case FI_CLASS_PEP: - pep = container_of(fid, struct gnix_fid_pep, pep_fid.fid); - /* TODO: make sure we're unconnected. */ - pep->bound = 1; - info = pep->info; - ep_name = &pep->src_addr; - break; - default: - GNIX_INFO(FI_LOG_EP_CTRL, "Invalid fid class: %d\n", - fid->fclass); - return -FI_EINVAL; - } - - if (OFI_UNLIKELY(info->addr_format == FI_ADDR_STR)) { - len = GNIX_FI_ADDR_STR_LEN; - - if (addrlen != len) - return -FI_EINVAL; - - ret = _gnix_ep_name_from_str((const char *) addr, - ep_name); - - if (ret) - return ret; - - return FI_SUCCESS; - } - - if (addrlen != len) - return -FI_EINVAL; - - memcpy(ep_name, addr, len); - - return FI_SUCCESS; -} - -DIRECT_FN STATIC int gnix_getpeer(struct fid_ep *ep, void *addr, - size_t *addrlen) -{ - struct gnix_fid_ep *ep_priv = NULL; - struct gnix_fid_sep *sep_priv = NULL; - struct gnix_ep_name *ep_name = NULL; - size_t len = 0, cpylen = 0; - struct fi_info *info = NULL; - int ret; - - if (OFI_UNLIKELY(addrlen == NULL || addr == NULL)) { - GNIX_INFO(FI_LOG_EP_CTRL, - "parameter is NULL in gnix_getpeer\n"); - return -FI_EINVAL; - } - - switch (ep->fid.fclass) { - case FI_CLASS_EP: - ep_priv = container_of(ep, struct gnix_fid_ep, ep_fid.fid); - info = ep_priv->info; - ep_name = &ep_priv->dest_addr; - break; - - case FI_CLASS_SEP: - sep_priv = container_of(ep, struct gnix_fid_sep, ep_fid); - info = sep_priv->info; - ep_name = info->dest_addr; - break; - - default: - GNIX_INFO(FI_LOG_EP_CTRL, "Invalid fid class: %d\n", - ep->fid.fclass); - return -FI_EINVAL; - } - - if (info->addr_format == FI_ADDR_STR) { - ret = _gnix_ep_name_to_str(ep_name, (char **) &addr); - - if (ret) - return ret; - - len = GNIX_FI_ADDR_STR_LEN; - cpylen = MIN(len, *addrlen); - } else { - len = sizeof(struct gnix_ep_name); - cpylen = MIN(len, *addrlen); - memcpy(addr, ep_name, cpylen); - } - - *addrlen = len; - - return (len == cpylen) ? FI_SUCCESS : -FI_ETOOSMALL; -} - -struct fi_ops_cm gnix_ep_ops_cm = { - .size = sizeof(struct fi_ops_cm), - .setname = gnix_setname, - .getname = gnix_getname, - .getpeer = gnix_getpeer, - .connect = fi_no_connect, - .listen = fi_no_listen, - .accept = fi_no_accept, - .reject = fi_no_reject, - .shutdown = fi_no_shutdown, - .join = fi_no_join, -}; - -/****************************************************************************** - * - * FI_EP_MSG endpoint handling - * - *****************************************************************************/ - -/* Process a connection response on an FI_EP_MSG. */ -static int __gnix_ep_connresp(struct gnix_fid_ep *ep, - struct gnix_pep_sock_connresp *resp) -{ - int ret = FI_SUCCESS; - struct fi_eq_cm_entry *eq_entry; - int eqe_size; - - switch (resp->cmd) { - case GNIX_PEP_SOCK_RESP_ACCEPT: - ep->vc->peer_caps = resp->peer_caps; - ep->vc->peer_key_offset = resp->key_offset; - ep->vc->peer_id = resp->vc_id; - - /* Initialize the GNI connection. */ - ret = _gnix_vc_smsg_init(ep->vc, resp->vc_id, - &resp->vc_mbox_attr, - &resp->cq_irq_mdh); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_vc_smsg_init returned %s\n", - fi_strerror(-ret)); - return ret; - } - - ep->vc->conn_state = GNIX_VC_CONNECTED; - ep->conn_state = GNIX_EP_CONNECTED; - - /* Notify user that this side is connected. */ - eq_entry = (struct fi_eq_cm_entry *)resp->eqe_buf; - eq_entry->fid = &ep->ep_fid.fid; - - eqe_size = sizeof(*eq_entry) + resp->cm_data_len; - ret = fi_eq_write(&ep->eq->eq_fid, FI_CONNECTED, eq_entry, - eqe_size, 0); - if (ret != eqe_size) { - GNIX_WARN(FI_LOG_EP_CTRL, - "fi_eq_write failed, err: %d\n", ret); - return ret; - } - - GNIX_DEBUG(FI_LOG_EP_CTRL, "Received conn accept: %p\n", ep); - - break; - case GNIX_PEP_SOCK_RESP_REJECT: - /* Undo the connect and generate a failure EQE. */ - close(ep->conn_fd); - ep->conn_fd = -1; - - _gnix_mbox_free(ep->vc->smsg_mbox); - ep->vc->smsg_mbox = NULL; - - _gnix_vc_destroy(ep->vc); - ep->vc = NULL; - - ep->conn_state = GNIX_EP_UNCONNECTED; - - /* Generate EQE. */ - eq_entry = (struct fi_eq_cm_entry *)resp->eqe_buf; - eq_entry->fid = &ep->ep_fid.fid; - - eq_entry = (struct fi_eq_cm_entry *)resp->eqe_buf; - ret = _gnix_eq_write_error(ep->eq, &ep->ep_fid.fid, NULL, 0, - FI_ECONNREFUSED, FI_ECONNREFUSED, - &eq_entry->data, resp->cm_data_len); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "fi_eq_write failed, err: %d\n", ret); - return ret; - } - - GNIX_DEBUG(FI_LOG_EP_CTRL, "Conn rejected: %p\n", ep); - - break; - default: - GNIX_INFO(FI_LOG_EP_CTRL, "Invalid response command: %d\n", - resp->cmd); - return -FI_EINVAL; - } - - return FI_SUCCESS; -} - -/* Check for a connection response on an FI_EP_MSG. */ -int _gnix_ep_progress(struct gnix_fid_ep *ep) -{ - int ret, bytes_read, errno_keep; - struct gnix_pep_sock_connresp resp; - - /* No lock, fast exit. */ - if (ep->conn_state != GNIX_EP_CONNECTING) { - return FI_SUCCESS; - } - - COND_ACQUIRE(ep->requires_lock, &ep->vc_lock); - - if (ep->conn_state != GNIX_EP_CONNECTING) { - COND_RELEASE(ep->requires_lock, &ep->vc_lock); - return FI_SUCCESS; - } - - /* Check for a connection response. */ - bytes_read = read(ep->conn_fd, &resp, sizeof(resp)); - if (bytes_read >= 0) { - if (bytes_read == sizeof(resp)) { - /* Received response. */ - ret = __gnix_ep_connresp(ep, &resp); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "__gnix_pep_connreq failed, %d\n", - ret); - } - } else { - errno_keep = errno; - GNIX_FATAL(FI_LOG_EP_CTRL, - "Unexpected read size: %d err: %s\n", - bytes_read, strerror(errno_keep)); - } - } else if (errno != EAGAIN) { - errno_keep = errno; - GNIX_WARN(FI_LOG_EP_CTRL, "Read error: %s\n", - strerror(errno_keep)); - } - - COND_RELEASE(ep->requires_lock, &ep->vc_lock); - - return FI_SUCCESS; -} - -DIRECT_FN STATIC int gnix_connect(struct fid_ep *ep, const void *addr, - const void *param, size_t paramlen) -{ - int ret, errno_keep; - struct gnix_fid_ep *ep_priv; - struct sockaddr_in saddr; - struct gnix_pep_sock_connreq req; - struct fi_eq_cm_entry *eqe_ptr; - struct gnix_vc *vc; - struct gnix_mbox *mbox = NULL; - struct gnix_av_addr_entry av_entry; - - if (!ep || !addr || (paramlen && !param) || - paramlen > GNIX_CM_DATA_MAX_SIZE) - return -FI_EINVAL; - - ep_priv = container_of(ep, struct gnix_fid_ep, ep_fid.fid); - - COND_ACQUIRE(ep_priv->requires_lock, &ep_priv->vc_lock); - - if (ep_priv->conn_state != GNIX_EP_UNCONNECTED) { - ret = -FI_EINVAL; - goto err_unlock; - } - - ret = _gnix_pe_to_ip(addr, &saddr); - if (ret != FI_SUCCESS) { - GNIX_INFO(FI_LOG_EP_CTRL, - "Failed to translate gnix_ep_name to IP\n"); - goto err_unlock; - } - - /* Create new VC without CM data. */ - av_entry.gnix_addr = ep_priv->dest_addr.gnix_addr; - av_entry.cm_nic_cdm_id = ep_priv->dest_addr.cm_nic_cdm_id; - ret = _gnix_vc_alloc(ep_priv, &av_entry, &vc); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "Failed to create VC:: %d\n", - ret); - goto err_unlock; - } - ep_priv->vc = vc; - - ret = _gnix_mbox_alloc(vc->ep->nic->mbox_hndl, &mbox); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_mbox_alloc returned %s\n", - fi_strerror(-ret)); - goto err_mbox_alloc; - } - vc->smsg_mbox = mbox; - - ep_priv->conn_fd = socket(AF_INET, SOCK_STREAM, 0); - if (ep_priv->conn_fd < 0) { - errno_keep = errno; - GNIX_WARN(FI_LOG_EP_CTRL, - "Failed to create connect socket, err: %s\n", - strerror(errno_keep)); - ret = -FI_ENOSPC; - goto err_socket; - } - - /* Currently blocks until connected. */ - ret = connect(ep_priv->conn_fd, (struct sockaddr *)&saddr, - sizeof(saddr)); - if (ret) { - errno_keep = errno; - GNIX_WARN(FI_LOG_EP_CTRL, - "Failed to connect, err: %s\n", - strerror(errno_keep)); - ret = -FI_EIO; - goto err_connect; - } - - memset(&req, 0, sizeof(req)); - req.info = *ep_priv->info; - - /* Note addrs are swapped. */ - memcpy(&req.dest_addr, (void *)&ep_priv->src_addr, - sizeof(req.dest_addr)); - memcpy(&ep_priv->dest_addr, addr, sizeof(ep_priv->dest_addr)); - memcpy(&req.src_addr, addr, sizeof(req.src_addr)); - - if (ep_priv->info->tx_attr) - req.tx_attr = *ep_priv->info->tx_attr; - if (ep_priv->info->rx_attr) - req.rx_attr = *ep_priv->info->rx_attr; - if (ep_priv->info->ep_attr) - req.ep_attr = *ep_priv->info->ep_attr; - if (ep_priv->info->domain_attr) - req.domain_attr = *ep_priv->info->domain_attr; - if (ep_priv->info->fabric_attr) - req.fabric_attr = *ep_priv->info->fabric_attr; - - req.vc_id = vc->vc_id; - req.vc_mbox_attr.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT; - req.vc_mbox_attr.msg_buffer = mbox->base; - req.vc_mbox_attr.buff_size = vc->ep->nic->mem_per_mbox; - req.vc_mbox_attr.mem_hndl = *mbox->memory_handle; - req.vc_mbox_attr.mbox_offset = (uint64_t)mbox->offset; - req.vc_mbox_attr.mbox_maxcredit = - ep_priv->domain->params.mbox_maxcredit; - req.vc_mbox_attr.msg_maxsize = ep_priv->domain->params.mbox_msg_maxsize; - req.cq_irq_mdh = ep_priv->nic->irq_mem_hndl; - req.peer_caps = ep_priv->caps; - req.key_offset = ep_priv->auth_key->key_offset; - - req.cm_data_len = paramlen; - if (paramlen) { - eqe_ptr = (struct fi_eq_cm_entry *)req.eqe_buf; - memcpy(eqe_ptr->data, param, paramlen); - } - - ret = write(ep_priv->conn_fd, &req, sizeof(req)); - if (ret != sizeof(req)) { - errno_keep = errno; - GNIX_WARN(FI_LOG_EP_CTRL, - "Failed to send req, err: %s\n", - strerror(errno_keep)); - ret = -FI_EIO; - goto err_write; - } - /* set fd to non-blocking now since we can't block within the eq - * progress system - */ - fi_fd_nonblock(ep_priv->conn_fd); - - ep_priv->conn_state = GNIX_EP_CONNECTING; - - COND_RELEASE(ep_priv->requires_lock, &ep_priv->vc_lock); - - GNIX_DEBUG(FI_LOG_EP_CTRL, "Sent conn req: %p, %s\n", - ep_priv, inet_ntoa(saddr.sin_addr)); - - return FI_SUCCESS; - -err_write: -err_connect: - close(ep_priv->conn_fd); - ep_priv->conn_fd = -1; -err_socket: - _gnix_mbox_free(ep_priv->vc->smsg_mbox); - ep_priv->vc->smsg_mbox = NULL; -err_mbox_alloc: - _gnix_vc_destroy(ep_priv->vc); - ep_priv->vc = NULL; -err_unlock: - COND_RELEASE(ep_priv->requires_lock, &ep_priv->vc_lock); - - return ret; -} - -DIRECT_FN STATIC int gnix_accept(struct fid_ep *ep, const void *param, - size_t paramlen) -{ - int ret, errno_keep; - struct gnix_vc *vc; - struct gnix_fid_ep *ep_priv; - struct gnix_pep_sock_conn *conn; - struct gnix_pep_sock_connresp resp; - struct fi_eq_cm_entry eq_entry, *eqe_ptr; - struct gnix_mbox *mbox = NULL; - struct gnix_av_addr_entry av_entry; - - if (!ep || (paramlen && !param) || paramlen > GNIX_CM_DATA_MAX_SIZE) - return -FI_EINVAL; - - ep_priv = container_of(ep, struct gnix_fid_ep, ep_fid.fid); - - COND_ACQUIRE(ep_priv->requires_lock, &ep_priv->vc_lock); - - /* Look up and unpack the connection request used to create this EP. */ - conn = (struct gnix_pep_sock_conn *)ep_priv->info->handle; - if (!conn || conn->fid.fclass != FI_CLASS_CONNREQ) { - ret = -FI_EINVAL; - goto err_unlock; - } - - /* Create new VC without CM data. */ - av_entry.gnix_addr = ep_priv->dest_addr.gnix_addr; - av_entry.cm_nic_cdm_id = ep_priv->dest_addr.cm_nic_cdm_id; - ret = _gnix_vc_alloc(ep_priv, &av_entry, &vc); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, "Failed to create VC: %d\n", ret); - goto err_unlock; - } - ep_priv->vc = vc; - ep_priv->vc->peer_caps = conn->req.peer_caps; - ep_priv->vc->peer_key_offset = conn->req.key_offset; - ep_priv->vc->peer_id = conn->req.vc_id; - - ret = _gnix_mbox_alloc(vc->ep->nic->mbox_hndl, &mbox); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_DATA, "_gnix_mbox_alloc returned %s\n", - fi_strerror(-ret)); - goto err_mbox_alloc; - } - vc->smsg_mbox = mbox; - - /* Initialize the GNI connection. */ - ret = _gnix_vc_smsg_init(vc, conn->req.vc_id, - &conn->req.vc_mbox_attr, - &conn->req.cq_irq_mdh); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_vc_smsg_init returned %s\n", - fi_strerror(-ret)); - goto err_smsg_init; - } - - vc->conn_state = GNIX_VC_CONNECTED; - - /* Send ACK with VC attrs to allow peer to initialize GNI connection. */ - resp.cmd = GNIX_PEP_SOCK_RESP_ACCEPT; - - resp.vc_id = vc->vc_id; - resp.vc_mbox_attr.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT; - resp.vc_mbox_attr.msg_buffer = mbox->base; - resp.vc_mbox_attr.buff_size = vc->ep->nic->mem_per_mbox; - resp.vc_mbox_attr.mem_hndl = *mbox->memory_handle; - resp.vc_mbox_attr.mbox_offset = (uint64_t)mbox->offset; - resp.vc_mbox_attr.mbox_maxcredit = - ep_priv->domain->params.mbox_maxcredit; - resp.vc_mbox_attr.msg_maxsize = - ep_priv->domain->params.mbox_msg_maxsize; - resp.cq_irq_mdh = ep_priv->nic->irq_mem_hndl; - resp.peer_caps = ep_priv->caps; - resp.key_offset = ep_priv->auth_key->key_offset; - - resp.cm_data_len = paramlen; - if (paramlen) { - eqe_ptr = (struct fi_eq_cm_entry *)resp.eqe_buf; - memcpy(eqe_ptr->data, param, paramlen); - } - - ret = write(conn->sock_fd, &resp, sizeof(resp)); - if (ret != sizeof(resp)) { - errno_keep = errno; - GNIX_WARN(FI_LOG_EP_CTRL, - "Failed to send resp, err: %s\n", - strerror(errno_keep)); - ret = -FI_EIO; - goto err_write; - } - - /* Notify user that this side is connected. */ - eq_entry.fid = &ep_priv->ep_fid.fid; - - ret = fi_eq_write(&ep_priv->eq->eq_fid, FI_CONNECTED, &eq_entry, - sizeof(eq_entry), 0); - if (ret != sizeof(eq_entry)) { - GNIX_WARN(FI_LOG_EP_CTRL, - "fi_eq_write failed, err: %d\n", ret); - goto err_eq_write; - } - - /* Free the connection request. */ - free(conn); - - ep_priv->conn_state = GNIX_EP_CONNECTED; - - COND_RELEASE(ep_priv->requires_lock, &ep_priv->vc_lock); - - GNIX_DEBUG(FI_LOG_EP_CTRL, "Sent conn accept: %p\n", ep_priv); - - return FI_SUCCESS; - -err_eq_write: -err_write: -err_smsg_init: - _gnix_mbox_free(ep_priv->vc->smsg_mbox); - ep_priv->vc->smsg_mbox = NULL; -err_mbox_alloc: - _gnix_vc_destroy(ep_priv->vc); - ep_priv->vc = NULL; -err_unlock: - COND_RELEASE(ep_priv->requires_lock, &ep_priv->vc_lock); - - return ret; -} - -DIRECT_FN STATIC int gnix_shutdown(struct fid_ep *ep, uint64_t flags) -{ - int ret; - struct gnix_fid_ep *ep_priv; - struct fi_eq_cm_entry eq_entry = {0}; - - if (!ep) - return -FI_EINVAL; - - ep_priv = container_of(ep, struct gnix_fid_ep, ep_fid.fid); - - COND_ACQUIRE(ep_priv->requires_lock, &ep_priv->vc_lock); - - eq_entry.fid = &ep_priv->ep_fid.fid; - - ret = fi_eq_write(&ep_priv->eq->eq_fid, FI_SHUTDOWN, &eq_entry, - sizeof(eq_entry), 0); - if (ret != sizeof(eq_entry)) { - GNIX_WARN(FI_LOG_EP_CTRL, - "fi_eq_write failed, err: %d\n", ret); - } else { - ret = FI_SUCCESS; - } - - COND_RELEASE(ep_priv->requires_lock, &ep_priv->vc_lock); - - return ret; -} - -struct fi_ops_cm gnix_ep_msg_ops_cm = { - .size = sizeof(struct fi_ops_cm), - .setname = gnix_setname, - .getname = gnix_getname, - .getpeer = gnix_getpeer, - .connect = gnix_connect, - .listen = fi_no_listen, - .accept = gnix_accept, - .reject = fi_no_reject, - .shutdown = gnix_shutdown, - .join = fi_no_join, -}; - -/****************************************************************************** - * - * Passive endpoint handling - * - *****************************************************************************/ - -DIRECT_FN STATIC int gnix_pep_getopt(fid_t fid, int level, int optname, - void *optval, size_t *optlen) -{ - if (!fid || !optval || !optlen) - return -FI_EINVAL; - else if (level != FI_OPT_ENDPOINT) - return -FI_ENOPROTOOPT; - - switch (optname) { - case FI_OPT_CM_DATA_SIZE: - *(size_t *)optval = GNIX_CM_DATA_MAX_SIZE; - *optlen = sizeof(size_t); - break; - default: - return -FI_ENOPROTOOPT; - } - - return 0; -} - -/* Process an incoming connection request at a listening PEP. */ -static int __gnix_pep_connreq(struct gnix_fid_pep *pep, int fd) -{ - int ret; - struct gnix_pep_sock_conn *conn; - struct fi_eq_cm_entry *eq_entry; - int eqe_size; - - /* Create and initialize a new connection request. */ - conn = calloc(1, sizeof(*conn)); - if (!conn) { - GNIX_WARN(FI_LOG_EP_CTRL, - "Failed to alloc accepted socket conn\n"); - return -FI_ENOMEM; - } - - conn->fid.fclass = FI_CLASS_CONNREQ; - conn->fid.context = pep; - conn->sock_fd = fd; - - /* Pull request data from the listening socket. */ - conn->bytes_read += read(fd, &conn->req, sizeof(conn->req)); - if (conn->bytes_read != sizeof(conn->req)) { - /* TODO Wait for more bytes. */ - GNIX_FATAL(FI_LOG_EP_CTRL, "Unexpected read size\n"); - } - - conn->req.info.src_addr = &conn->req.src_addr; - conn->req.info.dest_addr = &conn->req.dest_addr; - conn->req.info.tx_attr = &conn->req.tx_attr; - conn->req.info.rx_attr = &conn->req.rx_attr; - conn->req.info.ep_attr = &conn->req.ep_attr; - conn->req.info.domain_attr = &conn->req.domain_attr; - conn->req.info.fabric_attr = &conn->req.fabric_attr; - conn->req.info.domain_attr->name = NULL; - conn->req.info.fabric_attr->name = NULL; - conn->req.info.fabric_attr->prov_name = NULL; - - conn->info = &conn->req.info; - conn->info->handle = &conn->fid; - - /* Tell user of a new conn req via the EQ. */ - eq_entry = (struct fi_eq_cm_entry *)conn->req.eqe_buf; - eq_entry->fid = &pep->pep_fid.fid; - eq_entry->info = fi_dupinfo(conn->info); - - eqe_size = sizeof(*eq_entry) + conn->req.cm_data_len; - ret = fi_eq_write(&pep->eq->eq_fid, FI_CONNREQ, eq_entry, eqe_size, 0); - if (ret != eqe_size) { - GNIX_WARN(FI_LOG_EP_CTRL, "fi_eq_write failed, err: %d\n", ret); - fi_freeinfo(conn->info); - free(conn); - return ret; - } - - GNIX_DEBUG(FI_LOG_EP_CTRL, "Added FI_CONNREQ EQE: %p, %p\n", - pep->eq, pep); - - return FI_SUCCESS; -} - -/* Process incoming connection requests on a listening PEP. */ -int _gnix_pep_progress(struct gnix_fid_pep *pep) -{ - int accept_fd, ret, errno_keep; - - ofi_spin_lock(&pep->lock); - - accept_fd = accept(pep->listen_fd, NULL, NULL); - if (accept_fd >= 0) { - /* New Connection. */ - ret = __gnix_pep_connreq(pep, accept_fd); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "__gnix_pep_connreq failed, err: %d\n", - ret); - } - } else if (errno != EAGAIN) { - errno_keep = errno; - GNIX_WARN(FI_LOG_EP_CTRL, - "(accept) Unexpected errno on listen socket: %s\n", - strerror(errno_keep)); - } - - ofi_spin_unlock(&pep->lock); - - return FI_SUCCESS; -} - -static void __pep_destruct(void *obj) -{ - struct gnix_fid_pep *pep = (struct gnix_fid_pep *)obj; - - GNIX_DEBUG(FI_LOG_EP_CTRL, "Destroying PEP: %p\n", pep); - - ofi_spin_destroy(&pep->lock); - - if (pep->listen_fd >= 0) - close(pep->listen_fd); - - if (pep->eq) { - _gnix_eq_poll_obj_rem(pep->eq, &pep->pep_fid.fid); - _gnix_ref_put(pep->eq); - } - - free(pep); -} - -static int gnix_pep_close(fid_t fid) -{ - int ret = FI_SUCCESS; - struct gnix_fid_pep *pep; - int references_held; - - pep = container_of(fid, struct gnix_fid_pep, pep_fid.fid); - - references_held = _gnix_ref_put(pep); - if (references_held) - GNIX_INFO(FI_LOG_EP_CTRL, "failed to fully close pep due " - "to lingering references. references=%i pep=%p\n", - references_held, pep); - - return ret; -} - -DIRECT_FN int gnix_pep_bind(struct fid *fid, struct fid *bfid, uint64_t flags) -{ - int ret = FI_SUCCESS; - struct gnix_fid_pep *pep; - struct gnix_fid_eq *eq; - - if (!fid || !bfid) - return -FI_EINVAL; - - pep = container_of(fid, struct gnix_fid_pep, pep_fid.fid); - - ofi_spin_lock(&pep->lock); - - switch (bfid->fclass) { - case FI_CLASS_EQ: - eq = container_of(bfid, struct gnix_fid_eq, eq_fid.fid); - if (pep->fabric != eq->fabric) { - ret = -FI_EINVAL; - break; - } - - if (pep->eq) { - ret = -FI_EINVAL; - break; - } - - pep->eq = eq; - _gnix_eq_poll_obj_add(eq, &pep->pep_fid.fid); - _gnix_ref_get(eq); - - GNIX_DEBUG(FI_LOG_EP_CTRL, "Bound EQ to PEP: %p, %p\n", - eq, pep); - break; - default: - ret = -FI_ENOSYS; - break; - } - - ofi_spin_unlock(&pep->lock); - - return ret; -} - -DIRECT_FN int gnix_pep_listen(struct fid_pep *pep) -{ - int ret, errno_keep; - struct gnix_fid_pep *pep_priv; - struct sockaddr_in saddr; - int sockopt = 1; - - if (!pep) - return -FI_EINVAL; - - pep_priv = container_of(pep, struct gnix_fid_pep, pep_fid.fid); - - ofi_spin_lock(&pep_priv->lock); - - if (!pep_priv->eq) { - ret = -FI_EINVAL; - goto err_unlock; - } - - pep_priv->listen_fd = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK, 0); - if (pep_priv->listen_fd < 0) { - errno_keep = errno; - GNIX_WARN(FI_LOG_EP_CTRL, - "Failed to create listening socket, err: %s\n", - strerror(errno_keep)); - ret = -FI_ENOSPC; - goto err_unlock; - } - - ret = setsockopt(pep_priv->listen_fd, SOL_SOCKET, SO_REUSEADDR, - &sockopt, sizeof(sockopt)); - if (ret < 0) { - errno_keep = errno; - GNIX_WARN(FI_LOG_EP_CTRL, - "setsockopt(SO_REUSEADDR) failed, err: %s\n", - strerror(errno_keep)); - } - - /* Bind to the ipogif interface using resolved service number as CDM - * ID. */ - ret = _gnix_local_ipaddr(&saddr); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, "Failed to find local IP\n"); - ret = -FI_ENOSPC; - goto err_sock; - } - - /* If source addr was not specified, use auto assigned port. */ - if (pep_priv->bound) - saddr.sin_port = pep_priv->src_addr.gnix_addr.cdm_id; - else - saddr.sin_port = 0; - - ret = bind(pep_priv->listen_fd, &saddr, sizeof(struct sockaddr_in)); - if (ret < 0) { - errno_keep = errno; - GNIX_WARN(FI_LOG_EP_CTRL, - "Failed to bind listening socket, err: %s\n", - strerror(errno_keep)); - ret = -FI_ENOSPC; - goto err_sock; - } - - ret = listen(pep_priv->listen_fd, pep_priv->backlog); - if (ret < 0) { - errno_keep = errno; - GNIX_WARN(FI_LOG_EP_CTRL, - "Failed to start listening socket, err: %s\n", - strerror(errno_keep)); - ret = -FI_ENOSPC; - goto err_sock; - } - - ofi_spin_unlock(&pep_priv->lock); - - GNIX_DEBUG(FI_LOG_EP_CTRL, - "Configured PEP for listening: %p (%s:%d)\n", - pep, inet_ntoa(saddr.sin_addr), saddr.sin_port); - - return FI_SUCCESS; - -err_sock: - close(pep_priv->listen_fd); -err_unlock: - ofi_spin_unlock(&pep_priv->lock); - return ret; -} - -__attribute__((unused)) -DIRECT_FN STATIC int gnix_listen(struct fid_pep *pep) -{ - return -FI_ENOSYS; -} - -DIRECT_FN STATIC int gnix_reject(struct fid_pep *pep, fid_t handle, - const void *param, size_t paramlen) -{ - struct gnix_fid_pep *pep_priv; - struct gnix_pep_sock_conn *conn; - struct gnix_pep_sock_connresp resp; - struct fi_eq_cm_entry *eqe_ptr; - int ret; - - if (!pep) - return -FI_EINVAL; - - pep_priv = container_of(pep, struct gnix_fid_pep, pep_fid.fid); - - ofi_spin_lock(&pep_priv->lock); - - conn = (struct gnix_pep_sock_conn *)handle; - if (!conn || conn->fid.fclass != FI_CLASS_CONNREQ) { - ofi_spin_unlock(&pep_priv->lock); - return -FI_EINVAL; - } - - resp.cmd = GNIX_PEP_SOCK_RESP_REJECT; - - resp.cm_data_len = paramlen; - if (paramlen) { - eqe_ptr = (struct fi_eq_cm_entry *)resp.eqe_buf; - memcpy(eqe_ptr->data, param, paramlen); - } - - ret = write(conn->sock_fd, &resp, sizeof(resp)); - if (ret != sizeof(resp)) { - ofi_spin_unlock(&pep_priv->lock); - GNIX_WARN(FI_LOG_EP_CTRL, - "Failed to send resp, errno: %d\n", - errno); - return -FI_EIO; - } - - close(conn->sock_fd); - free(conn); - - ofi_spin_unlock(&pep_priv->lock); - - GNIX_DEBUG(FI_LOG_EP_CTRL, "Sent conn reject: %p\n", pep_priv); - - return FI_SUCCESS; -} - -DIRECT_FN int gnix_pep_open(struct fid_fabric *fabric, - struct fi_info *info, struct fid_pep **pep, - void *context) -{ - struct gnix_fid_fabric *fabric_priv; - struct gnix_fid_pep *pep_priv; - struct gnix_ep_name *ep_name; - - if (!fabric || !info || !pep) - return -FI_EINVAL; - - fabric_priv = container_of(fabric, struct gnix_fid_fabric, fab_fid); - - pep_priv = calloc(1, sizeof(*pep_priv)); - if (!pep_priv) - return -FI_ENOMEM; - - pep_priv->pep_fid.fid.fclass = FI_CLASS_PEP; - pep_priv->pep_fid.fid.context = context; - - pep_priv->pep_fid.fid.ops = &gnix_pep_fi_ops; - pep_priv->pep_fid.ops = &gnix_pep_ops_ep; - pep_priv->pep_fid.cm = &gnix_pep_ops_cm; - pep_priv->fabric = fabric_priv; - pep_priv->info = fi_dupinfo(info); - pep_priv->info->addr_format = info->addr_format; - - pep_priv->listen_fd = -1; - pep_priv->backlog = 5; /* TODO set via fi_control parameter. */ - ofi_spin_init(&pep_priv->lock); - - if (info->src_addr) { - ep_name = info->src_addr; - info->src_addrlen = sizeof(struct sockaddr_in); - - pep_priv->bound = 1; - memcpy(&pep_priv->src_addr, ep_name, info->src_addrlen); - } else { - pep_priv->bound = 0; - } - - _gnix_ref_init(&pep_priv->ref_cnt, 1, __pep_destruct); - - *pep = &pep_priv->pep_fid; - - GNIX_DEBUG(FI_LOG_EP_CTRL, "Opened PEP: %p\n", pep_priv); - - return FI_SUCCESS; -} - -struct fi_ops gnix_pep_fi_ops = { - .size = sizeof(struct fi_ops), - .close = gnix_pep_close, - .bind = gnix_pep_bind, - .control = fi_no_control, - .ops_open = fi_no_ops_open, -}; - -struct fi_ops_ep gnix_pep_ops_ep = { - .size = sizeof(struct fi_ops_ep), - .cancel = fi_no_cancel, - .getopt = gnix_pep_getopt, - .setopt = fi_no_setopt, - .tx_ctx = fi_no_tx_ctx, - .rx_ctx = fi_no_rx_ctx, - .rx_size_left = fi_no_rx_size_left, - .tx_size_left = fi_no_tx_size_left, -}; - -struct fi_ops_cm gnix_pep_ops_cm = { - .size = sizeof(struct fi_ops_cm), - .setname = gnix_setname, - .getname = gnix_getname, - .getpeer = fi_no_getpeer, - .connect = fi_no_connect, - .listen = gnix_pep_listen, - .accept = fi_no_accept, - .reject = gnix_reject, - .shutdown = fi_no_shutdown, - .join = fi_no_join, -}; - diff --git a/prov/gni/src/gnix_cm_nic.c b/prov/gni/src/gnix_cm_nic.c deleted file mode 100644 index 821871beccc..00000000000 --- a/prov/gni/src/gnix_cm_nic.c +++ /dev/null @@ -1,736 +0,0 @@ -/* - * Copyright (c) 2015-2017 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#if HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include - -#include "gnix.h" -#include "gnix_datagram.h" -#include "gnix_cm_nic.h" -#include "gnix_cm.h" -#include "gnix_nic.h" -#include "gnix_hashtable.h" - - -#define GNIX_CM_NIC_BND_TAG (100) -#define GNIX_CM_NIC_WC_TAG (99) - -DLIST_HEAD(gnix_cm_nic_list); -pthread_mutex_t gnix_cm_nic_list_lock = PTHREAD_MUTEX_INITIALIZER; - -/******************************************************************************* - * Helper functions - ******************************************************************************/ - -static void __dgram_set_tag(struct gnix_datagram *d, uint8_t tag) -{ - - _gnix_dgram_pack_buf(d, GNIX_DGRAM_IN_BUF, - &tag, sizeof(uint8_t)); -} - -/* - * we unpack the out tag instead of getting it - * since we need to pass the partially advanced - * out buf to the receive callback function - * associated with the cm_nic instance. - */ -static void __dgram_unpack_out_tag(struct gnix_datagram *d, uint8_t *tag) -{ - - _gnix_dgram_rewind_buf(d, GNIX_DGRAM_OUT_BUF); - _gnix_dgram_unpack_buf(d, GNIX_DGRAM_OUT_BUF, - tag, sizeof(uint8_t)); -} - -static void __dgram_get_in_tag(struct gnix_datagram *d, uint8_t *tag) -{ - - _gnix_dgram_rewind_buf(d, GNIX_DGRAM_IN_BUF); - _gnix_dgram_unpack_buf(d, GNIX_DGRAM_IN_BUF, - tag, sizeof(uint8_t)); - _gnix_dgram_rewind_buf(d, GNIX_DGRAM_IN_BUF); - -} - -static int __process_dgram_w_error(struct gnix_cm_nic *cm_nic, - struct gnix_datagram *dgram, - struct gnix_address peer_address, - gni_post_state_t state) -{ - return -FI_ENOSYS; -} - -static int __process_datagram(struct gnix_datagram *dgram, - struct gnix_address peer_address, - gni_post_state_t state) -{ - int ret = FI_SUCCESS; - struct gnix_cm_nic *cm_nic = NULL; - uint8_t in_tag = 0, out_tag = 0; - char rcv_buf[GNIX_CM_NIC_MAX_MSG_SIZE]; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - cm_nic = (struct gnix_cm_nic *)dgram->cache; - if (cm_nic == NULL) { - GNIX_WARN(FI_LOG_EP_CTRL, - "process_datagram, null cache\n"); - goto err; - } - - if (state != GNI_POST_COMPLETED) { - ret = __process_dgram_w_error(cm_nic, - dgram, - peer_address, - state); - GNIX_WARN(FI_LOG_EP_CTRL, - "process_datagram bad post state %d\n", state); - goto err; - } - - __dgram_get_in_tag(dgram, &in_tag); - if ((in_tag != GNIX_CM_NIC_BND_TAG) && - (in_tag != GNIX_CM_NIC_WC_TAG)) { - GNIX_WARN(FI_LOG_EP_CTRL, - "datagram with unknown in tag %d\n", in_tag); - goto err; - } - - __dgram_unpack_out_tag(dgram, &out_tag); - if ((out_tag != GNIX_CM_NIC_BND_TAG) && - (out_tag != GNIX_CM_NIC_WC_TAG)) { - GNIX_WARN(FI_LOG_EP_CTRL, - "datagram with unknown out tag %d\n", out_tag); - goto err; - } - - /* - * if out buf actually has data, call consumer's - * receive callback - */ - - if (out_tag == GNIX_CM_NIC_BND_TAG) { - _gnix_dgram_unpack_buf(dgram, - GNIX_DGRAM_OUT_BUF, - rcv_buf, - GNIX_CM_NIC_MAX_MSG_SIZE); - ret = cm_nic->rcv_cb_fn(cm_nic, - rcv_buf, - peer_address); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "cm_nic->rcv_cb_fn returned %s\n", - fi_strerror(-ret)); - goto err; - } - - ret = _gnix_cm_nic_progress(cm_nic); - if (ret != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_cm_nic_progress returned %s\n", - fi_strerror(-ret)); - } - - /* - * if we are processing a WC datagram, repost, otherwise - * just put back on the freelist. - */ - if (in_tag == GNIX_CM_NIC_WC_TAG) { - dgram->callback_fn = __process_datagram; - dgram->cache = cm_nic; - __dgram_set_tag(dgram, in_tag); - ret = _gnix_dgram_wc_post(dgram); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_dgram_wc_post returned %s\n", - fi_strerror(-ret)); - goto err; - } - } else { - ret = _gnix_dgram_free(dgram); - if (ret != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_dgram_free returned %s\n", - fi_strerror(-ret)); - } - - return ret; - -err: - if (in_tag == GNIX_CM_NIC_BND_TAG) - _gnix_dgram_free(dgram); - return ret; -} - -static bool __gnix_cm_nic_timeout_needed(void *data) -{ - struct gnix_cm_nic *cm_nic = (struct gnix_cm_nic *)data; - return _gnix_cm_nic_need_progress(cm_nic); -} - -static void __gnix_cm_nic_timeout_progress(void *data) -{ - int ret; - struct gnix_cm_nic *cm_nic = (struct gnix_cm_nic *)data; - ret = _gnix_cm_nic_progress(cm_nic); - if (ret != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_cm_nic_progress returned %s\n", - fi_strerror(-ret)); -} - - -/******************************************************************************* - * Internal API functions - ******************************************************************************/ - -int _gnix_cm_nic_create_cdm_id(struct gnix_fid_domain *domain, uint32_t *id) -{ - uint32_t cdm_id; - int v; - - if (*id != GNIX_CREATE_CDM_ID) { - return FI_SUCCESS; - } - - /* - * generate a cdm_id, use the 16 LSB of base_id from domain - * with 16 MSBs being obtained from atomic increment of - * a local variable. - */ - - v = ofi_atomic_inc32(&gnix_id_counter); - - cdm_id = ((domain->cdm_id_seed & 0xFFF) << 12) | v; - *id = cdm_id; - return FI_SUCCESS; -} - -/** - * This function will return a block of id's starting at id through nids - * - * @param domain gnix domain - * @param nids number of id's - * @param id if -1 return an id based on the counter and seed - */ -int _gnix_get_new_cdm_id_set(struct gnix_fid_domain *domain, int nids, - uint32_t *id) -{ - uint32_t cdm_id; - int v; - - if (*id == -1) { - v = ofi_atomic_add32(&gnix_id_counter, nids); - cdm_id = ((domain->cdm_id_seed & 0xFFF) << 12) | v; - *id = cdm_id; - } else { - /* - * asking for a block starting at a chosen base - * TODO: sanity check that requested base is reasonable - */ - if (*id <= ofi_atomic_get32(&gnix_id_counter)) - return -FI_ENOSPC; - ofi_atomic_set32(&gnix_id_counter, (*(int *)id + nids)); - } - return FI_SUCCESS; -} - -int _gnix_cm_nic_progress(void *arg) -{ - struct gnix_cm_nic *cm_nic = (struct gnix_cm_nic *)arg; - int ret = FI_SUCCESS; - int complete; - struct gnix_work_req *p = NULL; - - /* - * if we're doing FI_PROGRESS_MANUAL, - * see what's going on inside kgni's datagram - * box... - */ - - if (cm_nic->ctrl_progress == FI_PROGRESS_MANUAL) { - ++cm_nic->poll_cnt; - if (((cm_nic->poll_cnt % 512) == 0) || - !dlist_empty(&cm_nic->cm_nic_wq)) { - ret = _gnix_dgram_poll(cm_nic->dgram_hndl, - GNIX_DGRAM_NOBLOCK); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_dgram_poll returned %s\n", - fi_strerror(-ret)); - goto err; - } - } - } - - /* - * do a quick check if queue doesn't have anything yet, - * don't need this to be atomic - */ - -check_again: - if (dlist_empty(&cm_nic->cm_nic_wq)) - return ret; - - /* - * okay, stuff to do, lock work queue, - * dequeue head, unlock, process work element, - * if it doesn't compete, put back at the tail - * of the queue. - */ - - ofi_spin_lock(&cm_nic->wq_lock); - p = dlist_first_entry(&cm_nic->cm_nic_wq, struct gnix_work_req, - list); - if (p == NULL) { - ofi_spin_unlock(&cm_nic->wq_lock); - return ret; - } - - dlist_remove_init(&p->list); - ofi_spin_unlock(&cm_nic->wq_lock); - - assert(p->progress_fn); - - ret = p->progress_fn(p->data, &complete); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "dgram prog fn returned %s\n", - fi_strerror(-ret)); - } - - if (complete == 1) { - if (p->completer_fn) { - ret = p->completer_fn(p->completer_data); - free(p); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "dgram completer fn returned %s\n", - fi_strerror(-ret)); - goto err; - } - } else { - free(p); - } - goto check_again; - } else { - ofi_spin_lock(&cm_nic->wq_lock); - dlist_insert_before(&p->list, &cm_nic->cm_nic_wq); - ofi_spin_unlock(&cm_nic->wq_lock); - } - -err: - return ret; -} - -static void __cm_nic_destruct(void *obj) -{ - int ret; - struct gnix_cm_nic *cm_nic = (struct gnix_cm_nic *)obj; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - pthread_mutex_lock(&gnix_cm_nic_list_lock); - dlist_remove(&cm_nic->cm_nic_list); - pthread_mutex_unlock(&gnix_cm_nic_list_lock); - - if (cm_nic->dgram_hndl != NULL) { - ret = _gnix_dgram_hndl_free(cm_nic->dgram_hndl); - if (ret != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_CTRL, - "gnix_dgram_hndl_free returned %d\n", - ret); - } - - if (cm_nic->addr_to_ep_ht != NULL) { - ret = _gnix_ht_destroy(cm_nic->addr_to_ep_ht); - if (ret != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_CTRL, - "gnix_ht_destroy returned %d\n", - ret); - free(cm_nic->addr_to_ep_ht); - cm_nic->addr_to_ep_ht = NULL; - } - - if (cm_nic->nic != NULL) { - _gnix_ref_put(cm_nic->nic); - cm_nic->nic = NULL; - } - - cm_nic->domain->cm_nic = NULL; - free(cm_nic); -} - -static int __gnix_cm_nic_intra_progress_fn(void *data, int *complete_ptr) -{ - struct gnix_datagram *dgram; - struct gnix_cm_nic *cm_nic; - int ret; - - GNIX_INFO(FI_LOG_EP_CTRL, "\n"); - - dgram = (struct gnix_datagram *)data; - cm_nic = (struct gnix_cm_nic *)dgram->cache; - ret = __process_datagram(dgram, - cm_nic->my_name.gnix_addr, - GNI_POST_COMPLETED); - if (ret == FI_SUCCESS) { - GNIX_INFO(FI_LOG_EP_CTRL, "Intra-CM NIC dgram completed\n"); - *complete_ptr = 1; - } - - return FI_SUCCESS; -} - -int _gnix_cm_nic_send(struct gnix_cm_nic *cm_nic, - char *sbuf, size_t len, - struct gnix_address target_addr) -{ - int ret = FI_SUCCESS; - struct gnix_datagram *dgram = NULL; - ssize_t __attribute__((unused)) plen; - uint8_t tag; - struct gnix_work_req *work_req; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - if ((cm_nic == NULL) || (sbuf == NULL)) - return -FI_EINVAL; - - if (len > GNI_DATAGRAM_MAXSIZE) - return -FI_ENOSPC; - - ret = _gnix_dgram_alloc(cm_nic->dgram_hndl, - GNIX_DGRAM_BND, - &dgram); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_dgram_alloc returned %s\n", - fi_strerror(-ret)); - goto exit; - } - - dgram->target_addr = target_addr; - dgram->callback_fn = __process_datagram; - dgram->cache = cm_nic; - - tag = GNIX_CM_NIC_BND_TAG; - __dgram_set_tag(dgram, tag); - - plen = _gnix_dgram_pack_buf(dgram, GNIX_DGRAM_IN_BUF, - sbuf, len); - assert (plen == len); - - /* If connecting with the same CM NIC, skip datagram exchange. The - * caller could be holding an endpoint lock, so schedule connection - * completion for later. */ - if (GNIX_ADDR_EQUAL(target_addr, cm_nic->my_name.gnix_addr)) { - char tmp_buf[GNIX_CM_NIC_MAX_MSG_SIZE]; - - /* Pack output buffer with input data. */ - _gnix_dgram_unpack_buf(dgram, GNIX_DGRAM_IN_BUF, tmp_buf, - GNIX_CM_NIC_MAX_MSG_SIZE); - _gnix_dgram_pack_buf(dgram, GNIX_DGRAM_OUT_BUF, tmp_buf, - GNIX_CM_NIC_MAX_MSG_SIZE); - - work_req = calloc(1, sizeof(*work_req)); - if (work_req == NULL) { - _gnix_dgram_free(dgram); - return -FI_ENOMEM; - } - - work_req->progress_fn = __gnix_cm_nic_intra_progress_fn; - work_req->data = dgram; - work_req->completer_fn = NULL; - - ofi_spin_lock(&cm_nic->wq_lock); - dlist_insert_before(&work_req->list, &cm_nic->cm_nic_wq); - ofi_spin_unlock(&cm_nic->wq_lock); - - GNIX_INFO(FI_LOG_EP_CTRL, "Initiated intra-CM NIC connect\n"); - } else { - ret = _gnix_dgram_bnd_post(dgram); - if (ret == -FI_EBUSY) { - ret = -FI_EAGAIN; - _gnix_dgram_free(dgram); - } - } - -exit: - return ret; -} - -int _gnix_cm_nic_reg_recv_fn(struct gnix_cm_nic *cm_nic, - gnix_cm_nic_rcv_cb_func *recv_fn, - gnix_cm_nic_rcv_cb_func **prev_fn) -{ - int ret = FI_SUCCESS; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - if (cm_nic == NULL) - return -FI_EINVAL; - - *prev_fn = cm_nic->rcv_cb_fn; - cm_nic->rcv_cb_fn = recv_fn; - - return ret; -} - -int _gnix_cm_nic_enable(struct gnix_cm_nic *cm_nic) -{ - int i, ret = FI_SUCCESS; - struct gnix_fid_fabric *fabric; - struct gnix_datagram *dg_ptr; - uint8_t tag = GNIX_CM_NIC_WC_TAG; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - if (cm_nic == NULL) - return -FI_EINVAL; - - if (cm_nic->domain == NULL) { - GNIX_FATAL(FI_LOG_EP_CTRL, "domain is NULL\n"); - } - - if (cm_nic->domain->fabric == NULL) { - GNIX_FATAL(FI_LOG_EP_CTRL, "fabric is NULL\n"); - } - - fabric = cm_nic->domain->fabric; - - assert(cm_nic->dgram_hndl != NULL); - - for (i = 0; i < fabric->n_wc_dgrams; i++) { - ret = _gnix_dgram_alloc(cm_nic->dgram_hndl, GNIX_DGRAM_WC, - &dg_ptr); - - /* - * wildcards may already be posted to the cm_nic, - * so just break if -FI_EAGAIN is returned by - * _gnix_dgram_alloc - */ - - if (ret == -FI_EAGAIN) { - ret = FI_SUCCESS; - break; - } - - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_dgram_alloc call returned %d\n", ret); - goto err; - } - - dg_ptr->callback_fn = __process_datagram; - dg_ptr->cache = cm_nic; - __dgram_set_tag(dg_ptr, tag); - - ret = _gnix_dgram_wc_post(dg_ptr); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_dgram_wc_post returned %d\n", ret); - _gnix_dgram_free(dg_ptr); - goto err; - } - } - - /* - * TODO: better cleanup in error case - */ -err: - return ret; -} - -int _gnix_cm_nic_free(struct gnix_cm_nic *cm_nic) -{ - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - if (cm_nic == NULL) - return -FI_EINVAL; - - _gnix_ref_put(cm_nic); - - return FI_SUCCESS; -} - -int _gnix_cm_nic_alloc(struct gnix_fid_domain *domain, - struct fi_info *info, - uint32_t cdm_id, - struct gnix_auth_key *auth_key, - struct gnix_cm_nic **cm_nic_ptr) -{ - int ret = FI_SUCCESS; - struct gnix_cm_nic *cm_nic = NULL; - gnix_hashtable_attr_t gnix_ht_attr = {0}; - uint32_t name_type = GNIX_EPN_TYPE_UNBOUND; - struct gnix_nic_attr nic_attr = {0}; - struct gnix_ep_name ep_name; - struct gnix_dgram_hndl_attr dgram_hndl_attr = {0}; - struct gnix_dgram_hndl_attr *dgram_hndl_attr_ptr = NULL; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - *cm_nic_ptr = NULL; - - /* - * if app has specified a src_addr in the info - * argument and length matches that for gnix_ep_name - * we must allocate a cm_nic, otherwise we first - * check to see if there is a cm_nic already for this domain - * and just use it. - */ - - if (info->src_addr) { - /*TODO (optimization): strchr to name_type and strtol */ - _gnix_get_ep_name(info->src_addr, 0, &ep_name, domain); - name_type = ep_name.name_type; - } - - GNIX_INFO(FI_LOG_EP_CTRL, "creating cm_nic for %u/0x%x/%u\n", - auth_key->ptag, auth_key->cookie, cdm_id); - - cm_nic = (struct gnix_cm_nic *)calloc(1, sizeof(*cm_nic)); - if (cm_nic == NULL) { - ret = -FI_ENOMEM; - goto err; - } - - /* - * we have to force allocation of a new nic since we want - * an a particular cdm id - */ - nic_attr.must_alloc = true; - nic_attr.use_cdm_id = true; - nic_attr.cdm_id = cdm_id; - nic_attr.auth_key = auth_key; - - ret = gnix_nic_alloc(domain, &nic_attr, &cm_nic->nic); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "gnix_nic_alloc returned %s\n", - fi_strerror(-ret)); - goto err; - } - - cm_nic->my_name.gnix_addr.cdm_id = cdm_id; - cm_nic->ptag = auth_key->ptag; - cm_nic->my_name.cookie = auth_key->cookie; - cm_nic->my_name.gnix_addr.device_addr = cm_nic->nic->device_addr; - cm_nic->domain = domain; - cm_nic->ctrl_progress = domain->control_progress; - cm_nic->my_name.name_type = name_type; - cm_nic->poll_cnt = 0; - ofi_spin_init(&cm_nic->wq_lock); - dlist_init(&cm_nic->cm_nic_wq); - - /* - * prep the cm nic's dgram component - */ - if (domain->control_progress == FI_PROGRESS_AUTO) { - dgram_hndl_attr.timeout_needed = __gnix_cm_nic_timeout_needed; - dgram_hndl_attr.timeout_progress = __gnix_cm_nic_timeout_progress; - dgram_hndl_attr.timeout_data = (void *)cm_nic; - dgram_hndl_attr.timeout = domain->params.dgram_progress_timeout; - dgram_hndl_attr_ptr = &dgram_hndl_attr; - }; - - ret = _gnix_dgram_hndl_alloc(cm_nic, - domain->control_progress, - dgram_hndl_attr_ptr, - &cm_nic->dgram_hndl); - if (ret != FI_SUCCESS) - goto err; - - /* - * allocate hash table for translating ep addresses - * to ep's. - * This table will not be large - how many FI_EP_RDM ep's - * will an app create using one domain?, nor in the critical path - * so just use defaults. - */ - cm_nic->addr_to_ep_ht = calloc(1, sizeof(struct gnix_hashtable)); - if (cm_nic->addr_to_ep_ht == NULL) - goto err; - - gnix_ht_attr.ht_initial_size = 64; - gnix_ht_attr.ht_maximum_size = 1024; - gnix_ht_attr.ht_increase_step = 2; - gnix_ht_attr.ht_increase_type = GNIX_HT_INCREASE_MULT; - gnix_ht_attr.ht_collision_thresh = 500; - gnix_ht_attr.ht_hash_seed = 0xdeadbeefbeefdead; - gnix_ht_attr.ht_internal_locking = 1; - gnix_ht_attr.destructor = NULL; - - ret = _gnix_ht_init(cm_nic->addr_to_ep_ht, &gnix_ht_attr); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "gnix_ht_init returned %s\n", - fi_strerror(-ret)); - goto err; - } - - _gnix_ref_init(&cm_nic->ref_cnt, 1, __cm_nic_destruct); - - *cm_nic_ptr = cm_nic; - - pthread_mutex_lock(&gnix_cm_nic_list_lock); - dlist_insert_tail(&cm_nic->cm_nic_list, &gnix_cm_nic_list); - pthread_mutex_unlock(&gnix_cm_nic_list_lock); - - return ret; - -err: - if (cm_nic->dgram_hndl) - _gnix_dgram_hndl_free(cm_nic->dgram_hndl); - - if (cm_nic->nic) - _gnix_nic_free(cm_nic->nic); - - if (cm_nic->addr_to_ep_ht) { - _gnix_ht_destroy(cm_nic->addr_to_ep_ht); - free(cm_nic->addr_to_ep_ht); - } - - if (cm_nic != NULL) - free(cm_nic); - - return ret; -} diff --git a/prov/gni/src/gnix_cntr.c b/prov/gni/src/gnix_cntr.c deleted file mode 100644 index 5ecff2aa9f4..00000000000 --- a/prov/gni/src/gnix_cntr.c +++ /dev/null @@ -1,510 +0,0 @@ -/* - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * Copyright (c) 2015-2017 Los Alamos National Security, LLC. - * All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/* - * CNTR common code - */ -#include -#include -#include - -#include "gnix.h" -#include "gnix_cntr.h" -#include "gnix_nic.h" -#include "gnix_trigger.h" - -/******************************************************************************* - * Forward declarations for filling functions. - ******************************************************************************/ - -/******************************************************************************* - * Forward declarations for ops structures. - ******************************************************************************/ -static struct fi_ops gnix_cntr_fi_ops; -static struct fi_ops_cntr gnix_cntr_ops; - -/******************************************************************************* - * Internal helper functions - ******************************************************************************/ - -static int __verify_cntr_attr(struct fi_cntr_attr *attr) -{ - int ret = FI_SUCCESS; - - GNIX_TRACE(FI_LOG_CQ, "\n"); - - if (!attr) - return -FI_EINVAL; - - if (attr->events != FI_CNTR_EVENTS_COMP) { - GNIX_WARN(FI_LOG_CQ, "cntr event type: %d unsupported.\n", - attr->events); - return -FI_EINVAL; - } - - switch (attr->wait_obj) { - case FI_WAIT_UNSPEC: - case FI_WAIT_NONE: - case FI_WAIT_SET: - break; - case FI_WAIT_FD: - case FI_WAIT_MUTEX_COND: - default: - GNIX_WARN(FI_LOG_CQ, "wait type: %d unsupported.\n", - attr->wait_obj); - return -FI_EINVAL; - } - - return ret; -} - -static int gnix_cntr_set_wait(struct gnix_fid_cntr *cntr) -{ - int ret = FI_SUCCESS; - - GNIX_TRACE(FI_LOG_EQ, "\n"); - - struct fi_wait_attr requested = { - .wait_obj = cntr->attr.wait_obj, - .flags = 0 - }; - - switch (cntr->attr.wait_obj) { - case FI_WAIT_UNSPEC: - ret = gnix_wait_open(&cntr->domain->fabric->fab_fid, - &requested, &cntr->wait); - break; - case FI_WAIT_SET: - ret = _gnix_wait_set_add(cntr->attr.wait_set, - &cntr->cntr_fid.fid); - - if (!ret) - cntr->wait = cntr->attr.wait_set; - break; - default: - break; - } - - return ret; -} - -static int __gnix_cntr_progress(struct gnix_fid_cntr *cntr) -{ - return _gnix_prog_progress(&cntr->pset); -} - -/******************************************************************************* - * Exposed helper functions - ******************************************************************************/ - -int _gnix_cntr_inc(struct gnix_fid_cntr *cntr) -{ - if (cntr == NULL) - return -FI_EINVAL; - - ofi_atomic_inc32(&cntr->cnt); - - if (cntr->wait) - _gnix_signal_wait_obj(cntr->wait); - - if (_gnix_trigger_pending(cntr)) - _gnix_trigger_check_cntr(cntr); - - return FI_SUCCESS; -} - -int _gnix_cntr_inc_err(struct gnix_fid_cntr *cntr) -{ - if (cntr == NULL) - return -FI_EINVAL; - - ofi_atomic_inc32(&cntr->cnt_err); - - if (cntr->wait) - _gnix_signal_wait_obj(cntr->wait); - - return FI_SUCCESS; -} - -int _gnix_cntr_poll_obj_add(struct gnix_fid_cntr *cntr, void *obj, - int (*prog_fn)(void *data)) -{ - return _gnix_prog_obj_add(&cntr->pset, obj, prog_fn); -} - -int _gnix_cntr_poll_obj_rem(struct gnix_fid_cntr *cntr, void *obj, - int (*prog_fn)(void *data)) -{ - return _gnix_prog_obj_rem(&cntr->pset, obj, prog_fn); -} - -/******************************************************************************* - * API functions. - ******************************************************************************/ - -static int gnix_cntr_wait_sleep(struct gnix_fid_cntr *cntr_priv, - uint64_t threshold, int timeout) -{ - int ret = FI_SUCCESS; - struct timespec ts0, ts; - int msec_passed = 0; - - clock_gettime(CLOCK_REALTIME, &ts0); - while (ofi_atomic_get32(&cntr_priv->cnt) < threshold && - ofi_atomic_get32(&cntr_priv->cnt_err) == 0) { - - ret = gnix_wait_wait((struct fid_wait *)cntr_priv->wait, - timeout - msec_passed); - if (ret == -FI_ETIMEDOUT) - break; - - if (ret) { - GNIX_WARN(FI_LOG_CQ, - " fi_wait returned %d.\n", - ret); - break; - } - - if (ofi_atomic_get32(&cntr_priv->cnt) >= threshold) - break; - - if (timeout < 0) - continue; - - clock_gettime(CLOCK_REALTIME, &ts); - msec_passed = (ts.tv_sec - ts0.tv_sec) * 1000 + - (ts.tv_nsec - ts0.tv_nsec) / 100000; - - if (msec_passed >= timeout) { - ret = -FI_ETIMEDOUT; - break; - } - } - - return (ofi_atomic_get32(&cntr_priv->cnt_err)) ? -FI_EAVAIL : ret; -} - - -DIRECT_FN STATIC int gnix_cntr_wait(struct fid_cntr *cntr, uint64_t threshold, - int timeout) -{ - struct gnix_fid_cntr *cntr_priv; - - cntr_priv = container_of(cntr, struct gnix_fid_cntr, cntr_fid); - if (!cntr_priv->wait) - return -FI_EINVAL; - - if (cntr_priv->attr.wait_obj == FI_WAIT_SET || - cntr_priv->attr.wait_obj == FI_WAIT_NONE) - return -FI_EINVAL; - - return gnix_cntr_wait_sleep(cntr_priv, threshold, timeout); -} - -DIRECT_FN STATIC int gnix_cntr_adderr(struct fid_cntr *cntr, uint64_t value) -{ - struct gnix_fid_cntr *cntr_priv; - - cntr_priv = container_of(cntr, struct gnix_fid_cntr, cntr_fid); - if (FI_VERSION_LT(cntr_priv->domain->fabric->fab_fid.api_version, FI_VERSION(1, 5))) - return -FI_EOPNOTSUPP; - - ofi_atomic_add32(&cntr_priv->cnt_err, (int)value); - - if (cntr_priv->wait) - _gnix_signal_wait_obj(cntr_priv->wait); - - return FI_SUCCESS; -} - -DIRECT_FN STATIC int gnix_cntr_seterr(struct fid_cntr *cntr, uint64_t value) -{ - struct gnix_fid_cntr *cntr_priv; - - cntr_priv = container_of(cntr, struct gnix_fid_cntr, cntr_fid); - - if (FI_VERSION_LT(cntr_priv->domain->fabric->fab_fid.api_version, FI_VERSION(1, 5))) - return -FI_EOPNOTSUPP; - - ofi_atomic_set32(&cntr_priv->cnt_err, (int)value); - - if (cntr_priv->wait) - _gnix_signal_wait_obj(cntr_priv->wait); - - return FI_SUCCESS; -} - -static void __cntr_destruct(void *obj) -{ - struct gnix_fid_cntr *cntr = (struct gnix_fid_cntr *) obj; - - _gnix_ref_put(cntr->domain); - - switch (cntr->attr.wait_obj) { - case FI_WAIT_NONE: - break; - case FI_WAIT_SET: - _gnix_wait_set_remove(cntr->wait, &cntr->cntr_fid.fid); - break; - case FI_WAIT_UNSPEC: - case FI_WAIT_FD: - case FI_WAIT_MUTEX_COND: - assert(cntr->wait); - gnix_wait_close(&cntr->wait->fid); - break; - default: - GNIX_WARN(FI_LOG_CQ, "format: %d unsupported.\n", - cntr->attr.wait_obj); - break; - } - - _gnix_prog_fini(&cntr->pset); - - free(cntr); -} - -static int gnix_cntr_close(fid_t fid) -{ - struct gnix_fid_cntr *cntr; - int references_held; - - GNIX_TRACE(FI_LOG_CQ, "\n"); - - cntr = container_of(fid, struct gnix_fid_cntr, cntr_fid.fid); - - /* applications should never call close more than once. */ - references_held = _gnix_ref_put(cntr); - if (references_held) { - GNIX_INFO(FI_LOG_CQ, "failed to fully close cntr due to lingering " - "references. references=%i cntr=%p\n", - references_held, cntr); - } - - return FI_SUCCESS; -} - -DIRECT_FN STATIC uint64_t gnix_cntr_readerr(struct fid_cntr *cntr) -{ - int v, ret; - struct gnix_fid_cntr *cntr_priv; - - if (cntr == NULL) - return -FI_EINVAL; - - cntr_priv = container_of(cntr, struct gnix_fid_cntr, cntr_fid); - v = ofi_atomic_get32(&cntr_priv->cnt_err); - - ret = __gnix_cntr_progress(cntr_priv); - if (ret != FI_SUCCESS) - GNIX_WARN(FI_LOG_CQ, " __gnix_cntr_progress returned %d.\n", - ret); - - return (uint64_t)v; -} - -DIRECT_FN STATIC uint64_t gnix_cntr_read(struct fid_cntr *cntr) -{ - int v, ret; - struct gnix_fid_cntr *cntr_priv; - - if (cntr == NULL) - return -FI_EINVAL; - - cntr_priv = container_of(cntr, struct gnix_fid_cntr, cntr_fid); - - if (cntr_priv->wait) - gnix_wait_wait((struct fid_wait *)cntr_priv->wait, 0); - - ret = __gnix_cntr_progress(cntr_priv); - if (ret != FI_SUCCESS) - GNIX_WARN(FI_LOG_CQ, " __gnix_cntr_progress returned %d.\n", - ret); - - v = ofi_atomic_get32(&cntr_priv->cnt); - - return (uint64_t)v; -} - -DIRECT_FN STATIC int gnix_cntr_add(struct fid_cntr *cntr, uint64_t value) -{ - struct gnix_fid_cntr *cntr_priv; - - if (cntr == NULL) - return -FI_EINVAL; - - cntr_priv = container_of(cntr, struct gnix_fid_cntr, cntr_fid); - ofi_atomic_add32(&cntr_priv->cnt, (int)value); - - if (cntr_priv->wait) - _gnix_signal_wait_obj(cntr_priv->wait); - - _gnix_trigger_check_cntr(cntr_priv); - - return FI_SUCCESS; -} - -DIRECT_FN STATIC int gnix_cntr_set(struct fid_cntr *cntr, uint64_t value) -{ - struct gnix_fid_cntr *cntr_priv; - - if (cntr == NULL) - return -FI_EINVAL; - - cntr_priv = container_of(cntr, struct gnix_fid_cntr, cntr_fid); - ofi_atomic_set32(&cntr_priv->cnt, (int)value); - - if (cntr_priv->wait) - _gnix_signal_wait_obj(cntr_priv->wait); - - _gnix_trigger_check_cntr(cntr_priv); - - return FI_SUCCESS; -} - -static int gnix_cntr_control(struct fid *cntr, int command, void *arg) -{ - struct gnix_fid_cntr *cntr_priv; - - if (cntr == NULL) - return -FI_EINVAL; - - cntr_priv = container_of(cntr, struct gnix_fid_cntr, cntr_fid); - - switch (command) { - case FI_SETOPSFLAG: - cntr_priv->attr.flags = *(uint64_t *)arg; - break; - case FI_GETOPSFLAG: - if (!arg) - return -FI_EINVAL; - *(uint64_t *)arg = cntr_priv->attr.flags; - break; - case FI_GETWAIT: - /* return _gnix_get_wait_obj(cntr_priv->wait, arg); */ - return -FI_ENOSYS; - default: - return -FI_EINVAL; - } - - return FI_SUCCESS; - -} - - -DIRECT_FN int gnix_cntr_open(struct fid_domain *domain, - struct fi_cntr_attr *attr, - struct fid_cntr **cntr, void *context) -{ - int ret = FI_SUCCESS; - struct gnix_fid_domain *domain_priv; - struct gnix_fid_cntr *cntr_priv; - - GNIX_TRACE(FI_LOG_CQ, "\n"); - - ret = __verify_cntr_attr(attr); - if (ret) - goto err; - - domain_priv = container_of(domain, struct gnix_fid_domain, domain_fid); - if (!domain_priv) { - ret = -FI_EINVAL; - goto err; - } - - cntr_priv = calloc(1, sizeof(*cntr_priv)); - if (!cntr_priv) { - ret = -FI_ENOMEM; - goto err; - } - - cntr_priv->requires_lock = (domain_priv->thread_model != - FI_THREAD_COMPLETION); - - cntr_priv->domain = domain_priv; - cntr_priv->attr = *attr; - /* ref count is initialized to one to show that the counter exists */ - _gnix_ref_init(&cntr_priv->ref_cnt, 1, __cntr_destruct); - - /* initialize atomics */ - ofi_atomic_initialize32(&cntr_priv->cnt, 0); - ofi_atomic_initialize32(&cntr_priv->cnt_err, 0); - - _gnix_ref_get(cntr_priv->domain); - - _gnix_prog_init(&cntr_priv->pset); - - dlist_init(&cntr_priv->trigger_list); - ofi_spin_init(&cntr_priv->trigger_lock); - - ret = gnix_cntr_set_wait(cntr_priv); - if (ret) - goto err_wait; - - cntr_priv->cntr_fid.fid.fclass = FI_CLASS_CNTR; - cntr_priv->cntr_fid.fid.context = context; - cntr_priv->cntr_fid.fid.ops = &gnix_cntr_fi_ops; - cntr_priv->cntr_fid.ops = &gnix_cntr_ops; - - *cntr = &cntr_priv->cntr_fid; - return ret; - -err_wait: - _gnix_ref_put(cntr_priv->domain); - free(cntr_priv); -err: - return ret; -} - - -/******************************************************************************* - * FI_OPS_* data structures. - ******************************************************************************/ -static struct fi_ops gnix_cntr_fi_ops = { - .size = sizeof(struct fi_ops), - .close = gnix_cntr_close, - .bind = fi_no_bind, - .control = gnix_cntr_control, - .ops_open = fi_no_ops_open, -}; - -static struct fi_ops_cntr gnix_cntr_ops = { - .size = sizeof(struct fi_ops_cntr), - .readerr = gnix_cntr_readerr, - .read = gnix_cntr_read, - .add = gnix_cntr_add, - .set = gnix_cntr_set, - .wait = gnix_cntr_wait, - .adderr = gnix_cntr_adderr, - .seterr = gnix_cntr_seterr -}; diff --git a/prov/gni/src/gnix_cq.c b/prov/gni/src/gnix_cq.c deleted file mode 100644 index 36b43a20278..00000000000 --- a/prov/gni/src/gnix_cq.c +++ /dev/null @@ -1,766 +0,0 @@ -/* - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * Copyright (c) 2015-2017 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2019 Triad National Security, LLC. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/* - * CQ common code - */ -#include -#include -#include - -#include "gnix.h" -#include "gnix_cq.h" -#include "gnix_nic.h" -#include "gnix_cm_nic.h" - -/******************************************************************************* - * Function pointer for filling specific entry format type. - ******************************************************************************/ -typedef void (*fill_entry)(void *cq_entry, void *op_context, uint64_t flags, - size_t len, void *buf, uint64_t data, uint64_t tag); - -/******************************************************************************* - * Forward declarations for filling functions. - ******************************************************************************/ -static void fill_cq_entry(void *cq_entry, void *op_context, uint64_t flags, - size_t len, void *buf, uint64_t data, uint64_t tag); -static void fill_cq_msg(void *cq_entry, void *op_context, uint64_t flags, - size_t len, void *buf, uint64_t data, uint64_t tag); -static void fill_cq_data(void *cq_entry, void *op_context, uint64_t flags, - size_t len, void *buf, uint64_t data, uint64_t tag); -static void fill_cq_tagged(void *cq_entry, void *op_context, uint64_t flags, - size_t len, void *buf, uint64_t data, uint64_t tag); - -/******************************************************************************* - * Forward declarations for ops structures. - ******************************************************************************/ -static const struct fi_ops gnix_cq_fi_ops; -static const struct fi_ops_cq gnix_cq_ops; - -/******************************************************************************* - * Size array corresponding format type to format size. - ******************************************************************************/ -static const size_t format_sizes[] = { - [FI_CQ_FORMAT_UNSPEC] = sizeof(GNIX_CQ_DEFAULT_FORMAT), - [FI_CQ_FORMAT_CONTEXT] = sizeof(struct fi_cq_entry), - [FI_CQ_FORMAT_MSG] = sizeof(struct fi_cq_msg_entry), - [FI_CQ_FORMAT_DATA] = sizeof(struct fi_cq_data_entry), - [FI_CQ_FORMAT_TAGGED] = sizeof(struct fi_cq_tagged_entry) -}; - -static const fill_entry fill_function[] = { - [FI_CQ_FORMAT_UNSPEC] = fill_cq_entry, - [FI_CQ_FORMAT_CONTEXT] = fill_cq_entry, - [FI_CQ_FORMAT_MSG] = fill_cq_msg, - [FI_CQ_FORMAT_DATA] = fill_cq_data, - [FI_CQ_FORMAT_TAGGED] = fill_cq_tagged -}; - -/******************************************************************************* - * Internal helper functions - ******************************************************************************/ -static void fill_cq_entry(void *cq_entry, void *op_context, uint64_t flags, - size_t len, void *buf, uint64_t data, uint64_t tag) -{ - struct fi_cq_entry *entry = cq_entry; - - entry->op_context = op_context; -} - -static void fill_cq_msg(void *cq_entry, void *op_context, uint64_t flags, - size_t len, void *buf, uint64_t data, uint64_t tag) -{ - struct fi_cq_msg_entry *entry = cq_entry; - - entry->op_context = op_context; - entry->flags = flags; - entry->len = len; -} - -static void fill_cq_data(void *cq_entry, void *op_context, uint64_t flags, - size_t len, void *buf, uint64_t data, uint64_t tag) -{ - struct fi_cq_data_entry *entry = cq_entry; - - entry->op_context = op_context; - entry->flags = flags; - entry->len = len; - entry->buf = buf; - entry->data = data; -} - -static void fill_cq_tagged(void *cq_entry, void *op_context, uint64_t flags, - size_t len, void *buf, uint64_t data, uint64_t tag) -{ - struct fi_cq_tagged_entry *entry = cq_entry; - - entry->op_context = op_context; - entry->flags = flags; - entry->buf = buf; - entry->data = data; - entry->tag = tag; - entry->len = len; -} - -static int verify_cq_attr(struct fi_cq_attr *attr, struct fi_ops_cq *ops, - struct fi_ops *fi_cq_ops) -{ - GNIX_TRACE(FI_LOG_CQ, "\n"); - - if (!attr || !ops || !fi_cq_ops) - return -FI_EINVAL; - - if (!attr->size) - attr->size = GNIX_CQ_DEFAULT_SIZE; - - switch (attr->format) { - case FI_CQ_FORMAT_UNSPEC: - attr->format = FI_CQ_FORMAT_CONTEXT; - case FI_CQ_FORMAT_CONTEXT: - case FI_CQ_FORMAT_MSG: - case FI_CQ_FORMAT_DATA: - case FI_CQ_FORMAT_TAGGED: - break; - default: - GNIX_WARN(FI_LOG_CQ, "format: %d unsupported.\n", - attr->format); - return -FI_EINVAL; - } - - switch (attr->wait_obj) { - case FI_WAIT_NONE: - ops->sread = fi_no_cq_sread; - ops->signal = fi_no_cq_signal; - ops->sreadfrom = fi_no_cq_sreadfrom; - fi_cq_ops->control = fi_no_control; - break; - case FI_WAIT_SET: - if (!attr->wait_set) { - GNIX_WARN(FI_LOG_CQ, - "FI_WAIT_SET is set, but wait_set field doesn't reference a wait object.\n"); - return -FI_EINVAL; - } - break; - case FI_WAIT_UNSPEC: - break; - case FI_WAIT_FD: - case FI_WAIT_MUTEX_COND: - default: - GNIX_WARN(FI_LOG_CQ, "wait type: %d unsupported.\n", - attr->wait_obj); - return -FI_EINVAL; - } - - return FI_SUCCESS; -} - -static int gnix_cq_set_wait(struct gnix_fid_cq *cq) -{ - int ret = FI_SUCCESS; - - GNIX_TRACE(FI_LOG_CQ, "\n"); - - struct fi_wait_attr requested = { - .wait_obj = cq->attr.wait_obj, - .flags = 0 - }; - - switch (cq->attr.wait_obj) { - case FI_WAIT_UNSPEC: - case FI_WAIT_FD: - case FI_WAIT_MUTEX_COND: - ret = gnix_wait_open(&cq->domain->fabric->fab_fid, - &requested, &cq->wait); - break; - case FI_WAIT_SET: - ret = _gnix_wait_set_add(cq->attr.wait_set, &cq->cq_fid.fid); - if (!ret) - cq->wait = cq->attr.wait_set; - - break; - default: - break; - } - - return ret; -} - -static void free_cq_entry(struct slist_entry *item) -{ - struct gnix_cq_entry *entry; - - entry = container_of(item, struct gnix_cq_entry, item); - - free(entry->the_entry); - free(entry); -} - -static struct slist_entry *alloc_cq_entry(size_t size) -{ - struct gnix_cq_entry *entry = malloc(sizeof(*entry)); - - if (!entry) { - GNIX_DEBUG(FI_LOG_CQ, "out of memory\n"); - goto err; - } - - entry->the_entry = malloc(size); - if (!entry->the_entry) { - GNIX_DEBUG(FI_LOG_CQ, "out of memory\n"); - goto cleanup; - } - - return &entry->item; - -cleanup: - free(entry); -err: - return NULL; -} - -static int __gnix_cq_progress(struct gnix_fid_cq *cq) -{ - return _gnix_prog_progress(&cq->pset); -} - -/******************************************************************************* - * Exposed helper functions - ******************************************************************************/ -ssize_t _gnix_cq_add_event(struct gnix_fid_cq *cq, struct gnix_fid_ep *ep, - void *op_context, uint64_t flags, size_t len, - void *buf, uint64_t data, uint64_t tag, - fi_addr_t src_addr) -{ - struct gnix_cq_entry *event; - struct slist_entry *item; - uint64_t mask; - ssize_t ret = FI_SUCCESS; - - if (ep) { - if (ep->info && ep->info->mode & FI_NOTIFY_FLAGS_ONLY) { - mask = (FI_REMOTE_CQ_DATA | FI_MULTI_RECV); - - if (flags & FI_RMA_EVENT) { - mask |= (FI_REMOTE_READ | FI_REMOTE_WRITE | - FI_RMA); - } - - flags &= mask; - } - } - - COND_ACQUIRE(cq->requires_lock, &cq->lock); - - item = _gnix_queue_get_free(cq->events); - if (!item) { - GNIX_DEBUG(FI_LOG_CQ, "error creating cq_entry\n"); - ret = -FI_ENOMEM; - goto err; - } - - event = container_of(item, struct gnix_cq_entry, item); - - assert(event->the_entry); - - fill_function[cq->attr.format](event->the_entry, op_context, flags, - len, buf, data, tag); - event->src_addr = src_addr; - - _gnix_queue_enqueue(cq->events, &event->item); - GNIX_DEBUG(FI_LOG_CQ, "Added event: %lx\n", op_context); - - if (cq->wait) - _gnix_signal_wait_obj(cq->wait); - -err: - COND_RELEASE(cq->requires_lock, &cq->lock); - - return ret; -} - -ssize_t _gnix_cq_add_error(struct gnix_fid_cq *cq, void *op_context, - uint64_t flags, size_t len, void *buf, - uint64_t data, uint64_t tag, size_t olen, - int err, int prov_errno, void *err_data, - size_t err_data_size) -{ - struct fi_cq_err_entry *error; - struct gnix_cq_entry *event; - struct slist_entry *item; - - ssize_t ret = FI_SUCCESS; - - GNIX_INFO(FI_LOG_CQ, "creating error event entry\n"); - - - COND_ACQUIRE(cq->requires_lock, &cq->lock); - - item = _gnix_queue_get_free(cq->errors); - if (!item) { - GNIX_WARN(FI_LOG_CQ, "error creating error entry\n"); - ret = -FI_ENOMEM; - goto err; - } - - event = container_of(item, struct gnix_cq_entry, item); - - error = event->the_entry; - - error->op_context = op_context; - error->flags = flags; - error->len = len; - error->buf = buf; - error->data = data; - error->tag = tag; - error->olen = olen; - error->err = err; - error->prov_errno = prov_errno; - error->err_data = err_data; - error->err_data_size = err_data_size; - - _gnix_queue_enqueue(cq->errors, &event->item); - - if (cq->wait) - _gnix_signal_wait_obj(cq->wait); - -err: - COND_RELEASE(cq->requires_lock, &cq->lock); - - return ret; -} - -int _gnix_cq_poll_obj_add(struct gnix_fid_cq *cq, void *obj, - int (*prog_fn)(void *data)) -{ - return _gnix_prog_obj_add(&cq->pset, obj, prog_fn); -} - -int _gnix_cq_poll_obj_rem(struct gnix_fid_cq *cq, void *obj, - int (*prog_fn)(void *data)) -{ - return _gnix_prog_obj_rem(&cq->pset, obj, prog_fn); -} - -static void __cq_destruct(void *obj) -{ - struct gnix_fid_cq *cq = (struct gnix_fid_cq *) obj; - - _gnix_ref_put(cq->domain); - - switch (cq->attr.wait_obj) { - case FI_WAIT_NONE: - break; - case FI_WAIT_SET: - _gnix_wait_set_remove(cq->wait, &cq->cq_fid.fid); - break; - case FI_WAIT_UNSPEC: - case FI_WAIT_FD: - case FI_WAIT_MUTEX_COND: - assert(cq->wait); - gnix_wait_close(&cq->wait->fid); - break; - default: - GNIX_WARN(FI_LOG_CQ, "format: %d unsupported.\n", - cq->attr.wait_obj); - break; - } - - _gnix_prog_fini(&cq->pset); - - _gnix_queue_destroy(cq->events); - _gnix_queue_destroy(cq->errors); - - ofi_spin_destroy(&cq->lock); - free(cq->cq_fid.ops); - free(cq->cq_fid.fid.ops); - free(cq); -} - -/******************************************************************************* - * API functions. - ******************************************************************************/ -static int gnix_cq_close(fid_t fid) -{ - struct gnix_fid_cq *cq; - int references_held; - - GNIX_TRACE(FI_LOG_CQ, "\n"); - - cq = container_of(fid, struct gnix_fid_cq, cq_fid); - - references_held = _gnix_ref_put(cq); - - if (references_held) { - GNIX_INFO(FI_LOG_CQ, "failed to fully close cq due to lingering " - "references. references=%i cq=%p\n", - references_held, cq); - } - - return FI_SUCCESS; -} - -static ssize_t __gnix_cq_readfrom(struct fid_cq *cq, void *buf, - size_t count, fi_addr_t *src_addr) -{ - struct gnix_fid_cq *cq_priv; - struct gnix_cq_entry *event; - struct slist_entry *temp; - - ssize_t read_count = 0; - - if (!cq || !buf || !count) - return -FI_EINVAL; - - cq_priv = container_of(cq, struct gnix_fid_cq, cq_fid); - - __gnix_cq_progress(cq_priv); - - if (_gnix_queue_peek(cq_priv->errors)) - return -FI_EAVAIL; - - COND_ACQUIRE(cq_priv->requires_lock, &cq_priv->lock); - - while (_gnix_queue_peek(cq_priv->events) && count--) { - temp = _gnix_queue_dequeue(cq_priv->events); - event = container_of(temp, struct gnix_cq_entry, item); - - assert(event->the_entry); - memcpy(buf, event->the_entry, cq_priv->entry_size); - if (src_addr) - memcpy(&src_addr[read_count], &event->src_addr, sizeof(fi_addr_t)); - - _gnix_queue_enqueue_free(cq_priv->events, &event->item); - - buf = (void *) ((uint8_t *) buf + cq_priv->entry_size); - - read_count++; - } - - COND_RELEASE(cq_priv->requires_lock, &cq_priv->lock); - - return read_count ?: -FI_EAGAIN; -} - -static ssize_t __gnix_cq_sreadfrom(int blocking, struct fid_cq *cq, void *buf, - size_t count, fi_addr_t *src_addr, - const void *cond, int timeout) -{ - struct gnix_fid_cq *cq_priv; - - cq_priv = container_of(cq, struct gnix_fid_cq, cq_fid); - if ((blocking && !cq_priv->wait) || - (blocking && cq_priv->attr.wait_obj == FI_WAIT_SET)) - return -FI_EINVAL; - - if (_gnix_queue_peek(cq_priv->errors)) - return -FI_EAVAIL; - - if (cq_priv->wait) - gnix_wait_wait((struct fid_wait *)cq_priv->wait, timeout); - - - return __gnix_cq_readfrom(cq, buf, count, src_addr); - -} - -DIRECT_FN STATIC ssize_t gnix_cq_sreadfrom(struct fid_cq *cq, void *buf, - size_t count, fi_addr_t *src_addr, - const void *cond, int timeout) -{ - return __gnix_cq_sreadfrom(1, cq, buf, count, src_addr, cond, timeout); -} - -DIRECT_FN STATIC ssize_t gnix_cq_read(struct fid_cq *cq, - void *buf, - size_t count) -{ - return __gnix_cq_sreadfrom(0, cq, buf, count, NULL, NULL, 0); -} - -DIRECT_FN STATIC ssize_t gnix_cq_sread(struct fid_cq *cq, void *buf, - size_t count, const void *cond, - int timeout) -{ - return __gnix_cq_sreadfrom(1, cq, buf, count, NULL, cond, timeout); -} - -DIRECT_FN STATIC ssize_t gnix_cq_readfrom(struct fid_cq *cq, void *buf, - size_t count, fi_addr_t *src_addr) -{ - return __gnix_cq_sreadfrom(0, cq, buf, count, src_addr, NULL, 0); -} - -DIRECT_FN STATIC ssize_t gnix_cq_readerr(struct fid_cq *cq, - struct fi_cq_err_entry *buf, - uint64_t flags) -{ - struct gnix_fid_cq *cq_priv; - struct gnix_cq_entry *event; - struct slist_entry *entry; - size_t err_data_cpylen; - struct fi_cq_err_entry *gnix_cq_err; - - ssize_t read_count = 0; - - if (!cq || !buf) - return -FI_EINVAL; - - cq_priv = container_of(cq, struct gnix_fid_cq, cq_fid); - - /* - * we need to progress cq. some apps may be only using - * cq to check for errors. - */ - - _gnix_prog_progress(&cq_priv->pset); - - COND_ACQUIRE(cq_priv->requires_lock, &cq_priv->lock); - - entry = _gnix_queue_dequeue(cq_priv->errors); - if (!entry) { - read_count = -FI_EAGAIN; - goto err; - } - - event = container_of(entry, struct gnix_cq_entry, item); - gnix_cq_err = event->the_entry; - - buf->op_context = gnix_cq_err->op_context; - buf->flags = gnix_cq_err->flags; - buf->len = gnix_cq_err->len; - buf->buf = gnix_cq_err->buf; - buf->data = gnix_cq_err->data; - buf->tag = gnix_cq_err->tag; - buf->olen = gnix_cq_err->olen; - buf->err = gnix_cq_err->err; - buf->prov_errno = gnix_cq_err->prov_errno; - - if (gnix_cq_err->err_data != NULL) { - /* - * Note: If the api version is >= 1.5 then copy err_data into - * buf->err_data and copy at most buf->err_data_size. - * If buf->err_data_size is zero or the api version is < 1.5, - * use the old method of allocating space in provider. - */ - if (FI_VERSION_LT(cq_priv->domain->fabric->fab_fid.api_version, - FI_VERSION(1, 5)) || buf->err_data_size == 0) { - err_data_cpylen = sizeof(cq_priv->err_data); - - memcpy(cq_priv->err_data, gnix_cq_err->err_data, - err_data_cpylen); - - buf->err_data = cq_priv->err_data; - } else { - if (buf->err_data == NULL) - return -FI_EINVAL; - - err_data_cpylen = MIN(buf->err_data_size, - gnix_cq_err->err_data_size); - memcpy(buf->err_data, gnix_cq_err->err_data, err_data_cpylen); - buf->err_data_size = err_data_cpylen; - } - free(gnix_cq_err->err_data); - gnix_cq_err->err_data = NULL; - } else { - if (FI_VERSION_LT(cq_priv->domain->fabric->fab_fid.api_version, - FI_VERSION(1, 5))) { - buf->err_data = NULL; - } else { - buf->err_data_size = 0; - } - } - - _gnix_queue_enqueue_free(cq_priv->errors, &event->item); - - read_count++; - -err: - COND_RELEASE(cq_priv->requires_lock, &cq_priv->lock); - - return read_count; -} - -DIRECT_FN STATIC const char *gnix_cq_strerror(struct fid_cq *cq, int prov_errno, - const void *prov_data, char *buf, - size_t len) -{ - return NULL; -} - -DIRECT_FN STATIC int gnix_cq_signal(struct fid_cq *cq) -{ - struct gnix_fid_cq *cq_priv; - - cq_priv = container_of(cq, struct gnix_fid_cq, cq_fid); - - if (cq_priv->wait) - _gnix_signal_wait_obj(cq_priv->wait); - - return FI_SUCCESS; -} - -static int gnix_cq_control(struct fid *cq, int command, void *arg) -{ - - switch (command) { - case FI_GETWAIT: - return -FI_ENOSYS; - default: - return -FI_EINVAL; - } -} - - -DIRECT_FN int gnix_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, - struct fid_cq **cq, void *context) -{ - struct gnix_fid_domain *domain_priv; - struct gnix_fid_cq *cq_priv; - struct fi_ops_cq *cq_ops; - struct fi_ops *fi_cq_ops; - - int ret = FI_SUCCESS; - - GNIX_TRACE(FI_LOG_CQ, "\n"); - - cq_ops = calloc(1, sizeof(*cq_ops)); - if (!cq_ops) { - return -FI_ENOMEM; - } - - fi_cq_ops = calloc(1, sizeof(*fi_cq_ops)); - if (!fi_cq_ops) { - ret = -FI_ENOMEM; - goto free_cq_ops; - } - - *cq_ops = gnix_cq_ops; - *fi_cq_ops = gnix_cq_fi_ops; - - ret = verify_cq_attr(attr, cq_ops, fi_cq_ops); - if (ret) - goto free_fi_cq_ops; - - domain_priv = container_of(domain, struct gnix_fid_domain, domain_fid); - if (!domain_priv) { - ret = -FI_EINVAL; - goto free_fi_cq_ops; - } - - cq_priv = calloc(1, sizeof(*cq_priv)); - if (!cq_priv) { - ret = -FI_ENOMEM; - goto free_fi_cq_ops; - } - - cq_priv->requires_lock = (domain_priv->thread_model != - FI_THREAD_COMPLETION); - - cq_priv->domain = domain_priv; - cq_priv->attr = *attr; - - _gnix_ref_init(&cq_priv->ref_cnt, 1, __cq_destruct); - _gnix_ref_get(cq_priv->domain); - - _gnix_prog_init(&cq_priv->pset); - - cq_priv->cq_fid.fid.fclass = FI_CLASS_CQ; - cq_priv->cq_fid.fid.context = context; - cq_priv->cq_fid.fid.ops = fi_cq_ops; - cq_priv->cq_fid.ops = cq_ops; - - /* - * Although we don't need to store entry_size since we're already - * storing the format, this might provide a performance benefit - * when allocating storage. - */ - cq_priv->entry_size = format_sizes[cq_priv->attr.format]; - - ofi_spin_init(&cq_priv->lock); - ret = gnix_cq_set_wait(cq_priv); - if (ret) - goto free_cq_priv; - - ret = _gnix_queue_create(&cq_priv->events, alloc_cq_entry, - free_cq_entry, cq_priv->entry_size, - cq_priv->attr.size); - if (ret) - goto free_cq_priv; - - ret = _gnix_queue_create(&cq_priv->errors, alloc_cq_entry, - free_cq_entry, sizeof(struct fi_cq_err_entry), - 0); - if (ret) - goto free_gnix_queue; - - *cq = &cq_priv->cq_fid; - return ret; - -free_gnix_queue: - _gnix_queue_destroy(cq_priv->events); -free_cq_priv: - _gnix_ref_put(cq_priv->domain); - ofi_spin_destroy(&cq_priv->lock); - free(cq_priv); -free_fi_cq_ops: - free(fi_cq_ops); -free_cq_ops: - free(cq_ops); - - return ret; -} - - -/******************************************************************************* - * FI_OPS_* data structures. - ******************************************************************************/ -static const struct fi_ops gnix_cq_fi_ops = { - .size = sizeof(struct fi_ops), - .close = gnix_cq_close, - .bind = fi_no_bind, - .control = gnix_cq_control, - .ops_open = fi_no_ops_open -}; - -static const struct fi_ops_cq gnix_cq_ops = { - .size = sizeof(struct fi_ops_cq), - .read = gnix_cq_read, - .readfrom = gnix_cq_readfrom, - .readerr = gnix_cq_readerr, - .sread = gnix_cq_sread, - .sreadfrom = gnix_cq_sreadfrom, - .signal = gnix_cq_signal, - .strerror = gnix_cq_strerror -}; diff --git a/prov/gni/src/gnix_datagram.c b/prov/gni/src/gnix_datagram.c deleted file mode 100644 index 0245dc4643e..00000000000 --- a/prov/gni/src/gnix_datagram.c +++ /dev/null @@ -1,820 +0,0 @@ -/* - * Copyright (c) 2015-2016 Cray Inc. All rights reserved. - * Copyright (c) 2015-2017 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#if HAVE_CONFIG_H -# include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include - -#include "gnix.h" -#include "gnix_datagram.h" -#include "gnix_util.h" -#include "gnix_cm_nic.h" -#include "gnix_nic.h" - - -/******************************************************************************* - * Helper functions. - ******************************************************************************/ - -/* - * this function is intended to be invoked as an argument to pthread_create, - */ -static void *_gnix_dgram_prog_thread_fn(void *the_arg) -{ - int ret = FI_SUCCESS, prev_state; - struct gnix_dgram_hndl *the_hndl = (struct gnix_dgram_hndl *)the_arg; - sigset_t sigmask; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - /* - * temporarily disable cancelability while we set up - * some stuff - */ - - pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &prev_state); - - /* - * help out Cray core-spec, say we're not an app thread - * and can be run on core-spec cpus. - */ - - ret = _gnix_task_is_not_app(); - if (ret) - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_task_is_not_app call returned %d\n", ret); - - /* - * block all signals, don't want this thread to catch - * signals that may be for app threads - */ - - memset(&sigmask, 0, sizeof(sigset_t)); - ret = sigfillset(&sigmask); - if (ret) { - GNIX_WARN(FI_LOG_EP_CTRL, - "sigfillset call returned %d\n", ret); - } else { - - ret = pthread_sigmask(SIG_SETMASK, - &sigmask, NULL); - if (ret) - GNIX_WARN(FI_LOG_EP_CTRL, - "pthread_sigmask call returned %d\n", ret); - } - - /* - * okay now we're ready to be cancelable. - */ - - pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &prev_state); - - pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); - -retry: - ret = _gnix_dgram_poll(the_hndl, GNIX_DGRAM_BLOCK); - if ((ret == -FI_ETIMEDOUT) || (ret == FI_SUCCESS)) - goto retry; - - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_dgram_poll returned %s\n", fi_strerror(-ret)); - - /* - * TODO: need to be able to enqueue events on to the - * ep associated with the cm_nic. - */ - return NULL; -} - -/******************************************************************************* - * API function implementations. - ******************************************************************************/ - -/* - * function to pack data into datagram in/out buffers. - * On success, returns number of bytes packed in to the buffer, - * otherwise -FI errno. - */ -ssize_t _gnix_dgram_pack_buf(struct gnix_datagram *d, enum gnix_dgram_buf buf, - void *data, uint32_t nbytes) -{ - char *dptr; - uint32_t index; - - assert(d != NULL); - if (buf == GNIX_DGRAM_IN_BUF) { - index = d->w_index_in_buf; - dptr = &d->dgram_in_buf[index]; - } else { - index = d->w_index_out_buf; - dptr = &d->dgram_out_buf[index]; - } - - /* - * make sure there's room - */ - if ((index + nbytes) > GNI_DATAGRAM_MAXSIZE) - return -FI_ENOSPC; - - memcpy(dptr, data, nbytes); - - if (buf == GNIX_DGRAM_IN_BUF) - d->w_index_in_buf += nbytes; - else - d->w_index_out_buf += nbytes; - - return nbytes; -} - - -/* - * function to unpack data from datagram in/out buffers. - * On success, returns number of bytes unpacked, - * otherwise -FI errno. - */ -ssize_t _gnix_dgram_unpack_buf(struct gnix_datagram *d, enum gnix_dgram_buf buf, - void *data, uint32_t nbytes) -{ - char *dptr; - uint32_t index, bytes_left; - - assert(d != NULL); - if (buf == GNIX_DGRAM_IN_BUF) { - index = d->r_index_in_buf; - dptr = &d->dgram_in_buf[index]; - } else { - index = d->r_index_out_buf; - dptr = &d->dgram_out_buf[index]; - } - - /* - * only copy out up to GNI_DATAGRAM_MAXSIZE - */ - - bytes_left = GNI_DATAGRAM_MAXSIZE - index; - - nbytes = (nbytes > bytes_left) ? bytes_left : nbytes; - - memcpy(data, dptr, nbytes); - - if (buf == GNIX_DGRAM_IN_BUF) - d->r_index_in_buf += nbytes; - else - d->r_index_out_buf += nbytes; - - return nbytes; -} - -/* - * function to rewind the internal pointers to - * datagram in/out buffers. - */ -int _gnix_dgram_rewind_buf(struct gnix_datagram *d, enum gnix_dgram_buf buf) -{ - assert(d != NULL); - if (buf == GNIX_DGRAM_IN_BUF) { - d->r_index_in_buf = 0; - d->w_index_in_buf = 0; - } else { - d->r_index_out_buf = 0; - d->w_index_out_buf = 0; - } - return FI_SUCCESS; -} - -int _gnix_dgram_alloc(struct gnix_dgram_hndl *hndl, enum gnix_dgram_type type, - struct gnix_datagram **d_ptr) -{ - int ret = -FI_EAGAIN; - struct gnix_datagram *d = NULL; - struct dlist_entry *the_free_list; - struct dlist_entry *the_active_list; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - ofi_spin_lock(&hndl->lock); - - if (type == GNIX_DGRAM_WC) { - the_free_list = &hndl->wc_dgram_free_list; - the_active_list = &hndl->wc_dgram_active_list; - } else { - the_free_list = &hndl->bnd_dgram_free_list; - the_active_list = &hndl->bnd_dgram_active_list; - } - - if (!dlist_empty(the_free_list)) { - d = dlist_first_entry(the_free_list, struct gnix_datagram, - list); - if (d != NULL) { - dlist_remove_init(&d->list); - dlist_insert_head(&d->list, the_active_list); - d->type = type; - ret = FI_SUCCESS; - } - - } - - ofi_spin_unlock(&hndl->lock); - - if (d != NULL) { - d->r_index_in_buf = 0; - d->w_index_in_buf = 0; - d->w_index_in_buf = 0; - d->w_index_out_buf = 0; - } - - *d_ptr = d; - return ret; -} - -int _gnix_dgram_free(struct gnix_datagram *d) -{ - int ret = FI_SUCCESS; - gni_return_t status; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - if (d->type == GNIX_DGRAM_BND) { - status = GNI_EpUnbind(d->gni_ep); - if (status != GNI_RC_SUCCESS) { - /* TODO: have to handle this */ - GNIX_FATAL(FI_LOG_EP_CTRL, - "GNI_EpUnbind returned %s (ep=%p)\n", - gni_err_str[status], d->gni_ep); - } - } - - ofi_spin_lock(&d->d_hndl->lock); - dlist_remove_init(&d->list); - d->state = GNIX_DGRAM_STATE_FREE; - dlist_insert_head(&d->list, d->free_list_head); - ofi_spin_unlock(&d->d_hndl->lock); - return ret; -} - -int _gnix_dgram_wc_post(struct gnix_datagram *d) -{ - int ret = FI_SUCCESS; - gni_return_t status; - struct gnix_nic *nic = d->cm_nic->nic; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - COND_ACQUIRE(nic->requires_lock, &nic->lock); - status = GNI_EpPostDataWId(d->gni_ep, - d->dgram_in_buf, - GNI_DATAGRAM_MAXSIZE, - d->dgram_out_buf, - GNI_DATAGRAM_MAXSIZE, - (uint64_t)d); - if (status != GNI_RC_SUCCESS) { - ret = gnixu_to_fi_errno(status); - } else { - /* - * datagram is active now, listening - */ - d->state = GNIX_DGRAM_STATE_ACTIVE; - } - COND_RELEASE(nic->requires_lock, &nic->lock); - - return ret; -} - -int _gnix_dgram_bnd_post(struct gnix_datagram *d) -{ - gni_return_t status = GNI_RC_SUCCESS; - int ret = FI_SUCCESS; - struct gnix_nic *nic = d->cm_nic->nic; - int post = 1; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - /* - * bind the datagram ep - */ - - status = GNI_EpBind(d->gni_ep, - d->target_addr.device_addr, - d->target_addr.cdm_id); - if (status != GNI_RC_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "GNI_EpBind returned %s\n", gni_err_str[status]); - ret = gnixu_to_fi_errno(status); - goto err; - } - - COND_ACQUIRE(nic->requires_lock, &nic->lock); - if (d->pre_post_clbk_fn != NULL) { - ret = d->pre_post_clbk_fn(d, &post); - if (ret != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_CTRL, - "pre_post_callback_fn: %d\n", - ret); - } - - if (post) { - /* - * if we get GNI_RC_ERROR_RESOURCE status return from - * GNI_EpPostDataWId that means that either a previously posted - * wildcard datagram has matched up with an incoming - * bound datagram or we have a previously posted bound - * datagram whose transfer to the target node has - * not yet completed. Don't treat this case as an error. - */ - status = GNI_EpPostDataWId(d->gni_ep, - d->dgram_in_buf, - GNI_DATAGRAM_MAXSIZE, - d->dgram_out_buf, - GNI_DATAGRAM_MAXSIZE, - (uint64_t)d); - if (d->post_post_clbk_fn != NULL) { - ret = d->post_post_clbk_fn(d, status); - if (ret != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_CTRL, - "post_post_callback_fn: %d\n", - ret); - } - } - - COND_RELEASE(nic->requires_lock, &nic->lock); - - if (post) { - if ((status != GNI_RC_SUCCESS) && - (status != GNI_RC_ERROR_RESOURCE)) { - GNIX_WARN(FI_LOG_EP_CTRL, - "GNI_EpPostDataWId returned %s\n", - gni_err_str[status]); - ret = gnixu_to_fi_errno(status); - goto err; - } - - if (status == GNI_RC_SUCCESS) { - /* - * datagram is active now, connecting - */ - d->state = GNIX_DGRAM_STATE_ACTIVE; - } else { - ret = -FI_EBUSY; - } - } - -err: - return ret; -} - -int _gnix_dgram_poll(struct gnix_dgram_hndl *hndl, - enum gnix_dgram_poll_type type) -{ - int ret = FI_SUCCESS; - gni_return_t status; - gni_post_state_t post_state = GNI_POST_PENDING; - uint32_t responding_remote_id; - uint32_t timeout = -1; - unsigned int responding_remote_addr; - struct gnix_datagram *dg_ptr; - uint64_t datagram_id = 0UL; - struct gnix_cm_nic *cm_nic = NULL; - struct gnix_nic *nic = NULL; - struct gnix_address responding_addr; - - cm_nic = hndl->cm_nic; - assert(cm_nic != NULL); - nic = cm_nic->nic; - assert(nic != NULL); - - if (type == GNIX_DGRAM_BLOCK) { - if (hndl->timeout_needed && - (hndl->timeout_needed(hndl->timeout_data) == true)) - timeout = hndl->timeout; - - status = GNI_PostdataProbeWaitById(nic->gni_nic_hndl, - timeout, - &datagram_id); - if ((status != GNI_RC_SUCCESS) && - (status != GNI_RC_TIMEOUT)) { - GNIX_WARN(FI_LOG_EP_CTRL, - "GNI_PostdataProbeWaitById returned %s\n", - gni_err_str[status]); - ret = gnixu_to_fi_errno(status); - goto err; - } - } else { - status = GNI_PostDataProbeById(nic->gni_nic_hndl, - &datagram_id); - if ((status != GNI_RC_SUCCESS) && - (status != GNI_RC_NO_MATCH)) { - GNIX_WARN(FI_LOG_EP_CTRL, - "GNI_PostdataProbeById returned %s\n", - gni_err_str[status]); - ret = gnixu_to_fi_errno(status); - goto err; - } - } - - switch (status) { - case GNI_RC_SUCCESS: - dg_ptr = (struct gnix_datagram *)datagram_id; - assert(dg_ptr != NULL); - - /* - * do need to take lock here - */ - COND_ACQUIRE(nic->requires_lock, &nic->lock); - - status = GNI_EpPostDataTestById(dg_ptr->gni_ep, - datagram_id, - &post_state, - &responding_remote_addr, - &responding_remote_id); - if ((status != GNI_RC_SUCCESS) && - (status !=GNI_RC_NO_MATCH)) { - GNIX_WARN(FI_LOG_EP_CTRL, - "GNI_EpPostDataTestById: %s\n", - gni_err_str[status]); - ret = gnixu_to_fi_errno(status); - COND_RELEASE(nic->requires_lock, &nic->lock); - goto err; - } else { - if ((status == GNI_RC_SUCCESS) && - (dg_ptr->state != GNIX_DGRAM_STATE_ACTIVE)) { - GNIX_DEBUG(FI_LOG_EP_CTRL, - "GNI_EpPostDataTestById ", - "returned success but dgram not active\n"); - } - } - - COND_RELEASE(nic->requires_lock, &nic->lock); - - /* - * no match is okay, it means another thread - * won the race to get this datagram - */ - - if (status == GNI_RC_NO_MATCH) { - ret = FI_SUCCESS; - goto err; - } - - /* - * pass COMPLETED and error post state cases to - * callback function if present. If a callback funciton - * is not present, the error states set ret to -FI_EIO. - * - * TODO should we also pass pending,remote_data states to - * the callback? maybe useful for debugging weird - * datagram problems? - */ - switch (post_state) { - case GNI_POST_TIMEOUT: - case GNI_POST_TERMINATED: - case GNI_POST_ERROR: - ret = -FI_EIO; - break; - case GNI_POST_COMPLETED: - if (dg_ptr->callback_fn != NULL) { - responding_addr.device_addr = - responding_remote_addr; - responding_addr.cdm_id = - responding_remote_id; - ret = dg_ptr->callback_fn((void *)datagram_id, - responding_addr, - post_state); - } - break; - case GNI_POST_PENDING: - case GNI_POST_REMOTE_DATA: - break; - default: - GNIX_FATAL(FI_LOG_EP_CTRL, "Invalid post_state: %d\n", - post_state); - break; - } - break; - case GNI_RC_TIMEOUT: - /* call progress function */ - if (hndl->timeout_progress) - hndl->timeout_progress(hndl->timeout_data); - break; - case GNI_RC_NO_MATCH: - break; - default: - /* an error */ - break; - } - -err: - return ret; -} - -int _gnix_dgram_hndl_alloc(struct gnix_cm_nic *cm_nic, - enum fi_progress progress, - const struct gnix_dgram_hndl_attr *attr, - struct gnix_dgram_hndl **hndl_ptr) -{ - int i, ret = FI_SUCCESS; - int n_dgrams_tot; - struct gnix_datagram *dgram_base = NULL, *dg_ptr; - struct gnix_dgram_hndl *the_hndl = NULL; - struct gnix_fid_domain *dom = cm_nic->domain; - struct gnix_fid_fabric *fabric = NULL; - struct gnix_nic *nic; - gni_return_t status; - uint32_t num_corespec_cpus = 0; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - nic = cm_nic->nic; - - if (dom == NULL) - return -FI_EINVAL; - - fabric = dom->fabric; - - the_hndl = calloc(1, sizeof(struct gnix_dgram_hndl)); - if (the_hndl == NULL) { - ret = -FI_ENOMEM; - goto err; - } - - the_hndl->cm_nic = cm_nic; - - dlist_init(&the_hndl->bnd_dgram_free_list); - dlist_init(&the_hndl->bnd_dgram_active_list); - - dlist_init(&the_hndl->wc_dgram_free_list); - dlist_init(&the_hndl->wc_dgram_active_list); - - the_hndl->timeout = -1; - - /* - * inherit some stuff from the fabric object being - * used to open the domain which will use this cm nic. - */ - - the_hndl->n_dgrams = fabric->n_bnd_dgrams; - the_hndl->n_wc_dgrams = fabric->n_wc_dgrams; - ofi_spin_init(&the_hndl->lock); - - n_dgrams_tot = the_hndl->n_dgrams + the_hndl->n_wc_dgrams; - - /* - * set up the free lists for datagrams - */ - - dgram_base = calloc(n_dgrams_tot, - sizeof(struct gnix_datagram)); - if (dgram_base == NULL) { - ret = -FI_ENOMEM; - goto err; - } - - dg_ptr = dgram_base; - - /* - * first build up the list for connection requests - */ - - for (i = 0; i < fabric->n_bnd_dgrams; i++, dg_ptr++) { - dg_ptr->d_hndl = the_hndl; - dg_ptr->cm_nic = cm_nic; - status = GNI_EpCreate(nic->gni_nic_hndl, - NULL, - &dg_ptr->gni_ep); - if (status != GNI_RC_SUCCESS) { - ret = gnixu_to_fi_errno(status); - goto err; - } - dlist_node_init(&dg_ptr->list); - dlist_insert_head(&dg_ptr->list, - &the_hndl->bnd_dgram_free_list); - dg_ptr->free_list_head = &the_hndl->bnd_dgram_free_list; - } - - /* - * now the wild card (WC) dgrams - */ - - for (i = 0; i < fabric->n_wc_dgrams; i++, dg_ptr++) { - dg_ptr->d_hndl = the_hndl; - dg_ptr->cm_nic = cm_nic; - status = GNI_EpCreate(nic->gni_nic_hndl, - NULL, - &dg_ptr->gni_ep); - if (status != GNI_RC_SUCCESS) { - ret = gnixu_to_fi_errno(status); - goto err; - } - dlist_node_init(&dg_ptr->list); - dlist_insert_head(&dg_ptr->list, &the_hndl->wc_dgram_free_list); - dg_ptr->free_list_head = &the_hndl->wc_dgram_free_list; - } - - /* - * check the progress model, if FI_PROGRESS_AUTO, fire off - * a progress thread - */ - - if (progress == FI_PROGRESS_AUTO) { - - if (attr != NULL) { - the_hndl->timeout_needed = attr->timeout_needed; - the_hndl->timeout_progress = attr->timeout_progress; - the_hndl->timeout_data = attr->timeout_data; - the_hndl->timeout = attr->timeout; - } - - /* - * tell CLE job container that next thread should be - * runnable anywhere in the cpuset, don't treat as - * an error if one is returned, may have perf issues - * though... - */ - - ret = _gnix_get_num_corespec_cpus(&num_corespec_cpus); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "failed to get num corespec cpus\n"); - } - - if (num_corespec_cpus > 0) { - ret = _gnix_job_disable_affinity_apply(); - } else { - ret = _gnix_job_enable_unassigned_cpus(); - } - if (ret != 0) - GNIX_WARN(FI_LOG_EP_CTRL, - "disable_affinity/unassigned_cpus call returned %d\n", - ret); - - ret = pthread_create(&the_hndl->progress_thread, - NULL, - _gnix_dgram_prog_thread_fn, - (void *)the_hndl); - if (ret) { - GNIX_WARN(FI_LOG_EP_CTRL, - "pthread_ceate call returned %d\n", ret); - goto err1; - } - } - - the_hndl->dgram_base = dgram_base; - - *hndl_ptr = the_hndl; - - return ret; - -err1: - -err: - dg_ptr = dgram_base; - if (dg_ptr) { - - for (i = 0; i < n_dgrams_tot; i++, dg_ptr++) { - if (dg_ptr->gni_ep != NULL) - GNI_EpDestroy(dg_ptr->gni_ep); - } - free(dgram_base); - } - if (the_hndl) - free(the_hndl); - return ret; -} - -int _gnix_dgram_hndl_free(struct gnix_dgram_hndl *the_hndl) -{ - int i; - int n_dgrams; - int ret = FI_SUCCESS; - struct gnix_datagram *p, *next, *dg_ptr; - gni_return_t status; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - if (the_hndl->dgram_base == NULL) { - ret = -FI_EINVAL; - goto err; - } - - /* - * cancel any active datagrams - GNI_RC_NO_MATCH is okay. - */ - dlist_for_each_safe(&the_hndl->bnd_dgram_active_list, p, next, list) { - dg_ptr = p; - if (dg_ptr->state != GNIX_DGRAM_STATE_FREE) { - status = GNI_EpPostDataCancel(dg_ptr->gni_ep); - if ((status != GNI_RC_SUCCESS) && - (status != GNI_RC_NO_MATCH)) { - ret = gnixu_to_fi_errno(status); - goto err; - } - } - dlist_remove_init(&dg_ptr->list); - } - - dlist_for_each_safe(&the_hndl->wc_dgram_active_list, p, next, list) { - dg_ptr = p; - if (dg_ptr->state == GNIX_DGRAM_STATE_FREE) { - status = GNI_EpPostDataCancel(dg_ptr->gni_ep); - if ((status != GNI_RC_SUCCESS) && - (status != GNI_RC_NO_MATCH)) { - ret = gnixu_to_fi_errno(status); - goto err; - } - } - dlist_remove_init(&dg_ptr->list); - } - - /* - * destroy all the endpoints - */ - - n_dgrams = the_hndl->n_dgrams + the_hndl->n_wc_dgrams; - dg_ptr = the_hndl->dgram_base; - - for (i = 0; i < n_dgrams; i++, dg_ptr++) { - if (dg_ptr->gni_ep != NULL) - GNI_EpDestroy(dg_ptr->gni_ep); - } - - /* - * cancel the progress thread, if any - */ - - if (the_hndl->progress_thread) { - - ret = pthread_cancel(the_hndl->progress_thread); - if ((ret != 0) && (ret != ESRCH)) { - GNIX_WARN(FI_LOG_EP_CTRL, - "pthread_cancel returned %d\n", ret); - goto err; - } - - ret = pthread_join(the_hndl->progress_thread, - NULL); - if ((ret != 0) && (ret != ESRCH)) { - GNIX_WARN(FI_LOG_EP_CTRL, - "pthread_join returned %d\n", ret); - goto err; - } - - GNIX_INFO(FI_LOG_EP_CTRL, "pthread_join returned %d\n", ret); - } -err: - if (ret != FI_SUCCESS) - GNIX_INFO(FI_LOG_EP_CTRL, "returning error %d\n", ret); - free(the_hndl->dgram_base); - free(the_hndl); - - return ret; -} diff --git a/prov/gni/src/gnix_dom.c b/prov/gni/src/gnix_dom.c deleted file mode 100644 index a16f85989c0..00000000000 --- a/prov/gni/src/gnix_dom.c +++ /dev/null @@ -1,766 +0,0 @@ -/* - * Copyright (c) 2015-2018 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#if HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include - -#include "gnix.h" -#include "gnix_nic.h" -#include "gnix_util.h" -#include "gnix_xpmem.h" -#include "gnix_hashtable.h" -#include "gnix_auth_key.h" -#include "gnix_smrn.h" - -#define GNIX_MR_MODE_DEFAULT FI_MR_BASIC -#define GNIX_NUM_PTAGS 256 - -gni_cq_mode_t gnix_def_gni_cq_modes = GNI_CQ_PHYS_PAGES; - -static char *__gnix_mr_type_to_str[GNIX_MR_MAX_TYPE] = { - [GNIX_MR_TYPE_INTERNAL] = "internal", - [GNIX_MR_TYPE_UDREG] = "udreg", - [GNIX_MR_TYPE_NONE] = "none", -}; - -/******************************************************************************* - * Forward declaration for ops structures. - ******************************************************************************/ - -static struct fi_ops gnix_stx_ops; -static struct fi_ops gnix_domain_fi_ops; -static struct fi_ops_mr gnix_domain_mr_ops; -static struct fi_ops_domain gnix_domain_ops; - -static void __domain_destruct(void *obj) -{ - int ret = FI_SUCCESS; - struct gnix_fid_domain *domain = (struct gnix_fid_domain *) obj; - struct gnix_mr_cache_info *info; - int i; - - GNIX_TRACE(FI_LOG_DOMAIN, "\n"); - - for (i = 0; i < GNIX_NUM_PTAGS; i++) { - info = &domain->mr_cache_info[i]; - - ofi_spin_lock(&info->mr_cache_lock); - ret = _gnix_close_cache(domain, info); - ofi_spin_unlock(&info->mr_cache_lock); - - if (ret != FI_SUCCESS) - GNIX_FATAL(FI_LOG_MR, - "failed to close memory " - "registration cache\n"); - } - - free(domain->mr_cache_info); - - ret = _gnix_smrn_close(domain->mr_cache_attr.smrn); - if (ret != FI_SUCCESS) - GNIX_FATAL(FI_LOG_MR, "failed to close MR notifier\n"); - - /* - * remove from the list of cdms attached to fabric - */ - dlist_remove_init(&domain->list); - - _gnix_ref_put(domain->fabric); - - memset(domain, 0, sizeof *domain); - free(domain); -} - -static void __stx_destruct(void *obj) -{ - int ret; - struct gnix_fid_stx *stx = (struct gnix_fid_stx *) obj; - - GNIX_TRACE(FI_LOG_DOMAIN, "\n"); - - if (stx->nic) { - ret = _gnix_nic_free(stx->nic); - if (ret != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_nic_free call returned %s\n", - fi_strerror(-ret)); - } - - memset(stx, 0, sizeof(*stx)); - free(stx); -} - -/******************************************************************************* - * API function implementations. - ******************************************************************************/ - -/** - * Creates a shared transmit context. - * - * @param[in] val value to be sign extended - * @param[in] len length to sign extend the value - * @return FI_SUCCESS if shared tx context successfully created - * @return -FI_EINVAL if invalid arg(s) supplied - * @return -FI_ENOMEM insufficient memory - */ -DIRECT_FN STATIC int gnix_stx_open(struct fid_domain *dom, - struct fi_tx_attr *tx_attr, - struct fid_stx **stx, void *context) -{ - int ret = FI_SUCCESS; - struct gnix_fid_domain *domain; - struct gnix_fid_stx *stx_priv; - - GNIX_TRACE(FI_LOG_DOMAIN, "\n"); - - domain = container_of(dom, struct gnix_fid_domain, domain_fid.fid); - if (domain->domain_fid.fid.fclass != FI_CLASS_DOMAIN) { - ret = -FI_EINVAL; - goto err; - } - - stx_priv = calloc(1, sizeof(*stx_priv)); - if (!stx_priv) { - ret = -FI_ENOMEM; - goto err; - } - - stx_priv->domain = domain; - stx_priv->auth_key = NULL; - stx_priv->nic = NULL; - - _gnix_ref_init(&stx_priv->ref_cnt, 1, __stx_destruct); - - _gnix_ref_get(stx_priv->domain); - - stx_priv->stx_fid.fid.fclass = FI_CLASS_STX_CTX; - stx_priv->stx_fid.fid.context = context; - stx_priv->stx_fid.fid.ops = &gnix_stx_ops; - stx_priv->stx_fid.ops = NULL; - domain->num_allocd_stxs++; - - *stx = &stx_priv->stx_fid; - -err: - return ret; -} - -/** - * Destroy a shared transmit context. - * - * @param[in] fid fid for previously allocated gnix_fid_stx - * structure - * @return FI_SUCCESS if shared tx context successfully closed - * @return -FI_EINVAL if invalid arg(s) supplied - * - * @note - the structure will actually not be freed till all - * references to the structure have released their references - * to the stx structure. - */ -static int gnix_stx_close(fid_t fid) -{ - struct gnix_fid_stx *stx; - - GNIX_TRACE(FI_LOG_DOMAIN, "\n"); - - stx = container_of(fid, struct gnix_fid_stx, stx_fid.fid); - if (stx->stx_fid.fid.fclass != FI_CLASS_STX_CTX) - return -FI_EINVAL; - - _gnix_ref_put(stx->domain); - _gnix_ref_put(stx); - - return FI_SUCCESS; -} - -static int gnix_domain_close(fid_t fid) -{ - int ret = FI_SUCCESS, references_held; - struct gnix_fid_domain *domain; - int i; - struct gnix_mr_cache_info *info; - - GNIX_TRACE(FI_LOG_DOMAIN, "\n"); - - domain = container_of(fid, struct gnix_fid_domain, domain_fid.fid); - if (domain->domain_fid.fid.fclass != FI_CLASS_DOMAIN) { - ret = -FI_EINVAL; - goto err; - } - - for (i = 0; i < GNIX_NUM_PTAGS; i++) { - info = &domain->mr_cache_info[i]; - - if (!domain->mr_cache_info[i].inuse) - continue; - - /* before checking the refcnt, - * flush the memory registration cache - */ - if (info->mr_cache_ro) { - ofi_spin_lock(&info->mr_cache_lock); - ret = _gnix_mr_cache_flush(info->mr_cache_ro); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_DOMAIN, - "failed to flush memory cache on domain close\n"); - ofi_spin_unlock(&info->mr_cache_lock); - goto err; - } - ofi_spin_unlock(&info->mr_cache_lock); - } - - if (info->mr_cache_rw) { - ofi_spin_lock(&info->mr_cache_lock); - ret = _gnix_mr_cache_flush(info->mr_cache_rw); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_DOMAIN, - "failed to flush memory cache on domain close\n"); - ofi_spin_unlock(&info->mr_cache_lock); - goto err; - } - ofi_spin_unlock(&info->mr_cache_lock); - } - } - - /* - * if non-zero refcnt, there are eps, mrs, and/or an eq associated - * with this domain which have not been closed. - */ - - references_held = _gnix_ref_put(domain); - - if (references_held) { - GNIX_INFO(FI_LOG_DOMAIN, "failed to fully close domain due to " - "lingering references. references=%i dom=%p\n", - references_held, domain); - } - - GNIX_INFO(FI_LOG_DOMAIN, "gnix_domain_close invoked returning %d\n", - ret); -err: - return ret; -} - -/* - * gnix_domain_ops provides a means for an application to better - * control allocation of underlying aries resources associated with - * the domain. Examples will include controlling size of underlying - * hardware CQ sizes, max size of RX ring buffers, etc. - */ - -static const uint32_t default_msg_rendezvous_thresh = 16*1024; -static const uint32_t default_rma_rdma_thresh = 8*1024; -static const uint32_t default_ct_init_size = 64; -static const uint32_t default_ct_max_size = 16384; -static const uint32_t default_ct_step = 2; -static const uint32_t default_vc_id_table_capacity = 128; -static const uint32_t default_mbox_page_size = GNIX_PAGE_2MB; -static const uint32_t default_mbox_num_per_slab = 2048; -static const uint32_t default_mbox_maxcredit = 64; -static const uint32_t default_mbox_msg_maxsize = 16384; -/* rx cq bigger to avoid having to deal with rx overruns so much */ -static const uint32_t default_rx_cq_size = 16384; -static const uint32_t default_tx_cq_size = 2048; -static const uint32_t default_max_retransmits = 5; -static const int32_t default_err_inject_count; /* static var is zeroed */ -static const uint32_t default_dgram_progress_timeout = 100; -static const uint32_t default_eager_auto_progress = 0; - -static int __gnix_string_to_mr_type(const char *name) -{ - int i; - for (i = 0; i < GNIX_MR_MAX_TYPE; i++) - if (strncmp(name, __gnix_mr_type_to_str[i], - strlen(__gnix_mr_type_to_str[i])) == 0) - return i; - - return -1; -} - -static int -__gnix_dom_ops_flush_cache(struct fid *fid) -{ - struct gnix_fid_domain *domain; - - GNIX_TRACE(FI_LOG_DOMAIN, "\n"); - - domain = container_of(fid, struct gnix_fid_domain, domain_fid.fid); - if (domain->domain_fid.fid.fclass != FI_CLASS_DOMAIN) { - GNIX_WARN(FI_LOG_DOMAIN, ("Invalid domain\n")); - return -FI_EINVAL; - } - - return _gnix_flush_registration_cache(domain); -} - -static int -__gnix_dom_ops_get_val(struct fid *fid, dom_ops_val_t t, void *val) -{ - struct gnix_fid_domain *domain; - - GNIX_TRACE(FI_LOG_DOMAIN, "\n"); - - assert(val); - - domain = container_of(fid, struct gnix_fid_domain, domain_fid.fid); - if (domain->domain_fid.fid.fclass != FI_CLASS_DOMAIN) { - GNIX_WARN(FI_LOG_DOMAIN, ("Invalid domain\n")); - return -FI_EINVAL; - } - - switch (t) { - case GNI_MSG_RENDEZVOUS_THRESHOLD: - *(uint32_t *)val = domain->params.msg_rendezvous_thresh; - break; - case GNI_RMA_RDMA_THRESHOLD: - *(uint32_t *)val = domain->params.rma_rdma_thresh; - break; - case GNI_CONN_TABLE_INITIAL_SIZE: - *(uint32_t *)val = domain->params.ct_init_size; - break; - case GNI_CONN_TABLE_MAX_SIZE: - *(uint32_t *)val = domain->params.ct_max_size; - break; - case GNI_CONN_TABLE_STEP_SIZE: - *(uint32_t *)val = domain->params.ct_step; - break; - case GNI_VC_ID_TABLE_CAPACITY: - *(uint32_t *)val = domain->params.vc_id_table_capacity; - break; - case GNI_MBOX_PAGE_SIZE: - *(uint32_t *)val = domain->params.mbox_page_size; - break; - case GNI_MBOX_NUM_PER_SLAB: - *(uint32_t *)val = domain->params.mbox_num_per_slab; - break; - case GNI_MBOX_MAX_CREDIT: - *(uint32_t *)val = domain->params.mbox_maxcredit; - break; - case GNI_MBOX_MSG_MAX_SIZE: - *(uint32_t *)val = domain->params.mbox_msg_maxsize; - break; - case GNI_RX_CQ_SIZE: - *(uint32_t *)val = domain->params.rx_cq_size; - break; - case GNI_TX_CQ_SIZE: - *(uint32_t *)val = domain->params.tx_cq_size; - break; - case GNI_MAX_RETRANSMITS: - *(uint32_t *)val = domain->params.max_retransmits; - break; - case GNI_ERR_INJECT_COUNT: - *(int32_t *)val = domain->params.err_inject_count; - break; - case GNI_MR_CACHE_LAZY_DEREG: - *(int32_t *)val = domain->mr_cache_attr.lazy_deregistration; - break; - case GNI_MR_CACHE: - *(char **) val = __gnix_mr_type_to_str[domain->mr_cache_type]; - break; - case GNI_MR_UDREG_REG_LIMIT: - *(int32_t *)val = domain->udreg_reg_limit; - break; - case GNI_MR_HARD_REG_LIMIT: - *(int32_t *)val = domain->mr_cache_attr.hard_reg_limit; - break; - case GNI_MR_SOFT_REG_LIMIT: - *(int32_t *)val = domain->mr_cache_attr.soft_reg_limit; - break; - case GNI_MR_HARD_STALE_REG_LIMIT: - *(int32_t *)val = domain->mr_cache_attr.hard_stale_limit; - break; - case GNI_XPMEM_ENABLE: - *(bool *)val = domain->params.xpmem_enabled; -#if !HAVE_XPMEM - GNIX_WARN(FI_LOG_DOMAIN, - "GNI provider XPMEM support not configured\n"); -#endif - break; - case GNI_DGRAM_PROGRESS_TIMEOUT: - *(uint32_t *)val = domain->params.dgram_progress_timeout; - break; - case GNI_EAGER_AUTO_PROGRESS: - *(uint32_t *)val = domain->params.eager_auto_progress; - break; - default: - GNIX_WARN(FI_LOG_DOMAIN, ("Invalid dom_ops_val\n")); - return -FI_EINVAL; - } - - return FI_SUCCESS; -} - -static int -__gnix_dom_ops_set_val(struct fid *fid, dom_ops_val_t t, void *val) -{ - struct gnix_fid_domain *domain; - int ret, type; - - GNIX_TRACE(FI_LOG_DOMAIN, "\n"); - - assert(val); - - domain = container_of(fid, struct gnix_fid_domain, domain_fid.fid); - if (domain->domain_fid.fid.fclass != FI_CLASS_DOMAIN) { - GNIX_WARN(FI_LOG_DOMAIN, ("Invalid domain\n")); - return -FI_EINVAL; - } - - switch (t) { - case GNI_MSG_RENDEZVOUS_THRESHOLD: - domain->params.msg_rendezvous_thresh = *(uint32_t *)val; - break; - case GNI_RMA_RDMA_THRESHOLD: - domain->params.rma_rdma_thresh = *(uint32_t *)val; - break; - case GNI_CONN_TABLE_INITIAL_SIZE: - domain->params.ct_init_size = *(uint32_t *)val; - break; - case GNI_CONN_TABLE_MAX_SIZE: - domain->params.ct_max_size = *(uint32_t *)val; - break; - case GNI_CONN_TABLE_STEP_SIZE: - domain->params.ct_step = *(uint32_t *)val; - break; - case GNI_VC_ID_TABLE_CAPACITY: - domain->params.vc_id_table_capacity = *(uint32_t *)val; - break; - case GNI_MBOX_PAGE_SIZE: - domain->params.mbox_page_size = *(uint32_t *)val; - break; - case GNI_MBOX_NUM_PER_SLAB: - domain->params.mbox_num_per_slab = *(uint32_t *)val; - break; - case GNI_MBOX_MAX_CREDIT: - domain->params.mbox_maxcredit = *(uint32_t *)val; - break; - case GNI_MBOX_MSG_MAX_SIZE: - domain->params.mbox_msg_maxsize = *(uint32_t *)val; - break; - case GNI_RX_CQ_SIZE: - domain->params.rx_cq_size = *(uint32_t *)val; - break; - case GNI_TX_CQ_SIZE: - domain->params.tx_cq_size = *(uint32_t *)val; - break; - case GNI_MAX_RETRANSMITS: - domain->params.max_retransmits = *(uint32_t *)val; - break; - case GNI_ERR_INJECT_COUNT: - domain->params.err_inject_count = *(int32_t *)val; - break; - case GNI_MR_CACHE_LAZY_DEREG: - domain->mr_cache_attr.lazy_deregistration = *(int32_t *)val; - break; - case GNI_MR_CACHE: - if (val != NULL) { - GNIX_DEBUG(FI_LOG_DOMAIN, "user provided value=%s\n", - *(char **) val); - - type = __gnix_string_to_mr_type(*(const char **) val); - if (type < 0 || type >= GNIX_MR_MAX_TYPE) - return -FI_EINVAL; - - GNIX_DEBUG(FI_LOG_DOMAIN, "setting domain mr type to %s\n", - __gnix_mr_type_to_str[type]); - - ret = _gnix_open_cache(domain, type); - if (ret != FI_SUCCESS) - return -FI_EINVAL; - } - break; - case GNI_MR_HARD_REG_LIMIT: - domain->mr_cache_attr.hard_reg_limit = *(int32_t *) val; - break; - case GNI_MR_SOFT_REG_LIMIT: - domain->mr_cache_attr.soft_reg_limit = *(int32_t *) val; - break; - case GNI_MR_HARD_STALE_REG_LIMIT: - domain->mr_cache_attr.hard_stale_limit = *(int32_t *) val; - break; - case GNI_MR_UDREG_REG_LIMIT: - if (*(int32_t *) val < 0) - return -FI_EINVAL; - domain->udreg_reg_limit = *(int32_t *) val; - break; - case GNI_XPMEM_ENABLE: -#if HAVE_XPMEM - domain->params.xpmem_enabled = *(bool *)val; -#else - GNIX_WARN(FI_LOG_DOMAIN, - "GNI provider XPMEM support not configured\n"); -#endif - break; - case GNI_DGRAM_PROGRESS_TIMEOUT: - domain->params.dgram_progress_timeout = *(uint32_t *)val; - break; - case GNI_EAGER_AUTO_PROGRESS: - domain->params.eager_auto_progress = *(uint32_t *)val; - break; - default: - GNIX_WARN(FI_LOG_DOMAIN, ("Invalid dom_ops_val\n")); - return -FI_EINVAL; - } - - return FI_SUCCESS; -} - -static struct fi_gni_ops_domain gnix_ops_domain = { - .set_val = __gnix_dom_ops_set_val, - .get_val = __gnix_dom_ops_get_val, - .flush_cache = __gnix_dom_ops_flush_cache, -}; - -DIRECT_FN int gnix_domain_bind(struct fid_domain *domain, struct fid *fid, - uint64_t flags) -{ - return -FI_ENOSYS; -} - -static int -gnix_domain_ops_open(struct fid *fid, const char *ops_name, uint64_t flags, - void **ops, void *context) -{ - int ret = FI_SUCCESS; - - if (strcmp(ops_name, FI_GNI_DOMAIN_OPS_1) == 0) - *ops = &gnix_ops_domain; - else - ret = -FI_EINVAL; - - return ret; -} - -DIRECT_FN int gnix_domain_open(struct fid_fabric *fabric, struct fi_info *info, - struct fid_domain **dom, void *context) -{ - struct gnix_fid_domain *domain = NULL; - int ret = FI_SUCCESS; - struct gnix_fid_fabric *fabric_priv; - struct gnix_auth_key *auth_key = NULL; - int i; - int requesting_vmdh = 0; - - GNIX_TRACE(FI_LOG_DOMAIN, "\n"); - - fabric_priv = container_of(fabric, struct gnix_fid_fabric, fab_fid); - - if (FI_VERSION_LT(fabric->api_version, FI_VERSION(1, 5)) && - (info->domain_attr->auth_key_size || info->domain_attr->auth_key)) - return -FI_EINVAL; - - requesting_vmdh = !(info->domain_attr->mr_mode & - (FI_MR_BASIC | FI_MR_VIRT_ADDR)); - - auth_key = GNIX_GET_AUTH_KEY(info->domain_attr->auth_key, - info->domain_attr->auth_key_size, requesting_vmdh); - if (!auth_key) - return -FI_EINVAL; - - GNIX_INFO(FI_LOG_DOMAIN, - "authorization key=%p ptag %u cookie 0x%x\n", - auth_key, auth_key->ptag, auth_key->cookie); - - if (auth_key->using_vmdh != requesting_vmdh) { - GNIX_WARN(FI_LOG_DOMAIN, - "GNIX provider cannot support multiple " - "FI_MR_BASIC and FI_MR_SCALABLE for the same ptag. " - "ptag=%d current_mode=%x requested_mode=%x\n", - auth_key->ptag, - auth_key->using_vmdh, info->domain_attr->mr_mode); - return -FI_EINVAL; - } - - domain = calloc(1, sizeof *domain); - if (domain == NULL) { - ret = -FI_ENOMEM; - goto err; - } - - domain->mr_cache_info = calloc(sizeof(*domain->mr_cache_info), - GNIX_NUM_PTAGS); - if (!domain->mr_cache_info) { - ret = -FI_ENOMEM; - goto err; - } - - domain->auth_key = auth_key; - - domain->mr_cache_attr = _gnix_default_mr_cache_attr; - domain->mr_cache_attr.reg_context = (void *) domain; - domain->mr_cache_attr.dereg_context = NULL; - domain->mr_cache_attr.destruct_context = NULL; - - ret = _gnix_smrn_open(&domain->mr_cache_attr.smrn); - if (ret != FI_SUCCESS) - goto err; - - ofi_spin_init(&domain->mr_cache_lock); - for (i = 0; i < GNIX_NUM_PTAGS; i++) { - domain->mr_cache_info[i].inuse = 0; - domain->mr_cache_info[i].domain = domain; - ofi_spin_init(&domain->mr_cache_info[i].mr_cache_lock); - } - - /* - * we are likely sharing udreg entries with Craypich if we're using udreg - * cache, so ask for only half the entries by default. - */ - domain->udreg_reg_limit = 2048; - - dlist_init(&domain->nic_list); - dlist_init(&domain->list); - - dlist_insert_after(&domain->list, &fabric_priv->domain_list); - - domain->fabric = fabric_priv; - _gnix_ref_get(domain->fabric); - - domain->cdm_id_seed = getpid(); /* TODO: direct syscall better */ - domain->addr_format = info->addr_format; - - /* user tunables */ - domain->params.msg_rendezvous_thresh = default_msg_rendezvous_thresh; - domain->params.rma_rdma_thresh = default_rma_rdma_thresh; - domain->params.ct_init_size = default_ct_init_size; - domain->params.ct_max_size = default_ct_max_size; - domain->params.ct_step = default_ct_step; - domain->params.vc_id_table_capacity = default_vc_id_table_capacity; - domain->params.mbox_page_size = default_mbox_page_size; - domain->params.mbox_num_per_slab = default_mbox_num_per_slab; - domain->params.mbox_maxcredit = default_mbox_maxcredit; - domain->params.mbox_msg_maxsize = default_mbox_msg_maxsize; - domain->params.rx_cq_size = default_rx_cq_size; - domain->params.tx_cq_size = default_tx_cq_size; - domain->params.max_retransmits = default_max_retransmits; - domain->params.err_inject_count = default_err_inject_count; -#if HAVE_XPMEM - domain->params.xpmem_enabled = true; -#else - domain->params.xpmem_enabled = false; -#endif - domain->params.dgram_progress_timeout = default_dgram_progress_timeout; - domain->params.eager_auto_progress = default_eager_auto_progress; - - domain->gni_cq_modes = gnix_def_gni_cq_modes; - _gnix_ref_init(&domain->ref_cnt, 1, __domain_destruct); - - domain->domain_fid.fid.fclass = FI_CLASS_DOMAIN; - domain->domain_fid.fid.context = context; - domain->domain_fid.fid.ops = &gnix_domain_fi_ops; - domain->domain_fid.ops = &gnix_domain_ops; - domain->domain_fid.mr = &gnix_domain_mr_ops; - - domain->control_progress = info->domain_attr->control_progress; - domain->data_progress = info->domain_attr->data_progress; - domain->thread_model = info->domain_attr->threading; - domain->mr_is_init = 0; - domain->mr_iov_limit = info->domain_attr->mr_iov_limit; - - ofi_spin_init(&domain->cm_nic_lock); - - domain->using_vmdh = requesting_vmdh; - - auth_key->using_vmdh = domain->using_vmdh; - _gnix_auth_key_enable(auth_key); - domain->auth_key = auth_key; - - if (!requesting_vmdh) { - _gnix_open_cache(domain, GNIX_DEFAULT_CACHE_TYPE); - } else { - domain->mr_cache_type = GNIX_MR_TYPE_NONE; - _gnix_open_cache(domain, GNIX_MR_TYPE_NONE); - } - - *dom = &domain->domain_fid; - return FI_SUCCESS; - -err: - if (domain && domain->mr_cache_info) - free(domain->mr_cache_info); - - if (domain != NULL) { - free(domain); - } - return ret; -} - -DIRECT_FN int gnix_srx_context(struct fid_domain *domain, - struct fi_rx_attr *attr, - struct fid_ep **rx_ep, void *context) -{ - return -FI_ENOSYS; -} - -/******************************************************************************* - * FI_OPS_* data structures. - ******************************************************************************/ - -static struct fi_ops gnix_stx_ops = { - .close = gnix_stx_close, - .bind = fi_no_bind, - .control = fi_no_control, - .ops_open = fi_no_ops_open -}; - -static struct fi_ops gnix_domain_fi_ops = { - .size = sizeof(struct fi_ops), - .close = gnix_domain_close, - .bind = fi_no_bind, - .control = fi_no_control, - .ops_open = gnix_domain_ops_open -}; - -static struct fi_ops_mr gnix_domain_mr_ops = { - .size = sizeof(struct fi_ops_mr), - .reg = gnix_mr_reg, - .regv = gnix_mr_regv, - .regattr = gnix_mr_regattr, -}; - -static struct fi_ops_domain gnix_domain_ops = { - .size = sizeof(struct fi_ops_domain), - .av_open = gnix_av_open, - .cq_open = gnix_cq_open, - .endpoint = gnix_ep_open, - .scalable_ep = gnix_sep_open, - .cntr_open = gnix_cntr_open, - .poll_open = fi_no_poll_open, - .stx_ctx = gnix_stx_open, - .srx_ctx = fi_no_srx_context -}; diff --git a/prov/gni/src/gnix_ep.c b/prov/gni/src/gnix_ep.c deleted file mode 100644 index 7f80fdd6d71..00000000000 --- a/prov/gni/src/gnix_ep.c +++ /dev/null @@ -1,3301 +0,0 @@ -/* - * Copyright (c) 2015-2019 Cray Inc. All rights reserved. - * Copyright (c) 2015-2018 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2019-2020 Triad National Security, LLC. - * All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/* - * Endpoint common code - */ -#include -#include -#include - -#include "gnix.h" -#include "gnix_cm_nic.h" -#include "gnix_nic.h" -#include "gnix_util.h" -#include "gnix_ep.h" -#include "gnix_hashtable.h" -#include "gnix_vc.h" -#include "gnix_vector.h" -#include "gnix_msg.h" -#include "gnix_rma.h" -#include "gnix_atomic.h" -#include "gnix_cntr.h" -#include "gnix_xpmem.h" -#include "gnix_eq.h" -#include "gnix_cm.h" -#include "gnix_auth_key.h" - -/******************************************************************************* - * gnix_fab_req freelist functions - * - * These are wrappers around the gnix_freelist - * - ******************************************************************************/ - -#define GNIX_FAB_REQ_FL_MIN_SIZE 100 -#define GNIX_FAB_REQ_FL_REFILL_SIZE 10 - -int _gnix_ep_int_tx_pool_grow(struct gnix_fid_ep *ep) -{ - int ret, i; - uint8_t *tx_bufs; - struct fid_mr *auto_mr = NULL; - struct gnix_fid_mem_desc *md = NULL; - struct gnix_int_tx_buf *tx_buf_list; - struct gnix_int_tx_ptrs *tx_ptrs; - - assert(ep); - - if (ep->int_tx_pool.nbufs >= GNIX_INT_TX_POOL_COUNT) { - GNIX_WARN(FI_LOG_EP_DATA, "int_tx_pool is at max size\n"); - return -FI_ENOSPC; - } - - tx_bufs = malloc(GNIX_INT_TX_POOL_SIZE * GNIX_INT_TX_BUF_SZ); - if (tx_bufs == NULL) { - GNIX_WARN(FI_LOG_EP_DATA, "tx_bufs allocation failed\n"); - goto tx_buf_err; - } - - tx_buf_list = malloc(GNIX_INT_TX_POOL_SIZE * - sizeof(struct gnix_int_tx_buf)); - if (tx_buf_list == NULL) { - GNIX_WARN(FI_LOG_EP_DATA, "tx_bufs_list allocation failed\n"); - goto tx_buf_list_err; - } - - tx_ptrs = malloc(sizeof(struct gnix_int_tx_ptrs)); - if (tx_buf_list == NULL) { - GNIX_WARN(FI_LOG_EP_DATA, "tx_ptrs allocation failed\n"); - goto tx_ptrs_err; - } - - ret = _gnix_mr_reg(&ep->domain->domain_fid.fid, tx_bufs, - GNIX_INT_TX_BUF_SZ * GNIX_INT_TX_POOL_SIZE, - FI_READ | FI_WRITE, 0, 0, 0, - &auto_mr, NULL, ep->auth_key, GNIX_PROV_REG); - - if (OFI_UNLIKELY(ret != FI_SUCCESS)) { - GNIX_WARN(FI_LOG_EP_DATA, "gnix_mr_req returned: %s\n", - fi_strerror(-ret)); - goto reg_err; - } - - md = container_of(auto_mr, struct gnix_fid_mem_desc, mr_fid); - - ofi_spin_lock(&ep->int_tx_pool.lock); - - for (i = 0; i < GNIX_INT_TX_POOL_SIZE; i++) { - GNIX_DEBUG(FI_LOG_EP_DATA, - "tx_bufs + (%d * GNIX_INT_TX_BUF_SZ) = %p\n", - i, tx_bufs + (i * GNIX_INT_TX_BUF_SZ)); - tx_buf_list[i].buf = tx_bufs + (i * GNIX_INT_TX_BUF_SZ); - tx_buf_list[i].md = md; - slist_insert_tail(&tx_buf_list[i].e, &ep->int_tx_pool.sl); - } - - tx_ptrs->md = md; - tx_ptrs->buf_ptr = (void *) tx_bufs; - tx_ptrs->sl_ptr = (void *) tx_buf_list; - slist_insert_tail(&tx_ptrs->e, &ep->int_tx_pool.bl); - - ep->int_tx_pool.nbufs++; - - ofi_spin_unlock(&ep->int_tx_pool.lock); - - return FI_SUCCESS; - -reg_err: - free(tx_ptrs); -tx_ptrs_err: - free(tx_buf_list); -tx_buf_list_err: - free(tx_bufs); -tx_buf_err: - return -FI_ENOSPC; -} - -int _gnix_ep_int_tx_pool_init(struct gnix_fid_ep *ep) -{ - int ret; - - assert(ep); - - ep->int_tx_pool.nbufs = 0; - slist_init(&ep->int_tx_pool.sl); - slist_init(&ep->int_tx_pool.bl); - ofi_spin_init(&ep->int_tx_pool.lock); - - ret = _gnix_ep_int_tx_pool_grow(ep); - if (ret != FI_SUCCESS) - return ret; - - ep->int_tx_pool.enabled = true; - - return FI_SUCCESS; -} - -void _gnix_ep_int_tx_pool_fini(struct gnix_fid_ep *ep) -{ - int ret; - struct slist_entry *e; - struct gnix_int_tx_ptrs *tx_ptrs; - - assert(ep); - - if (ep->int_tx_pool.enabled == false) - return; - - ofi_spin_lock(&ep->int_tx_pool.lock); - - while (!slist_empty(&ep->int_tx_pool.bl)) { - e = slist_remove_head(&ep->int_tx_pool.bl); - tx_ptrs = (struct gnix_int_tx_ptrs *)e; - - ret = fi_close(&tx_ptrs->md->mr_fid.fid); - - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_DATA, "fi_close returned: %s\n", - fi_strerror(-ret)); - } - - if (tx_ptrs->buf_ptr != NULL) { - free(tx_ptrs->buf_ptr); - tx_ptrs->buf_ptr = NULL; - } - - if (tx_ptrs->sl_ptr != NULL) { - free(tx_ptrs->sl_ptr); - tx_ptrs->sl_ptr = NULL; - } - - free(tx_ptrs); - ep->int_tx_pool.nbufs--; - } - - ep->int_tx_pool.enabled = false; - - ofi_spin_unlock(&ep->int_tx_pool.lock); -} - -static int __fr_freelist_init(struct gnix_fid_ep *ep) -{ - int ret; - - assert(ep); - ret = _gnix_fl_init_ts(sizeof(struct gnix_fab_req), - offsetof(struct gnix_fab_req, dlist), - GNIX_FAB_REQ_FL_MIN_SIZE, - GNIX_FAB_REQ_FL_REFILL_SIZE, - 0, 0, &ep->fr_freelist); - - if (ret != FI_SUCCESS) { - GNIX_DEBUG(FI_LOG_EP_DATA, "_gnix_fl_init_ts returned: %s\n", - fi_strerror(-ret)); - return ret; - } - - return ret; -} - -static void __fr_freelist_destroy(struct gnix_fid_ep *ep) -{ - assert(ep); - - _gnix_fl_destroy(&ep->fr_freelist); -} - -int _gnix_ep_rx_enable(struct gnix_fid_ep *ep) -{ - if (ep->recv_cq) { - _gnix_cq_poll_obj_add(ep->recv_cq, ep->nic, - _gnix_nic_progress); - if (ep->cm_nic) /* No CM NIC for MSG EPs */ - _gnix_cq_poll_obj_add(ep->recv_cq, ep->cm_nic, - _gnix_cm_nic_progress); - ep->rx_enabled = true; - } - - if (ep->rwrite_cntr) { - _gnix_cntr_poll_obj_add(ep->rwrite_cntr, ep->nic, - _gnix_nic_progress); - if (ep->cm_nic) /* No CM NIC for MSG EPs */ - _gnix_cntr_poll_obj_add(ep->rwrite_cntr, ep->cm_nic, - _gnix_cm_nic_progress); - } - - if (ep->rread_cntr) { - _gnix_cntr_poll_obj_add(ep->rread_cntr, ep->nic, - _gnix_nic_progress); - if (ep->cm_nic) /* No CM NIC for MSG EPs */ - _gnix_cntr_poll_obj_add(ep->rread_cntr, ep->cm_nic, - _gnix_cm_nic_progress); - } - - return FI_SUCCESS; -} - -int _gnix_ep_tx_enable(struct gnix_fid_ep *ep) -{ - - if (ep->send_cq) { - _gnix_cq_poll_obj_add(ep->send_cq, ep->nic, - _gnix_nic_progress); - if (ep->cm_nic) /* No CM NIC for MSG EPs */ - _gnix_cq_poll_obj_add(ep->send_cq, ep->cm_nic, - _gnix_cm_nic_progress); - ep->tx_enabled = true; - } - - if (ep->send_cntr) { - _gnix_cntr_poll_obj_add(ep->send_cntr, ep->nic, - _gnix_nic_progress); - if (ep->cm_nic) /* No CM NIC for MSG EPs */ - _gnix_cntr_poll_obj_add(ep->send_cntr, ep->cm_nic, - _gnix_cm_nic_progress); - } - - if (ep->write_cntr) { - _gnix_cntr_poll_obj_add(ep->write_cntr, ep->nic, - _gnix_nic_progress); - if (ep->cm_nic) /* No CM NIC for MSG EPs */ - _gnix_cntr_poll_obj_add(ep->write_cntr, ep->cm_nic, - _gnix_cm_nic_progress); - } - - if (ep->read_cntr) { - _gnix_cntr_poll_obj_add(ep->read_cntr, ep->nic, - _gnix_nic_progress); - if (ep->cm_nic) /* No CM NIC for MSG EPs */ - _gnix_cntr_poll_obj_add(ep->read_cntr, ep->cm_nic, - _gnix_cm_nic_progress); - } - - return FI_SUCCESS; -} - - -/******************************************************************************* - * Forward declaration for ops structures - ******************************************************************************/ - -static struct fi_ops gnix_ep_fi_ops; -static struct fi_ops_ep gnix_ep_ops; -static struct fi_ops_msg gnix_ep_msg_ops; -static struct fi_ops_rma gnix_ep_rma_ops; -struct fi_ops_tagged gnix_ep_tagged_ops; -struct fi_ops_atomic gnix_ep_atomic_ops; - -/******************************************************************************* - * EP common messaging wrappers. - ******************************************************************************/ -ssize_t _ep_recv(struct fid_ep *ep, void *buf, size_t len, - void *desc, fi_addr_t src_addr, void *context, - uint64_t flags, uint64_t tag, uint64_t ignore) -{ - struct gnix_fid_ep *ep_priv; - - if (!ep) { - return -FI_EINVAL; - } - - ep_priv = container_of(ep, struct gnix_fid_ep, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(ep_priv->type)); - - if (!(ep_priv->op_flags & FI_MULTI_RECV)) { - return _gnix_recv(ep_priv, - (uint64_t)buf, - len, desc, - src_addr, - context, - ep_priv->op_flags | flags, - tag, - ignore, - NULL); - } else { - return _gnix_recv_mr(ep_priv, - (uint64_t)buf, - len, - desc, - src_addr, - context, - ep_priv->op_flags | flags, - tag, - ignore); - } -} - -ssize_t _ep_recvv(struct fid_ep *ep, const struct iovec *iov, - void **desc, size_t count, fi_addr_t src_addr, - void *context, uint64_t flags, uint64_t tag, - uint64_t ignore) -{ - struct gnix_fid_ep *ep_priv; - - if (!ep || !iov || count > GNIX_MAX_MSG_IOV_LIMIT) { - return -FI_EINVAL; - } - - ep_priv = container_of(ep, struct gnix_fid_ep, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(ep_priv->type)); - - if (count <= 1) { - if (!(ep_priv->op_flags & FI_MULTI_RECV)) { - return _gnix_recv(ep_priv, - (uint64_t)iov[0].iov_base, - iov[0].iov_len, - desc ? desc[0] : NULL, - src_addr, - context, - ep_priv->op_flags | flags, - tag, - ignore, - NULL); - } else { - return _gnix_recv_mr(ep_priv, - (uint64_t)iov[0].iov_base, - iov[0].iov_len, - desc ? desc[0] : NULL, - src_addr, - context, - ep_priv->op_flags | flags, - tag, - ignore); - } - } - - return _gnix_recvv(ep_priv, iov, desc, count, src_addr, - context, ep_priv->op_flags | flags, ignore, tag); -} - -ssize_t _ep_recvmsg(struct fid_ep *ep, const struct fi_msg *msg, - uint64_t flags, uint64_t tag, - uint64_t ignore) -{ - struct iovec iov; - struct gnix_fid_ep *ep_priv = container_of(ep, struct gnix_fid_ep, ep_fid); - - ep_priv = container_of(ep, struct gnix_fid_ep, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(ep_priv->type)); - - iov.iov_base = NULL; - iov.iov_len = 0; - - if (!msg) { - return -FI_EINVAL; - } - - if (flags & FI_MULTI_RECV) { - return _gnix_recv_mr(ep_priv, - (uint64_t)msg->msg_iov[0].iov_base, - msg->msg_iov[0].iov_len, - msg->desc ? msg->desc[0] : NULL, - msg->addr, - msg->context, - ep_priv->op_flags | flags, - tag, - ignore); - } - - /* msg_iov can be undefined when using FI_PEEK, etc. */ - return _ep_recvv(ep, msg->msg_iov ? msg->msg_iov : &iov, msg->desc, - msg->iov_count, msg->addr, msg->context, flags, tag, - ignore); -} - -ssize_t _ep_send(struct fid_ep *ep, const void *buf, size_t len, - void *desc, fi_addr_t dest_addr, void *context, - uint64_t flags, uint64_t tag) -{ - struct gnix_fid_ep *gnix_ep; - - if (!ep) { - return -FI_EINVAL; - } - - gnix_ep = container_of(ep, struct gnix_fid_ep, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(gnix_ep->type)); - - return _gnix_send(gnix_ep, (uint64_t)buf, len, desc, dest_addr, context, - gnix_ep->op_flags | flags, 0, tag); -} - -ssize_t _ep_sendv(struct fid_ep *ep, const struct iovec *iov, - void **desc, size_t count, fi_addr_t dest_addr, - void *context, uint64_t flags, uint64_t tag) -{ - struct gnix_fid_ep *gnix_ep; - - if (!ep || !iov || !count || count > GNIX_MAX_MSG_IOV_LIMIT) { - return -FI_EINVAL; - } - - gnix_ep = container_of(ep, struct gnix_fid_ep, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(gnix_ep->type)); - - if (count == 1) { - return _gnix_send(gnix_ep, (uint64_t)iov[0].iov_base, - iov[0].iov_len, desc ? desc[0] : NULL, - dest_addr, context, gnix_ep->op_flags | flags, - 0, tag); - } - - return _gnix_sendv(gnix_ep, iov, desc, count, dest_addr, context, - gnix_ep->op_flags | flags, tag); -} - -ssize_t _ep_sendmsg(struct fid_ep *ep, const struct fi_msg *msg, - uint64_t flags, uint64_t tag) -{ - struct gnix_fid_ep *gnix_ep; - - if (!ep || !msg || !msg->msg_iov || !msg->iov_count) { - return -FI_EINVAL; - } - - /* Must check the iov count here, can't send msg->data to sendv */ - if (msg->iov_count > 1) { - return _ep_sendv(ep, msg->msg_iov, msg->desc, msg->iov_count, - msg->addr, msg->context, flags, tag); - } - - gnix_ep = container_of(ep, struct gnix_fid_ep, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(gnix_ep->type)); - - return _gnix_send(gnix_ep, (uint64_t)msg->msg_iov[0].iov_base, - msg->msg_iov[0].iov_len, - msg->desc ? msg->desc[0] : NULL, msg->addr, - msg->context, flags, msg->data, tag); -} - -ssize_t _ep_inject(struct fid_ep *ep, const void *buf, - size_t len, uint64_t data, fi_addr_t dest_addr, - uint64_t flags, uint64_t tag) -{ - struct gnix_fid_ep *gnix_ep; - uint64_t inject_flags; - - if (!ep) { - return -FI_EINVAL; - } - - gnix_ep = container_of(ep, struct gnix_fid_ep, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(gnix_ep->type)); - - inject_flags = (gnix_ep->op_flags | FI_INJECT | - GNIX_SUPPRESS_COMPLETION | flags); - - return _gnix_send(gnix_ep, (uint64_t)buf, len, NULL, dest_addr, - NULL, inject_flags, data, tag); -} - -ssize_t _ep_senddata(struct fid_ep *ep, const void *buf, - size_t len, void *desc, uint64_t data, - fi_addr_t dest_addr, void *context, - uint64_t flags, uint64_t tag) -{ - struct gnix_fid_ep *gnix_ep; - uint64_t sd_flags; - - if (!ep) { - return -FI_EINVAL; - } - - gnix_ep = container_of(ep, struct gnix_fid_ep, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(gnix_ep->type)); - - sd_flags = gnix_ep->op_flags | FI_REMOTE_CQ_DATA | flags; - - return _gnix_send(gnix_ep, (uint64_t)buf, len, desc, dest_addr, - context, sd_flags, data, tag); -} - -static void __gnix_vc_destroy_ht_entry(void *val) -{ - struct gnix_vc *vc = (struct gnix_vc *) val; - - _gnix_vc_destroy(vc); -} - -/******************************************************************************* - * EP vc initialization helper - ******************************************************************************/ - -int _gnix_ep_init_vc(struct gnix_fid_ep *ep_priv) -{ - int ret; - gnix_hashtable_attr_t gnix_ht_attr; - gnix_vec_attr_t gnix_vec_attr; - - if (ep_priv->av == NULL) { - GNIX_FATAL(FI_LOG_EP_CTRL, - "_gnix_ep_init_vc av field NULL\n"); - } - - if (ep_priv->av->type == FI_AV_TABLE) { - /* Use array to store EP VCs when using FI_AV_TABLE. */ - ep_priv->vc_table = calloc(1, sizeof(struct gnix_vector)); - if(ep_priv->vc_table == NULL) - return -FI_ENOMEM; - - gnix_vec_attr.vec_initial_size = - ep_priv->domain->params.ct_init_size; - /* TODO: ep_priv->domain->params.ct_max_size; */ - gnix_vec_attr.vec_maximum_size = 1024*1024; - gnix_vec_attr.vec_increase_step = ep_priv->domain->params.ct_step; - gnix_vec_attr.vec_increase_type = GNIX_VEC_INCREASE_MULT; - gnix_vec_attr.vec_internal_locking = GNIX_VEC_UNLOCKED; - - ret = _gnix_vec_init(ep_priv->vc_table, &gnix_vec_attr); - GNIX_DEBUG(FI_LOG_EP_CTRL, - "ep_priv->vc_table = %p, ep_priv->vc_table->vector = %p\n", - ep_priv->vc_table, ep_priv->vc_table->vector); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, "_gnix_vec_init returned %s\n", - fi_strerror(ret)); - goto err; - } - } else { - /* Use hash table to store EP VCs when using FI_AV_MAP. */ - ep_priv->vc_ht = calloc(1, sizeof(struct gnix_hashtable)); - if (ep_priv->vc_ht == NULL) - return -FI_ENOMEM; - - gnix_ht_attr.ht_initial_size = - ep_priv->domain->params.ct_init_size; - gnix_ht_attr.ht_maximum_size = - ep_priv->domain->params.ct_max_size; - gnix_ht_attr.ht_increase_step = ep_priv->domain->params.ct_step; - gnix_ht_attr.ht_increase_type = GNIX_HT_INCREASE_MULT; - gnix_ht_attr.ht_collision_thresh = 500; - gnix_ht_attr.ht_hash_seed = 0xdeadbeefbeefdead; - gnix_ht_attr.ht_internal_locking = 0; - gnix_ht_attr.destructor = __gnix_vc_destroy_ht_entry; - - ret = _gnix_ht_init(ep_priv->vc_ht, &gnix_ht_attr); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_ht_init returned %s\n", - fi_strerror(-ret)); - - goto err; - } - } - - dlist_init(&ep_priv->unmapped_vcs); - - return FI_SUCCESS; - -err: - if (ep_priv->av->type == FI_AV_TABLE) { - free(ep_priv->vc_table); - ep_priv->vc_table = NULL; - } else { - free(ep_priv->vc_ht); - ep_priv->vc_ht = NULL; - } - - return ret; -} - -static inline int __gnix_ep_fini_vc(struct gnix_fid_ep *ep) -{ - int ret; - GNIX_VECTOR_ITERATOR(ep->vc_table, iter); - struct gnix_vc *vc; - - /* Free unmapped VCs. */ - dlist_for_each(&ep->unmapped_vcs, vc, list) { - _gnix_vc_destroy(vc); - } - - if (!ep->av) { - /* No AV bound, no mapped VCs clean up. */ - return FI_SUCCESS; - } - - if (ep->av->type == FI_AV_TABLE) { - /* Destroy all VCs */ - while ((vc = (struct gnix_vc *) - _gnix_vec_iterator_next(&iter))) { - _gnix_vec_remove_at(ep->vc_table, - GNIX_VECTOR_ITERATOR_IDX(iter)); - _gnix_vc_destroy(vc); - } - - /* Destroy vector storage */ - ret = _gnix_vec_close(ep->vc_table); - if (ret == FI_SUCCESS) { - free(ep->vc_table); - ep->vc_table = NULL; - } else { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_vec_close returned %s\n", - fi_strerror(-ret)); - } - } else { - /* Destroy VC storage, it automatically tears down VCs */ - ret = _gnix_ht_destroy(ep->vc_ht); - if (ret == FI_SUCCESS) { - free(ep->vc_ht); - ep->vc_ht = NULL; - } else { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_ht_destroy returned %s\n", - fi_strerror(-ret)); - } - } - - return FI_SUCCESS; -} - -/******************************************************************************* - * EP messaging API function implementations. - ******************************************************************************/ - -DIRECT_FN STATIC ssize_t gnix_ep_recv(struct fid_ep *ep, void *buf, size_t len, - void *desc, fi_addr_t src_addr, - void *context) -{ - return _ep_recv(ep, buf, len, desc, src_addr, context, 0, 0, 0); -} - -DIRECT_FN STATIC ssize_t gnix_ep_recvv(struct fid_ep *ep, - const struct iovec *iov, - void **desc, size_t count, - fi_addr_t src_addr, - void *context) -{ - return _ep_recvv(ep, iov, desc, count, src_addr, context, 0, 0, 0); -} - -DIRECT_FN STATIC ssize_t gnix_ep_recvmsg(struct fid_ep *ep, - const struct fi_msg *msg, - uint64_t flags) -{ - return _ep_recvmsg(ep, msg, flags & GNIX_RECVMSG_FLAGS, 0, 0); -} - -DIRECT_FN STATIC ssize_t gnix_ep_send(struct fid_ep *ep, const void *buf, - size_t len, void *desc, - fi_addr_t dest_addr, void *context) -{ - return _ep_send(ep, buf, len, desc, dest_addr, context, 0, 0); -} - -DIRECT_FN STATIC ssize_t gnix_ep_sendv(struct fid_ep *ep, - const struct iovec *iov, - void **desc, size_t count, - fi_addr_t dest_addr, - void *context) -{ - return _ep_sendv(ep, iov, desc, count, dest_addr, context, 0, 0); -} - -DIRECT_FN STATIC ssize_t gnix_ep_sendmsg(struct fid_ep *ep, - const struct fi_msg *msg, - uint64_t flags) -{ - return _ep_sendmsg(ep, msg, flags & GNIX_SENDMSG_FLAGS, 0); -} - -DIRECT_FN STATIC ssize_t gnix_ep_msg_inject(struct fid_ep *ep, const void *buf, - size_t len, fi_addr_t dest_addr) -{ - return _ep_inject(ep, buf, len, 0, dest_addr, 0, 0); -} - -DIRECT_FN STATIC ssize_t gnix_ep_senddata(struct fid_ep *ep, const void *buf, - size_t len, void *desc, uint64_t data, - fi_addr_t dest_addr, void *context) -{ - return _ep_senddata(ep, buf, len, desc, data, dest_addr, context, 0, 0); -} - -DIRECT_FN STATIC ssize_t -gnix_ep_msg_injectdata(struct fid_ep *ep, const void *buf, size_t len, - uint64_t data, fi_addr_t dest_addr) -{ - struct gnix_fid_ep *gnix_ep; - uint64_t flags; - - if (!ep) { - return -FI_EINVAL; - } - - gnix_ep = container_of(ep, struct gnix_fid_ep, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(gnix_ep->type)); - - flags = gnix_ep->op_flags | FI_INJECT | FI_REMOTE_CQ_DATA | - GNIX_SUPPRESS_COMPLETION; - - return _gnix_send(gnix_ep, (uint64_t)buf, len, NULL, dest_addr, - NULL, flags, data, 0); -} - -/******************************************************************************* - * EP RMA API function implementations. - ******************************************************************************/ - -DIRECT_FN STATIC ssize_t gnix_ep_read(struct fid_ep *ep, void *buf, size_t len, - void *desc, fi_addr_t src_addr, uint64_t addr, - uint64_t key, void *context) -{ - struct gnix_fid_ep *gnix_ep; - uint64_t flags; - - if (!ep) { - return -FI_EINVAL; - } - - gnix_ep = container_of(ep, struct gnix_fid_ep, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(gnix_ep->type)); - - flags = gnix_ep->op_flags | GNIX_RMA_READ_FLAGS_DEF; - - return _gnix_rma(gnix_ep, GNIX_FAB_RQ_RDMA_READ, - (uint64_t)buf, len, desc, - src_addr, addr, key, - context, flags, 0); -} - -DIRECT_FN STATIC ssize_t -gnix_ep_readv(struct fid_ep *ep, const struct iovec *iov, void **desc, - size_t count, fi_addr_t src_addr, uint64_t addr, uint64_t key, - void *context) -{ - struct gnix_fid_ep *gnix_ep; - uint64_t flags; - - if (!ep || !iov || count > GNIX_MAX_RMA_IOV_LIMIT) { - return -FI_EINVAL; - } - - gnix_ep = container_of(ep, struct gnix_fid_ep, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(gnix_ep->type)); - - flags = gnix_ep->op_flags | GNIX_RMA_READ_FLAGS_DEF; - - return _gnix_rma(gnix_ep, GNIX_FAB_RQ_RDMA_READ, - (uint64_t)iov[0].iov_base, iov[0].iov_len, desc? desc[0] : NULL, - src_addr, addr, key, - context, flags, 0); -} - -DIRECT_FN STATIC ssize_t -gnix_ep_readmsg(struct fid_ep *ep, const struct fi_msg_rma *msg, uint64_t flags) -{ - struct gnix_fid_ep *gnix_ep; - - if (!ep || !msg || !msg->msg_iov || !msg->rma_iov || - msg->iov_count != 1 || msg->rma_iov_count != 1 || - msg->rma_iov[0].len > msg->msg_iov[0].iov_len) { - return -FI_EINVAL; - } - - gnix_ep = container_of(ep, struct gnix_fid_ep, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(gnix_ep->type)); - - flags = (flags & GNIX_READMSG_FLAGS) | GNIX_RMA_READ_FLAGS_DEF; - - return _gnix_rma(gnix_ep, GNIX_FAB_RQ_RDMA_READ, - (uint64_t)msg->msg_iov[0].iov_base, - msg->msg_iov[0].iov_len, msg->desc? msg->desc[0] : NULL, - msg->addr, msg->rma_iov[0].addr, msg->rma_iov[0].key, - msg->context, flags, msg->data); -} - -DIRECT_FN STATIC ssize_t -gnix_ep_write(struct fid_ep *ep, const void *buf, size_t len, void *desc, - fi_addr_t dest_addr, uint64_t addr, uint64_t key, void *context) -{ - struct gnix_fid_ep *gnix_ep; - uint64_t flags; - - if (!ep) { - return -FI_EINVAL; - } - - gnix_ep = container_of(ep, struct gnix_fid_ep, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(gnix_ep->type)); - - flags = gnix_ep->op_flags | GNIX_RMA_WRITE_FLAGS_DEF; - - return _gnix_rma(gnix_ep, GNIX_FAB_RQ_RDMA_WRITE, - (uint64_t)buf, len, desc, dest_addr, addr, key, - context, flags, 0); -} - -DIRECT_FN STATIC ssize_t -gnix_ep_writev(struct fid_ep *ep, const struct iovec *iov, void **desc, - size_t count, fi_addr_t dest_addr, uint64_t addr, uint64_t key, - void *context) -{ - struct gnix_fid_ep *gnix_ep; - uint64_t flags; - - if (!ep || !iov || count > GNIX_MAX_RMA_IOV_LIMIT) { - return -FI_EINVAL; - } - - gnix_ep = container_of(ep, struct gnix_fid_ep, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(gnix_ep->type)); - - flags = gnix_ep->op_flags | GNIX_RMA_WRITE_FLAGS_DEF; - - return _gnix_rma(gnix_ep, GNIX_FAB_RQ_RDMA_WRITE, - (uint64_t)iov[0].iov_base, iov[0].iov_len, desc? desc[0] : NULL, - dest_addr, addr, key, context, flags, 0); -} - -DIRECT_FN STATIC ssize_t gnix_ep_writemsg(struct fid_ep *ep, const struct fi_msg_rma *msg, - uint64_t flags) -{ - struct gnix_fid_ep *gnix_ep; - - if (!ep || !msg || !msg->msg_iov || !msg->rma_iov || - msg->iov_count != 1 || - msg->rma_iov_count > GNIX_MAX_RMA_IOV_LIMIT || - msg->rma_iov[0].len > msg->msg_iov[0].iov_len) { - return -FI_EINVAL; - } - - gnix_ep = container_of(ep, struct gnix_fid_ep, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(gnix_ep->type)); - - flags = (flags & GNIX_WRITEMSG_FLAGS) | GNIX_RMA_WRITE_FLAGS_DEF; - - return _gnix_rma(gnix_ep, GNIX_FAB_RQ_RDMA_WRITE, - (uint64_t)msg->msg_iov[0].iov_base, - msg->msg_iov[0].iov_len, msg->desc ? msg->desc[0] : NULL, - msg->addr, msg->rma_iov[0].addr, msg->rma_iov[0].key, - msg->context, flags, msg->data); -} - -DIRECT_FN STATIC ssize_t gnix_ep_rma_inject(struct fid_ep *ep, const void *buf, - size_t len, fi_addr_t dest_addr, - uint64_t addr, uint64_t key) -{ - struct gnix_fid_ep *gnix_ep; - uint64_t flags; - - if (!ep) { - return -FI_EINVAL; - } - - gnix_ep = container_of(ep, struct gnix_fid_ep, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(gnix_ep->type)); - - flags = gnix_ep->op_flags | FI_INJECT | GNIX_SUPPRESS_COMPLETION | - GNIX_RMA_WRITE_FLAGS_DEF; - - return _gnix_rma(gnix_ep, GNIX_FAB_RQ_RDMA_WRITE, - (uint64_t)buf, len, NULL, - dest_addr, addr, key, - NULL, flags, 0); -} - -DIRECT_FN STATIC ssize_t -gnix_ep_writedata(struct fid_ep *ep, const void *buf, size_t len, void *desc, - uint64_t data, fi_addr_t dest_addr, uint64_t addr, - uint64_t key, void *context) -{ - struct gnix_fid_ep *gnix_ep; - uint64_t flags; - - if (!ep) { - return -FI_EINVAL; - } - - gnix_ep = container_of(ep, struct gnix_fid_ep, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(gnix_ep->type)); - - flags = gnix_ep->op_flags | FI_REMOTE_CQ_DATA | - GNIX_RMA_WRITE_FLAGS_DEF; - - return _gnix_rma(gnix_ep, GNIX_FAB_RQ_RDMA_WRITE, - (uint64_t)buf, len, desc, - dest_addr, addr, key, - context, flags, data); -} - -DIRECT_FN STATIC ssize_t -gnix_ep_rma_injectdata(struct fid_ep *ep, const void *buf, size_t len, - uint64_t data, fi_addr_t dest_addr, uint64_t addr, - uint64_t key) -{ - struct gnix_fid_ep *gnix_ep; - uint64_t flags; - - if (!ep) { - return -FI_EINVAL; - } - - gnix_ep = container_of(ep, struct gnix_fid_ep, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(gnix_ep->type)); - - flags = gnix_ep->op_flags | FI_INJECT | FI_REMOTE_CQ_DATA | - GNIX_SUPPRESS_COMPLETION | GNIX_RMA_WRITE_FLAGS_DEF; - - return _gnix_rma(gnix_ep, GNIX_FAB_RQ_RDMA_WRITE, - (uint64_t)buf, len, NULL, - dest_addr, addr, key, - NULL, flags, data); -} - -/******************************************************************************* - * EP Tag matching API function implementations. - ******************************************************************************/ - -DIRECT_FN STATIC ssize_t gnix_ep_trecv(struct fid_ep *ep, void *buf, size_t len, - void *desc, fi_addr_t src_addr, - uint64_t tag, uint64_t ignore, - void *context) -{ - return _ep_recv(ep, buf, len, desc, src_addr, context, - FI_TAGGED, tag, ignore); -} - -DIRECT_FN STATIC ssize_t gnix_ep_trecvv(struct fid_ep *ep, - const struct iovec *iov, - void **desc, size_t count, - fi_addr_t src_addr, - uint64_t tag, uint64_t ignore, - void *context) -{ - return _ep_recvv(ep, iov, desc, count, src_addr, context, - FI_TAGGED, tag, ignore); -} - -DIRECT_FN STATIC ssize_t gnix_ep_trecvmsg(struct fid_ep *ep, - const struct fi_msg_tagged *msg, - uint64_t flags) -{ - const struct fi_msg _msg = { - .msg_iov = msg->msg_iov, - .desc = msg->desc, - .iov_count = msg->iov_count, - .addr = msg->addr, - .context = msg->context, - .data = msg->data - }; - - if (flags & ~GNIX_TRECVMSG_FLAGS) - return -FI_EINVAL; - - /* From the fi_tagged man page regarding the use of FI_CLAIM: - * - * In order to use the FI_CLAIM flag, an application must supply a - * struct fi_context structure as the context for the receive opera- - * tion. The same fi_context structure used for an FI_PEEK + FI_CLAIM - * operation must be used by the paired FI_CLAIM requests - */ - if ((flags & FI_CLAIM) && _msg.context == NULL) - return -FI_EINVAL; - - /* From the fi_tagged man page regarding the use of FI_DISCARD: - * - * This flag must be used in conjunction with either - * FI_PEEK or FI_CLAIM. - * - * Note: I suspect the use of all three flags at the same time is invalid, - * but the man page does not say that it is. - */ - if ((flags & FI_DISCARD) && !(flags & (FI_PEEK | FI_CLAIM))) - return -FI_EINVAL; - - return _ep_recvmsg(ep, &_msg, flags | FI_TAGGED, msg->tag, - msg->ignore); -} - -DIRECT_FN STATIC ssize_t gnix_ep_tsend(struct fid_ep *ep, const void *buf, - size_t len, void *desc, - fi_addr_t dest_addr, uint64_t tag, - void *context) -{ - return _ep_send(ep, buf, len, desc, dest_addr, context, - FI_TAGGED, tag); -} - -DIRECT_FN STATIC ssize_t gnix_ep_tsendv(struct fid_ep *ep, - const struct iovec *iov, - void **desc, size_t count, - fi_addr_t dest_addr, - uint64_t tag, void *context) -{ - return _ep_sendv(ep, iov, desc, count, dest_addr, context, - FI_TAGGED, tag); -} - -DIRECT_FN STATIC ssize_t gnix_ep_tsendmsg(struct fid_ep *ep, - const struct fi_msg_tagged *msg, - uint64_t flags) -{ - const struct fi_msg _msg = { - .msg_iov = msg->msg_iov, - .desc = msg->desc, - .iov_count = msg->iov_count, - .addr = msg->addr, - .context = msg->context, - .data = msg->data - }; - - if (flags & ~(GNIX_SENDMSG_FLAGS)) - return -FI_EINVAL; - - return _ep_sendmsg(ep, &_msg, flags | FI_TAGGED, msg->tag); -} - -DIRECT_FN STATIC ssize_t gnix_ep_tinject(struct fid_ep *ep, const void *buf, - size_t len, fi_addr_t dest_addr, - uint64_t tag) -{ - return _ep_inject(ep, buf, len, 0, dest_addr, FI_TAGGED, tag); -} - -DIRECT_FN STATIC ssize_t gnix_ep_tsenddata(struct fid_ep *ep, const void *buf, - size_t len, void *desc, - uint64_t data, fi_addr_t dest_addr, - uint64_t tag, void *context) -{ - return _ep_senddata(ep, buf, len, desc, data, dest_addr, context, - FI_TAGGED, tag); -} - -/** - * Injects data into the data buffer and returns immediately. - * - * @param[in] ep the endpoint we are sending data from - * @param[in] buf the data to send - * @param[in] len the length of buf - * @param[in] data remote CQ data to transfer with the data from buf - * @param[in] dest_addr the desitnation address for connectionless transfers - * @param[in] tag the tag associated with the message - * - * @return FI_SUCCESS upon successfully writing to the destination - * @return -FI_ERRNO upon an error - * @return -FI_ENOSYS if this operation is not supported - */ -DIRECT_FN STATIC ssize_t gnix_ep_tinjectdata(struct fid_ep *ep, const void *buf, - size_t len, uint64_t data, - fi_addr_t dest_addr, uint64_t tag) -{ - return _ep_inject(ep, buf, len, data, dest_addr, - FI_TAGGED | FI_REMOTE_CQ_DATA, tag); -} - -/******************************************************************************* - * EP atomic API implementation. - ******************************************************************************/ - -DIRECT_FN int gnix_ep_atomic_valid(struct fid_ep *ep, - enum fi_datatype datatype, - enum fi_op op, size_t *count) -{ - if (count) - *count = 1; - - return _gnix_atomic_cmd(datatype, op, GNIX_FAB_RQ_AMO) >= 0 ? - 0 : -FI_EOPNOTSUPP; -} - -DIRECT_FN int gnix_ep_fetch_atomic_valid(struct fid_ep *ep, - enum fi_datatype datatype, - enum fi_op op, size_t *count) -{ - if (count) - *count = 1; - - return _gnix_atomic_cmd(datatype, op, GNIX_FAB_RQ_FAMO) >= 0 ? - 0 : -FI_EOPNOTSUPP; -} - -DIRECT_FN int gnix_ep_cmp_atomic_valid(struct fid_ep *ep, - enum fi_datatype datatype, - enum fi_op op, size_t *count) -{ - if (count) - *count = 1; - - return _gnix_atomic_cmd(datatype, op, GNIX_FAB_RQ_CAMO) >= 0 ? - 0 : -FI_EOPNOTSUPP; -} - -size_t -__gnix_fabric_ops_native_amo(struct fid_ep *ep, const void *buf, size_t count, - void *desc, void *result, void *result_desc, - fi_addr_t dest_addr, - uint64_t addr, uint64_t key, - enum fi_datatype datatype, - int req_type, - void *context) -{ - struct gnix_fid_ep *gnix_ep; - struct fi_msg_atomic msg; - struct fi_ioc msg_iov; - struct fi_rma_ioc rma_iov; - struct fi_ioc result_iov; - uint64_t flags; - - if (!ep) - return -FI_EINVAL; - if ((req_type < 0) || (req_type > GNIX_FAB_RQ_MAX_TYPES) || - (req_type >= GNIX_FAB_RQ_END_NON_NATIVE && - req_type < GNIX_FAB_RQ_START_NATIVE)) - return -FI_EINVAL; - - gnix_ep = container_of(ep, struct gnix_fid_ep, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(gnix_ep->type)); - - msg_iov.addr = (void *)buf; - msg_iov.count = count; - rma_iov.addr = addr; - rma_iov.count = 1; - rma_iov.key = key; - msg.msg_iov = &msg_iov; - msg.desc = &desc; - msg.iov_count = 1; - msg.addr = dest_addr; - msg.rma_iov = &rma_iov; - msg.datatype = datatype; - msg.op = FI_ATOMIC_OP_LAST; /* not FI_ATOMIC_OP */ - msg.context = context; - result_iov.addr = result; - result_iov.count = 1; - - flags = gnix_ep->op_flags | GNIX_ATOMIC_WRITE_FLAGS_DEF; - - return _gnix_atomic(gnix_ep, req_type, &msg, NULL, - NULL, 0, &result_iov, &result_desc, 1, flags); -} - -DIRECT_FN STATIC ssize_t -gnix_ep_atomic_write(struct fid_ep *ep, const void *buf, size_t count, - void *desc, fi_addr_t dest_addr, uint64_t addr, - uint64_t key, enum fi_datatype datatype, enum fi_op op, - void *context) -{ - struct gnix_fid_ep *gnix_ep; - struct fi_msg_atomic msg; - struct fi_ioc msg_iov; - struct fi_rma_ioc rma_iov; - uint64_t flags; - - if (gnix_ep_atomic_valid(ep, datatype, op, NULL)) - return -FI_EOPNOTSUPP; - - if (!ep) - return -FI_EINVAL; - - gnix_ep = container_of(ep, struct gnix_fid_ep, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(gnix_ep->type)); - - msg_iov.addr = (void *)buf; - msg_iov.count = count; - msg.msg_iov = &msg_iov; - msg.desc = &desc; - msg.iov_count = 1; - msg.addr = dest_addr; - rma_iov.addr = addr; - rma_iov.count = 1; - rma_iov.key = key; - msg.rma_iov = &rma_iov; - msg.datatype = datatype; - msg.op = op; - msg.context = context; - - flags = gnix_ep->op_flags | GNIX_ATOMIC_WRITE_FLAGS_DEF; - - return _gnix_atomic(gnix_ep, GNIX_FAB_RQ_AMO, &msg, - NULL, NULL, 0, NULL, NULL, 0, flags); -} - -DIRECT_FN STATIC ssize_t -gnix_ep_atomic_writev(struct fid_ep *ep, const struct fi_ioc *iov, void **desc, - size_t count, fi_addr_t dest_addr, uint64_t addr, - uint64_t key, enum fi_datatype datatype, enum fi_op op, - void *context) -{ - if (!iov || count > 1) { - return -FI_EINVAL; - } - - return gnix_ep_atomic_write(ep, iov[0].addr, iov[0].count, - desc ? desc[0] : NULL, - dest_addr, addr, key, datatype, op, - context); -} - -DIRECT_FN STATIC ssize_t -gnix_ep_atomic_writemsg(struct fid_ep *ep, const struct fi_msg_atomic *msg, - uint64_t flags) -{ - struct gnix_fid_ep *gnix_ep; - - if (gnix_ep_atomic_valid(ep, msg->datatype, msg->op, NULL)) - return -FI_EOPNOTSUPP; - - if (!ep) - return -FI_EINVAL; - - gnix_ep = container_of(ep, struct gnix_fid_ep, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(gnix_ep->type)); - - flags = (flags & GNIX_ATOMICMSG_FLAGS) | GNIX_ATOMIC_WRITE_FLAGS_DEF; - - return _gnix_atomic(gnix_ep, GNIX_FAB_RQ_AMO, msg, - NULL, NULL, 0, NULL, NULL, 0, flags); -} - -DIRECT_FN STATIC ssize_t -gnix_ep_atomic_inject(struct fid_ep *ep, const void *buf, size_t count, - fi_addr_t dest_addr, uint64_t addr, uint64_t key, - enum fi_datatype datatype, enum fi_op op) -{ - struct gnix_fid_ep *gnix_ep; - struct fi_msg_atomic msg; - struct fi_ioc msg_iov; - struct fi_rma_ioc rma_iov; - uint64_t flags; - - if (gnix_ep_atomic_valid(ep, datatype, op, NULL)) - return -FI_EOPNOTSUPP; - - if (!ep) - return -FI_EINVAL; - - gnix_ep = container_of(ep, struct gnix_fid_ep, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(gnix_ep->type)); - - msg_iov.addr = (void *)buf; - msg_iov.count = count; - msg.msg_iov = &msg_iov; - msg.desc = NULL; - msg.iov_count = 1; - msg.addr = dest_addr; - rma_iov.addr = addr; - rma_iov.count = 1; - rma_iov.key = key; - msg.rma_iov = &rma_iov; - msg.datatype = datatype; - msg.op = op; - - flags = gnix_ep->op_flags | FI_INJECT | GNIX_SUPPRESS_COMPLETION | - GNIX_ATOMIC_WRITE_FLAGS_DEF; - - return _gnix_atomic(gnix_ep, GNIX_FAB_RQ_AMO, &msg, - NULL, NULL, 0, NULL, NULL, 0, flags); -} - -DIRECT_FN STATIC ssize_t -gnix_ep_atomic_readwrite(struct fid_ep *ep, const void *buf, size_t count, - void *desc, void *result, void *result_desc, - fi_addr_t dest_addr, uint64_t addr, uint64_t key, - enum fi_datatype datatype, enum fi_op op, - void *context) -{ - struct gnix_fid_ep *gnix_ep; - struct fi_msg_atomic msg; - struct fi_ioc msg_iov; - struct fi_rma_ioc rma_iov; - struct fi_ioc result_iov; - uint64_t flags; - - if (gnix_ep_fetch_atomic_valid(ep, datatype, op, NULL)) - return -FI_EOPNOTSUPP; - - if (!ep) - return -FI_EINVAL; - - gnix_ep = container_of(ep, struct gnix_fid_ep, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(gnix_ep->type)); - - msg_iov.addr = (void *)buf; - msg_iov.count = count; - msg.msg_iov = &msg_iov; - msg.desc = &desc; - msg.iov_count = 1; - msg.addr = dest_addr; - rma_iov.addr = addr; - rma_iov.count = 1; - rma_iov.key = key; - msg.rma_iov = &rma_iov; - msg.datatype = datatype; - msg.op = op; - msg.context = context; - result_iov.addr = result; - result_iov.count = 1; - - flags = gnix_ep->op_flags | GNIX_ATOMIC_READ_FLAGS_DEF; - - return _gnix_atomic(gnix_ep, GNIX_FAB_RQ_FAMO, &msg, - NULL, NULL, 0, - &result_iov, &result_desc, 1, - flags); -} - -DIRECT_FN STATIC ssize_t -gnix_ep_atomic_readwritev(struct fid_ep *ep, const struct fi_ioc *iov, - void **desc, size_t count, struct fi_ioc *resultv, - void **result_desc, size_t result_count, - fi_addr_t dest_addr, uint64_t addr, uint64_t key, - enum fi_datatype datatype, enum fi_op op, - void *context) -{ - if (!iov || count > 1 || !resultv) - return -FI_EINVAL; - - return gnix_ep_atomic_readwrite(ep, iov[0].addr, iov[0].count, - desc ? desc[0] : NULL, - resultv[0].addr, - result_desc ? result_desc[0] : NULL, - dest_addr, addr, key, datatype, op, - context); -} - -DIRECT_FN STATIC ssize_t -gnix_ep_atomic_readwritemsg(struct fid_ep *ep, const struct fi_msg_atomic *msg, - struct fi_ioc *resultv, void **result_desc, - size_t result_count, uint64_t flags) -{ - struct gnix_fid_ep *gnix_ep; - - if (gnix_ep_fetch_atomic_valid(ep, msg->datatype, msg->op, NULL)) - return -FI_EOPNOTSUPP; - - if (!ep) - return -FI_EINVAL; - - gnix_ep = container_of(ep, struct gnix_fid_ep, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(gnix_ep->type)); - - flags = (flags & GNIX_FATOMICMSG_FLAGS) | GNIX_ATOMIC_READ_FLAGS_DEF; - - return _gnix_atomic(gnix_ep, GNIX_FAB_RQ_FAMO, msg, - NULL, NULL, 0, - resultv, result_desc, result_count, - flags); -} - -DIRECT_FN STATIC ssize_t -gnix_ep_atomic_compwrite(struct fid_ep *ep, const void *buf, size_t count, - void *desc, const void *compare, void *compare_desc, - void *result, void *result_desc, fi_addr_t dest_addr, - uint64_t addr, uint64_t key, enum fi_datatype datatype, - enum fi_op op, void *context) -{ - struct gnix_fid_ep *gnix_ep; - struct fi_msg_atomic msg; - struct fi_ioc msg_iov; - struct fi_rma_ioc rma_iov; - struct fi_ioc result_iov; - struct fi_ioc compare_iov; - uint64_t flags; - - if (gnix_ep_cmp_atomic_valid(ep, datatype, op, NULL)) - return -FI_EOPNOTSUPP; - - if (!ep) - return -FI_EINVAL; - - gnix_ep = container_of(ep, struct gnix_fid_ep, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(gnix_ep->type)); - - msg_iov.addr = (void *)buf; - msg_iov.count = count; - msg.msg_iov = &msg_iov; - msg.desc = &desc; - msg.iov_count = 1; - msg.addr = dest_addr; - rma_iov.addr = addr; - rma_iov.count = 1; - rma_iov.key = key; - msg.rma_iov = &rma_iov; - msg.datatype = datatype; - msg.op = op; - msg.context = context; - result_iov.addr = result; - result_iov.count = 1; - compare_iov.addr = (void *)compare; - compare_iov.count = 1; - - flags = gnix_ep->op_flags | GNIX_ATOMIC_READ_FLAGS_DEF; - - return _gnix_atomic(gnix_ep, GNIX_FAB_RQ_CAMO, &msg, - &compare_iov, &compare_desc, 1, - &result_iov, &result_desc, 1, - flags); -} - -DIRECT_FN STATIC ssize_t gnix_ep_atomic_compwritev(struct fid_ep *ep, - const struct fi_ioc *iov, - void **desc, - size_t count, - const struct fi_ioc *comparev, - void **compare_desc, - size_t compare_count, - struct fi_ioc *resultv, - void **result_desc, - size_t result_count, - fi_addr_t dest_addr, - uint64_t addr, uint64_t key, - enum fi_datatype datatype, - enum fi_op op, - void *context) -{ - if (!iov || count > 1 || !resultv || !comparev) - return -FI_EINVAL; - - return gnix_ep_atomic_compwrite(ep, iov[0].addr, iov[0].count, - desc ? desc[0] : NULL, - comparev[0].addr, - compare_desc ? compare_desc[0] : NULL, - resultv[0].addr, - result_desc ? result_desc[0] : NULL, - dest_addr, addr, key, datatype, op, - context); -} - -DIRECT_FN STATIC ssize_t gnix_ep_atomic_compwritemsg(struct fid_ep *ep, - const struct fi_msg_atomic *msg, - const struct fi_ioc *comparev, - void **compare_desc, - size_t compare_count, - struct fi_ioc *resultv, - void **result_desc, - size_t result_count, - uint64_t flags) -{ - struct gnix_fid_ep *gnix_ep; - - if (gnix_ep_cmp_atomic_valid(ep, msg->datatype, msg->op, NULL)) - return -FI_EOPNOTSUPP; - - if (!ep) - return -FI_EINVAL; - - gnix_ep = container_of(ep, struct gnix_fid_ep, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(gnix_ep->type)); - - flags = (flags & GNIX_CATOMICMSG_FLAGS) | GNIX_ATOMIC_READ_FLAGS_DEF; - - return _gnix_atomic(gnix_ep, GNIX_FAB_RQ_CAMO, msg, - comparev, compare_desc, compare_count, - resultv, result_desc, result_count, - flags); -} - -/******************************************************************************* - * Base EP API function implementations. - ******************************************************************************/ - -DIRECT_FN STATIC int gnix_ep_control(fid_t fid, int command, void *arg) -{ - int ret = FI_SUCCESS; - struct gnix_fid_ep *ep; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - ep = container_of(fid, struct gnix_fid_ep, ep_fid); - - switch (command) { - /* - * for FI_EP_RDM/DGRAM, enable the cm_nic associated - * with this ep. - */ - case FI_ENABLE: - - if (GNIX_EP_RDM_DGM(ep->type)) { - if ((ep->send_cq && ep->tx_enabled)) { - ret = -FI_EOPBADSTATE; - goto err; - } - if ((ep->recv_cq && ep->rx_enabled)) { - ret = -FI_EOPBADSTATE; - goto err; - } - ret = _gnix_vc_cm_init(ep->cm_nic); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_vc_cm_nic_init call returned %d\n", - ret); - goto err; - } - ret = _gnix_cm_nic_enable(ep->cm_nic); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_cm_nic_enable call returned %d\n", - ret); - goto err; - } - } - - ret = _gnix_ep_tx_enable(ep); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_ep_tx_enable call returned %d\n", - ret); - goto err; - } - - ret = _gnix_ep_rx_enable(ep); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_ep_rx_enable call returned %d\n", - ret); - goto err; - } - - ret = _gnix_ep_int_tx_pool_init(ep); - break; - - case FI_GETOPSFLAG: - case FI_SETOPSFLAG: - case FI_ALIAS: - default: - return -FI_ENOSYS; - } - -err: - return ret; -} - -static int __destruct_tag_storages(struct gnix_fid_ep *ep) -{ - int ret; - - GNIX_INFO(FI_LOG_EP_CTRL, "destroying tag storage\n"); - - ret = _gnix_tag_storage_destroy(&ep->unexp_recv_queue); - if (ret) - return ret; - - ret = _gnix_tag_storage_destroy(&ep->posted_recv_queue); - if (ret) - return ret; - - ret = _gnix_tag_storage_destroy(&ep->tagged_unexp_recv_queue); - if (ret) - return ret; - - ret = _gnix_tag_storage_destroy(&ep->tagged_posted_recv_queue); - - return ret; -} - -static void __ep_destruct(void *obj) -{ - int ret; - struct gnix_fid_domain *domain; - struct gnix_fid_av *av; - gnix_ht_key_t *key_ptr; - struct gnix_fid_ep *ep = (struct gnix_fid_ep *) obj; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - if (ep->type == FI_EP_MSG) { - if (GNIX_EP_CONNECTED(ep)) { - assert(ep->vc); - ret = _gnix_vc_destroy(ep->vc); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_vc_destroy returned %s\n", - fi_strerror(-ret)); - } - } - } else if (ep->av) { - /* Remove EP from CM NIC lookup list. */ - key_ptr = (gnix_ht_key_t *)&ep->src_addr.gnix_addr; - ret = _gnix_ht_remove(ep->cm_nic->addr_to_ep_ht, - *key_ptr); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_ht_remove returned %s\n", - fi_strerror(-ret)); - } - - /* Destroy EP VC storage. */ - ret = __gnix_ep_fini_vc(ep); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_ht_remove returned %s\n", - fi_strerror(-ret)); - } - } - - if (ep->eq) { - _gnix_eq_poll_obj_rem(ep->eq, &ep->ep_fid.fid); - _gnix_ref_put(ep->eq); - } - - if (ep->send_cq) { - _gnix_cq_poll_obj_rem(ep->send_cq, ep->nic, - _gnix_nic_progress); - if (ep->cm_nic) /* No CM NIC for MSG EPs */ - _gnix_cq_poll_obj_rem(ep->send_cq, ep->cm_nic, - _gnix_cm_nic_progress); - _gnix_ref_put(ep->send_cq); - } - - if (ep->recv_cq) { - _gnix_cq_poll_obj_rem(ep->recv_cq, ep->nic, - _gnix_nic_progress); - if (ep->cm_nic) /* No CM NIC for MSG EPs */ - _gnix_cq_poll_obj_rem(ep->recv_cq, ep->cm_nic, - _gnix_cm_nic_progress); - _gnix_ref_put(ep->recv_cq); - } - - if (ep->send_cntr) { - _gnix_cntr_poll_obj_rem(ep->send_cntr, ep->nic, - _gnix_nic_progress); - if (ep->cm_nic) /* No CM NIC for MSG EPs */ - _gnix_cntr_poll_obj_rem(ep->send_cntr, ep->cm_nic, - _gnix_cm_nic_progress); - _gnix_ref_put(ep->send_cntr); - } - - if (ep->recv_cntr) { - _gnix_cntr_poll_obj_rem(ep->recv_cntr, ep->nic, - _gnix_nic_progress); - if (ep->cm_nic) /* No CM NIC for MSG EPs */ - _gnix_cntr_poll_obj_rem(ep->recv_cntr, ep->cm_nic, - _gnix_cm_nic_progress); - _gnix_ref_put(ep->recv_cntr); - } - - if (ep->write_cntr) { - _gnix_cntr_poll_obj_rem(ep->write_cntr, ep->nic, - _gnix_nic_progress); - if (ep->cm_nic) /* No CM NIC for MSG EPs */ - _gnix_cntr_poll_obj_rem(ep->write_cntr, ep->cm_nic, - _gnix_cm_nic_progress); - _gnix_ref_put(ep->write_cntr); - } - - if (ep->read_cntr) { - _gnix_cntr_poll_obj_rem(ep->read_cntr, ep->nic, - _gnix_nic_progress); - if (ep->cm_nic) /* No CM NIC for MSG EPs */ - _gnix_cntr_poll_obj_rem(ep->read_cntr, ep->cm_nic, - _gnix_cm_nic_progress); - _gnix_ref_put(ep->read_cntr); - } - - if (ep->rwrite_cntr) { - _gnix_cntr_poll_obj_rem(ep->rwrite_cntr, ep->nic, - _gnix_nic_progress); - if (ep->cm_nic) /* No CM NIC for MSG EPs */ - _gnix_cntr_poll_obj_rem(ep->rwrite_cntr, ep->cm_nic, - _gnix_cm_nic_progress); - _gnix_ref_put(ep->rwrite_cntr); - } - - if (ep->rread_cntr) { - _gnix_cntr_poll_obj_rem(ep->rread_cntr, ep->nic, - _gnix_nic_progress); - if (ep->cm_nic) /* No CM NIC for MSG EPs */ - _gnix_cntr_poll_obj_rem(ep->rread_cntr, ep->cm_nic, - _gnix_cm_nic_progress); - _gnix_ref_put(ep->rread_cntr); - } - - if (ep->stx_ctx) - _gnix_ref_put(ep->stx_ctx); - - if (ep->xpmem_hndl) { - ret = _gnix_xpmem_handle_destroy(ep->xpmem_hndl); - if (ret != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_xpmem_handle_destroy returned %s\n", - fi_strerror(-ret)); - } - - domain = ep->domain; - assert(domain != NULL); - _gnix_ref_put(domain); - - av = ep->av; - if (av != NULL) - _gnix_ref_put(av); - - if (ep->nic) { - ret = _gnix_nic_free(ep->nic); - if (ret != FI_SUCCESS) - GNIX_FATAL(FI_LOG_EP_CTRL, - "_gnix_nic_free failed: %d\n"); - } - - if (ep->cm_nic) { - ret = _gnix_cm_nic_free(ep->cm_nic); - if (ret != FI_SUCCESS) - GNIX_FATAL(FI_LOG_EP_CTRL, - "_gnix_cm_nic_free failed: %d\n"); - } - - __destruct_tag_storages(ep); - - /* - * Free fab_reqs - */ - - __fr_freelist_destroy(ep); - _gnix_ep_int_tx_pool_fini(ep); - - fi_freeinfo(ep->info); - - free(ep); -} - -int gnix_ep_close(fid_t fid) -{ - int ret = FI_SUCCESS; - struct gnix_fid_ep *ep; - int references_held; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - ep = container_of(fid, struct gnix_fid_ep, ep_fid.fid); - - references_held = _gnix_ref_put(ep); - if (references_held) - GNIX_INFO(FI_LOG_EP_CTRL, "failed to fully close ep due " - "to lingering references. references=%i ep=%p\n", - references_held, ep); - - return ret; -} - -DIRECT_FN int gnix_ep_bind(fid_t fid, struct fid *bfid, uint64_t flags) -{ - int ret = FI_SUCCESS; - struct gnix_fid_ep *ep; - struct gnix_fid_eq *eq; - struct gnix_fid_av *av; - struct gnix_fid_cq *cq; - struct gnix_fid_stx *stx; - struct gnix_fid_cntr *cntr; - struct gnix_fid_trx *trx_priv; - struct gnix_nic_attr nic_attr = {0}; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - switch (fid->fclass) { - case FI_CLASS_TX_CTX: - case FI_CLASS_RX_CTX: - trx_priv = container_of(fid, struct gnix_fid_trx, ep_fid); - ep = trx_priv->ep; - break; - default: - ep = container_of(fid, struct gnix_fid_ep, ep_fid.fid); - } - - ret = ofi_ep_bind_valid(&gnix_prov, bfid, flags); - if (ret) - return ret; - - /* - * Per fi_endpoint man page, can't bind an object - * to an ep after its been enabled. - * For scalable endpoints, the rx/tx contexts are bound to the same - * gnix_ep so we allow enabling of the tx before binding the rx and - * vice versa. - */ - switch (fid->fclass) { - case FI_CLASS_TX_CTX: - if (ep->send_cq && ep->tx_enabled) { - return -FI_EOPBADSTATE; - } - break; - case FI_CLASS_RX_CTX: - if (ep->recv_cq && ep->rx_enabled) { - return -FI_EOPBADSTATE; - } - break; - default: - if ((ep->send_cq && ep->tx_enabled) || - (ep->recv_cq && ep->rx_enabled)) { - return -FI_EOPBADSTATE; - } - } - - switch (bfid->fclass) { - case FI_CLASS_EQ: - eq = container_of(bfid, struct gnix_fid_eq, eq_fid.fid); - if (ep->domain->fabric != eq->fabric) { - ret = -FI_EINVAL; - break; - } - - if (ep->eq) { - ret = -FI_EINVAL; - break; - } - - ep->eq = eq; - _gnix_eq_poll_obj_add(eq, &ep->ep_fid.fid); - _gnix_ref_get(eq); - - GNIX_DEBUG(FI_LOG_EP_CTRL, "Bound EQ to EP: %p, %p\n", eq, ep); - break; - case FI_CLASS_CQ: - cq = container_of(bfid, struct gnix_fid_cq, cq_fid.fid); - if (ep->domain != cq->domain) { - ret = -FI_EINVAL; - break; - } - if (flags & FI_TRANSMIT) { - /* don't allow rebinding */ - if (ep->send_cq) { - ret = -FI_EINVAL; - break; - } - - ep->send_cq = cq; - if (flags & FI_SELECTIVE_COMPLETION) { - ep->send_selective_completion = 1; - } - - _gnix_ref_get(cq); - } - if (flags & FI_RECV) { - /* don't allow rebinding */ - if (ep->recv_cq) { - ret = -FI_EINVAL; - break; - } - - ep->recv_cq = cq; - if (flags & FI_SELECTIVE_COMPLETION) { - ep->recv_selective_completion = 1; - } - - _gnix_ref_get(cq); - } - break; - case FI_CLASS_AV: - av = container_of(bfid, struct gnix_fid_av, av_fid.fid); - if (ep->domain != av->domain) { - ret = -FI_EINVAL; - break; - } - ep->av = av; - _gnix_ep_init_vc(ep); - _gnix_ref_get(ep->av); - break; - case FI_CLASS_CNTR: - cntr = container_of(bfid, struct gnix_fid_cntr, cntr_fid.fid); - if (ep->domain != cntr->domain) { - ret = -FI_EINVAL; - break; - } - - if (flags & FI_SEND) { - /* don't allow rebinding */ - if (ep->send_cntr) { - GNIX_WARN(FI_LOG_EP_CTRL, - "cannot rebind send counter (%p)\n", - cntr); - ret = -FI_EINVAL; - break; - } - ep->send_cntr = cntr; - _gnix_ref_get(cntr); - } - - if (flags & FI_RECV) { - /* don't allow rebinding */ - if (ep->recv_cntr) { - GNIX_WARN(FI_LOG_EP_CTRL, - "cannot rebind recv counter (%p)\n", - cntr); - ret = -FI_EINVAL; - break; - } - ep->recv_cntr = cntr; - _gnix_ref_get(cntr); - } - - if (flags & FI_WRITE) { - /* don't allow rebinding */ - if (ep->write_cntr) { - GNIX_WARN(FI_LOG_EP_CTRL, - "cannot rebind write counter (%p)\n", - cntr); - ret = -FI_EINVAL; - break; - } - ep->write_cntr = cntr; - _gnix_ref_get(cntr); - } - - if (flags & FI_READ) { - /* don't allow rebinding */ - if (ep->read_cntr) { - GNIX_WARN(FI_LOG_EP_CTRL, - "cannot rebind read counter (%p)\n", - cntr); - ret = -FI_EINVAL; - break; - } - ep->read_cntr = cntr; - _gnix_ref_get(cntr); - } - - if (flags & FI_REMOTE_WRITE) { - /* don't allow rebinding */ - if (ep->rwrite_cntr) { - GNIX_WARN(FI_LOG_EP_CTRL, - "cannot rebind rwrite counter (%p)\n", - cntr); - ret = -FI_EINVAL; - break; - } - ep->rwrite_cntr = cntr; - _gnix_ref_get(cntr); - } - - if (flags & FI_REMOTE_READ) { - /* don't allow rebinding */ - if (ep->rread_cntr) { - GNIX_WARN(FI_LOG_EP_CTRL, - "cannot rebind rread counter (%p)\n", - cntr); - ret = -FI_EINVAL; - break; - } - ep->rread_cntr = cntr; - _gnix_ref_get(cntr); - } - - break; - - case FI_CLASS_STX_CTX: - stx = container_of(bfid, struct gnix_fid_stx, stx_fid.fid); - if (ep->domain != stx->domain) { - ret = -FI_EINVAL; - break; - } - - /* - * can only bind an STX to an ep opened with - * FI_SHARED_CONTEXT ep_attr->tx_ctx_cnt and also - * if a nic has not been previously bound - */ - - if (ep->shared_tx == false || ep->nic) { - ret = -FI_EOPBADSTATE; - break; - } - - /* - * we force allocation of a nic to make semantics - * match the intent fi_endpoint man page, provide - * a TX context (aka gnix nic) that can be shared - * explicitly amongst endpoints - */ - if (stx->auth_key && ep->auth_key != stx->auth_key) { - ret = -FI_EINVAL; - break; - } - - if (!stx->nic) { - nic_attr.must_alloc = true; - nic_attr.auth_key = ep->auth_key; - ret = gnix_nic_alloc(ep->domain, &nic_attr, - &stx->nic); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_nic_alloc call returned %d\n", - ret); - break; - } - stx->auth_key = nic_attr.auth_key; - } - - ep->stx_ctx = stx; - _gnix_ref_get(ep->stx_ctx); - - ep->nic = stx->nic; - if (ep->nic->smsg_callbacks == NULL) - ep->nic->smsg_callbacks = gnix_ep_smsg_callbacks; - _gnix_ref_get(ep->nic); - break; - - case FI_CLASS_MR:/*TODO: got to figure this one out */ - default: - ret = -FI_ENOSYS; - break; - } - - return ret; -} - -static void gnix_ep_caps(struct gnix_fid_ep *ep_priv, uint64_t caps) -{ - if (ofi_recv_allowed(caps & ~FI_TAGGED)) - ep_priv->ep_ops.msg_recv_allowed = 1; - - if (ofi_send_allowed(caps & ~FI_TAGGED)) - ep_priv->ep_ops.msg_send_allowed = 1; - - if (ofi_recv_allowed(caps & ~FI_MSG)) - ep_priv->ep_ops.tagged_recv_allowed = 1; - - if (ofi_send_allowed(caps & ~FI_MSG)) - ep_priv->ep_ops.tagged_send_allowed = 1; - -} - -static int __init_tag_storages(struct gnix_fid_ep *ep, int tag_type, - int use_addrs) -{ - int tsret; - struct gnix_tag_storage_attr untagged_attr = { - .type = tag_type, - .use_src_addr_matching = use_addrs, - }; - struct gnix_tag_storage_attr tagged_attr = { - .type = tag_type, - .use_src_addr_matching = use_addrs, - }; - - GNIX_INFO(FI_LOG_EP_CTRL, "initializing tag storage, tag_type=%d\n", - tag_type); - - /* init untagged storages */ - tsret = _gnix_posted_tag_storage_init( - &ep->posted_recv_queue, &untagged_attr); - if (tsret) - return tsret; - - tsret = _gnix_unexpected_tag_storage_init( - &ep->unexp_recv_queue, &untagged_attr); - if (tsret) - return tsret; - - /* init tagged storages */ - tsret = _gnix_posted_tag_storage_init( - &ep->tagged_posted_recv_queue, &tagged_attr); - if (tsret) - return tsret; - - tsret = _gnix_unexpected_tag_storage_init( - &ep->tagged_unexp_recv_queue, &tagged_attr); - - return tsret; -} - -static int _gnix_ep_nic_init(struct gnix_fid_domain *domain, - struct fi_info *info, - struct gnix_fid_ep *ep) -{ - int ret = FI_SUCCESS; - uint32_t cdm_id = GNIX_CREATE_CDM_ID; - struct gnix_ep_name *name; - struct gnix_nic_attr nic_attr = {0}; - - if (ep->type == FI_EP_MSG) { - if (ep->shared_tx == false) { - nic_attr.auth_key = ep->auth_key; - - ret = gnix_nic_alloc(domain, &nic_attr, - &ep->nic); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_nic_alloc call returned %d\n", - ret); - } - } - return ret; - } - - name = (struct gnix_ep_name *)info->src_addr; - if (name && name->name_type == GNIX_EPN_TYPE_BOUND) { - /* Endpoint was bound to a specific source address. Create a - * new CM NIC to listen on this address. */ - ret = _gnix_cm_nic_alloc(domain, info, name->gnix_addr.cdm_id, - ep->auth_key, &ep->cm_nic); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_cm_nic_alloc returned %s\n", - fi_strerror(-ret)); - return ret; - } - - ep->src_addr = ep->cm_nic->my_name; - - /* - * if this endpoint is not going to use a shared TX - * aka gnix_nic, link its nic with the one being - * used for the cm_nic to reduce pressure on underlying - * hardware resources. - */ - if (ep->shared_tx == false) { - ep->nic = ep->cm_nic->nic; - _gnix_ref_get(ep->nic); - - GNIX_INFO(FI_LOG_EP_CTRL, - "Allocated new NIC for bound EP: %p (ID:%d)\n", - ep->src_addr.gnix_addr.cdm_id); - } - } else { - ofi_spin_lock(&domain->cm_nic_lock); - - /* Allocate a domain CM NIC, if needed. */ - if (domain->cm_nic == NULL) { - ret = _gnix_cm_nic_create_cdm_id(domain, &cdm_id); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "gnix_cm_nic_create_cdm_id returned %s\n", - fi_strerror(-ret)); - return ret; - } - - ret = _gnix_cm_nic_alloc(domain, info, cdm_id, - ep->auth_key, &domain->cm_nic); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_cm_nic_alloc returned %s\n", - fi_strerror(-ret)); - ofi_spin_unlock(&domain->cm_nic_lock); - return ret; - } - - /* Use the newly allocated domain CM NIC for data - * movement on this EP if not using STX. */ - ep->cm_nic = domain->cm_nic; - if (ep->shared_tx == false) { - ep->nic = ep->cm_nic->nic; - _gnix_ref_get(ep->nic); - } - - GNIX_INFO(FI_LOG_EP_CTRL, - "Allocated new NIC for EP: %p (ID:%d)\n", - ep->src_addr.gnix_addr.cdm_id); - } else { - /* Re-use the existing domain CM NIC. */ - ep->cm_nic = domain->cm_nic; - _gnix_ref_get(ep->cm_nic); - - if (ep->shared_tx == false) { - nic_attr.auth_key = ep->auth_key; - - /* Allocate a new NIC for data - movement on this EP. */ - ret = gnix_nic_alloc(domain, - &nic_attr, &ep->nic); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_nic_alloc call returned %d\n", - ret); - ofi_spin_unlock(&domain->cm_nic_lock); - return ret; - } - - GNIX_INFO(FI_LOG_EP_CTRL, - "Allocated new NIC for xfers: %p (ID:%d)\n", - ep->src_addr.gnix_addr.cdm_id); - } - } - - ofi_spin_unlock(&domain->cm_nic_lock); - - ep->src_addr.gnix_addr.device_addr = - ep->cm_nic->my_name.gnix_addr.device_addr; - ep->src_addr.cm_nic_cdm_id = - ep->cm_nic->my_name.gnix_addr.cdm_id; - - ret = _gnix_cm_nic_create_cdm_id(domain, &cdm_id); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "gnix_cm_nic_create_cdm_id returned %s\n", - fi_strerror(-ret)); - if(ep->nic != NULL) - _gnix_ref_put(ep->nic); - return ret; - } - ep->src_addr.gnix_addr.cdm_id = cdm_id; - } - - return FI_SUCCESS; -} - -static int _gnix_ep_msg_open(struct gnix_fid_domain *domain, - struct fi_info *info, - struct gnix_fid_ep *ep) -{ - ep->ep_fid.cm = &gnix_ep_msg_ops_cm; - ep->conn_fd = -1; - ep->conn_state = GNIX_EP_UNCONNECTED; - - return FI_SUCCESS; -} - -static int _gnix_ep_unconn_open(struct gnix_fid_domain *domain, - struct fi_info *info, - struct gnix_fid_ep *ep) -{ - int ret; - gnix_ht_key_t *key_ptr; - - key_ptr = (gnix_ht_key_t *)&ep->src_addr.gnix_addr; - ret = _gnix_ht_insert(ep->cm_nic->addr_to_ep_ht, - *key_ptr, ep); - if ((ret != FI_SUCCESS) && (ret != -FI_ENOSPC)) { - GNIX_WARN(FI_LOG_EP_CTRL, - "__gnix_ht_insert returned %d\n", - ret); - return ret; - } - - /* Unconnected endpoints use a limited set of CM ops. */ - ep->ep_fid.cm = &gnix_ep_ops_cm; - - return FI_SUCCESS; -} - -DIRECT_FN int gnix_ep_open(struct fid_domain *domain, struct fi_info *info, - struct fid_ep **ep, void *context) -{ - int ret = FI_SUCCESS; - int err_ret; - struct gnix_fid_domain *domain_priv; - struct gnix_fid_ep *ep_priv; - struct gnix_auth_key *auth_key; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - if ((domain == NULL) || (info == NULL) || (ep == NULL) || - (info->ep_attr == NULL)) - return -FI_EINVAL; - - domain_priv = container_of(domain, struct gnix_fid_domain, domain_fid); - - if (FI_VERSION_LT(domain_priv->fabric->fab_fid.api_version, - FI_VERSION(1, 5)) && - (info->ep_attr->auth_key || info->ep_attr->auth_key_size)) - return -FI_EINVAL; - - if (info->ep_attr->auth_key_size) { - auth_key = GNIX_GET_AUTH_KEY(info->ep_attr->auth_key, - info->ep_attr->auth_key_size, - domain_priv->using_vmdh); - if (!auth_key) - return -FI_EINVAL; - } else { - auth_key = domain_priv->auth_key; - assert(auth_key); - } - - ep_priv = calloc(1, sizeof *ep_priv); - if (!ep_priv) - return -FI_ENOMEM; - - /* Set up libfabric fid data. */ - ep_priv->ep_fid.fid.fclass = FI_CLASS_EP; - ep_priv->ep_fid.fid.context = context; - ep_priv->ep_fid.fid.ops = &gnix_ep_fi_ops; - ep_priv->ep_fid.ops = &gnix_ep_ops; - ep_priv->ep_fid.msg = &gnix_ep_msg_ops; - ep_priv->ep_fid.rma = &gnix_ep_rma_ops; - ep_priv->ep_fid.tagged = &gnix_ep_tagged_ops; - ep_priv->ep_fid.atomic = &gnix_ep_atomic_ops; - - /* Init GNIX data. */ - ep_priv->auth_key = auth_key; - ep_priv->type = info->ep_attr->type; - ep_priv->domain = domain_priv; - _gnix_ref_init(&ep_priv->ref_cnt, 1, __ep_destruct); - ep_priv->min_multi_recv = GNIX_OPT_MIN_MULTI_RECV_DEFAULT; - ofi_spin_init(&ep_priv->vc_lock); - ep_priv->progress_fn = NULL; - ep_priv->rx_progress_fn = NULL; - ep_priv->tx_enabled = false; - ep_priv->rx_enabled = false; - ep_priv->requires_lock = (domain_priv->thread_model != - FI_THREAD_COMPLETION); - ep_priv->info = fi_dupinfo(info); - ep_priv->info->addr_format = info->addr_format; - - GNIX_DEBUG(FI_LOG_EP_CTRL, "ep(%p) is using addr_format(%s)\n", ep_priv, - ep_priv->info->addr_format == FI_ADDR_STR ? "FI_ADDR_STR" : - "FI_ADDR_GNI"); - - if (info->src_addr) { - memcpy(&ep_priv->src_addr, info->src_addr, - sizeof(struct gnix_ep_name)); - } - - if (info->dest_addr) { - memcpy(&ep_priv->dest_addr, info->dest_addr, - sizeof(struct gnix_ep_name)); - } - - ret = __init_tag_storages(ep_priv, GNIX_TAG_LIST, - ep_priv->type == FI_EP_MSG ? 0 : 1); - if (ret) - goto err_tag_init; - - ret = __fr_freelist_init(ep_priv); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "Error allocating gnix_fab_req freelist (%s)", - fi_strerror(-ret)); - goto err_fl_init; - } - - ep_priv->shared_tx = (info->ep_attr->tx_ctx_cnt == FI_SHARED_CONTEXT) ? - true : false; - /* - * try out XPMEM - */ - ret = _gnix_xpmem_handle_create(domain_priv, - &ep_priv->xpmem_hndl); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_xpmem_handl_create returned %s\n", - fi_strerror(-ret)); - } - - /* Initialize caps, modes, permissions, behaviors. */ - ep_priv->caps = info->caps & GNIX_EP_CAPS_FULL; - - if (ep_priv->info->tx_attr) - ep_priv->op_flags = ep_priv->info->tx_attr->op_flags; - if (ep_priv->info->rx_attr) - ep_priv->op_flags |= ep_priv->info->rx_attr->op_flags; - ep_priv->op_flags &= GNIX_EP_OP_FLAGS; - - gnix_ep_caps(ep_priv, ep_priv->caps); - - ret = _gnix_ep_nic_init(domain_priv, ep_priv->info, ep_priv); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_ep_nic_init returned %d\n", - ret); - goto err_nic_init; - } - - /* Do EP type specific initialization. */ - switch (ep_priv->type) { - case FI_EP_DGRAM: - case FI_EP_RDM: - ret = _gnix_ep_unconn_open(domain_priv, ep_priv->info, ep_priv); - if (ret != FI_SUCCESS) { - GNIX_INFO(FI_LOG_EP_CTRL, - "_gnix_ep_unconn_open() failed, err: %d\n", - ret); - goto err_type_init; - } - break; - case FI_EP_MSG: - ret = _gnix_ep_msg_open(domain_priv, ep_priv->info, ep_priv); - if (ret != FI_SUCCESS) { - GNIX_INFO(FI_LOG_EP_CTRL, - "_gnix_ep_msg_open() failed, err: %d\n", - ret); - goto err_type_init; - } - break; - default: - ret = -FI_EINVAL; - goto err_type_init; - } - - _gnix_ref_get(ep_priv->domain); - - *ep = &ep_priv->ep_fid; - - return ret; - -err_type_init: - if (ep_priv->nic) - _gnix_nic_free(ep_priv->nic); - _gnix_cm_nic_free(ep_priv->cm_nic); -err_nic_init: - if (ep_priv->xpmem_hndl) { - err_ret = _gnix_xpmem_handle_destroy(ep_priv->xpmem_hndl); - if (err_ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_xpmem_handle_destroy returned %s\n", - fi_strerror(-err_ret)); - } - } - - __fr_freelist_destroy(ep_priv); -err_fl_init: - __destruct_tag_storages(ep_priv); -err_tag_init: - free(ep_priv); - - return ret; -} - -int _gnix_ep_alloc(struct fid_domain *domain, struct fi_info *info, - struct gnix_ep_attr *attr, - struct fid_ep **ep, void *context) -{ - int ret = FI_SUCCESS; - int err_ret; - struct gnix_fid_domain *domain_priv; - struct gnix_fid_ep *ep_priv; - gnix_ht_key_t *key_ptr; - struct gnix_auth_key *auth_key; - uint32_t cdm_id; - bool free_list_inited = false; - struct gnix_nic_attr nic_attr = {0}; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - if ((domain == NULL) || (info == NULL) || (ep == NULL) || - (info->ep_attr == NULL)) - return -FI_EINVAL; - - domain_priv = container_of(domain, struct gnix_fid_domain, domain_fid); - - if (info->ep_attr->auth_key_size) { - auth_key = GNIX_GET_AUTH_KEY(info->ep_attr->auth_key, - info->ep_attr->auth_key_size, - domain_priv->using_vmdh); - if (!auth_key) - return -FI_EINVAL; - } else { - auth_key = domain_priv->auth_key; - assert(auth_key); - } - - ep_priv = calloc(1, sizeof(*ep_priv)); - if (!ep_priv) - return -FI_ENOMEM; - - ep_priv->auth_key = auth_key; - - ep_priv->requires_lock = (domain_priv->thread_model != - FI_THREAD_COMPLETION); - - ep_priv->ep_fid.fid.fclass = FI_CLASS_EP; - ep_priv->ep_fid.fid.context = context; - - ep_priv->ep_fid.fid.ops = &gnix_ep_fi_ops; - ep_priv->ep_fid.ops = &gnix_ep_ops; - ep_priv->domain = domain_priv; - ep_priv->type = info->ep_attr->type; - ep_priv->info = fi_dupinfo(info); - - _gnix_ref_init(&ep_priv->ref_cnt, 1, __ep_destruct); - - ep_priv->caps = info->caps & GNIX_EP_CAPS_FULL; - - if (info->tx_attr) - ep_priv->op_flags = info->tx_attr->op_flags; - if (info->rx_attr) - ep_priv->op_flags |= info->rx_attr->op_flags; - ep_priv->op_flags &= GNIX_EP_OP_FLAGS; - - ep_priv->min_multi_recv = GNIX_OPT_MIN_MULTI_RECV_DEFAULT; - - if (attr && attr->msg_ops) - ep_priv->ep_fid.msg = attr->msg_ops; - else - ep_priv->ep_fid.msg = &gnix_ep_msg_ops; - - if (attr && attr->rma_ops) - ep_priv->ep_fid.rma = attr->rma_ops; - else - ep_priv->ep_fid.rma = &gnix_ep_rma_ops; - - if (attr && attr->tagged_ops) - ep_priv->ep_fid.tagged = attr->tagged_ops; - else - ep_priv->ep_fid.tagged = &gnix_ep_tagged_ops; - - if (attr && attr->atomic_ops) - ep_priv->ep_fid.atomic = attr->atomic_ops; - else - ep_priv->ep_fid.atomic = &gnix_ep_atomic_ops; - - if (attr && attr->cm_ops) - ep_priv->ep_fid.cm = attr->cm_ops; - else - ep_priv->ep_fid.cm = &gnix_ep_ops_cm; - - gnix_ep_caps(ep_priv, ep_priv->caps); - - ret = __init_tag_storages(ep_priv, GNIX_TAG_LIST, 1); - if (ret) { - goto err; - } - - ret = __fr_freelist_init(ep_priv); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "Error allocating gnix_fab_req freelist (%s)", - fi_strerror(-ret)); - goto err; - } else - free_list_inited = true; - - /* - * try out XPMEM - */ - - ret = _gnix_xpmem_handle_create(domain_priv, - &ep_priv->xpmem_hndl); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, "xpmem_handl_create returned %s\n", - fi_strerror(-ret)); - } - - if (attr && attr->cm_nic) { - ep_priv->cm_nic = attr->cm_nic; - _gnix_ref_get(ep_priv->cm_nic); - } else { - - /* - * if a cm_nic has not yet been allocated for this - * domain, do it now. Reuse the embedded gnix_nic - * in the cm_nic as the nic for this endpoint - * to reduce demand on Aries hw resources. - */ - - ofi_spin_lock(&domain_priv->cm_nic_lock); - if (domain_priv->cm_nic == NULL) { - ret = _gnix_cm_nic_alloc(domain_priv, info, - cdm_id, - ep_priv->auth_key, - &domain_priv->cm_nic); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_cm_nic_alloc returned %s\n", - fi_strerror(-ret)); - ofi_spin_unlock( - &domain_priv->cm_nic_lock); - goto err; - } - ep_priv->cm_nic = domain_priv->cm_nic; - ep_priv->nic = ep_priv->cm_nic->nic; - _gnix_ref_get(ep_priv->nic); - } else { - ep_priv->cm_nic = domain_priv->cm_nic; - _gnix_ref_get(ep_priv->cm_nic); - } - - ofi_spin_unlock(&domain_priv->cm_nic_lock); - - } - - ep_priv->src_addr.gnix_addr.device_addr = - ep_priv->cm_nic->my_name.gnix_addr.device_addr; - ep_priv->src_addr.cm_nic_cdm_id = - ep_priv->cm_nic->my_name.gnix_addr.cdm_id; - - if (attr && attr->use_cdm_id) { - cdm_id = attr->cdm_id; - } else { - ret = _gnix_cm_nic_create_cdm_id(domain_priv, &cdm_id); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "gnix_cm_nic_create_cdm_id returned %s\n", - fi_strerror(-ret)); - goto err; - } - } - ep_priv->src_addr.gnix_addr.cdm_id = cdm_id; - - key_ptr = (gnix_ht_key_t *)&ep_priv->src_addr.gnix_addr; - ret = _gnix_ht_insert(ep_priv->cm_nic->addr_to_ep_ht, - *key_ptr, - ep_priv); - if ((ret != FI_SUCCESS) && (ret != -FI_ENOSPC)) { - GNIX_WARN(FI_LOG_EP_CTRL, - "__gnix_ht_insert returned %d\n", - ret); - goto err; - } - - ofi_spin_init(&ep_priv->vc_lock); - - ep_priv->progress_fn = NULL; - ep_priv->rx_progress_fn = NULL; - ep_priv->tx_enabled = false; - ep_priv->rx_enabled = false; - - if (attr && attr->nic) { - ep_priv->nic = attr->nic; - } else { - assert(ep_priv->nic == NULL); - nic_attr.auth_key = ep_priv->auth_key; - - ret = gnix_nic_alloc(domain_priv, &nic_attr, - &ep_priv->nic); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "gnix_nic_alloc call returned %d\n", ret); - goto err; - } - if (!(attr && attr->cm_nic)) { - ep_priv->cm_nic = domain_priv->cm_nic; - } - _gnix_ref_get(ep_priv->nic); - } - - /* - * if smsg callbacks not present hook them up now - */ - - if (ep_priv->nic->smsg_callbacks == NULL) - ep_priv->nic->smsg_callbacks = gnix_ep_smsg_callbacks; - - _gnix_ref_get(ep_priv->domain); - *ep = &ep_priv->ep_fid; - return ret; - -err: - if (ep_priv->xpmem_hndl) { - err_ret = _gnix_xpmem_handle_destroy(ep_priv->xpmem_hndl); - if (err_ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_xpmem_handle_destroy returned %s\n", - fi_strerror(-err_ret)); - } - } - - err_ret = __destruct_tag_storages(ep_priv); - if (err_ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "__destruct_tag_stroages returned %s\n", - fi_strerror(-err_ret)); - } - - if (free_list_inited == true) - __fr_freelist_destroy(ep_priv); - - if (ep_priv->cm_nic != NULL) { - err_ret = _gnix_cm_nic_free(ep_priv->cm_nic); - if (err_ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_cm_nic_free returned %s\n", - fi_strerror(-err_ret)); - } - } - - if (ep_priv->nic != NULL) { - err_ret = _gnix_nic_free(ep_priv->nic); - if (err_ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_nic_free returned %s\n", - fi_strerror(-err_ret)); - } - } - - free(ep_priv); - return ret; -} - -static int __match_context(struct dlist_entry *item, const void *arg) -{ - struct gnix_fab_req *req; - - req = container_of(item, struct gnix_fab_req, dlist); - - return req->user_context == arg; -} - -static inline struct gnix_fab_req *__find_tx_req( - struct gnix_fid_ep *ep, - void *context) -{ - struct gnix_fab_req *req = NULL; - struct dlist_entry *entry; - struct gnix_vc *vc; - - GNIX_DEBUG(FI_LOG_EP_CTRL, "searching VCs for the correct context to" - " cancel, context=%p", context); - - COND_ACQUIRE(ep->requires_lock, &ep->vc_lock); - - if (ep->av->type == FI_AV_TABLE) { - GNIX_VECTOR_ITERATOR(ep->vc_table, iter); - - while ((vc = (struct gnix_vc *) - _gnix_vec_iterator_next(&iter))) { - entry = dlist_remove_first_match(&vc->tx_queue, - __match_context, - context); - - if (entry) { - req = container_of(entry, - struct gnix_fab_req, - dlist); - break; - } - } - } else { - GNIX_HASHTABLE_ITERATOR(ep->vc_ht, iter); - - while ((vc = _gnix_ht_iterator_next(&iter))) { - entry = dlist_remove_first_match(&vc->tx_queue, - __match_context, - context); - - if (entry) { - req = container_of(entry, - struct gnix_fab_req, - dlist); - break; - } - } - } - - COND_RELEASE(ep->requires_lock, &ep->vc_lock); - - return req; -} - -static inline struct gnix_fab_req *__find_rx_req( - struct gnix_fid_ep *ep, - void *context) -{ - struct gnix_fab_req *req = NULL; - - COND_ACQUIRE(ep->requires_lock, &ep->vc_lock); - req = _gnix_remove_req_by_context(&ep->posted_recv_queue, context); - if (req) { - COND_RELEASE(ep->requires_lock, &ep->vc_lock); - return req; - } - - req = _gnix_remove_req_by_context(&ep->tagged_posted_recv_queue, - context); - COND_RELEASE(ep->requires_lock, &ep->vc_lock); - - return req; -} - -DIRECT_FN STATIC ssize_t gnix_ep_cancel(fid_t fid, void *context) -{ - int ret = FI_SUCCESS; - struct gnix_fid_ep *ep; - struct gnix_fab_req *req; - struct gnix_fid_cq *err_cq = NULL; - struct gnix_fid_cntr *err_cntr = NULL; - void *addr; - uint64_t tag, flags; - size_t len; - int is_send = 0; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - ep = container_of(fid, struct gnix_fid_ep, ep_fid.fid); - - if (!ep->domain) - return -FI_EDOMAIN; - - /* without context, we will have to find a request that matches - * a recv or send request. Try the send requests first. - */ - GNIX_INFO(FI_LOG_EP_CTRL, "looking for event to cancel\n"); - - req = __find_tx_req(ep, context); - if (!req) { - req = __find_rx_req(ep, context); - if (req) { - err_cq = ep->recv_cq; - err_cntr = ep->recv_cntr; - } - } else { - is_send = 1; - err_cq = ep->send_cq; - err_cntr = ep->send_cntr; - } - GNIX_INFO(FI_LOG_EP_CTRL, "finished searching\n"); - - if (!req) - return -FI_ENOENT; - - if (err_cq) { - /* add canceled event */ - if (!(req->type == GNIX_FAB_RQ_RDMA_READ || - req->type == GNIX_FAB_RQ_RDMA_WRITE)) { - if (!is_send) { - addr = (void *) req->msg.recv_info[0].recv_addr; - len = req->msg.cum_recv_len; - } else { - addr = (void *) req->msg.send_info[0].send_addr; - len = req->msg.cum_send_len; - } - tag = req->msg.tag; - } else { - /* rma information */ - addr = (void *) req->rma.loc_addr; - len = req->rma.len; - tag = 0; - } - flags = req->flags; - - _gnix_cq_add_error(err_cq, context, flags, len, addr, 0 /* data */, - tag, len, FI_ECANCELED, FI_ECANCELED, 0, 0); - - } - - if (err_cntr) { - /* signal increase in cntr errs */ - _gnix_cntr_inc_err(err_cntr); - } - - if (req->flags & FI_LOCAL_MR) { - fi_close(&req->amo.loc_md->mr_fid.fid); - req->flags &= ~FI_LOCAL_MR; - } - - _gnix_fr_free(ep, req); - - return ret; -} - -ssize_t gnix_cancel(fid_t fid, void *context) -{ - ssize_t ret; - struct gnix_fid_ep *ep; - struct gnix_fid_trx *trx_ep; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - switch (fid->fclass) { - case FI_CLASS_EP: - ret = gnix_ep_cancel(fid, context); - break; - - case FI_CLASS_RX_CTX: - case FI_CLASS_TX_CTX: - trx_ep = container_of(fid, struct gnix_fid_trx, ep_fid); - ep = trx_ep->ep; - ret = gnix_ep_cancel(&ep->ep_fid.fid, context); - break; - /* not supported yet */ - case FI_CLASS_SRX_CTX: - case FI_CLASS_STX_CTX: - return -FI_ENOENT; - - default: - GNIX_WARN(FI_LOG_EP_CTRL, "Invalid fid type\n"); - return -FI_EINVAL; - } - - return ret; -} - -static int -__gnix_ep_ops_get_val(struct fid *fid, ep_ops_val_t t, void *val) -{ - struct gnix_fid_ep *ep; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - assert(val); - - if (fid->fclass != FI_CLASS_EP) { - GNIX_WARN(FI_LOG_DOMAIN, "Invalid ep\n"); - return -FI_EINVAL; - } - - ep = container_of(fid, struct gnix_fid_ep, ep_fid.fid); - - switch (t) { - case GNI_HASH_TAG_IMPL: - *(uint32_t *)val = (ep->use_tag_hlist) ? 1 : 0; - break; - - default: - GNIX_WARN(FI_LOG_DOMAIN, ("Invalid dom_ops_val\n")); - return -FI_EINVAL; - } - - return FI_SUCCESS; -} - -static int -__gnix_ep_ops_set_val(struct fid *fid, ep_ops_val_t t, void *val) -{ - struct gnix_fid_ep *ep; - int v; - int ret; - int tag_type; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - assert(val); - - if (fid->fclass != FI_CLASS_EP) { - GNIX_WARN(FI_LOG_DOMAIN, "Invalid ep\n"); - return -FI_EINVAL; - } - - ep = container_of(fid, struct gnix_fid_ep, ep_fid.fid); - switch (t) { - case GNI_HASH_TAG_IMPL: - if (ep->tx_enabled || ep->rx_enabled) { - GNIX_WARN(FI_LOG_EP_CTRL, - "EP enabled, cannot modify tag matcher\n"); - return -FI_EINVAL; - } - - v = *(uint32_t *) val; - if ((v && !(ep->use_tag_hlist)) || - (!v && (ep->use_tag_hlist))) { - ret = __destruct_tag_storages(ep); - if (ret) { - GNIX_FATAL(FI_LOG_EP_CTRL, - "failed to destroy existing tag storage\n"); - } - - tag_type = (v) ? GNIX_TAG_HLIST : GNIX_TAG_LIST; - - ret = __init_tag_storages(ep, tag_type, 1); - if (ret) - return ret; - - ep->use_tag_hlist = v; - } - break; - default: - GNIX_WARN(FI_LOG_DOMAIN, ("Invalid dom_ops_val\n")); - return -FI_EINVAL; - } - - return FI_SUCCESS; -} - -static struct fi_gni_ops_ep gnix_ops_ep = { - .set_val = __gnix_ep_ops_set_val, - .get_val = __gnix_ep_ops_get_val, - .native_amo = __gnix_fabric_ops_native_amo -}; - -static int -gnix_ep_ops_open(struct fid *fid, const char *ops_name, uint64_t flags, - void **ops, void *context) -{ - int ret = FI_SUCCESS; - - if (strcmp(ops_name, FI_GNI_EP_OPS_1) == 0) - *ops = &gnix_ops_ep; - else - ret = -FI_EINVAL; - - return ret; -} - -DIRECT_FN STATIC int gnix_ep_getopt(fid_t fid, int level, int optname, - void *optval, size_t *optlen) -{ - struct gnix_fid_ep *gnix_ep; - - gnix_ep = container_of(fid, struct gnix_fid_ep, ep_fid.fid); - - switch (optname) { - case FI_OPT_MIN_MULTI_RECV: - *(size_t *)optval = gnix_ep->min_multi_recv; - *optlen = sizeof(size_t); - break; - case FI_OPT_CM_DATA_SIZE: - *(size_t *)optval = GNIX_CM_DATA_MAX_SIZE; - *optlen = sizeof(size_t); - break; - default: - return -FI_ENOPROTOOPT; - } - - return 0; -} - -int gnix_getopt(fid_t fid, int level, int optname, - void *optval, size_t *optlen) -{ - ssize_t ret; - struct gnix_fid_ep *ep; - struct gnix_fid_trx *trx_ep; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - if (!fid || !optval || !optlen) - return -FI_EINVAL; - else if (level != FI_OPT_ENDPOINT) - return -FI_ENOPROTOOPT; - - switch (fid->fclass) { - case FI_CLASS_EP: - ret = gnix_ep_getopt(fid, level, optname, optval, optlen); - break; - - case FI_CLASS_RX_CTX: - case FI_CLASS_TX_CTX: - trx_ep = container_of(fid, struct gnix_fid_trx, ep_fid); - ep = trx_ep->ep; - ret = gnix_ep_getopt(&ep->ep_fid.fid, level, optname, optval, - optlen); - break; - /* not supported yet */ - case FI_CLASS_SRX_CTX: - case FI_CLASS_STX_CTX: - return -FI_ENOENT; - - default: - GNIX_WARN(FI_LOG_EP_CTRL, "Invalid fid type\n"); - return -FI_EINVAL; - } - - return ret; -} - -DIRECT_FN STATIC int gnix_ep_setopt(fid_t fid, int level, int optname, - const void *optval, size_t optlen) -{ - struct gnix_fid_ep *gnix_ep; - - gnix_ep = container_of(fid, struct gnix_fid_ep, ep_fid.fid); - - switch (optname) { - case FI_OPT_MIN_MULTI_RECV: - if (optlen != sizeof(size_t)) - return -FI_EINVAL; - /* - * see https://github.com/ofi-cray/libfabric-cray/issues/1120 - */ - if (*(size_t *)optval == 0UL) - return -FI_EINVAL; - gnix_ep->min_multi_recv = *(size_t *)optval; - break; - default: - return -FI_ENOPROTOOPT; - } - - return 0; -} - -int gnix_setopt(fid_t fid, int level, int optname, - const void *optval, size_t optlen) -{ - ssize_t ret; - struct gnix_fid_ep *ep; - struct gnix_fid_trx *trx_ep; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - if (!fid || !optval) - return -FI_EINVAL; - else if (level != FI_OPT_ENDPOINT) - return -FI_ENOPROTOOPT; - - switch (fid->fclass) { - case FI_CLASS_EP: - ret = gnix_ep_setopt(fid, level, optname, optval, optlen); - break; - - case FI_CLASS_RX_CTX: - case FI_CLASS_TX_CTX: - trx_ep = container_of(fid, struct gnix_fid_trx, ep_fid); - ep = trx_ep->ep; - ret = gnix_ep_setopt(&ep->ep_fid.fid, level, optname, optval, - optlen); - break; - /* not supported yet */ - case FI_CLASS_SRX_CTX: - case FI_CLASS_STX_CTX: - return -FI_ENOENT; - - default: - GNIX_WARN(FI_LOG_EP_CTRL, "Invalid fid type\n"); - return -FI_EINVAL; - } - - return ret; -} - -DIRECT_FN STATIC ssize_t gnix_ep_rx_size_left(struct fid_ep *ep) -{ - if (!ep) { - return -FI_EINVAL; - } - - struct gnix_fid_ep *ep_priv = container_of(ep, - struct gnix_fid_ep, - ep_fid); - - /* A little arbitrary... */ - if (ep_priv->int_tx_pool.enabled == false) { - return -FI_EOPBADSTATE; - } - - switch (ep->fid.fclass) { - case FI_CLASS_EP: - break; - case FI_CLASS_RX_CTX: - case FI_CLASS_SRX_CTX: - break; - default: - GNIX_INFO(FI_LOG_EP_CTRL, "Invalid EP type\n"); - return -FI_EINVAL; - } - - /* We can queue RXs indefinitely, so just return the default size. */ - return GNIX_RX_SIZE_DEFAULT; -} - -DIRECT_FN STATIC ssize_t gnix_ep_tx_size_left(struct fid_ep *ep) -{ - if (!ep) { - return -FI_EINVAL; - } - - struct gnix_fid_ep *ep_priv = container_of(ep, - struct gnix_fid_ep, - ep_fid); - - /* A little arbitrary... */ - if (ep_priv->int_tx_pool.enabled == false) { - return -FI_EOPBADSTATE; - } - - switch (ep->fid.fclass) { - case FI_CLASS_EP: - break; - case FI_CLASS_TX_CTX: - break; - default: - GNIX_INFO(FI_LOG_EP_CTRL, "Invalid EP type\n"); - return -FI_EINVAL; - } - - /* We can queue TXs indefinitely, so just return the default size. */ - return GNIX_TX_SIZE_DEFAULT; -} - -__attribute__((unused)) -DIRECT_FN STATIC int gnix_tx_context(struct fid_ep *ep, int index, - struct fi_tx_attr *attr, - struct fid_ep **tx_ep, void *context) -{ - return -FI_ENOSYS; -} - -__attribute__((unused)) -DIRECT_FN STATIC int gnix_rx_context(struct fid_ep *ep, int index, - struct fi_rx_attr *attr, - struct fid_ep **rx_ep, void *context) -{ - return -FI_ENOSYS; -} - -/******************************************************************************* - * FI_OPS_* data structures. - ******************************************************************************/ - -static struct fi_ops gnix_ep_fi_ops = { - .size = sizeof(struct fi_ops), - .close = gnix_ep_close, - .bind = gnix_ep_bind, - .control = gnix_ep_control, - .ops_open = gnix_ep_ops_open, -}; - -static struct fi_ops_ep gnix_ep_ops = { - .size = sizeof(struct fi_ops_ep), - .cancel = gnix_cancel, - .getopt = gnix_getopt, - .setopt = gnix_setopt, - .tx_ctx = fi_no_tx_ctx, - .rx_ctx = fi_no_rx_ctx, - .rx_size_left = gnix_ep_rx_size_left, - .tx_size_left = gnix_ep_tx_size_left, -}; - -static struct fi_ops_msg gnix_ep_msg_ops = { - .size = sizeof(struct fi_ops_msg), - .recv = gnix_ep_recv, - .recvv = gnix_ep_recvv, - .recvmsg = gnix_ep_recvmsg, - .send = gnix_ep_send, - .sendv = gnix_ep_sendv, - .sendmsg = gnix_ep_sendmsg, - .inject = gnix_ep_msg_inject, - .senddata = gnix_ep_senddata, - .injectdata = gnix_ep_msg_injectdata, -}; - -static struct fi_ops_rma gnix_ep_rma_ops = { - .size = sizeof(struct fi_ops_rma), - .read = gnix_ep_read, - .readv = gnix_ep_readv, - .readmsg = gnix_ep_readmsg, - .write = gnix_ep_write, - .writev = gnix_ep_writev, - .writemsg = gnix_ep_writemsg, - .inject = gnix_ep_rma_inject, - .writedata = gnix_ep_writedata, - .injectdata = gnix_ep_rma_injectdata, -}; - -struct fi_ops_tagged gnix_ep_tagged_ops = { - .size = sizeof(struct fi_ops_tagged), - .recv = gnix_ep_trecv, - .recvv = gnix_ep_trecvv, - .recvmsg = gnix_ep_trecvmsg, - .send = gnix_ep_tsend, - .sendv = gnix_ep_tsendv, - .sendmsg = gnix_ep_tsendmsg, - .inject = gnix_ep_tinject, - .senddata = gnix_ep_tsenddata, - .injectdata = gnix_ep_tinjectdata, -}; - -struct fi_ops_atomic gnix_ep_atomic_ops = { - .size = sizeof(struct fi_ops_atomic), - .write = gnix_ep_atomic_write, - .writev = gnix_ep_atomic_writev, - .writemsg = gnix_ep_atomic_writemsg, - .inject = gnix_ep_atomic_inject, - .readwrite = gnix_ep_atomic_readwrite, - .readwritev = gnix_ep_atomic_readwritev, - .readwritemsg = gnix_ep_atomic_readwritemsg, - .compwrite = gnix_ep_atomic_compwrite, - .compwritev = gnix_ep_atomic_compwritev, - .compwritemsg = gnix_ep_atomic_compwritemsg, - .writevalid = gnix_ep_atomic_valid, - .readwritevalid = gnix_ep_fetch_atomic_valid, - .compwritevalid = gnix_ep_cmp_atomic_valid, -}; diff --git a/prov/gni/src/gnix_eq.c b/prov/gni/src/gnix_eq.c deleted file mode 100644 index ae4071461ab..00000000000 --- a/prov/gni/src/gnix_eq.c +++ /dev/null @@ -1,700 +0,0 @@ -/* - * Copyright (c) 2015-2016 Cray Inc. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include - -#include - -#include "gnix.h" -#include "gnix_eq.h" -#include "gnix_util.h" -#include "gnix_cm.h" - -/******************************************************************************* - * Global declarations - ******************************************************************************/ -DLIST_HEAD(gnix_eq_list); -pthread_mutex_t gnix_eq_list_lock = PTHREAD_MUTEX_INITIALIZER; - -/******************************************************************************* - * Forward declaration for ops structures. - ******************************************************************************/ -static struct fi_ops_eq gnix_eq_ops; -static struct fi_ops gnix_fi_eq_ops; - - -/******************************************************************************* - * Helper functions. - ******************************************************************************/ - -static void gnix_eq_cleanup_err_bufs(struct gnix_fid_eq *eq, int free_all) -{ - struct gnix_eq_err_buf *ebuf, *tmp; - - dlist_for_each_safe(&eq->err_bufs, ebuf, tmp, dlist) { - if (free_all || ebuf->do_free) { - dlist_remove(&ebuf->dlist); - free(ebuf); - } - } -} - -static int gnix_eq_set_wait(struct gnix_fid_eq *eq) -{ - int ret = FI_SUCCESS; - - GNIX_TRACE(FI_LOG_EQ, "\n"); - - struct fi_wait_attr requested = { - .wait_obj = eq->attr.wait_obj, - .flags = 0 - }; - - switch (eq->attr.wait_obj) { - case FI_WAIT_UNSPEC: - ret = gnix_wait_open(&eq->fabric->fab_fid, &requested, - &eq->wait); - break; - case FI_WAIT_SET: - ret = _gnix_wait_set_add(eq->attr.wait_set, &eq->eq_fid.fid); - if (!ret) - eq->wait = eq->attr.wait_set; - break; - default: - break; - } - - return ret; -} - -static int gnix_verify_eq_attr(struct fi_eq_attr *attr) -{ - - GNIX_TRACE(FI_LOG_EQ, "\n"); - - if (!attr) - return -FI_EINVAL; - - if (!attr->size) - attr->size = GNIX_EQ_DEFAULT_SIZE; - - /* - * We only support FI_WAIT_SET and FI_WAIT_UNSPEC - */ - switch (attr->wait_obj) { - case FI_WAIT_NONE: - break; - case FI_WAIT_SET: - if (!attr->wait_set) { - GNIX_WARN(FI_LOG_EQ, - "FI_WAIT_SET is set, but wait_set field doesn't reference a wait object.\n"); - return -FI_EINVAL; - } - break; - case FI_WAIT_UNSPEC: - break; - case FI_WAIT_FD: - case FI_WAIT_MUTEX_COND: - default: - GNIX_WARN(FI_LOG_EQ, "wait type: %d unsupported.\n", - attr->wait_obj); - return -FI_ENOSYS; - } - - return FI_SUCCESS; -} - -static void free_eq_entry(struct slist_entry *item) -{ - struct gnix_eq_entry *entry; - - entry = container_of(item, struct gnix_eq_entry, item); - - free(entry->the_entry); - free(entry); -} - -static struct slist_entry *alloc_eq_entry(size_t size) -{ - struct gnix_eq_entry *entry = calloc(1, sizeof(*entry)); - - if (!entry) { - GNIX_WARN(FI_LOG_EQ, "out of memory\n"); - goto err; - } - - if (size) { - entry->the_entry = malloc(size); - if (!entry->the_entry) { - GNIX_WARN(FI_LOG_EQ, "out of memory\n"); - goto cleanup; - } - } - - return &entry->item; - -cleanup: - free(entry); -err: - return NULL; -} - -ssize_t _gnix_eq_write_error(struct gnix_fid_eq *eq, fid_t fid, - void *context, uint64_t index, int err, - int prov_errno, void *err_data, - size_t err_size) -{ - struct fi_eq_err_entry *error; - struct gnix_eq_entry *event; - struct slist_entry *item; - struct gnix_eq_err_buf *err_buf; - - ssize_t ret = FI_SUCCESS; - - if (!eq) - return -FI_EINVAL; - - ofi_spin_lock(&eq->lock); - - item = _gnix_queue_get_free(eq->errors); - if (!item) { - GNIX_WARN(FI_LOG_EQ, "error creating error entry\n"); - ret = -FI_ENOMEM; - goto err; - } - - event = container_of(item, struct gnix_eq_entry, item); - - error = event->the_entry; - - error->fid = fid; - error->context = context; - error->data = index; - error->err = err; - error->prov_errno = prov_errno; - - if (err_size) { - err_buf = malloc(sizeof(struct gnix_eq_err_buf) + err_size); - if (!err_buf) { - _gnix_queue_enqueue_free(eq->errors, &event->item); - ret = -FI_ENOMEM; - goto err; - } - err_buf->do_free = 0; - - memcpy(err_buf->buf, err_data, err_size); - error->err_data = err_buf->buf; - error->err_data_size = err_size; - - dlist_insert_tail(&err_buf->dlist, &eq->err_bufs); - } else { - error->err_data = NULL; - error->err_data_size = 0; - } - - _gnix_queue_enqueue(eq->errors, &event->item); - - if (eq->wait) - _gnix_signal_wait_obj(eq->wait); - -err: - ofi_spin_unlock(&eq->lock); - - return ret; -} - -static void __eq_destruct(void *obj) -{ - struct gnix_fid_eq *eq = (struct gnix_fid_eq *) obj; - pthread_mutex_lock(&gnix_eq_list_lock); - dlist_remove(&eq->gnix_fid_eq_list); - pthread_mutex_unlock(&gnix_eq_list_lock); - - _gnix_ref_put(eq->fabric); - - ofi_spin_destroy(&eq->lock); - - switch (eq->attr.wait_obj) { - case FI_WAIT_NONE: - break; - case FI_WAIT_SET: - _gnix_wait_set_remove(eq->wait, &eq->eq_fid.fid); - break; - case FI_WAIT_UNSPEC: - case FI_WAIT_FD: - case FI_WAIT_MUTEX_COND: - assert(eq->wait); - gnix_wait_close(&eq->wait->fid); - break; - default: - GNIX_WARN(FI_LOG_EQ, "format: %d unsupported\n.", - eq->attr.wait_obj); - break; - } - - _gnix_queue_destroy(eq->events); - _gnix_queue_destroy(eq->errors); - - gnix_eq_cleanup_err_bufs(eq, 1); - - free(eq); -} - -int _gnix_eq_poll_obj_add(struct gnix_fid_eq *eq, struct fid *obj_fid) -{ - struct gnix_eq_poll_obj *pobj; - - COND_WRITE_ACQUIRE(eq->requires_lock, &eq->poll_obj_lock); - - pobj = malloc(sizeof(struct gnix_eq_poll_obj)); - if (!pobj) { - GNIX_WARN(FI_LOG_EQ, "Failed to add object to EQ poll list.\n"); - COND_RW_RELEASE(eq->requires_lock, &eq->poll_obj_lock); - return -FI_ENOMEM; - } - - pobj->obj_fid = obj_fid; - dlist_init(&pobj->list); - dlist_insert_tail(&pobj->list, &eq->poll_objs); - - COND_RW_RELEASE(eq->requires_lock, &eq->poll_obj_lock); - - GNIX_INFO(FI_LOG_EQ, "Added object(%d, %p) to EQ(%p) poll list\n", - obj_fid->fclass, obj_fid, eq); - - return FI_SUCCESS; -} - -int _gnix_eq_poll_obj_rem(struct gnix_fid_eq *eq, struct fid *obj_fid) -{ - struct gnix_eq_poll_obj *pobj, *tmp; - - COND_WRITE_ACQUIRE(eq->requires_lock, &eq->poll_obj_lock); - - dlist_for_each_safe(&eq->poll_objs, pobj, tmp, list) { - if (pobj->obj_fid == obj_fid) { - dlist_remove(&pobj->list); - free(pobj); - GNIX_INFO(FI_LOG_EQ, - "Removed object(%d, %p) from EQ(%p) poll list\n", - pobj->obj_fid->fclass, pobj, eq); - COND_RW_RELEASE(eq->requires_lock, &eq->poll_obj_lock); - return FI_SUCCESS; - } - } - - COND_RW_RELEASE(eq->requires_lock, &eq->poll_obj_lock); - - GNIX_WARN(FI_LOG_EQ, "object not found on EQ poll list.\n"); - return -FI_EINVAL; -} - -int _gnix_eq_progress(struct gnix_fid_eq *eq) -{ - struct gnix_eq_poll_obj *pobj, *tmp; - int rc; - struct gnix_fid_pep *pep; - struct gnix_fid_ep *ep; - - COND_READ_ACQUIRE(eq->requires_lock, &eq->poll_obj_lock); - - dlist_for_each_safe(&eq->poll_objs, pobj, tmp, list) { - switch (pobj->obj_fid->fclass) { - case FI_CLASS_PEP: - pep = container_of(pobj->obj_fid, struct gnix_fid_pep, - pep_fid.fid); - rc = _gnix_pep_progress(pep); - if (rc) { - GNIX_WARN(FI_LOG_EQ, - "_gnix_pep_progress failed: %d\n", - rc); - } - break; - case FI_CLASS_EP: - ep = container_of(pobj->obj_fid, struct gnix_fid_ep, - ep_fid.fid); - rc = _gnix_ep_progress(ep); - if (rc) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_ep_progress failed: %d\n", - rc); - } - break; - default: - GNIX_WARN(FI_LOG_EQ, - "invalid poll object: %d %p\n", - pobj->obj_fid->fclass, pobj); - break; - } - } - - COND_RW_RELEASE(eq->requires_lock, &eq->poll_obj_lock); - - return FI_SUCCESS; -} - -/******************************************************************************* - * API function implementations. - ******************************************************************************/ -/* - * - Handle FI_WRITE flag. When not included, replace write function with - * fi_no_eq_write. - */ -DIRECT_FN int gnix_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr, - struct fid_eq **eq, void *context) -{ - struct gnix_fid_eq *eq_priv; - - int ret = FI_SUCCESS; - - GNIX_TRACE(FI_LOG_EQ, "\n"); - - if (!fabric) - return -FI_EINVAL; - - eq_priv = calloc(1, sizeof(*eq_priv)); - if (!eq_priv) - return -FI_ENOMEM; - - ret = gnix_verify_eq_attr(attr); - if (ret) - goto err; - - eq_priv->fabric = container_of(fabric, struct gnix_fid_fabric, - fab_fid); - - _gnix_ref_init(&eq_priv->ref_cnt, 1, __eq_destruct); - - _gnix_ref_get(eq_priv->fabric); - - eq_priv->eq_fid.fid.fclass = FI_CLASS_EQ; - eq_priv->eq_fid.fid.context = context; - eq_priv->eq_fid.fid.ops = &gnix_fi_eq_ops; - eq_priv->eq_fid.ops = &gnix_eq_ops; - eq_priv->requires_lock = 1; - eq_priv->attr = *attr; - - ofi_spin_init(&eq_priv->lock); - - rwlock_init(&eq_priv->poll_obj_lock); - dlist_init(&eq_priv->poll_objs); - - dlist_init(&eq_priv->err_bufs); - - ret = gnix_eq_set_wait(eq_priv); - if (ret) - goto err1; - - ret = _gnix_queue_create(&eq_priv->events, alloc_eq_entry, - free_eq_entry, 0, eq_priv->attr.size); - if (ret) - goto err1; - - ret = _gnix_queue_create(&eq_priv->errors, alloc_eq_entry, - free_eq_entry, sizeof(struct fi_eq_err_entry), - 0); - if (ret) - goto err2; - - *eq = &eq_priv->eq_fid; - - pthread_mutex_lock(&gnix_eq_list_lock); - dlist_insert_tail(&eq_priv->gnix_fid_eq_list, &gnix_eq_list); - pthread_mutex_unlock(&gnix_eq_list_lock); - - return ret; - -err2: - _gnix_queue_destroy(eq_priv->events); -err1: - _gnix_ref_put(eq_priv->fabric); - ofi_spin_destroy(&eq_priv->lock); -err: - free(eq_priv); - return ret; -} - -DIRECT_FN STATIC int gnix_eq_close(struct fid *fid) -{ - struct gnix_fid_eq *eq; - int references_held; - - GNIX_TRACE(FI_LOG_EQ, "\n"); - - if (!fid) - return -FI_EINVAL; - - eq = container_of(fid, struct gnix_fid_eq, eq_fid); - - references_held = _gnix_ref_put(eq); - if (references_held) { - GNIX_INFO(FI_LOG_EQ, "failed to fully close eq due " - "to lingering references. references=%i eq=%p\n", - references_held, eq); - } - - return FI_SUCCESS; -} - -static ssize_t __gnix_eq_sread(int blocking, struct fid_eq *eq, - uint32_t *event, void *buf, size_t len, - uint64_t flags, int timeout) -{ - struct gnix_fid_eq *eq_priv; - struct gnix_eq_entry *entry; - struct slist_entry *item; - ssize_t read_size; - - if (!eq || !event || (len && !buf)) - return -FI_EINVAL; - - eq_priv = container_of(eq, struct gnix_fid_eq, eq_fid); - - if ((blocking && !eq_priv->wait) || - (blocking && eq_priv->attr.wait_obj == FI_WAIT_SET)) { - GNIX_WARN(FI_LOG_EQ, "Invalid wait type\n"); - return -FI_EINVAL; - } - - gnix_eq_cleanup_err_bufs(eq_priv, 0); - - _gnix_eq_progress(eq_priv); - - if (_gnix_queue_peek(eq_priv->errors)) - return -FI_EAVAIL; - - if (eq_priv->wait) - gnix_wait_wait((struct fid_wait *) eq_priv->wait, timeout); - - ofi_spin_lock(&eq_priv->lock); - - if (_gnix_queue_peek(eq_priv->errors)) { - read_size = -FI_EAVAIL; - goto err; - } - - item = _gnix_queue_peek(eq_priv->events); - - if (!item) { - read_size = -FI_EAGAIN; - goto err; - } - - entry = container_of(item, struct gnix_eq_entry, item); - - if (len < entry->len) { - read_size = -FI_ETOOSMALL; - goto err; - } - - *event = entry->type; - - read_size = entry->len; - memcpy(buf, entry->the_entry, read_size); - - if (!(flags & FI_PEEK)) { - item = _gnix_queue_dequeue(eq_priv->events); - - free(entry->the_entry); - entry->the_entry = NULL; - - _gnix_queue_enqueue_free(eq_priv->events, &entry->item); - } - -err: - ofi_spin_unlock(&eq_priv->lock); - - return read_size; -} - -DIRECT_FN STATIC ssize_t gnix_eq_read(struct fid_eq *eq, uint32_t *event, - void *buf, size_t len, uint64_t flags) -{ - return __gnix_eq_sread(0, eq, event, buf, len, flags, 0); -} - -DIRECT_FN STATIC ssize_t gnix_eq_sread(struct fid_eq *eq, uint32_t *event, - void *buf, size_t len, int timeout, - uint64_t flags) -{ - return __gnix_eq_sread(1, eq, event, buf, len, flags, timeout); -} - -DIRECT_FN STATIC int gnix_eq_control(struct fid *eq, int command, void *arg) -{ - /* disabled until new trywait interface is implemented - struct gnix_fid_eq *eq_priv; - - eq_priv = container_of(eq, struct gnix_fid_eq, eq_fid); - */ - switch (command) { - case FI_GETWAIT: - /* return _gnix_get_wait_obj(eq_priv->wait, arg); */ - return -FI_ENOSYS; - default: - return -FI_EINVAL; - } -} - -DIRECT_FN STATIC ssize_t gnix_eq_readerr(struct fid_eq *eq, - struct fi_eq_err_entry *buf, - uint64_t flags) -{ - struct gnix_fid_eq *eq_priv; - struct gnix_eq_entry *entry; - struct slist_entry *item; - struct gnix_eq_err_buf *err_buf; - struct fi_eq_err_entry *fi_err; - - ssize_t read_size = sizeof(*buf); - - eq_priv = container_of(eq, struct gnix_fid_eq, eq_fid); - - ofi_spin_lock(&eq_priv->lock); - - if (flags & FI_PEEK) - item = _gnix_queue_peek(eq_priv->errors); - else - item = _gnix_queue_dequeue(eq_priv->errors); - - if (!item) { - read_size = -FI_EAGAIN; - goto err; - } - - entry = container_of(item, struct gnix_eq_entry, item); - fi_err = (struct fi_eq_err_entry *)entry->the_entry; - - memcpy(buf, entry->the_entry, read_size); - - /* If removing an event with err_data, mark err buf to be freed during - * the next EQ read. */ - if (!(flags & FI_PEEK) && fi_err->err_data) { - err_buf = container_of(fi_err->err_data, - struct gnix_eq_err_buf, buf); - err_buf->do_free = 1; - } - - _gnix_queue_enqueue_free(eq_priv->errors, &entry->item); - -err: - ofi_spin_unlock(&eq_priv->lock); - - return read_size; -} - -DIRECT_FN STATIC ssize_t gnix_eq_write(struct fid_eq *eq, uint32_t event, - const void *buf, size_t len, - uint64_t flags) -{ - struct gnix_fid_eq *eq_priv; - struct slist_entry *item; - struct gnix_eq_entry *entry; - - ssize_t ret = len; - - eq_priv = container_of(eq, struct gnix_fid_eq, eq_fid); - - ofi_spin_lock(&eq_priv->lock); - - item = _gnix_queue_get_free(eq_priv->events); - if (!item) { - GNIX_WARN(FI_LOG_EQ, "error creating eq_entry\n"); - ret = -FI_ENOMEM; - goto err; - } - - entry = container_of(item, struct gnix_eq_entry, item); - - entry->the_entry = calloc(1, len); - if (!entry->the_entry) { - _gnix_queue_enqueue_free(eq_priv->events, &entry->item); - GNIX_WARN(FI_LOG_EQ, "error allocating buffer\n"); - ret = -FI_ENOMEM; - goto err; - } - - memcpy(entry->the_entry, buf, len); - - entry->len = len; - entry->type = event; - entry->flags = flags; - - _gnix_queue_enqueue(eq_priv->events, &entry->item); - - if (eq_priv->wait) - _gnix_signal_wait_obj(eq_priv->wait); - -err: - ofi_spin_unlock(&eq_priv->lock); - - return ret; -} - -/** - * Converts provider specific error information into a printable string. - * - * @param[in] eq the event queue - * @param[in] prov_errno the provider specific error number - * @param[in/out] buf optional buffer to print error information - * @param[in] len the length of buf - * - * @return the printable string - * @return NULL upon error or if the operation is not supported yet - */ -DIRECT_FN STATIC const char *gnix_eq_strerror(struct fid_eq *eq, int prov_errno, - const void *err_data, char *buf, - size_t len) -{ - return NULL; -} - -/******************************************************************************* - * FI_OPS_* data structures. - ******************************************************************************/ -static struct fi_ops_eq gnix_eq_ops = { - .size = sizeof(struct fi_ops_eq), - .read = gnix_eq_read, - .readerr = gnix_eq_readerr, - .write = gnix_eq_write, - .sread = gnix_eq_sread, - .strerror = gnix_eq_strerror -}; - -static struct fi_ops gnix_fi_eq_ops = { - .size = sizeof(struct fi_ops), - .close = gnix_eq_close, - .bind = fi_no_bind, - .control = gnix_eq_control, - .ops_open = fi_no_ops_open -}; diff --git a/prov/gni/src/gnix_fabric.c b/prov/gni/src/gnix_fabric.c deleted file mode 100644 index a67684d49ed..00000000000 --- a/prov/gni/src/gnix_fabric.c +++ /dev/null @@ -1,1057 +0,0 @@ -/* - * Copyright (c) 2014 Intel Corporation, Inc. All rights reserved. - * Copyright (c) 2015-2017 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * Copyrigth (c) 2019 Triad National Security, LLC. All rights - * reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#if HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include "ofi_prov.h" - -#include "gnix.h" -#include "gnix_nic.h" -#include "gnix_cm.h" -#include "gnix_cm_nic.h" -#include "gnix_util.h" -#include "gnix_nameserver.h" -#include "gnix_wait.h" -#include "gnix_xpmem.h" -#include "gnix_mbox_allocator.h" - -/* check if only one bit of a set is enabled, when one is required */ -#define IS_EXCLUSIVE(x) \ - ((x) && !((x) & ((x)-1))) - -/* optional basic bits */ -#define GNIX_MR_BASIC_OPT \ - (FI_MR_LOCAL) - -/* optional scalable bits */ -#define GNIX_MR_SCALABLE_OPT \ - (FI_MR_LOCAL) - -/* required basic bits */ -#define GNIX_MR_BASIC_REQ \ - (FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY) - -/* required scalable bits */ -#define GNIX_MR_SCALABLE_REQ \ - (FI_MR_MMU_NOTIFY) - -#define GNIX_MR_BASIC_BITS \ - (GNIX_MR_BASIC_OPT | GNIX_MR_BASIC_REQ) - -#define GNIX_MR_SCALABLE_BITS \ - (GNIX_MR_SCALABLE_OPT | GNIX_MR_SCALABLE_REQ) - -const char gnix_fab_name[] = "gni"; -const char gnix_dom_name[] = "/sys/class/gni/kgni0"; -const char gnix_prov_name[] = "gni"; - -uint32_t gnix_cdm_modes = - (GNI_CDM_MODE_FAST_DATAGRAM_POLL | GNI_CDM_MODE_FMA_SHARED | - GNI_CDM_MODE_FMA_SMALL_WINDOW | GNI_CDM_MODE_FORK_PARTCOPY | - GNI_CDM_MODE_ERR_NO_KILL); - -/* default number of directed datagrams per domain */ -static int gnix_def_gni_n_dgrams = 128; -/* default number of wildcard datagrams per domain */ -static int gnix_def_gni_n_wc_dgrams = 4; -static uint64_t gnix_def_gni_datagram_timeouts = -1; - -static struct fi_ops gnix_fab_fi_ops; -static struct fi_gni_ops_fab gnix_ops_fab; -static struct fi_gni_auth_key_ops_fab gnix_fab_ak_ops; - -static int __gnix_auth_key_initialize( - uint8_t *auth_key, - size_t auth_key_size, - struct gnix_auth_key_attr *attr); -static int __gnix_auth_key_set_val( - uint8_t *auth_key, - size_t auth_key_size, - gnix_auth_key_opt_t opt, - void *val); -static int __gnix_auth_key_get_val( - uint8_t *auth_key, - size_t auth_key_size, - gnix_auth_key_opt_t opt, - void *val); - -#define GNIX_DEFAULT_USER_REGISTRATION_LIMIT 192 -#define GNIX_DEFAULT_PROV_REGISTRATION_LIMIT 64 -#define GNIX_DEFAULT_SHARED_MEMORY_TIMEOUT 30 - -int gnix_default_user_registration_limit = GNIX_DEFAULT_USER_REGISTRATION_LIMIT; -int gnix_default_prov_registration_limit = GNIX_DEFAULT_PROV_REGISTRATION_LIMIT; -uint32_t gnix_wait_shared_memory_timeout = GNIX_DEFAULT_SHARED_MEMORY_TIMEOUT; - -/* assume that the user will open additional fabrics later and that - ptag information will need to be retained for the lifetime of the - process. If the user sets this value, we can assume that they - intend to be done with libfabric when the last fabric instance - closes so that we can free the ptag information. */ -int gnix_dealloc_aki_on_fabric_close = 0; - -const struct fi_fabric_attr gnix_fabric_attr = { - .fabric = NULL, - .name = NULL, - .prov_name = NULL, - .prov_version = FI_VERSION(GNI_MAJOR_VERSION, GNI_MINOR_VERSION), -}; - -DIRECT_FN int gnix_fabric_trywait(struct fid_fabric *fabric, struct fid **fids, int count) -{ - return -FI_ENOSYS; -} - -static struct fi_ops_fabric gnix_fab_ops = { - .size = sizeof(struct fi_ops_fabric), - .domain = gnix_domain_open, - .passive_ep = gnix_pep_open, - .eq_open = gnix_eq_open, - .wait_open = gnix_wait_open, - .trywait = gnix_fabric_trywait -}; - -static void __fabric_destruct(void *obj) -{ - struct gnix_fid_fabric *fab = (struct gnix_fid_fabric *) obj; - - _gnix_app_cleanup(); - - free(fab); -} - -static int gnix_fab_ops_open(struct fid *fid, const char *ops_name, - uint64_t flags, void **ops, void *context) -{ - if (strcmp(ops_name, FI_GNI_FAB_OPS_1) == 0) - *ops = &gnix_ops_fab; - else if (strcmp(ops_name, FI_GNI_FAB_OPS_2) == 0) - *ops = &gnix_fab_ak_ops; - else - return -FI_EINVAL; - - return 0; -} - -static int gnix_fabric_close(fid_t fid) -{ - struct gnix_fid_fabric *fab; - int references_held; - - fab = container_of(fid, struct gnix_fid_fabric, fab_fid); - - references_held = _gnix_ref_put(fab); - if (references_held) - GNIX_INFO(FI_LOG_FABRIC, "failed to fully close fabric due " - "to lingering references. references=%i fabric=%p\n", - references_held, fab); - - return FI_SUCCESS; -} - -/* - * define methods needed for the GNI fabric provider - */ -static int gnix_fabric_open(struct fi_fabric_attr *attr, - struct fid_fabric **fabric, - void *context) -{ - struct gnix_fid_fabric *fab; - - if (strcmp(attr->name, gnix_fab_name)) { - return -FI_ENODATA; - } - - fab = calloc(1, sizeof(*fab)); - if (!fab) { - return -FI_ENOMEM; - } - - /* - * set defaults related to use of GNI datagrams - */ - fab->n_bnd_dgrams = gnix_def_gni_n_dgrams; - fab->n_wc_dgrams = gnix_def_gni_n_wc_dgrams; - fab->datagram_timeout = gnix_def_gni_datagram_timeouts; - - fab->fab_fid.fid.fclass = FI_CLASS_FABRIC; - fab->fab_fid.fid.context = context; - fab->fab_fid.fid.ops = &gnix_fab_fi_ops; - fab->fab_fid.ops = &gnix_fab_ops; - _gnix_ref_init(&fab->ref_cnt, 1, __fabric_destruct); - dlist_init(&fab->domain_list); - - *fabric = &fab->fab_fid; - - return FI_SUCCESS; -} - -static struct fi_info *_gnix_allocinfo(void) -{ - struct fi_info *gnix_info; - - gnix_info = fi_allocinfo(); - if (gnix_info == NULL) { - return NULL; - } - - gnix_info->caps = GNIX_EP_CAPS_FULL; - gnix_info->tx_attr->op_flags = 0; - gnix_info->rx_attr->op_flags = 0; - gnix_info->ep_attr->type = FI_EP_RDM; - gnix_info->ep_attr->protocol = FI_PROTO_GNI; - gnix_info->ep_attr->max_msg_size = GNIX_MAX_MSG_SIZE; - gnix_info->ep_attr->mem_tag_format = FI_TAG_GENERIC; - gnix_info->ep_attr->tx_ctx_cnt = 1; - gnix_info->ep_attr->rx_ctx_cnt = 1; - - gnix_info->domain_attr->threading = FI_THREAD_SAFE; - gnix_info->domain_attr->control_progress = FI_PROGRESS_AUTO; - gnix_info->domain_attr->data_progress = FI_PROGRESS_AUTO; - gnix_info->domain_attr->av_type = FI_AV_UNSPEC; - /* - * the cm_nic currently sucks up one of the gnix_nic's so - * we have to subtract one from the gnix_max_nics_per_ptag. - */ - gnix_info->domain_attr->tx_ctx_cnt = (gnix_max_nics_per_ptag == 1) ? - 1 : gnix_max_nics_per_ptag - 1; - gnix_info->domain_attr->rx_ctx_cnt = gnix_max_nics_per_ptag; - gnix_info->domain_attr->cntr_cnt = _gnix_get_cq_limit() / 2; - gnix_info->domain_attr->cq_cnt = _gnix_get_cq_limit() / 2; - gnix_info->domain_attr->ep_cnt = SIZE_MAX; - - gnix_info->domain_attr->name = strdup(gnix_dom_name); - gnix_info->domain_attr->cq_data_size = sizeof(uint64_t); - gnix_info->domain_attr->mr_mode = FI_MR_BASIC; - gnix_info->domain_attr->resource_mgmt = FI_RM_ENABLED; - gnix_info->domain_attr->mr_key_size = sizeof(uint64_t); - gnix_info->domain_attr->max_ep_tx_ctx = GNIX_SEP_MAX_CNT; - gnix_info->domain_attr->max_ep_rx_ctx = GNIX_SEP_MAX_CNT; - gnix_info->domain_attr->mr_iov_limit = 1; - gnix_info->domain_attr->caps = GNIX_DOM_CAPS; - gnix_info->domain_attr->mode = 0; - gnix_info->domain_attr->mr_cnt = 65535; - - gnix_info->next = NULL; - gnix_info->addr_format = FI_ADDR_GNI; - gnix_info->src_addrlen = sizeof(struct gnix_ep_name); - gnix_info->dest_addrlen = sizeof(struct gnix_ep_name); - gnix_info->src_addr = NULL; - gnix_info->dest_addr = NULL; - - gnix_info->tx_attr->msg_order = FI_ORDER_SAS; - gnix_info->tx_attr->comp_order = FI_ORDER_NONE; - gnix_info->tx_attr->size = GNIX_TX_SIZE_DEFAULT; - gnix_info->tx_attr->iov_limit = GNIX_MAX_MSG_IOV_LIMIT; - gnix_info->tx_attr->inject_size = GNIX_INJECT_SIZE; - gnix_info->tx_attr->rma_iov_limit = GNIX_MAX_RMA_IOV_LIMIT; - gnix_info->rx_attr->msg_order = FI_ORDER_SAS; - gnix_info->rx_attr->comp_order = FI_ORDER_NONE; - gnix_info->rx_attr->size = GNIX_RX_SIZE_DEFAULT; - gnix_info->rx_attr->iov_limit = GNIX_MAX_MSG_IOV_LIMIT; - - return gnix_info; -} - -static int __gnix_getinfo_resolve_node(const char *node, const char *service, - uint64_t flags, const struct fi_info *hints, - struct fi_info *info) -{ - int ret; - struct gnix_ep_name *dest_addr = NULL; - struct gnix_ep_name *src_addr = NULL; - bool is_fi_addr_str = false; - - /* TODO: Add version check when we decide on how to do it */ - if (hints && hints->addr_format == FI_ADDR_STR) { - is_fi_addr_str = true; - } - - if (OFI_UNLIKELY(is_fi_addr_str && node && service)) { - GNIX_WARN(FI_LOG_FABRIC, "service parameter must be NULL when " - "node parameter is not and using FI_ADDR_STR.\n"); - return -FI_EINVAL; - } - - if (flags & FI_SOURCE) { - /* -resolve node/port to make info->src_addr - * -ignore hints->src_addr - * -copy hints->dest_addr to output info */ - src_addr = malloc(sizeof(*src_addr)); - if (!src_addr) { - ret = -FI_ENOMEM; - goto err; - } - - if (is_fi_addr_str) { - ret = _gnix_ep_name_from_str(node, src_addr); - } else { - ret = _gnix_resolve_name(node, service, flags, - src_addr); - } - - if (ret != FI_SUCCESS) { - ret = -FI_ENODATA; - goto err; - } - - if (hints && hints->dest_addr) { - dest_addr = malloc(sizeof(*dest_addr)); - if (!dest_addr) { - ret = -FI_ENOMEM; - goto err; - } - - memcpy(dest_addr, hints->dest_addr, - hints->dest_addrlen); - } - } else { - /* -try to resolve node/port to make info->dest_addr - * -fallback to copying hints->dest_addr to output info - * -try to copy hints->src_addr to output info - * -falback to finding src_addr for output info */ - if (node || service) { - dest_addr = malloc(sizeof(*dest_addr)); - if (!dest_addr) { - ret = -FI_ENOMEM; - goto err; - } - - if (is_fi_addr_str) { - ret = _gnix_ep_name_from_str(node, dest_addr); - } else { - ret = _gnix_resolve_name(node, service, flags, - dest_addr); - } - - if (ret != FI_SUCCESS) { - ret = -FI_ENODATA; - goto err; - } - } else { - if (hints && hints->dest_addr) { - dest_addr = malloc(sizeof(*dest_addr)); - if (!dest_addr) { - ret = -FI_ENOMEM; - goto err; - } - - memcpy(dest_addr, hints->dest_addr, - hints->dest_addrlen); - } - } - - if (hints && hints->src_addr) { - src_addr = malloc(sizeof(*src_addr)); - if (!src_addr) { - ret = -FI_ENOMEM; - goto err; - } - - memcpy(src_addr, hints->src_addr, hints->src_addrlen); - } else { - src_addr = malloc(sizeof(*src_addr)); - if (!src_addr) { - ret = -FI_ENOMEM; - goto err; - } - - ret = _gnix_src_addr(src_addr); - if (ret != FI_SUCCESS) - goto err; - } - } - - GNIX_INFO(FI_LOG_FABRIC, "%snode: %s service: %s\n", - flags & FI_SOURCE ? "(FI_SOURCE) " : "", node, service); - - if (src_addr) - GNIX_INFO(FI_LOG_FABRIC, "src_pe: 0x%x src_port: 0x%lx\n", - src_addr->gnix_addr.device_addr, - src_addr->gnix_addr.cdm_id); - if (dest_addr) - GNIX_INFO(FI_LOG_FABRIC, "dest_pe: 0x%x dest_port: 0x%lx\n", - dest_addr->gnix_addr.device_addr, - dest_addr->gnix_addr.cdm_id); - - if (src_addr) { - info->src_addr = src_addr; - info->src_addrlen = sizeof(*src_addr); - } - - if (dest_addr) { - info->dest_addr = dest_addr; - info->dest_addrlen = sizeof(*dest_addr); - } - - return FI_SUCCESS; - -err: - free(src_addr); - free(dest_addr); - - return ret; -} - -static int _gnix_ep_getinfo(enum fi_ep_type ep_type, uint32_t version, - const char *node, const char *service, - uint64_t flags, const struct fi_info *hints, - struct fi_info **info) -{ - uint64_t mode = GNIX_FAB_MODES; - struct fi_info *gnix_info = NULL; - int ret = -FI_ENODATA; - int mr_mode; - - GNIX_TRACE(FI_LOG_FABRIC, "\n"); - - if ((hints && hints->ep_attr) && - (hints->ep_attr->type != FI_EP_UNSPEC && - hints->ep_attr->type != ep_type)) { - return -FI_ENODATA; - } - - gnix_info = _gnix_allocinfo(); - if (!gnix_info) - return -FI_ENOMEM; - - gnix_info->ep_attr->type = ep_type; - - if (hints) { - /* TODO: Add version check when we decide on how to do it */ - if (hints->addr_format == FI_ADDR_STR) { - gnix_info->addr_format = FI_ADDR_STR; - } - - if (hints->ep_attr) { - /* Only support FI_PROTO_GNI protocol. */ - switch (hints->ep_attr->protocol) { - case FI_PROTO_UNSPEC: - case FI_PROTO_GNI: - break; - default: - goto err; - } - - if ((hints->ep_attr->tx_ctx_cnt > GNIX_SEP_MAX_CNT) && - (hints->ep_attr->tx_ctx_cnt != - FI_SHARED_CONTEXT)) { - goto err; - } - - if (hints->ep_attr->rx_ctx_cnt > GNIX_SEP_MAX_CNT) - goto err; - - if (hints->ep_attr->tx_ctx_cnt) - gnix_info->ep_attr->tx_ctx_cnt = - hints->ep_attr->tx_ctx_cnt; - if (hints->ep_attr->rx_ctx_cnt) - gnix_info->ep_attr->rx_ctx_cnt = - hints->ep_attr->rx_ctx_cnt; - - if (hints->ep_attr->max_msg_size > GNIX_MAX_MSG_SIZE) - goto err; - } - - GNIX_DEBUG(FI_LOG_FABRIC, "Passed EP attributes check\n"); - - /* - * check the mode field - */ - if (hints->mode) { - if ((hints->mode & GNIX_FAB_MODES) != GNIX_FAB_MODES) { - goto err; - } - mode = hints->mode & ~GNIX_FAB_MODES_CLEAR; - if (FI_VERSION_LT(version, FI_VERSION(1, 5))) { - mode = hints->mode & ~FI_NOTIFY_FLAGS_ONLY; - } - } - - GNIX_DEBUG(FI_LOG_FABRIC, "Passed mode check\n"); - - if (hints->caps) { - /* The provider must support all requested - * capabilities. */ - if ((hints->caps & GNIX_EP_CAPS_FULL) != hints->caps) - goto err; - } - - GNIX_DEBUG(FI_LOG_FABRIC, "Passed caps check gnix_info->caps = 0x%016lx\n", - gnix_info->caps); - - if (hints->tx_attr) { - if ((hints->tx_attr->op_flags & GNIX_EP_OP_FLAGS) != - hints->tx_attr->op_flags) { - goto err; - } - if (hints->tx_attr->inject_size > GNIX_INJECT_SIZE) { - goto err; - } - - gnix_info->tx_attr->op_flags = - hints->tx_attr->op_flags & GNIX_EP_OP_FLAGS; - } - - GNIX_DEBUG(FI_LOG_FABRIC, "Passed TX attributes check\n"); - - if (hints->rx_attr) { - if ((hints->rx_attr->op_flags & GNIX_EP_OP_FLAGS) != - hints->rx_attr->op_flags) { - goto err; - } - - gnix_info->rx_attr->op_flags = - hints->rx_attr->op_flags & GNIX_EP_OP_FLAGS; - } - - if (hints->fabric_attr && hints->fabric_attr->name && - strncmp(hints->fabric_attr->name, gnix_fab_name, - strlen(gnix_fab_name))) { - goto err; - } - - GNIX_DEBUG(FI_LOG_FABRIC, "Passed fabric name check\n"); - - if (hints->domain_attr) { - mr_mode = hints->domain_attr->mr_mode; - - if (hints->domain_attr->name && - strncmp(hints->domain_attr->name, gnix_dom_name, - strlen(gnix_dom_name))) { - goto err; - } - - if (hints->domain_attr->control_progress != - FI_PROGRESS_UNSPEC) - gnix_info->domain_attr->control_progress = - hints->domain_attr->control_progress; - - if (hints->domain_attr->data_progress != - FI_PROGRESS_UNSPEC) - gnix_info->domain_attr->data_progress = - hints->domain_attr->data_progress; - - /* If basic registration isn't being requested, - require FI_MR_MMU_NOTIFY */ - if (!(hints->domain_attr->mr_mode & - (FI_MR_BASIC | FI_MR_ALLOCATED))) - gnix_info->domain_attr->mr_mode |= FI_MR_MMU_NOTIFY; - - if (ofi_check_mr_mode(&gnix_prov, version, - gnix_info->domain_attr->mr_mode, - hints) != FI_SUCCESS) { - GNIX_INFO(FI_LOG_DOMAIN, - "failed ofi_check_mr_mode, " - "ret=%d\n", ret); - goto err; - } - - if (FI_VERSION_LT(version, FI_VERSION(1, 5))) { - switch (mr_mode) { - case FI_MR_UNSPEC: - case FI_MR_BASIC: - mr_mode = FI_MR_BASIC; - break; - default: - GNIX_DEBUG(FI_LOG_FABRIC, - "unsupported mr_mode selected, " - "ret=%d\n", ret); - goto err; - } - } else { - /* define the mode we return to the user - * prefer basic until scalable - * has more testing time */ - if (mr_mode & FI_MR_BASIC) - mr_mode = OFI_MR_BASIC_MAP; - else if ((mr_mode & GNIX_MR_BASIC_REQ) == - GNIX_MR_BASIC_REQ) - mr_mode &= GNIX_MR_BASIC_BITS; - else - mr_mode &= GNIX_MR_SCALABLE_BITS; - } - - gnix_info->domain_attr->mr_mode = mr_mode; - - switch (hints->domain_attr->threading) { - case FI_THREAD_COMPLETION: - gnix_info->domain_attr->threading = - hints->domain_attr->threading; - break; - default: - break; - } - - if (hints->domain_attr->caps) { - if (hints->domain_attr->caps & ~GNIX_DOM_CAPS) { - GNIX_WARN(FI_LOG_FABRIC, - "Invalid domain caps\n"); - goto err; - } - - gnix_info->domain_attr->caps = - hints->domain_attr->caps; - } - - } - } - - ret = __gnix_getinfo_resolve_node(node, service, flags, hints, - gnix_info); - if (ret != FI_SUCCESS) - goto err; - - ofi_alter_info(gnix_info, hints, version); - - GNIX_DEBUG(FI_LOG_FABRIC, "Passed the domain attributes check\n"); - - /* The provider may silently enable secondary - * capabilities that do not introduce any overhead. */ - if (hints && hints->caps) - gnix_info->caps = hints->caps | GNIX_EP_SEC_CAPS; - else - gnix_info->caps = GNIX_EP_CAPS_FULL | GNIX_EP_SEC_CAPS; - - gnix_info->mode = mode; - gnix_info->fabric_attr->name = strdup(gnix_fab_name); - gnix_info->tx_attr->caps = gnix_info->caps; - gnix_info->tx_attr->mode = gnix_info->mode; - gnix_info->rx_attr->caps = gnix_info->caps; - gnix_info->rx_attr->mode = gnix_info->mode; - - *info = gnix_info; - - GNIX_DEBUG(FI_LOG_FABRIC, "Returning EP type: %s\n", - fi_tostr(&ep_type, FI_TYPE_EP_TYPE)); - return FI_SUCCESS; -err: - fi_freeinfo(gnix_info); - return ret; -} - -static int gnix_getinfo(uint32_t version, const char *node, const char *service, - uint64_t flags, const struct fi_info *hints, - struct fi_info **info) -{ - int ret = 0; - struct fi_info *info_ptr; - - /* Note that info entries are added to the head of 'info', that is, - * they are preferred in the reverse order shown here. */ - - *info = NULL; - - ret = _gnix_ep_getinfo(FI_EP_MSG, version, node, service, flags, - hints, &info_ptr); - if (ret == FI_SUCCESS) { - info_ptr->next = *info; - *info = info_ptr; - } - - ret = _gnix_ep_getinfo(FI_EP_DGRAM, version, node, service, flags, - hints, &info_ptr); - if (ret == FI_SUCCESS) { - info_ptr->next = *info; - *info = info_ptr; - } - - ret = _gnix_ep_getinfo(FI_EP_RDM, version, node, service, flags, - hints, &info_ptr); - if (ret == FI_SUCCESS) { - info_ptr->next = *info; - *info = info_ptr; - } - - return *info ? FI_SUCCESS : -FI_ENODATA; -} - -static void gnix_fini(void) -{ -} - -struct fi_provider gnix_prov = { - .name = gnix_prov_name, - .version = FI_VERSION(GNI_MAJOR_VERSION, GNI_MINOR_VERSION), - .fi_version = OFI_VERSION_LATEST, - .getinfo = gnix_getinfo, - .fabric = gnix_fabric_open, - .cleanup = gnix_fini -}; - -GNI_INI -{ - struct fi_provider *provider = NULL; - gni_return_t status; - gni_version_info_t lib_version; - int num_devices; - int ret; - - /* - * if no GNI devices available, don't register as provider - */ - status = GNI_GetNumLocalDevices(&num_devices); - if ((status != GNI_RC_SUCCESS) || (num_devices == 0)) { - return NULL; - } - - /* - * ensure all globals are properly initialized - */ - _gnix_init(); - - /* sanity check that the 1 aries/node holds */ - assert(num_devices == 1); - - /* - * don't register if available ugni is older than one libfabric was - * built against - */ - status = GNI_GetVersionInformation(&lib_version); - if ((GNI_GET_MAJOR(lib_version.ugni_version) > GNI_MAJOR_REV) || - ((GNI_GET_MAJOR(lib_version.ugni_version) == GNI_MAJOR_REV) && - GNI_GET_MINOR(lib_version.ugni_version) >= GNI_MINOR_REV)) { - provider = &gnix_prov; - } - - /* Initialize global MR notifier. */ - ret = _gnix_smrn_init(); - if (ret != FI_SUCCESS) - GNIX_FATAL(FI_LOG_FABRIC, - "failed to initialize global mr notifier\n"); - - /* Initialize global NIC data. */ - _gnix_nic_init(); - - if (getenv("GNIX_DISABLE_XPMEM") != NULL) - gnix_xpmem_disabled = true; - if (getenv("GNIX_MBOX_FALLBACK_DISABLE") != NULL) - gnix_mbox_alloc_allow_fallback = false; - - return (provider); -} - -static int -__gnix_fab_ops_get_val(struct fid *fid, fab_ops_val_t t, void *val) -{ - GNIX_TRACE(FI_LOG_FABRIC, "\n"); - - assert(val); - - if (fid->fclass != FI_CLASS_FABRIC) { - GNIX_WARN(FI_LOG_FABRIC, "Invalid fabric\n"); - return -FI_EINVAL; - } - - switch (t) { - case GNI_WAIT_THREAD_SLEEP: - *(uint32_t *)val = gnix_wait_thread_sleep_time; - break; - case GNI_DEFAULT_USER_REGISTRATION_LIMIT: - *(uint32_t *)val = gnix_default_user_registration_limit; - break; - case GNI_DEFAULT_PROV_REGISTRATION_LIMIT: - *(uint32_t *)val = gnix_default_prov_registration_limit; - break; - case GNI_WAIT_SHARED_MEMORY_TIMEOUT: - *(uint32_t *)val = gnix_wait_shared_memory_timeout; - break; - default: - GNIX_WARN(FI_LOG_FABRIC, ("Invalid fab_ops_val\n")); - } - - return FI_SUCCESS; -} - -static int -__gnix_fab_ops_set_val(struct fid *fid, fab_ops_val_t t, void *val) -{ - int v; - - assert(val); - - if (fid->fclass != FI_CLASS_FABRIC) { - GNIX_WARN(FI_LOG_FABRIC, "Invalid fabric\n"); - return -FI_EINVAL; - } - - switch (t) { - case GNI_WAIT_THREAD_SLEEP: - v = *(uint32_t *) val; - gnix_wait_thread_sleep_time = v; - break; - case GNI_DEFAULT_USER_REGISTRATION_LIMIT: - v = *(uint32_t *) val; - if (v > GNIX_MAX_SCALABLE_REGISTRATIONS) { - GNIX_ERR(FI_LOG_FABRIC, - "User specified an invalid user registration " - "limit, requested=%d maximum=%d\n", - v, GNIX_MAX_SCALABLE_REGISTRATIONS); - return -FI_EINVAL; - } - gnix_default_user_registration_limit = v; - break; - case GNI_DEFAULT_PROV_REGISTRATION_LIMIT: - v = *(uint32_t *) val; - if (v > GNIX_MAX_SCALABLE_REGISTRATIONS) { - GNIX_ERR(FI_LOG_FABRIC, - "User specified an invalid prov registration " - "limit, requested=%d maximum=%d\n", - v, GNIX_MAX_SCALABLE_REGISTRATIONS); - return -FI_EINVAL; - } - gnix_default_prov_registration_limit = v; - break; - case GNI_WAIT_SHARED_MEMORY_TIMEOUT: - v = *(uint32_t *) val; - gnix_wait_shared_memory_timeout = v; - break; - default: - GNIX_WARN(FI_LOG_FABRIC, ("Invalid fab_ops_val\n")); - return -FI_EINVAL; - } - - return FI_SUCCESS; -} - -static int __gnix_auth_key_initialize( - uint8_t *auth_key, - size_t auth_key_size, - struct gnix_auth_key_attr *attr) -{ - struct gnix_auth_key *info = NULL; - int ret = FI_SUCCESS; - - info = _gnix_auth_key_lookup(auth_key, auth_key_size); - - if (info) { - GNIX_WARN(FI_LOG_FABRIC, "authorization key is already " - "initialized, auth_key=%d auth_key_size=%d\n", - auth_key, auth_key_size); - return -FI_ENOSPC; /* already initialized*/ - } - - info = _gnix_auth_key_alloc(); - if (!info) - return -FI_ENOMEM; - - if (attr) - info->attr = *attr; - else { - info->attr.user_key_limit = - gnix_default_user_registration_limit; - info->attr.prov_key_limit = - gnix_default_prov_registration_limit; - } - - ret = _gnix_auth_key_insert(auth_key, auth_key_size, info); - if (ret) { - GNIX_INFO(FI_LOG_FABRIC, "failed to insert authorization key" - ", key=%p len=%d ret=%d\n", - auth_key, auth_key_size, ret); - _gnix_auth_key_free(info); - info = NULL; - } - - return ret; -} - -static int __gnix_auth_key_set_val( - uint8_t *auth_key, - size_t auth_key_size, - gnix_auth_key_opt_t opt, - void *val) -{ - struct gnix_auth_key *info; - int v; - int ret = FI_SUCCESS; - - if (!val) - return -FI_EINVAL; - - info = _gnix_auth_key_lookup(auth_key, auth_key_size); - - if (!info) { - ret = __gnix_auth_key_initialize(auth_key, auth_key_size, NULL); - assert(ret == FI_SUCCESS); - - info = _gnix_auth_key_lookup(auth_key, auth_key_size); - assert(info); - } - - /* if the limits have already been set, and the user is - * trying to modify it, kick it back */ - if (opt == GNIX_USER_KEY_LIMIT || opt == GNIX_PROV_KEY_LIMIT) { - ofi_spin_lock(&info->lock); - if (info->enabled) { - ofi_spin_unlock(&info->lock); - GNIX_INFO(FI_LOG_FABRIC, "authorization key already " - "enabled and cannot be modified\n"); - return -FI_EAGAIN; - } - } - - switch (opt) { - case GNIX_USER_KEY_LIMIT: - v = *(int *) val; - if (v >= GNIX_MAX_SCALABLE_REGISTRATIONS) { - GNIX_ERR(FI_LOG_FABRIC, - "User is requesting more registrations than is present on node\n"); - ret = -FI_EINVAL; - } else - info->attr.user_key_limit = v; - ofi_spin_unlock(&info->lock); - break; - case GNIX_PROV_KEY_LIMIT: - v = *(int *) val; - if (v >= GNIX_MAX_SCALABLE_REGISTRATIONS) { - GNIX_ERR(FI_LOG_FABRIC, - "User is requesting more registrations than is present on node\n"); - ret = -FI_EINVAL; - } - info->attr.prov_key_limit = v; - ofi_spin_unlock(&info->lock); - break; - case GNIX_TOTAL_KEYS_NEEDED: - GNIX_WARN(FI_LOG_FABRIC, - "GNIX_TOTAL_KEYS_NEEDED is not a definable value.\n"); - return -FI_EOPNOTSUPP; - case GNIX_USER_KEY_MAX_PER_RANK: - GNIX_WARN(FI_LOG_FABRIC, - "GNIX_USER_KEY_MAX_PER_RANK is not a definable " - "value.\n"); - return -FI_EOPNOTSUPP; - default: - GNIX_WARN(FI_LOG_FABRIC, ("Invalid fab_ops_val\n")); - return -FI_EINVAL; - } - - return ret; -} - -static int __gnix_auth_key_get_val( - uint8_t *auth_key, - size_t auth_key_size, - gnix_auth_key_opt_t opt, - void *val) -{ - struct gnix_auth_key *info; - uint32_t pes_on_node; - int ret; - - if (!val) - return -FI_EINVAL; - - info = _gnix_auth_key_lookup(auth_key, auth_key_size); - - switch (opt) { - case GNIX_USER_KEY_LIMIT: - *(int *)val = (info) ? - info->attr.user_key_limit : - gnix_default_user_registration_limit; - break; - case GNIX_PROV_KEY_LIMIT: - *(int *)val = (info) ? - info->attr.prov_key_limit : - gnix_default_prov_registration_limit; - break; - case GNIX_TOTAL_KEYS_NEEDED: - *(uint32_t *)val = ((info) ? - (info->attr.user_key_limit + - info->attr.prov_key_limit) : - (gnix_default_user_registration_limit + - gnix_default_prov_registration_limit)); - break; - case GNIX_USER_KEY_MAX_PER_RANK: - ret = _gnix_pes_on_node(&pes_on_node); - if (ret) { - GNIX_WARN(FI_LOG_FABRIC, - "failed to get pes on node count\n"); - return -FI_EINVAL; - } - - *(int *)val = ((info) ? - info->attr.user_key_limit : - gnix_default_user_registration_limit) / - pes_on_node; - break; - default: - GNIX_WARN(FI_LOG_FABRIC, ("Invalid fab_ops_val\n")); - return -FI_EINVAL; - } - - return FI_SUCCESS; -} - -/******************************************************************************* - * FI_OPS_* data structures. - ******************************************************************************/ - -static struct fi_gni_ops_fab gnix_ops_fab = { - .set_val = __gnix_fab_ops_set_val, - .get_val = __gnix_fab_ops_get_val -}; - -static struct fi_ops gnix_fab_fi_ops = { - .size = sizeof(struct fi_ops), - .close = gnix_fabric_close, - .bind = fi_no_bind, - .control = fi_no_control, - .ops_open = gnix_fab_ops_open, -}; - -static struct fi_gni_auth_key_ops_fab gnix_fab_ak_ops = { - .set_val = __gnix_auth_key_set_val, - .get_val = __gnix_auth_key_get_val, -}; diff --git a/prov/gni/src/gnix_freelist.c b/prov/gni/src/gnix_freelist.c deleted file mode 100644 index 9091651933a..00000000000 --- a/prov/gni/src/gnix_freelist.c +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Copyright (c) 2015 Cray Inc. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/* - * Endpoint common code - */ -#include -#include -#include - -#include "gnix_freelist.h" -#include "gnix_util.h" - -/* - * NOTES: - * - thread safe if initialized with _nix_fl_init_ts - * - Does not shrink - * - Cannot be used for data structures with alignment requirements - * - Refill size increases by growth_factor each time growth is needed - * (limited by max_refill_size) - * - Refills are allocated as chunks, which are managed by chunks slist - * - Allocate an extra element at the beginning of each chunk for the - * chunk slist - * - Individual elements are *not* zeroed before being returned - * - * Your structure doesn't really need to have an slist_entry pointer, - * it just has to be at least as big as an slist_entry. - */ - -int __gnix_fl_refill(struct gnix_freelist *fl, int n) -{ int i, ret = FI_SUCCESS; - unsigned char *elems; - - assert(fl); - assert(n > 0); - /* - * We allocate an extra element for use as the pointer to the - * memory chunk maintained in the chunks field for later - * freeing. Use an entire element, in case size was padded - * for alignment - */ - elems = calloc((n+1), fl->elem_size); - if (elems == NULL) { - ret = -FI_ENOMEM; - goto err; - } - - /* Save away the pointer to the chunk */ - slist_insert_tail((struct slist_entry *) elems, &fl->chunks); - - /* Start with slist_entry of first element */ - elems += fl->elem_size + fl->offset; - - for (i = 0; i < n; i++) { - dlist_init((struct dlist_entry *) elems); - dlist_insert_tail((struct dlist_entry *) elems, &fl->freelist); - elems += fl->elem_size; - } -err: - return ret; -} - -int _gnix_fl_init(int elem_size, int offset, int init_size, - int refill_size, int growth_factor, - int max_refill_size, struct gnix_freelist *fl) -{ - assert(elem_size > 0); - assert(offset >= 0); - assert(init_size >= 0); - assert(refill_size >= 0); - assert(growth_factor >= 0); - assert(max_refill_size >= 0); - - int fill_size = init_size != 0 ? init_size : GNIX_FL_INIT_SIZE; - - fl->refill_size = refill_size; - fl->growth_factor = (growth_factor != 0 ? - growth_factor : - GNIX_FL_GROWTH_FACTOR); - fl->max_refill_size = (max_refill_size != 0 ? - max_refill_size : - fill_size); - fl->elem_size = elem_size; - fl->offset = offset; - - dlist_init(&fl->freelist); - assert(slist_empty(&fl->chunks)); /* maybe should be a warning? */ - slist_init(&fl->chunks); - - return __gnix_fl_refill(fl, fill_size); -} - -int _gnix_fl_init_ts(int elem_size, int offset, int init_size, - int refill_size, int growth_factor, - int max_refill_size, struct gnix_freelist *fl) -{ - int ret; - - ret = _gnix_fl_init(elem_size, - offset, - init_size, - refill_size, - growth_factor, - max_refill_size, - fl); - if (ret == FI_SUCCESS) { - fl->ts = 1; - ofi_spin_init(&fl->lock); - } - - return ret; - -} - -void _gnix_fl_destroy(struct gnix_freelist *fl) -{ - assert(fl); - - struct slist_entry *chunk; - - for (chunk = slist_remove_head(&fl->chunks); - chunk != NULL; - chunk = slist_remove_head(&fl->chunks)) { - free(chunk); - } - - if (fl->ts) - ofi_spin_destroy(&fl->lock); -} - - diff --git a/prov/gni/src/gnix_hashtable.c b/prov/gni/src/gnix_hashtable.c deleted file mode 100644 index a573d94d4cc..00000000000 --- a/prov/gni/src/gnix_hashtable.c +++ /dev/null @@ -1,852 +0,0 @@ -/* - * Copyright (c) 2015 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include - -#include "gnix_hashtable.h" -#include "fasthash.h" - -#include "gnix_util.h" - -#define __GNIX_HT_INITIAL_SIZE 128 -#define __GNIX_HT_MAXIMUM_SIZE 1024 -#define __GNIX_HT_INCREASE_STEP 2 - -#define __GNIX_HT_COLLISION_THRESH 400 /* average of 4 elements per bucket */ - -/* - * __gnix_ht_lf* prefixes denote lock free version of functions intended for - * use with hashtables that had attr->ht_internal_locking set to zero - * during initialization - * - * __gnix_ht_lk* prefixes denote locking versions of functions intended for - * use with hash tables that had attr->ht_internal_locking set to a non-zero - * value during initialization - */ - -/* - * default_attr is global for a criterion test. - */ -gnix_hashtable_attr_t default_attr = { - .ht_initial_size = __GNIX_HT_INITIAL_SIZE, - .ht_maximum_size = __GNIX_HT_MAXIMUM_SIZE, - .ht_increase_step = __GNIX_HT_INCREASE_STEP, - .ht_increase_type = GNIX_HT_INCREASE_MULT, - .ht_collision_thresh = __GNIX_HT_COLLISION_THRESH, - .ht_hash_seed = 0, - .ht_internal_locking = 0, - .destructor = NULL -}; - -static gnix_hashtable_ops_t __gnix_lockless_ht_ops; -static gnix_hashtable_ops_t __gnix_locked_ht_ops; - -static int __gnix_ht_check_attr_sanity(gnix_hashtable_attr_t *attr) -{ - if (attr->ht_initial_size == 0 || - attr->ht_initial_size > attr->ht_maximum_size) - return -FI_EINVAL; - - if (attr->ht_maximum_size == 0) - return -FI_EINVAL; - - if (attr->ht_increase_step == 0) - return -FI_EINVAL; - - if (!(attr->ht_increase_type == GNIX_HT_INCREASE_ADD || - attr->ht_increase_type == GNIX_HT_INCREASE_MULT)) - return -FI_EINVAL; - - if (attr->ht_increase_step == 1 && - attr->ht_increase_type == GNIX_HT_INCREASE_MULT) - return -FI_EINVAL; - - if (attr->ht_collision_thresh == 0) - return -FI_EINVAL; - - return 0; -} - -static inline void __gnix_ht_delete_entry(gnix_ht_entry_t *ht_entry) -{ - dlist_remove(&ht_entry->entry); - - ht_entry->value = NULL; - ht_entry->key = 0; - free(ht_entry); -} - -static inline void __gnix_ht_init_lk_list_head(gnix_ht_lk_lh_t *lh) -{ - dlist_init(&lh->head); - rwlock_init(&lh->lh_lock); -} - -static inline void __gnix_ht_init_lf_list_head(gnix_ht_lf_lh_t *lh) -{ - dlist_init(&lh->head); -} - -static inline gnix_ht_key_t __gnix_hash_func( - gnix_hashtable_t *ht, - gnix_ht_key_t key) -{ - return fasthash64(&key, sizeof(gnix_ht_key_t), - ht->ht_attr.ht_hash_seed) % ht->ht_size; -} - -static inline gnix_ht_entry_t *__gnix_ht_lookup_entry_collision( - struct dlist_entry *head, - gnix_ht_key_t key, - uint64_t *collision_count) -{ - gnix_ht_entry_t *ht_entry; - - dlist_for_each(head, ht_entry, entry) { - READ_PREFETCH(ht_entry->entry.next); - if (ht_entry->key == key) - return ht_entry; - - *collision_count += 1; - } - - return NULL; -} - -static inline gnix_ht_entry_t *__gnix_ht_lookup_entry( - struct dlist_entry *head, - gnix_ht_key_t key) -{ - gnix_ht_entry_t *ht_entry; - - dlist_for_each(head, ht_entry, entry) { - READ_PREFETCH(ht_entry->entry.next); - if (ht_entry->key == key) - return ht_entry; - } - - return NULL; -} - -static inline void *__gnix_ht_lookup_key( - struct dlist_entry *head, - gnix_ht_key_t key) -{ - gnix_ht_entry_t *ht_entry = __gnix_ht_lookup_entry(head, key); - - return ((ht_entry != NULL) ? ht_entry->value : NULL); -} - -static inline int __gnix_ht_destroy_list( - gnix_hashtable_t *ht, - struct dlist_entry *head) -{ - gnix_ht_entry_t *ht_entry, *iter; - void *value; - int entries_freed = 0; - - dlist_for_each_safe(head, ht_entry, iter, entry) { - value = ht_entry->value; - __gnix_ht_delete_entry(ht_entry); - if (ht->ht_attr.destructor != NULL) { - ht->ht_attr.destructor(value); - } - ++entries_freed; - } - - return entries_freed; -} - -static inline int __gnix_ht_insert_list( - struct dlist_entry *head, - gnix_ht_entry_t *ht_entry, - uint64_t *collisions) -{ - gnix_ht_entry_t *found; - - found = __gnix_ht_lookup_entry_collision(head, ht_entry->key, collisions); - if (!found) { - dlist_insert_tail(&ht_entry->entry, head); - } else { - return -FI_ENOSPC; - } - - return 0; -} - -static inline int __gnix_ht_remove_list( - struct dlist_entry *head, - gnix_ht_key_t key) -{ - gnix_ht_entry_t *ht_entry; - - ht_entry = __gnix_ht_lookup_entry(head, key); - if (!ht_entry) { - return -FI_ENOENT; - } - __gnix_ht_delete_entry(ht_entry); - - return 0; -} - -static inline void __gnix_ht_rehash_list( - gnix_hashtable_t *ht, - struct dlist_entry *head) -{ - gnix_ht_entry_t *ht_entry, *tmp; - gnix_ht_key_t bucket; - struct dlist_entry *ht_lh; - uint64_t trash; // No collision information is recorded - - if (dlist_empty(head)) - return; - - dlist_for_each_safe(head, ht_entry, tmp, entry) { - bucket = __gnix_hash_func(ht, ht_entry->key); - ht_lh = ht->ht_ops->retrieve_list(ht, bucket); - - dlist_remove(&ht_entry->entry); - - __gnix_ht_insert_list(ht_lh, ht_entry, &trash); - } -} - -static inline void __gnix_ht_resize_hashtable_inc(gnix_hashtable_t *ht) -{ - int old_size = ht->ht_size; - int new_size; - - /* set up the new bucket list size */ - if (ht->ht_attr.ht_increase_type == GNIX_HT_INCREASE_ADD) - new_size = old_size + ht->ht_attr.ht_increase_step; - else - new_size = old_size * ht->ht_attr.ht_increase_step; - - new_size = MIN(new_size, ht->ht_attr.ht_maximum_size); - - /* ignore ret code for now. In the future, we might provide an info - * if the hash table wont resize. It is generally a performance - * issue if we cannot, and not really a bug. - */ - - ht->ht_ops->resize(ht, new_size, old_size); -} - -static inline void __gnix_ht_resize_hashtable_dec(gnix_hashtable_t *ht) -{ - int old_size = ht->ht_size; - int new_size; - - /* set up the new bucket list size */ - if (ht->ht_attr.ht_increase_type == GNIX_HT_INCREASE_ADD) - new_size = old_size - ht->ht_attr.ht_increase_step; - else - new_size = old_size / ht->ht_attr.ht_increase_step; - - new_size = MAX(new_size, ht->ht_attr.ht_initial_size); - - /* ignore ret code for now. In the future, we might provide an info - * if the hash table wont resize. It is generally a performance - * issue if we cannot, and not really a bug. - */ - - ht->ht_ops->resize(ht, new_size, old_size); -} - -static inline void __gnix_ht_common_init(gnix_hashtable_t *ht) -{ - if (ht->ht_state == GNIX_HT_STATE_UNINITIALIZED) { - ofi_atomic_initialize32(&ht->ht_elements, 0); - ofi_atomic_initialize32(&ht->ht_collisions, 0); - ofi_atomic_initialize32(&ht->ht_insertions, 0); - } else { - ofi_atomic_set32(&ht->ht_elements, 0); - ofi_atomic_set32(&ht->ht_collisions, 0); - ofi_atomic_set32(&ht->ht_insertions, 0); - } - - ht->ht_state = GNIX_HT_STATE_READY; -} - -static inline void __gnix_ht_common_destroy(gnix_hashtable_t *ht) -{ - ht->ht_size = 0; - ofi_atomic_set32(&ht->ht_collisions, 0); - ofi_atomic_set32(&ht->ht_insertions, 0); - ofi_atomic_set32(&ht->ht_elements, 0); - ht->ht_state = GNIX_HT_STATE_DEAD; -} - -static gnix_ht_lf_lh_t *__gnix_ht_lf_init_new_table(int nelem) -{ - int i; - gnix_ht_lf_lh_t *tbl = calloc(nelem, sizeof(gnix_ht_lf_lh_t)); - - if (!tbl) - return NULL; - - for (i = 0; i < nelem; ++i) - __gnix_ht_init_lf_list_head(&tbl[i]); - - return tbl; -} - -static int __gnix_ht_lf_init(gnix_hashtable_t *ht) -{ - ht->ht_lf_tbl = __gnix_ht_lf_init_new_table(ht->ht_size); - if (!ht->ht_lf_tbl) - return -FI_ENOMEM; - - __gnix_ht_common_init(ht); - - return 0; -} - -static int __gnix_ht_lf_destroy(gnix_hashtable_t *ht) -{ - int i, freed_entries; - gnix_ht_lf_lh_t *lh; - - for (i = 0; i < ht->ht_size; ++i) { - lh = &ht->ht_lf_tbl[i]; - - freed_entries = __gnix_ht_destroy_list(ht, &lh->head); - - if (freed_entries) - ofi_atomic_sub32(&ht->ht_elements, freed_entries); - } - - free(ht->ht_lf_tbl); - ht->ht_lf_tbl = NULL; - - __gnix_ht_common_destroy(ht); - - return 0; -} - -static int __gnix_ht_lf_insert( - gnix_hashtable_t *ht, - gnix_ht_entry_t *entry, - uint64_t *collisions) -{ - int ret; - int bucket; - gnix_ht_lf_lh_t *lh; - - bucket = __gnix_hash_func(ht, entry->key); - lh = &ht->ht_lf_tbl[bucket]; - - ret = __gnix_ht_insert_list(&lh->head, entry, collisions); - - return ret; -} - -static int __gnix_ht_lf_remove(gnix_hashtable_t *ht, gnix_ht_key_t key) -{ - int ret; - int bucket; - - bucket = __gnix_hash_func(ht, key); - ret = __gnix_ht_remove_list(&ht->ht_lf_tbl[bucket].head, key); - - return ret; -} - -static void *__gnix_ht_lf_lookup(gnix_hashtable_t *ht, gnix_ht_key_t key) -{ - int bucket = __gnix_hash_func(ht, key); - gnix_ht_lf_lh_t *lh = &ht->ht_lf_tbl[bucket]; - - return __gnix_ht_lookup_key(&lh->head, key); -} - -static struct dlist_entry *__gnix_ht_lf_retrieve_list( - gnix_hashtable_t *ht, - int bucket) -{ - if (bucket < 0 || bucket >= ht->ht_size) - return NULL; - - return &ht->ht_lf_tbl[bucket].head; -} - -static int __gnix_ht_lf_resize( - gnix_hashtable_t *ht, - int new_size, - int old_size) -{ - gnix_ht_lf_lh_t *old_tbl, *new_tbl; - int i; - - if (ht->ht_size != old_size) - return -FI_EBUSY; - - new_tbl = __gnix_ht_lf_init_new_table(new_size); - if (!new_tbl) - return -FI_ENOMEM; - - old_tbl = ht->ht_lf_tbl; - ht->ht_lf_tbl = new_tbl; - ht->ht_size = new_size; - - for (i = 0; i < old_size; ++i) - __gnix_ht_rehash_list(ht, &old_tbl[i].head); - - free(old_tbl); - - return 0; -} - -static gnix_ht_lk_lh_t *__gnix_ht_lk_init_new_table(int nelem) -{ - int i; - gnix_ht_lk_lh_t *tbl = calloc(nelem, sizeof(gnix_ht_lk_lh_t)); - - if (!tbl) - return NULL; - - for (i = 0; i < nelem; ++i) - __gnix_ht_init_lk_list_head(&tbl[i]); - - return tbl; -} - -static int __gnix_ht_lk_init(gnix_hashtable_t *ht) -{ - if (ht->ht_state != GNIX_HT_STATE_DEAD) - rwlock_init(&ht->ht_lock); - - rwlock_wrlock(&ht->ht_lock); - - ht->ht_lk_tbl = __gnix_ht_lk_init_new_table(ht->ht_size); - if (!ht->ht_lk_tbl) { - rwlock_unlock(&ht->ht_lock); - return -FI_ENOMEM; - } - - __gnix_ht_common_init(ht); - - rwlock_unlock(&ht->ht_lock); - - return 0; -} - -static int __gnix_ht_lk_destroy(gnix_hashtable_t *ht) -{ - int i, freed_entries; - gnix_ht_lk_lh_t *lh; - - if (ht->ht_state != GNIX_HT_STATE_READY) - return -FI_EINVAL; - - rwlock_wrlock(&ht->ht_lock); - - for (i = 0; i < ht->ht_size; ++i) { - lh = &ht->ht_lk_tbl[i]; - - freed_entries = __gnix_ht_destroy_list(ht, &lh->head); - - if (freed_entries) - ofi_atomic_sub32(&ht->ht_elements, freed_entries); - } - - free(ht->ht_lk_tbl); - ht->ht_lk_tbl = NULL; - - __gnix_ht_common_destroy(ht); - - rwlock_unlock(&ht->ht_lock); - - return 0; -} - -static int __gnix_ht_lk_insert( - gnix_hashtable_t *ht, - gnix_ht_entry_t *entry, - uint64_t *collisions) -{ - int ret, bucket; - gnix_ht_lk_lh_t *lh; - - rwlock_rdlock(&ht->ht_lock); - - bucket = __gnix_hash_func(ht, entry->key); - lh = &ht->ht_lk_tbl[bucket]; - - rwlock_wrlock(&lh->lh_lock); - ret = __gnix_ht_insert_list(&lh->head, entry, collisions); - rwlock_unlock(&lh->lh_lock); - - rwlock_unlock(&ht->ht_lock); - - return ret; -} - -static int __gnix_ht_lk_remove(gnix_hashtable_t *ht, gnix_ht_key_t key) -{ - int ret; - int bucket; - gnix_ht_lk_lh_t *lh; - - rwlock_rdlock(&ht->ht_lock); - - bucket = __gnix_hash_func(ht, key); - lh = &ht->ht_lk_tbl[bucket]; - - rwlock_wrlock(&lh->lh_lock); - ret = __gnix_ht_remove_list(&lh->head, key); - rwlock_unlock(&lh->lh_lock); - - rwlock_unlock(&ht->ht_lock); - - return ret; -} - -static void *__gnix_ht_lk_lookup(gnix_hashtable_t *ht, gnix_ht_key_t key) -{ - void *ret; - int bucket; - gnix_ht_lk_lh_t *lh; - - rwlock_rdlock(&ht->ht_lock); - - bucket = __gnix_hash_func(ht, key); - lh = &ht->ht_lk_tbl[bucket]; - - rwlock_rdlock(&lh->lh_lock); - ret = __gnix_ht_lookup_key(&lh->head, key); - rwlock_unlock(&lh->lh_lock); - - rwlock_unlock(&ht->ht_lock); - - return ret; -} - -static struct dlist_entry *__gnix_ht_lk_retrieve_list( - gnix_hashtable_t *ht, - int bucket) -{ - if (bucket < 0 || bucket >= ht->ht_size) - return NULL; - - return &ht->ht_lk_tbl[bucket].head; -} - -static int __gnix_ht_lk_resize( - gnix_hashtable_t *ht, - int new_size, - int old_size) -{ - int i; - gnix_ht_lk_lh_t *old_tbl, *new_tbl; - - /* race to resize... let one of them resize the hash table and the rest - * can just release after the first is done. - */ - rwlock_wrlock(&ht->ht_lock); - if (ht->ht_size != old_size) { - rwlock_unlock(&ht->ht_lock); - return -FI_EBUSY; - } - - new_tbl = __gnix_ht_lk_init_new_table(new_size); - if (!new_tbl) { - rwlock_unlock(&ht->ht_lock); - return -FI_ENOMEM; - } - - old_tbl = ht->ht_lk_tbl; - ht->ht_lk_tbl = new_tbl; - ht->ht_size = new_size; - - for (i = 0; i < old_size; ++i) - __gnix_ht_rehash_list(ht, &old_tbl[i].head); - - free(old_tbl); - - rwlock_unlock(&ht->ht_lock); - - return 0; -} - -static inline int __gnix_ht_should_decrease_size(gnix_hashtable_t *ht) -{ - int decrease; - int desired_thresh = (ht->ht_attr.ht_collision_thresh >> 2) * 3; - - if (ht->ht_attr.ht_increase_type == GNIX_HT_INCREASE_ADD) - decrease = ht->ht_attr.ht_increase_step; - else - decrease = ht->ht_size / ht->ht_attr.ht_increase_step; - - /* This is just an approximation of the collision rate since we - * don't track collisions on removal - */ - return ((ofi_atomic_get32(&ht->ht_elements) * 100) / - (ht->ht_size - decrease)) <= desired_thresh; -} - -int _gnix_ht_init(gnix_hashtable_t *ht, gnix_hashtable_attr_t *attr) -{ - int ret; - gnix_hashtable_attr_t *tbl_attr = &default_attr; - - if (attr) { - ret = __gnix_ht_check_attr_sanity(attr); - if (ret < 0) - return ret; - - tbl_attr = attr; - } - - if (ht->ht_state == GNIX_HT_STATE_READY) - return -FI_EINVAL; - - memcpy(&ht->ht_attr, tbl_attr, sizeof(gnix_hashtable_attr_t)); - ht->ht_size = ht->ht_attr.ht_initial_size; - - if (ht->ht_attr.ht_internal_locking) - ht->ht_ops = &__gnix_locked_ht_ops; - else - ht->ht_ops = &__gnix_lockless_ht_ops; - - return ht->ht_ops->init(ht); -} - -int _gnix_ht_destroy(gnix_hashtable_t *ht) -{ - if (ht->ht_state != GNIX_HT_STATE_READY) - return -FI_EINVAL; - - return ht->ht_ops->destroy(ht); -} - -int _gnix_ht_insert(gnix_hashtable_t *ht, gnix_ht_key_t key, void *value) -{ - int ret; - int collisions, insertions; - uint64_t hits = 0; - gnix_ht_entry_t *list_entry; - - if (ht->ht_state != GNIX_HT_STATE_READY) - return -FI_EINVAL; - - list_entry = calloc(1, sizeof(gnix_ht_entry_t)); - if (!list_entry) - return -FI_ENOMEM; - - list_entry->value = value; - list_entry->key = key; - - ret = ht->ht_ops->insert(ht, list_entry, &hits); - if (ret != 0) { - free(list_entry); - return ret; - } - - if (ht->ht_size < ht->ht_attr.ht_maximum_size) { - collisions = ofi_atomic_add32(&ht->ht_collisions, hits); - insertions = ofi_atomic_inc32(&ht->ht_insertions); - if (insertions > 10 && - ((collisions * 100) / insertions) - > ht->ht_attr.ht_collision_thresh) { - - ofi_atomic_set32(&ht->ht_collisions, 0); - ofi_atomic_set32(&ht->ht_insertions, 0); - - __gnix_ht_resize_hashtable_inc(ht); - } - } - - ofi_atomic_inc32(&ht->ht_elements); - - return ret; -} - -int _gnix_ht_remove(gnix_hashtable_t *ht, gnix_ht_key_t key) -{ - int ret; - - if (ht->ht_state != GNIX_HT_STATE_READY) - return -FI_EINVAL; - - ret = ht->ht_ops->remove(ht, key); - - /* on success, we may have to resize */ - if (ret == 0) { - ofi_atomic_dec32(&ht->ht_elements); - - if (ht->ht_size > ht->ht_attr.ht_initial_size && - __gnix_ht_should_decrease_size(ht)) { - - /* since we are resizing the table, - * reset the collision info - */ - ofi_atomic_set32(&ht->ht_collisions, 0); - ofi_atomic_set32(&ht->ht_insertions, 0); - - __gnix_ht_resize_hashtable_dec(ht); - } - } - - return ret; -} - -void *_gnix_ht_lookup(gnix_hashtable_t *ht, gnix_ht_key_t key) -{ - return ht->ht_ops->lookup(ht, key); -} - -int _gnix_ht_empty(gnix_hashtable_t *ht) -{ - return ofi_atomic_get32(&ht->ht_elements) == 0; -} - -void *__gnix_ht_lf_iter_next(struct gnix_hashtable_iter *iter) -{ - gnix_ht_entry_t *ht_entry; - struct dlist_entry *head, *next; - int i; - - /* take next entry in bin */ - if (iter->cur_entry) { - head = &iter->ht->ht_lf_tbl[iter->cur_idx].head; - next = iter->cur_entry->entry.next; - if (next != head) { - ht_entry = dlist_entry(next, gnix_ht_entry_t, entry); - iter->cur_entry = ht_entry; - return ht_entry->value; - } - iter->cur_idx++; - } - - /* look for next bin with an entry */ - for (i = iter->cur_idx; i < iter->ht->ht_size; i++) { - head = &iter->ht->ht_lf_tbl[i].head; - if (dlist_empty(head)) - continue; - - ht_entry = dlist_first_entry(head, gnix_ht_entry_t, entry); - iter->cur_idx = i; - iter->cur_entry = ht_entry; - return ht_entry->value; - } - - return NULL; -} - -void *__gnix_ht_lk_iter_next(struct gnix_hashtable_iter *iter) -{ - gnix_ht_lk_lh_t *lh; - gnix_ht_entry_t *ht_entry; - struct dlist_entry *head, *next; - int i; - void *value; - - rwlock_rdlock(&iter->ht->ht_lock); - - /* take next entry in bin */ - if (iter->cur_entry) { - lh = &iter->ht->ht_lk_tbl[iter->cur_idx]; - - rwlock_rdlock(&lh->lh_lock); - head = &lh->head; - next = iter->cur_entry->entry.next; - if (next != head) { - ht_entry = dlist_entry(next, gnix_ht_entry_t, entry); - iter->cur_entry = ht_entry; - value = ht_entry->value; - rwlock_unlock(&lh->lh_lock); - - rwlock_unlock(&iter->ht->ht_lock); - return value; - } - rwlock_unlock(&lh->lh_lock); - - iter->cur_idx++; - } - - /* look for next bin with an entry */ - for (i = iter->cur_idx; i < iter->ht->ht_size; i++) { - lh = &iter->ht->ht_lk_tbl[i]; - - rwlock_rdlock(&lh->lh_lock); - head = &lh->head; - if (dlist_empty(head)) { - rwlock_unlock(&lh->lh_lock); - continue; - } - - ht_entry = dlist_first_entry(head, gnix_ht_entry_t, entry); - value = ht_entry->value; - rwlock_unlock(&lh->lh_lock); - - iter->cur_idx = i; - iter->cur_entry = ht_entry; - - rwlock_unlock(&iter->ht->ht_lock); - return value; - } - - rwlock_unlock(&iter->ht->ht_lock); - - return NULL; -} - -void *_gnix_ht_iterator_next(struct gnix_hashtable_iter *iter) -{ - return iter->ht->ht_ops->iter_next(iter); -} - -static gnix_hashtable_ops_t __gnix_lockless_ht_ops = { - .init = __gnix_ht_lf_init, - .destroy = __gnix_ht_lf_destroy, - .insert = __gnix_ht_lf_insert, - .remove = __gnix_ht_lf_remove, - .lookup = __gnix_ht_lf_lookup, - .resize = __gnix_ht_lf_resize, - .retrieve_list = __gnix_ht_lf_retrieve_list, - .iter_next = __gnix_ht_lf_iter_next -}; - -static gnix_hashtable_ops_t __gnix_locked_ht_ops = { - .init = __gnix_ht_lk_init, - .destroy = __gnix_ht_lk_destroy, - .insert = __gnix_ht_lk_insert, - .remove = __gnix_ht_lk_remove, - .lookup = __gnix_ht_lk_lookup, - .resize = __gnix_ht_lk_resize, - .retrieve_list = __gnix_ht_lk_retrieve_list, - .iter_next = __gnix_ht_lk_iter_next -}; diff --git a/prov/gni/src/gnix_init.c b/prov/gni/src/gnix_init.c deleted file mode 100644 index ecdd5cbd03d..00000000000 --- a/prov/gni/src/gnix_init.c +++ /dev/null @@ -1,177 +0,0 @@ -/* - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights reserved. - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include "gnix.h" -#include "gnix_auth_key.h" -#include "gnix_util.h" -#include "ofi.h" -#include "ofi_prov.h" - -/** - * @note To make sure that static linking will work, there must be at - * least one symbol in the file that requires gnix_init.o to have - * to be linked in when building the executable. This insures the - * ctor will run even with static linking. - */ - -ofi_atomic32_t gnix_id_counter; -ofi_atomic32_t file_id_counter; -#ifdef ENABLE_DEBUG -/* don't think this needs to be in tls */ -__thread pid_t gnix_debug_pid = ~(uint32_t) 0; -__thread uint32_t gnix_debug_tid = ~(uint32_t) 0; -ofi_atomic32_t gnix_debug_next_tid; -#endif - -extern ofi_spin_t __gnix_alps_lock; - -/** - * Helper for static computation of GNI CRC updating an intermediate crc - * value based on the status of one bit in the data value. - * - * @param[in] data value to compute crc for - * @param[in] lcrc intermediate crc to update - * @param[in] bit which bit (in range [0-7]) of 'data' to test - * @param[in] xor value to 'xor' into 'lcrc' iff bit 'bit' of 'data' is set - * - * @return updated intermediate crc - */ -#define CRC_HELPER(data, lcrc, bit, xor) (((((data)>>(bit))&1)*(xor))^(lcrc)) - -/* Parameterized helpers for each bit in GNI CRC */ -#define CRC_80(data, lcrc) CRC_HELPER(data, lcrc, 7, 0x8c) -#define CRC_40(data, lcrc) CRC_80(data, CRC_HELPER(data, lcrc, 6, 0x46)) -#define CRC_20(data, lcrc) CRC_40(data, CRC_HELPER(data, lcrc, 5, 0x23)) -#define CRC_10(data, lcrc) CRC_20(data, CRC_HELPER(data, lcrc, 4, 0x9d)) -#define CRC_08(data, lcrc) CRC_10(data, CRC_HELPER(data, lcrc, 3, 0xc2)) -#define CRC_04(data, lcrc) CRC_08(data, CRC_HELPER(data, lcrc, 2, 0x61)) -#define CRC_02(data, lcrc) CRC_04(data, CRC_HELPER(data, lcrc, 1, 0xbc)) -#define CRC_01(data, lcrc) CRC_02(data, CRC_HELPER(data, lcrc, 0, 0x5e)) - -/* Static computation of 8-bit GNI CRC of one 8-bit value */ -#define CRC(data) ((uint8_t)CRC_01(data, 0)) - -/* Helpers for declaring large array of precomputed CRCs */ -/* 4 elements starting at x */ -#define CRCS_4(x) CRC((x)), CRC((x)+1), CRC((x)+2), CRC((x)+3) - -/* 16 elements starting at x: ie, CRC(x),...,CRC(x+15) */ -#define CRCS_16(x) CRCS_4((x)), CRCS_4((x)+4), \ - CRCS_4((x)+8), CRCS_4((x)+12) - -/* 64 elements starting at x: ie, CRC(x),...,CRC(x+63) */ -#define CRCS_64(x) CRCS_16((x)), CRCS_16((x)+16), \ - CRCS_16((x)+32), CRCS_16((x)+48) - -/* 256 elements starting at x: ie, CRC(x),...,CRC(x+255) */ -#define CRCS_256(x) CRCS_64((x)), CRCS_64((x)+64), \ - CRCS_64((x)+128), CRCS_64((x)+192) - -uint8_t precomputed_crc_results[256] = { CRCS_256(0) }; - -#ifndef NDEBUG -static inline uint8_t __gni_crc_bits(uint8_t data) -{ - uint8_t lcrc = 0; - - if(data & 1) - lcrc ^= 0x5e; - if(data & 2) - lcrc ^= 0xbc; - if(data & 4) - lcrc ^= 0x61; - if(data & 8) - lcrc ^= 0xc2; - if(data & 0x10) - lcrc ^= 0x9d; - if(data & 0x20) - lcrc ^= 0x23; - if(data & 0x40) - lcrc ^= 0x46; - if(data & 0x80) - lcrc ^= 0x8c; - - return lcrc; -} - -static void __validate_precomputed_crcs(void) -{ - int i; - uint8_t crc_i; - - for (i = 0; i < 256; i++) { - crc_i = __gni_crc_bits(i); - if (precomputed_crc_results[i] != crc_i) { - GNIX_WARN(FI_LOG_FABRIC, "precomputed_crc_results[%d]" - " initialized to 0x%x, expected 0x%x\n", - i, (int)precomputed_crc_results[i], (int)crc_i); - precomputed_crc_results[i] = crc_i; - } - } -} -#endif /* NDEBUG */ - -/** - * Initialization function for performing global setup - */ -__attribute__((constructor)) -void _gnix_init(void) -{ - static int called=0; - - if (called==0) { - ofi_spin_init(&__gnix_alps_lock); - - if (sizeof(struct gnix_mr_key) != sizeof(uint64_t)) { - GNIX_FATAL(FI_LOG_FABRIC, - "gnix_mr_key size is invalid, " - "size=%d expected=%d\n", - sizeof(struct gnix_mr_key), - sizeof(uint64_t)); - assert(0); - } - - _gnix_auth_key_subsys_init(); - - ofi_atomic_initialize32(&gnix_id_counter, 0); - ofi_atomic_initialize32(&file_id_counter, 0); -#ifndef NDEBUG - __validate_precomputed_crcs(); - ofi_atomic_initialize32(&gnix_debug_next_tid, 0); -#endif - called = 1; - } -} diff --git a/prov/gni/src/gnix_mbox_allocator.c b/prov/gni/src/gnix_mbox_allocator.c deleted file mode 100644 index c02f5644d99..00000000000 --- a/prov/gni/src/gnix_mbox_allocator.c +++ /dev/null @@ -1,805 +0,0 @@ -/* - * Copyright (c) 2015-2016 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2015,2017-2018 Cray Inc. All rights reserved. - * Copyright (c) 2019 Triad National Security, LLC. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include - -#include "gnix_mbox_allocator.h" -#include "gnix_nic.h" -#include "fi_ext_gni.h" - -bool gnix_mbox_alloc_allow_fallback = true; - -/** - * Will attempt to find a directory in the hugetlbfs with the given page size. - * - * @param[in] page_size Page size to look for in the hugetlbfs - * @param[out] directory Double pointer to string that will point to - * directory name. - * - * @return FI_SUCCESS on successfully finding a huge page. The directory - * pointer contains the string that represents the directory name. - * - * @return -FI_EINVAL if an invalid parameter was given - * @return -FI_EIO if an error occurred while opening the /proc/mounts - * file. - */ -static int __find_huge_page(size_t page_size, char **directory) -{ - int rc = -FI_EINVAL; - struct statfs pg_size; - struct mntent *mntent; - FILE *fd; - - if (!directory || !page_size) { - GNIX_WARN(FI_LOG_EP_CTRL, - "Invalid page_size or directory provided.\n"); - return -FI_EINVAL; - } - - fd = setmntent ("/proc/mounts", "r"); - if (fd == NULL) { - GNIX_WARN(FI_LOG_EP_CTRL, - "Unable to open /proc/mounts - %s.\n", - strerror(errno)); - return -FI_EIO; - } - - while ((mntent = getmntent(fd)) != NULL) { - - if (strcmp (mntent->mnt_type, "hugetlbfs") != 0) { - continue; - } - - if (statfs(mntent->mnt_dir, &pg_size) == 0) { - if (pg_size.f_bsize == page_size) { - *directory = strdup(mntent->mnt_dir); - rc = FI_SUCCESS; - break; - } - } - } - - endmntent(fd); - - return rc; -} - -/** - * Will attempt to find a directory in hugetlbfs using the given page size and - * create a filename to use for backing an mmap. - * - * @param[in] page_size Page size to look for in the hugetlbfs - * @param[out] filename Pointer containing filename after generation. - * - * @return FI_SUCCESS On successfully finding a huge page and generating a - * file name. - * - * @return -FI_EINVAL if an invalid parameter was given - * @return -FI_EIO if an error occurred while opening the /proc/mounts - * file. This is propagated from __find_huge_page. - * @return -FI_ENOMEM if an error occurred while allocating space for the - * filename. - */ -static int __generate_file_name(size_t page_size, char **filename) -{ - static const char basename[] = "gnix_map"; - char *full_filename = NULL; - char *huge_page = NULL; - char *error; - char error_buf[256]; - int my_file_id; - int size; - int ret; - int file_name_size; - - if (!filename) { - GNIX_WARN(FI_LOG_EP_CTRL, "filename pointer is NULL.\n"); - ret = -FI_EINVAL; - goto err_invalid; - } - - ret = __find_huge_page(page_size, &huge_page); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "Find huge page returned error %s\n", - fi_strerror(-ret)); - goto err_invalid; - } - - my_file_id = ofi_atomic_inc32(&file_id_counter); - size = snprintf(NULL, 0, "%s/%s.%d.%d", huge_page, basename, getpid(), - my_file_id); - if (size < 0) { - error = strerror_r(errno, error_buf, sizeof(error_buf)); - GNIX_WARN(FI_LOG_EP_CTRL, - "Error while gathering size for snprintf: %s\n", - error); - goto err_snprintf; - } - - file_name_size = size + 1; - full_filename = malloc(file_name_size); - if (!full_filename) { - error = strerror_r(errno, error_buf, sizeof(error_buf)); - GNIX_WARN(FI_LOG_EP_CTRL, - "Error allocating full_filename: %s\n", - error); - ret = -FI_ENOMEM; - goto err_snprintf; - } - - snprintf(full_filename, file_name_size, "%s/%s.%d.%d", huge_page, basename, - getpid(), my_file_id); - - GNIX_DEBUG(FI_LOG_EP_CTRL, "Generated filename: %s\n", full_filename); - - *filename = full_filename; - -err_snprintf: - free(huge_page); -err_invalid: - return ret; -} - -/** - * Find huge page, generate filename, open huge page, and attach huge page - * descriptor to handle. - * - * @param[in] handle Handle to the allocator being used. - * - * @return FI_SUCCESS On successfully opening a huge page. - * - * @return -FI_EINVAL if an invalid parameter was given. Propagated from - * __generate_file_name. - * @return -FI_EIO if an error occurred while opening the hugepage - * @return -FI_ENOMEM if an error in space allocation occurred. Propagated - * from __generate_file_name. - */ -static int __open_huge_page(struct gnix_mbox_alloc_handle *handle) -{ - char *filename = NULL; - char error_buf[256]; - char *error; - int ret; - int fd; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - handle->fd = -1; - handle->filename = NULL; - - ret = __generate_file_name(handle->page_size, &filename); - if (ret < 0) { - GNIX_WARN(FI_LOG_EP_CTRL, "Error in generating file name.\n"); - goto err_filename; - } - - fd = open(filename, O_CREAT | O_RDWR | O_EXCL, 0700); - if (fd < 0) { - error = strerror_r(errno, error_buf, sizeof(error_buf)); - GNIX_WARN(FI_LOG_EP_CTRL, "IO Error: %s\n", error); - ret = -FI_EIO; - goto err_open; - } - - handle->fd = fd; - handle->filename = filename; - - GNIX_DEBUG(FI_LOG_EP_CTRL, - "Successfully opened: %s with handle : %d\n.", - handle->filename, handle->fd); - - unlink(handle->filename); - - return ret; - -err_open: - free(filename); -err_filename: - return ret; -} - -/** - * Determine how many pages need to be allocated. - * - * @param[in] handle Handle to the allocator being used. - * - * @return Number of pages that need to be allocated rounded up to the nearest - * multiple of the page size. - */ -static size_t __page_count(struct gnix_mbox_alloc_handle *handle) -{ - size_t total_size = CEILING((handle->mbox_size * handle->mpmmap), - handle->page_size); - size_t page_count; - - page_count = total_size / handle->page_size; - - GNIX_DEBUG(FI_LOG_EP_CTRL, - "Mbox_size: %zu, mpmmap: %zu, page_size: %zu\n", - handle->mbox_size, handle->mpmmap, handle->page_size); - - GNIX_DEBUG(FI_LOG_EP_CTRL, - "Total size: %zu, page_count: %zu\n", total_size, - page_count); - - if (page_count <= 0) { - GNIX_WARN(FI_LOG_EP_CTRL, - "Invalid size requested, truncating to single page.\n"); - page_count = 1; - } - - return page_count; -} - -/** - * Determine how many mboxes are in a requested allocation size. - * - * @param[in] handle Handle to the allocator being used. - * - * @return Number of mail boxes being allocated. - */ -static size_t __mbox_count(struct gnix_mbox_alloc_handle *handle) -{ - size_t mbox_count = (__page_count(handle) * handle->page_size) / - handle->mbox_size; - - GNIX_DEBUG(FI_LOG_EP_CTRL, - "Mbox_count: %zu.\n", mbox_count); - return mbox_count; -} - -/** - * Create a slab from a handle and append to the slab list. - * - * @param[in] handle Handle to the allocator being used. - * - * @return FI_SUCCESS On successful slab creation. - * - * @return -FI_ENOMEM if failure to allocate memory for slab or bitmap. - * @return [Unspec] if failure in alloc_bitmap. Will return error code from - * alloc_bitmap. - * @return [Unspec] if failure in GNI_MemRegister. Converts gni_return_t - * status code to FI_ERRNO value. - */ -static int __create_slab(struct gnix_mbox_alloc_handle *handle) -{ - struct gnix_slab *slab; - gni_return_t status; - char error_buf[256]; - char *error; - size_t total_size; - int ret, mflags; - int vmdh_index = -1; - int flags = GNI_MEM_READWRITE; - struct gnix_auth_key *info; - struct fi_gni_auth_key key; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - slab = calloc(1, sizeof(*slab)); - if (!slab) { - error = strerror_r(errno, error_buf, sizeof(error_buf)); - GNIX_WARN(FI_LOG_EP_CTRL, - "Error allocating slab: %s\n", - error); - ret = -FI_ENOMEM; - goto err_slab_calloc; - } - - total_size = handle->page_size * __page_count(handle); - GNIX_DEBUG(FI_LOG_EP_CTRL, "total_size requested for mmap: %zu.\n", - total_size); - - slab->used = calloc(1, sizeof(*(slab->used))); - if (!slab->used) { - error = strerror_r(errno, error_buf, sizeof(error_buf)); - GNIX_WARN(FI_LOG_EP_CTRL, - "Error allocating bitmap: %s\n", - error); - ret = -FI_ENOMEM; - goto err_bitmap_calloc; - } - - mflags = MAP_SHARED; - if (handle->fd == -1) - mflags |= MAP_ANONYMOUS; - - slab->base = mmap(0, total_size, (PROT_READ | PROT_WRITE), mflags, - handle->fd, handle->last_offset); - if (slab->base == MAP_FAILED) { - error = strerror_r(errno, error_buf, sizeof(error_buf)); - GNIX_WARN(FI_LOG_EP_CTRL, "%s\n", error); - ret = -FI_ENOMEM; - goto err_mmap; - } - - ret = _gnix_alloc_bitmap(slab->used, __mbox_count(handle), NULL); - if (ret) { - GNIX_WARN(FI_LOG_EP_CTRL, "Error allocating bitmap.\n"); - goto err_alloc_bitmap; - } - - COND_ACQUIRE(handle->nic_handle->requires_lock, &handle->nic_handle->lock); - if (handle->nic_handle->using_vmdh) { - key.type = GNIX_AKT_RAW; - key.raw.protection_key = handle->nic_handle->cookie; - - info = _gnix_auth_key_lookup((uint8_t *) &key, sizeof(key)); - assert(info); - - if (!handle->nic_handle->mdd_resources_set) { - /* check to see if the ptag registration limit was set - * yet or not -- becomes read-only after success */ - _gnix_auth_key_enable(info); - - status = GNI_SetMddResources( - handle->nic_handle->gni_nic_hndl, - (info->attr.prov_key_limit + - info->attr.user_key_limit)); - assert(status == GNI_RC_SUCCESS); - - handle->nic_handle->mdd_resources_set = 1; - } - - vmdh_index = _gnix_get_next_reserved_key(info); - if (vmdh_index <= 0) { - GNIX_FATAL(FI_LOG_DOMAIN, - "failed to get reserved key for mbox " - "registration, rc=%d\n", - vmdh_index); - } - flags |= GNI_MEM_USE_VMDH; - } - - status = GNI_MemRegister(handle->nic_handle->gni_nic_hndl, - (uint64_t) slab->base, total_size, - handle->cq_handle, - flags, vmdh_index, - &slab->memory_handle); - COND_RELEASE(handle->nic_handle->requires_lock, &handle->nic_handle->lock); - if (status != GNI_RC_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, "GNI_MemRegister failed: %s\n", - gni_err_str[status]); - ret = gnixu_to_fi_errno(status); - goto err_memregister; - } - - slab->allocator = handle; - - gnix_slist_insert_tail(&slab->list_entry, &handle->slab_list); - - handle->last_offset += total_size; - - return ret; - -err_memregister: - _gnix_free_bitmap(slab->used); -err_alloc_bitmap: - munmap(slab->base, total_size); -err_mmap: - free(slab->used); -err_bitmap_calloc: - free(slab); -err_slab_calloc: - return ret; -} - -/** - * Destroy a slab. - * - * @param[in] handle Handle to the allocator being used. - * @param[in] slab Slab to be destroyed. - * - * @return FI_SUCCESS On successful slab destruction. - * - * @return -FI_EINVAL On invalid handle or slab being given as parameters. - */ -static int __destroy_slab(struct gnix_mbox_alloc_handle *handle, - struct gnix_slab *slab) -{ - size_t total_size; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - if (!handle || !slab) { - GNIX_WARN(FI_LOG_EP_CTRL, - "Invalid argument handle or slab.\n"); - return -FI_EINVAL; - } - - total_size = handle->page_size * __page_count(handle); - - _gnix_free_bitmap(slab->used); - free(slab->used); - - COND_ACQUIRE(handle->nic_handle->requires_lock, &handle->nic_handle->lock); - GNI_MemDeregister(handle->nic_handle->gni_nic_hndl, - &slab->memory_handle); - COND_RELEASE(handle->nic_handle->requires_lock, &handle->nic_handle->lock); - - munmap(slab->base, total_size); - - free(slab); - - return FI_SUCCESS; -} - -/** - * Iterate over all slab bitmaps associated with an allocation handle and run - * action function on each bitmap. - * - * @param[in] handle Handle to the allocator being used. - * @param[out] slab Contains slab which made action evaluate to true. - * - * @return Index into slab which made action evaluate to true. - * @return -FI_EAGAIN On failure of action across all slab bitmaps. - */ -static int __check_bitmap(struct gnix_mbox_alloc_handle *handle, - struct gnix_slab **slab, - int (*action)(gnix_bitmap_t *)) -{ - struct slist_entry *entry; - struct gnix_slab *temp; - int ret = FI_SUCCESS; - - *slab = NULL; - - for (entry = handle->slab_list.head; entry; entry = entry->next) { - temp = container_of(entry, struct gnix_slab, list_entry); - ret = action(temp->used); - if (ret >= 0) { - *slab = temp; - break; - } - } - - return ret; -} - -/** - * Iterate over all slab bitmaps associated with an allocation handle and - * search for the first free piece of memory. - * - * @param[in] handle Handle to the allocator being used. - * @param[out] slab Contains slab which contained a free piece of memory. - * - * @return Index into slab which is free. - * @return -FI_EAGAIN Upon not finding any free memory. - */ -static int __find_free(struct gnix_mbox_alloc_handle *handle, - struct gnix_slab **slab) -{ - return __check_bitmap(handle, slab, _gnix_find_first_zero_bit); -} - -/** - * Iterate over all slab bitmaps associated with an allocation handle and - * search for the first used piece of memory. - * - * @param[in] handle Handle to the allocator being used. - * @param[out] slab Contains slab which contained a free piece of memory. - * - * @return Index into slab which is used. - * @return -FI_EAGAIN Upon not finding any used memory. - */ -static int __find_used(struct gnix_mbox_alloc_handle *handle, - struct gnix_slab **slab) -{ - return __check_bitmap(handle, slab, _gnix_find_first_set_bit); -} - -/** - * Fill all of the fields of an mbox to be returned to the requester. - * - * @param[in] handle Handle to the allocator being used. - * @param[in] slab Slab which the mbox is allocated from. - * @param[in] position Position of the mbox in the slab. - * @param[out] ptr Contains the allocated mbox upon success. - * - * @return FI_SUCCESS Upon successfully filling an mbox with relevant data. - * @return -FI_EINVAL Upon receiving invalid input, or finding the bitmap in - * a corrupted state. - * @return -FI_ENOMEM Upon failure to create the mbox structure using calloc. - */ -static int __fill_mbox(struct gnix_mbox_alloc_handle *handle, - struct gnix_slab *slab, size_t position, - struct gnix_mbox **ptr) -{ - struct gnix_mbox *out; - int ret = FI_SUCCESS; - char error_buf[256]; - size_t mapped_size; - char *error; - - out = calloc(1, sizeof(*out)); - if (!out) { - error = strerror_r(errno, error_buf, sizeof(error_buf)); - GNIX_WARN(FI_LOG_EP_CTRL, - "Error allocating mbox: %s\n", - error); - ret = -FI_ENOMEM; - goto err_mbox_calloc; - } - - mapped_size = handle->page_size * __page_count(handle); - - out->slab = slab; - out->base = slab->base; - out->offset = (position * handle->mbox_size); - out->memory_handle = &slab->memory_handle; - - if (out->offset > mapped_size) { - GNIX_WARN(FI_LOG_EP_CTRL, "Mbox out of bounds.\n"); - ret = -FI_EINVAL; - goto err_invalid; - } - - /* On some systems, the page may not be zero'd from first use. - Memset it here */ - memset((void *) ((uint64_t) out->base + out->offset), - 0x0, handle->mbox_size); - - ret = _gnix_test_and_set_bit(slab->used, position); - if (ret != 0) { - GNIX_WARN(FI_LOG_EP_CTRL, - "Bit already set when creating mbox.\n"); - ret = -FI_EINVAL; - goto err_invalid; - } - - *ptr = out; - - return ret; - -err_invalid: - free(out); -err_mbox_calloc: - return ret; -} - -int _gnix_mbox_allocator_create(struct gnix_nic *nic, - gni_cq_handle_t cq_handle, - enum gnix_page_size page_size, - size_t mbox_size, - size_t mpmmap, - struct gnix_mbox_alloc_handle **alloc_handle) -{ - struct gnix_mbox_alloc_handle *handle; - char error_buf[256]; - char *error; - int ret; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - if (!nic || !mbox_size || !mpmmap || !alloc_handle) { - GNIX_WARN(FI_LOG_EP_CTRL, - "Invalid parameter to allocator_create.\n"); - return -FI_EINVAL; - } - - *alloc_handle = NULL; - - handle = calloc(1, sizeof(*handle)); - if (!handle) { - error = strerror_r(errno, error_buf, sizeof(error_buf)); - GNIX_WARN(FI_LOG_EP_CTRL, - "Error allocating alloc handle: %s\n", - error); - return -FI_ENOMEM; - } - - handle->page_size = page_size * 1024 * 1024; - handle->mbox_size = mbox_size; - handle->mpmmap = mpmmap; - handle->nic_handle = nic; - handle->cq_handle = cq_handle; - ofi_spin_init(&handle->lock); - - ret = __open_huge_page(handle); - if (ret == FI_SUCCESS) { - ret = __create_slab(handle); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, "Slab creation failed.\n"); - } - } else { - GNIX_WARN(FI_LOG_EP_CTRL, "Error opening huge page.\n"); - } - - /* - * try plan B - try to use anonymous mapping (base page size). - * If a file was successfully opened, close fd and free filename - * field in the handle. - */ - - if ((ret != FI_SUCCESS) && - (gnix_mbox_alloc_allow_fallback == true)) { - if (handle->filename != NULL) { - free(handle->filename); - handle->filename = NULL; - } - if (handle->fd != -1) { - ret = close(handle->fd); - handle->fd = -1; - if (ret) { - GNIX_WARN(FI_LOG_EP_CTRL, - "Error closing huge page - %d\n", - ret); - } - } - - ret = __create_slab(handle); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "Slab(anon) creation failed.\n"); - } - } - - if (ret == FI_SUCCESS) { - *alloc_handle = handle; - } else { - free(handle); - } - - return ret; -} - -int _gnix_mbox_allocator_destroy(struct gnix_mbox_alloc_handle *alloc_handle) -{ - struct slist_entry *entry; - struct gnix_slab *temp; - char error_buf[256]; - int position; - char *error; - int ret = FI_SUCCESS; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - if (!alloc_handle) { - GNIX_WARN(FI_LOG_EP_CTRL, "Invalid alloc handle.\n"); - return -FI_EINVAL; - } - - position = __find_used(alloc_handle, &temp); - if (position >= 0) { - GNIX_WARN(FI_LOG_EP_CTRL, - "Can't destroy, not all mailboxes have been returned (pos = %d).\n", - position); - return -FI_EBUSY; - } - - while (!slist_empty(&alloc_handle->slab_list)) { - entry = slist_remove_head(&alloc_handle->slab_list); - - temp = container_of(entry, struct gnix_slab, list_entry); - - ret = __destroy_slab(alloc_handle, temp); - if (ret) - GNIX_WARN(FI_LOG_EP_CTRL, - "Error destroying slab.\n"); - } - - if (alloc_handle->filename != NULL) - free(alloc_handle->filename); - - if (alloc_handle->fd != -1) - ret = close(alloc_handle->fd); - - if (ret) { - error = strerror_r(errno, error_buf, sizeof(error_buf)); - GNIX_WARN(FI_LOG_EP_CTRL, - "Error closing map file: %s\n", - error); - } - - ofi_spin_destroy(&alloc_handle->lock); - - free(alloc_handle); - - return FI_SUCCESS; -} - -int _gnix_mbox_alloc(struct gnix_mbox_alloc_handle *alloc_handle, - struct gnix_mbox **ptr) -{ - struct gnix_slab *slab; - int position; - int ret; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - if (!alloc_handle || !ptr) { - GNIX_WARN(FI_LOG_EP_CTRL, "Invalid alloc_handle or ptr.\n"); - ret = -FI_EINVAL; - goto err; - } - - ofi_spin_lock(&alloc_handle->lock); - position = __find_free(alloc_handle, &slab); - if (position < 0) { - GNIX_DEBUG(FI_LOG_EP_CTRL, "Creating new slab.\n"); - ret = __create_slab(alloc_handle); - if (ret) { - GNIX_WARN(FI_LOG_EP_CTRL, "Slab creation failed.\n"); - goto err; - } - - slab = container_of(alloc_handle->slab_list.tail, - struct gnix_slab, list_entry); - position = ret; - } - - ret = __fill_mbox(alloc_handle, slab, (size_t) position, ptr); - if (ret) - GNIX_WARN(FI_LOG_EP_CTRL, "Creating mbox failed.\n"); - - ofi_spin_unlock(&alloc_handle->lock); -err: - return ret; -} - -int _gnix_mbox_free(struct gnix_mbox *ptr) -{ - size_t position; - int ret; - ofi_spin_t *lock; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - if (!ptr || !ptr->slab || !ptr->slab->allocator) { - GNIX_WARN(FI_LOG_EP_CTRL, "Invalid mbox given to free.\n"); - return -FI_EINVAL; - } - - lock = &ptr->slab->allocator->lock; - ofi_spin_lock(lock); - position = ptr->offset / ptr->slab->allocator->mbox_size; - - ret = _gnix_test_and_clear_bit(ptr->slab->used, position); - if (ret != 1) { - GNIX_WARN(FI_LOG_EP_CTRL, - "Bit already cleared while freeing mbox.\n"); - ofi_spin_unlock(lock); - return -FI_EINVAL; - } - - free(ptr); - ofi_spin_unlock(lock); - - return FI_SUCCESS; -} diff --git a/prov/gni/src/gnix_mr.c b/prov/gni/src/gnix_mr.c deleted file mode 100644 index a55b73a0e7d..00000000000 --- a/prov/gni/src/gnix_mr.c +++ /dev/null @@ -1,1269 +0,0 @@ -/* - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * Copyright (c) 2015-2018 Los Alamos National Security, LLC. - * All rights reserved. - * - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include - -#include "gnix.h" -#include "gnix_nic.h" -#include "gnix_util.h" -#include "gnix_mr.h" -#include "gnix_priv.h" - -/* forward declarations */ -static int __gnix_mr_refresh(struct gnix_fid_mem_desc *desc, - uint64_t addr, uint64_t len); -static int fi_gnix_mr_close(fid_t fid); -static int fi_gnix_mr_control(struct fid *fid, int command, void *arg); - -/* global declarations */ -/* memory registration operations */ -static struct fi_ops fi_gnix_mr_ops = { - .size = sizeof(struct fi_ops), - .close = fi_gnix_mr_close, - .bind = fi_no_bind, - .control = fi_gnix_mr_control, - .ops_open = fi_no_ops_open, -}; - -/** - * Sign extends the value passed into up to length parameter - * - * @param[in] val value to be sign extended - * @param[in] len length to sign extend the value - * @return sign extended value to length, len - */ -static inline int64_t __sign_extend( - uint64_t val, - int len) -{ - int64_t m = 1UL << (len - 1); - int64_t r = (val ^ m) - m; - - return r; -} - -static inline void __print_mhdl(gni_mem_handle_t *mhdl) -{ - GNIX_INFO(FI_LOG_DOMAIN, "PRINT_MHDL:\n" - "va=%016llx\n" - "mdh=%d\n" - "npages=%d\n" - "pgsize=%d\n" - "flags=%08llx\n" - "crc=%08llx\n", - GNI_MEMHNDL_GET_VA((*mhdl)), - GNI_MEMHNDL_GET_MDH((*mhdl)), - mhdl->qword2 & GNI_MEMHNDL_NPGS_MASK, - (mhdl->qword2 >> 28) & GNI_MEMHNDL_PSIZE_MASK, - GNI_MEMHNDL_GET_FLAGS((*mhdl)), - mhdl->qword2 >> 56); -} - -/** - * Converts a key to a gni memory handle without calculating crc - * - * @param key gnix memory registration key - * @param mhdl gni memory handle - */ -void _gnix_convert_key_to_mhdl_no_crc( - gnix_mr_key_t *key, - gni_mem_handle_t *mhdl) -{ - uint64_t va = key->pfn; - uint8_t flags = 0; - - va = (uint64_t) __sign_extend(va << GNIX_MR_PAGE_SHIFT, - GNIX_MR_VA_BITS); - - flags = (key->flags & GNIX_MR_FLAG_READONLY) ? - GNI_MEMHNDL_ATTR_READONLY : 0; - - GNI_MEMHNDL_INIT((*mhdl)); - GNI_MEMHNDL_SET_PAGESIZE((*mhdl), GNIX_MR_PAGE_SHIFT); - GNI_MEMHNDL_SET_NPAGES((*mhdl), GNI_MEMHNDL_NPGS_MASK); - - if (key->flags & GNIX_MR_FLAG_BASIC_REG) { - va = key->pfn; - va = (uint64_t) __sign_extend(va << GNIX_MR_PAGE_SHIFT, - GNIX_MR_VA_BITS); - - GNI_MEMHNDL_SET_VA((*mhdl), va); - GNI_MEMHNDL_SET_MDH((*mhdl), key->mdd); - } else { - GNI_MEMHNDL_SET_MDH((*mhdl), key->value); - flags |= GNI_MEMHNDL_ATTR_VMDH; - } - GNI_MEMHNDL_SET_FLAGS((*mhdl), flags); -} - -/** - * Converts a key to a gni memory handle - * - * @param key gnix memory registration key - * @param mhdl gni memory handle - */ -void _gnix_convert_key_to_mhdl( - gnix_mr_key_t *key, - gni_mem_handle_t *mhdl) -{ - _gnix_convert_key_to_mhdl_no_crc(key, mhdl); - compiler_barrier(); - GNI_MEMHNDL_SET_CRC((*mhdl)); -} - -/** - * Converts a gni memory handle to gnix memory registration key - * - * @param mhdl gni memory handle - * @return uint64_t representation of a gnix memory registration key - */ -uint64_t _gnix_convert_mhdl_to_key(gni_mem_handle_t *mhdl) -{ - gnix_mr_key_t key = {{{0}}}; - int flags = GNI_MEMHNDL_GET_FLAGS((*mhdl)); - - /* VMDH handles do not have an address set */ - if (flags & GNI_MEMHNDL_ATTR_VMDH) - return GNI_MEMHNDL_GET_MDH((*mhdl)); - - key.pfn = GNI_MEMHNDL_GET_VA((*mhdl)) >> GNIX_MR_PAGE_SHIFT; - key.mdd = GNI_MEMHNDL_GET_MDH((*mhdl)); - //key->format = GNI_MEMHNDL_NEW_FRMT((*mhdl)); - key.flags = GNIX_MR_FLAG_BASIC_REG; - - key.flags |= (flags & GNI_MEMHNDL_FLAG_READONLY) ? - GNIX_MR_FLAG_READONLY : 0; - - - return key.value; -} - -/** - * Helper function to calculate the length of a potential registration - * based on some rules of the registration cache. - * - * Registrations should be page aligned and contain all of page(s) - * - * @param address base address of the registration - * @param length length of the registration - * @param pagesize assumed page size of the registration - * @return length for the new registration - */ -static inline uint64_t __calculate_length( - uint64_t address, - uint64_t length, - uint64_t pagesize) -{ - uint64_t baseaddr = address & ~(pagesize - 1); - uint64_t reg_len = (address + length) - baseaddr; - uint64_t pages = reg_len / pagesize; - - if (reg_len % pagesize != 0) - pages += 1; - - return pages * pagesize; -} - -int _gnix_mr_reg(struct fid *fid, const void *buf, size_t len, - uint64_t access, uint64_t offset, - uint64_t requested_key, uint64_t flags, - struct fid_mr **mr_o, void *context, - struct gnix_auth_key *auth_key, - int reserved) -{ - struct gnix_fid_mem_desc *mr = NULL; - struct gnix_fid_domain *domain; - int rc; - uint64_t reg_addr, reg_len; - struct _gnix_fi_reg_context fi_reg_context = { - .access = access, - .offset = offset, - .requested_key = requested_key, - .flags = flags, - .context = context, - .auth_key = auth_key, - .reserved = reserved, - }; - struct gnix_mr_cache_info *info; - - GNIX_TRACE(FI_LOG_MR, "\n"); - - GNIX_INFO(FI_LOG_MR, "reg: buf=%p len=%llu\n", buf, len); - - /* Flags are reserved for future use and must be 0. */ - if (OFI_UNLIKELY(flags)) - return -FI_EBADFLAGS; - - /* The offset parameter is reserved for future use and must be 0. - * Additionally, check for invalid pointers, bad access flags and the - * correct fclass on associated fid - */ - if (offset || !buf || !mr_o || !access || - (access & ~(FI_READ | FI_WRITE | FI_RECV | FI_SEND | - FI_REMOTE_READ | - FI_REMOTE_WRITE)) || - (fid->fclass != FI_CLASS_DOMAIN)) - return -FI_EINVAL; - - domain = container_of(fid, struct gnix_fid_domain, domain_fid.fid); - - if (auth_key->using_vmdh && !reserved && - requested_key >= auth_key->attr.user_key_limit) - return -FI_EKEYREJECTED; - - if (!reserved && auth_key->using_vmdh) { - /* adjust requested key by rank offset */ - fi_reg_context.requested_key += auth_key->key_offset; - GNIX_DEBUG(FI_LOG_DOMAIN, - "user requested key %d, but adjusting by " - "rank offset as key %d\n", - requested_key, fi_reg_context.requested_key); - } - - if (auth_key->using_vmdh && !reserved && - requested_key < auth_key->attr.user_key_limit) { - rc = _gnix_test_and_set_bit(auth_key->user, - fi_reg_context.requested_key); - if (rc) { - GNIX_WARN(FI_LOG_DOMAIN, "key already in use, key=%d\n", - fi_reg_context.requested_key); - return -FI_ENOKEY; - } - } - - /* if this is a provider registration using VMDH and 0 was provided - * as the key, pick any available */ - if (auth_key->using_vmdh && reserved && !requested_key) { - requested_key = _gnix_get_next_reserved_key(auth_key); - if (requested_key <= 0) - return -FI_ENOKEY; - fi_reg_context.requested_key = requested_key; - } - - info = &domain->mr_cache_info[auth_key->ptag]; - - reg_addr = ((uint64_t) buf) & ~((1 << GNIX_MR_PAGE_SHIFT) - 1); - reg_len = __calculate_length((uint64_t) buf, len, - 1 << GNIX_MR_PAGE_SHIFT); - - /* call cache register op to retrieve the right entry */ - ofi_spin_lock(&info->mr_cache_lock); - if (OFI_UNLIKELY(!domain->mr_ops)) - _gnix_open_cache(domain, GNIX_DEFAULT_CACHE_TYPE); - - if (OFI_UNLIKELY(!domain->mr_ops->is_init(domain, auth_key))) { - rc = domain->mr_ops->init(domain, auth_key); - if (rc != FI_SUCCESS) { - ofi_spin_unlock(&info->mr_cache_lock); - goto err; - } - } - - rc = domain->mr_ops->reg_mr(domain, - (uint64_t) reg_addr, reg_len, &fi_reg_context, - (void **) &mr); - ofi_spin_unlock(&info->mr_cache_lock); - - /* check retcode */ - if (OFI_UNLIKELY(rc != FI_SUCCESS)) - goto err; - - /* md.mr_fid */ - mr->mr_fid.mem_desc = mr; - mr->mr_fid.fid.fclass = FI_CLASS_MR; - mr->mr_fid.fid.context = context; - mr->mr_fid.fid.ops = &fi_gnix_mr_ops; - - /* setup internal key structure */ - mr->mr_fid.key = _gnix_convert_mhdl_to_key(&mr->mem_hndl); - if (!reserved && auth_key->using_vmdh) { - /* When using scalable, the key is a virtual index to the - vmdh table */ - mr->mr_fid.key = requested_key; - } - mr->auth_key = auth_key; - - if (reserved && auth_key->using_vmdh) { - rc = __gnix_mr_refresh(mr, reg_addr, reg_len); - if (rc != FI_SUCCESS) - GNIX_FATAL(FI_LOG_MR, - "failed to enabled internal provider registration, ret=%d", - rc); - } - - _gnix_ref_get(mr->domain); - - /* set up mr_o out pointer */ - *mr_o = &mr->mr_fid; - return FI_SUCCESS; - -err: - return rc; -} - -DIRECT_FN int gnix_mr_reg(struct fid *fid, const void *buf, size_t len, - uint64_t access, uint64_t offset, - uint64_t requested_key, uint64_t flags, - struct fid_mr **mr, void *context) -{ - const struct iovec mr_iov = { - .iov_base = (void *) buf, - .iov_len = len, - }; - const struct fi_mr_attr attr = { - .mr_iov = &mr_iov, - .iov_count = 1, - .access = access, - .offset = offset, - .requested_key = requested_key, - .context = context, - .auth_key = NULL, - .auth_key_size = 0, - }; - - return gnix_mr_regattr(fid, &attr, flags, mr); -} - -DIRECT_FN int gnix_mr_regv(struct fid *fid, const struct iovec *iov, - size_t count, uint64_t access, - uint64_t offset, uint64_t requested_key, - uint64_t flags, struct fid_mr **mr, void *context) -{ - const struct fi_mr_attr attr = { - .mr_iov = iov, - .iov_count = count, - .access = access, - .offset = offset, - .requested_key = requested_key, - .context = context, - .auth_key = NULL, - .auth_key_size = 0, - }; - - return gnix_mr_regattr(fid, &attr, flags, mr); -} - -DIRECT_FN int gnix_mr_regattr(struct fid *fid, const struct fi_mr_attr *attr, - uint64_t flags, struct fid_mr **mr) -{ - struct gnix_fid_domain *domain = container_of(fid, - struct gnix_fid_domain, domain_fid.fid); - struct gnix_auth_key *auth_key; - - if (!attr) - return -FI_EINVAL; - if (!attr->mr_iov || !attr->iov_count) - return -FI_EINVAL; - - if (domain->mr_iov_limit < attr->iov_count) - return -FI_EOPNOTSUPP; - - if (FI_VERSION_LT(domain->fabric->fab_fid.api_version, - FI_VERSION(1, 5)) && - (attr->auth_key || attr->auth_key_size)) - return -FI_EINVAL; - - if (attr->auth_key_size) { - auth_key = GNIX_GET_AUTH_KEY(attr->auth_key, - attr->auth_key_size, - domain->using_vmdh); - if (!auth_key) - return -FI_EINVAL; - } else { - auth_key = domain->auth_key; - } - - if (attr->iov_count == 1) - return _gnix_mr_reg(fid, attr->mr_iov[0].iov_base, - attr->mr_iov[0].iov_len, attr->access, attr->offset, - attr->requested_key, flags, mr, attr->context, - auth_key, GNIX_USER_REG); - - /* regv limited to one iov at this time */ - return -FI_EOPNOTSUPP; -} - -static int __gnix_mr_refresh(struct gnix_fid_mem_desc *desc, - uint64_t addr, uint64_t len) -{ - gni_return_t grc; - - - ofi_spin_lock(&desc->nic->lock); - grc = GNI_MemRegister(desc->nic->gni_nic_hndl, addr, - len, NULL, GNI_MEM_UPDATE_REGION, - desc->mr_fid.key, &desc->mem_hndl); - if (grc != FI_SUCCESS) - GNIX_WARN(FI_LOG_MR, - "failed GNI_MemRegister with UPDATE REGION, " - " addr=%p len=%x key=%d grc=%d\n", - addr, len, desc->mr_fid.key, grc); - ofi_spin_unlock(&desc->nic->lock); - - return (grc != 0) ? -FI_EINVAL : FI_SUCCESS; -} - -static int __gnix_mr_refresh_iov(struct fid *fid, void *arg) -{ - struct fi_mr_modify *modify = (struct fi_mr_modify *) arg; - int ret = FI_SUCCESS; - struct gnix_fid_mem_desc *desc; - uint64_t aligned_addr; - uint64_t aligned_len; - uint64_t addr; - uint64_t len; - int i; - - GNIX_TRACE(FI_LOG_MR, "\n"); - - desc = container_of(fid, struct gnix_fid_mem_desc, mr_fid); - - /* assume that no one is going to attempt to update a MR at the - * same time that they might try to deregister a mr - * - * For the record, that would be REALLY silly application behavior - */ - if (!modify->attr.mr_iov || modify->attr.iov_count == 0) { - GNIX_DEBUG(FI_LOG_DOMAIN, - "cannot provide null iov or 0 iov_count\n"); - return -FI_EINVAL; - } - - if (!desc->auth_key->using_vmdh) { - GNIX_DEBUG(FI_LOG_DOMAIN, - "cannot use refresh with non-vmdh registrations\n"); - return -FI_EINVAL; - } - - for (i = 0; i < modify->attr.iov_count; i++) { - addr = (uint64_t) modify->attr.mr_iov[i].iov_base; - len = (uint64_t) modify->attr.mr_iov[i].iov_len; - - aligned_addr = addr & ~0xfff; - aligned_len = addr + len - aligned_addr; - aligned_len += (((addr + len) & 0xfff) ? - (0x1000 - ((addr + len) & 0xfff)) : 0); - - ret = __gnix_mr_refresh(desc, aligned_addr, aligned_len); - if (ret) { - GNIX_WARN(FI_LOG_DOMAIN, - "failed to refresh IOV %d, addr=%p len=%x\n", - i, aligned_addr, aligned_len); - return ret; - } - } - - return FI_SUCCESS; -} - -static int fi_gnix_mr_control(struct fid *fid, int command, void *arg) -{ - int ret; - struct gnix_fid_mem_desc *desc; - - desc = container_of(fid, struct gnix_fid_mem_desc, mr_fid); - if (desc->mr_fid.fid.fclass != FI_CLASS_MR) { - GNIX_WARN(FI_LOG_DOMAIN, "invalid fid\n"); - return -FI_EINVAL; - } - - switch (command) { - case FI_REFRESH: - ret = __gnix_mr_refresh_iov(fid, arg); - break; - default: - ret = -FI_EOPNOTSUPP; - break; - } - - return ret; -} - -/** - * Closes and deallocates a libfabric memory registration in the internal cache - * - * @param[in] fid libfabric memory registration fid - * - * @return FI_SUCCESS on success - * -FI_EINVAL on invalid fid - * -FI_NOENT when there isn't a matching registration for the - * provided fid - * Otherwise, GNI_RC_* ret codes converted to FI_* err codes - */ -static int fi_gnix_mr_close(fid_t fid) -{ - struct gnix_fid_mem_desc *mr; - gni_return_t ret; - struct gnix_fid_domain *domain; - struct gnix_mr_cache_info *info; - int requested_key; - struct gnix_auth_key *auth_key; - - GNIX_TRACE(FI_LOG_MR, "\n"); - - if (OFI_UNLIKELY(fid->fclass != FI_CLASS_MR)) - return -FI_EINVAL; - - mr = container_of(fid, struct gnix_fid_mem_desc, mr_fid.fid); - - auth_key = mr->auth_key; - domain = mr->domain; - requested_key = fi_mr_key(&mr->mr_fid); - info = &domain->mr_cache_info[mr->auth_key->ptag]; - - /* call cache deregister op */ - ofi_spin_lock(&info->mr_cache_lock); - ret = domain->mr_ops->dereg_mr(domain, mr); - ofi_spin_unlock(&info->mr_cache_lock); - - /* check retcode */ - if (OFI_LIKELY(ret == FI_SUCCESS)) { - /* release references to the domain and nic */ - _gnix_ref_put(domain); - if (auth_key->using_vmdh) { - if (requested_key < auth_key->attr.user_key_limit) - _gnix_test_and_clear_bit(auth_key->user, - requested_key); - else { - ret = _gnix_release_reserved_key(auth_key, - requested_key); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_DOMAIN, - "failed to release reserved key, " - "rc=%d key=%d\n", - ret, requested_key); - } - } - } - } else { - GNIX_INFO(FI_LOG_MR, "failed to deregister memory, " - "ret=%i\n", ret); - } - - return ret; -} - -static inline void *__gnix_generic_register( - struct gnix_fid_domain *domain, - struct gnix_fid_mem_desc *md, - void *address, - size_t length, - gni_cq_handle_t dst_cq_hndl, - int flags, - int vmdh_index, - struct gnix_auth_key *auth_key) -{ - struct gnix_nic *nic; - struct gnix_nic_attr nic_attr = {0}; - gni_return_t grc = GNI_RC_SUCCESS; - int rc; - struct gnix_auth_key *info; - - pthread_mutex_lock(&gnix_nic_list_lock); - - /* If the nic list is empty, create a nic */ - if (OFI_UNLIKELY((dlist_empty(&gnix_nic_list_ptag[auth_key->ptag])))) { - /* release the lock because we are not checking the list after - this point. Additionally, gnix_nic_alloc takes the - lock to add the nic. */ - pthread_mutex_unlock(&gnix_nic_list_lock); - nic_attr.auth_key = auth_key; - - rc = gnix_nic_alloc(domain, &nic_attr, &nic); - if (rc) { - GNIX_INFO(FI_LOG_MR, - "could not allocate nic to do mr_reg," - " ret=%i\n", rc); - return NULL; - } - } else { - nic = dlist_first_entry(&gnix_nic_list_ptag[auth_key->ptag], - struct gnix_nic, ptag_nic_list); - if (OFI_UNLIKELY(nic == NULL)) { - GNIX_ERR(FI_LOG_MR, "Failed to find nic on " - "ptag list\n"); - pthread_mutex_unlock(&gnix_nic_list_lock); - return NULL; - } - _gnix_ref_get(nic); - pthread_mutex_unlock(&gnix_nic_list_lock); - } - - COND_ACQUIRE(nic->requires_lock, &nic->lock); - if (nic->using_vmdh && !nic->mdd_resources_set) { - info = auth_key; - assert(info); - - grc = GNI_SetMddResources(nic->gni_nic_hndl, - (info->attr.prov_key_limit + - info->attr.user_key_limit)); - assert(grc == GNI_RC_SUCCESS); - - nic->mdd_resources_set = 1; - } - - GNIX_DEBUG(FI_LOG_MR, - "Params: hndl=%p addr=%p length=%d dst_cq_hndl=%p flags=%08x " - "vmdh_index=%d mem_hndl=%p md=%p\n", - nic->gni_nic_hndl, address, length, dst_cq_hndl, flags, - vmdh_index, &md->mem_hndl, md); - - grc = GNI_MemRegister(nic->gni_nic_hndl, (uint64_t) address, - length, dst_cq_hndl, flags, - vmdh_index, &md->mem_hndl); - COND_RELEASE(nic->requires_lock, &nic->lock); - - if (OFI_UNLIKELY(grc != GNI_RC_SUCCESS)) { - GNIX_INFO(FI_LOG_MR, "failed to register memory with uGNI, " - "ret=%s\n", gni_err_str[grc]); - _gnix_ref_put(nic); - - return NULL; - } - - /* set up the mem desc */ - md->nic = nic; - md->domain = domain; - - /* take references on domain */ - _gnix_ref_get(md->domain); - - return md; -} - -static void *__gnix_register_region( - void *handle, - void *address, - size_t length, - struct _gnix_fi_reg_context *fi_reg_context, - void *context) -{ - struct gnix_fid_mem_desc *md = (struct gnix_fid_mem_desc *) handle; - struct gnix_fid_domain *domain = context; - gni_cq_handle_t dst_cq_hndl = NULL; - int flags = 0; - int vmdh_index = -1; - - /* If network would be able to write to this buffer, use read-write */ - if (fi_reg_context->access & (FI_RECV | FI_READ | FI_REMOTE_WRITE)) - flags |= GNI_MEM_READWRITE; - else - flags |= GNI_MEM_READ_ONLY; - - if (domain->using_vmdh) { - flags |= GNI_MEM_USE_VMDH | GNI_MEM_RESERVE_REGION; - vmdh_index = fi_reg_context->requested_key; - } - - GNIX_DEBUG(FI_LOG_MR, "addr %p len %d flags 0x%x\n", address, length, - flags); - return __gnix_generic_register(domain, md, address, length, dst_cq_hndl, - flags, vmdh_index, fi_reg_context->auth_key); -} - -static int __gnix_deregister_region( - void *handle, - void *context) -{ - struct gnix_fid_mem_desc *mr = (struct gnix_fid_mem_desc *) handle; - gni_return_t ret = GNI_RC_SUCCESS; - struct gnix_fid_domain *domain; - struct gnix_nic *nic; - - domain = mr->domain; - nic = mr->nic; - - GNIX_DEBUG(FI_LOG_MR, - "Params: deregister md=%p\n", handle); - COND_ACQUIRE(nic->requires_lock, &nic->lock); - ret = GNI_MemDeregister(nic->gni_nic_hndl, &mr->mem_hndl); - COND_RELEASE(nic->requires_lock, &nic->lock); - if (ret == GNI_RC_SUCCESS) { - /* release reference to domain */ - _gnix_ref_put(domain); - - /* release reference to nic */ - _gnix_ref_put(nic); - } else { - GNIX_INFO(FI_LOG_MR, "failed to deregister memory" - " region, entry=%p ret=%i\n", handle, ret); - } - - return ret; -} - -/** - * Associates a registered memory region with a completion counter. - * - * @param[in] fid the memory region - * @param[in] bfid the fabric identifier for the memory region - * @param[in] flags flags to apply to the registration - * - * @return FI_SUCCESS Upon successfully registering the memory region - * @return -FI_ENOSYS If binding of the memory region is not supported - * @return -FI_EBADFLAGS If the flags are not supported - * @return -FI_EKEYREJECTED If the key is not available - * @return -FI_ENOKEY If the key is already in use - */ -DIRECT_FN int gnix_mr_bind(fid_t fid, struct fid *bfid, uint64_t flags) -{ - return -FI_ENOSYS; -} - -static int __gnix_destruct_registration(void *context) -{ - return GNI_RC_SUCCESS; -} - -#ifdef HAVE_UDREG -void *__udreg_register(void *addr, uint64_t length, void *context) -{ - struct gnix_fid_mem_desc *md; - struct gnix_mr_cache_info *info = (struct gnix_mr_cache_info *) context; - struct gnix_auth_key *auth_key = info->auth_key; - struct gnix_fid_domain *domain = info->domain; - - /* Allocate an udreg info block for this registration. */ - md = calloc(1, sizeof(*md)); - if (!md) { - GNIX_WARN(FI_LOG_MR, - "failed to allocate memory for registration\n"); - return NULL; - } - - GNIX_INFO(FI_LOG_MR, "info=%p auth_key=%p\n", - info, auth_key); - GNIX_INFO(FI_LOG_MR, "ptag=%d\n", auth_key->ptag); - - return __gnix_generic_register(domain, md, addr, length, NULL, - GNI_MEM_READWRITE, -1, auth_key); -} - -uint32_t __udreg_deregister(void *registration, void *context) -{ - gni_return_t grc; - - grc = __gnix_deregister_region(registration, NULL); - - free(registration); - - return (grc == GNI_RC_SUCCESS) ? 0 : 1; -} - -/* Called via dreg when a cache is destroyed. */ -void __udreg_cache_destructor(void *context) -{ - /* Nothing needed here. */ -} - -static int __udreg_init(struct gnix_fid_domain *domain, - struct gnix_auth_key *auth_key) -{ - int ret = FI_SUCCESS; - udreg_return_t urc; - struct gnix_mr_cache_info *info = - GNIX_GET_MR_CACHE_INFO(domain, auth_key); - - udreg_cache_attr_t udreg_cache_attr = { - .cache_name = {"gnix_app_cache"}, - .max_entries = domain->udreg_reg_limit, - .modes = UDREG_CC_MODE_USE_LARGE_PAGES, - .debug_mode = 0, - .debug_rank = 0, - .reg_context = (void *) info, - .dreg_context = (void *) domain, - .destructor_context = (void *) domain, - .device_reg_func = __udreg_register, - .device_dereg_func = __udreg_deregister, - .destructor_callback = __udreg_cache_destructor, - }; - - if (domain->mr_cache_attr.lazy_deregistration) - udreg_cache_attr.modes |= (UDREG_CC_MODE_USE_LAZY_DEREG | UDREG_CC_MODE_USE_KERNEL_CACHE); - - /* - * Create a udreg cache for application memory registrations. - */ - urc = UDREG_CacheCreate(&udreg_cache_attr); - if (urc != UDREG_RC_SUCCESS) { - GNIX_WARN(FI_LOG_MR, - "Could not initialize udreg application cache, urc=%d\n", - urc); - switch (urc) { - case UDREG_RC_INVALID_PARAM: - ret = -FI_EINVAL; - goto err; - break; - case UDREG_RC_ERROR_NO_DEVICE: - ret = -FI_ENODEV; - goto err; - break; - case UDREG_RC_NO_SPACE: - ret = -FI_ENOSPC; - goto err; - break; - default: - ret = -FI_EINVAL; - goto err; - break; - } - } - - urc = UDREG_CacheAccess(udreg_cache_attr.cache_name, - &info->udreg_cache); - if (urc != UDREG_RC_SUCCESS) { - GNIX_WARN(FI_LOG_MR, - "Could not access udreg application cache, urc=%d", - urc); - switch (urc) { - case UDREG_RC_INVALID_PARAM: - ret = -FI_EINVAL; - goto err; - break; - case UDREG_RC_NO_MATCH: - ret = -FI_ENODEV; - goto err; - break; - default: - ret = -FI_EINVAL; - goto err; - break; - } - } - - info->inuse = 1; - info->auth_key = auth_key; - GNIX_INFO(FI_LOG_MR, "info=%p auth_key=%p ptag=%d\n", - info, info->auth_key, auth_key->ptag); - -err: - return ret; -} - -static int __udreg_is_init(struct gnix_fid_domain *domain, - struct gnix_auth_key *auth_key) -{ - struct gnix_mr_cache_info *info = - GNIX_GET_MR_CACHE_INFO(domain, auth_key); - - return info->inuse; -} - -static int __udreg_reg_mr( - struct gnix_fid_domain *domain, - uint64_t address, - uint64_t length, - struct _gnix_fi_reg_context *fi_reg_context, - void **handle) { - - udreg_return_t urc; - udreg_entry_t *udreg_entry; - struct gnix_fid_mem_desc *md; - struct gnix_mr_cache_info *info = - GNIX_GET_MR_CACHE_INFO(domain, - fi_reg_context->auth_key); - - urc = UDREG_Register(info->udreg_cache, (void *) address, - length, &udreg_entry); - if (OFI_UNLIKELY(urc != UDREG_RC_SUCCESS)) - return -FI_EIO; - - md = udreg_entry->device_data; - md->entry = udreg_entry; - - *handle = md; - - return FI_SUCCESS; -} - -static int __udreg_dereg_mr(struct gnix_fid_domain *domain, - struct gnix_fid_mem_desc *md) -{ - udreg_return_t urc; - struct gnix_mr_cache_info *info = - GNIX_GET_MR_CACHE_INFO(domain, - md->auth_key); - - urc = UDREG_Unregister(info->udreg_cache, - (udreg_entry_t *) md->entry); - if (urc != UDREG_RC_SUCCESS) { - GNIX_WARN(FI_LOG_MR, "UDREG_Unregister() returned %d\n", urc); - return -FI_ENOENT; - } - - return urc; -} - -static int __udreg_close(struct gnix_fid_domain *domain, - struct gnix_mr_cache_info *info) -{ - udreg_return_t ret; - - if (!info->inuse) - return FI_SUCCESS; /* nothing to close */ - - if (info->udreg_cache) { - ret = UDREG_CacheRelease(info->udreg_cache); - if (OFI_UNLIKELY(ret != UDREG_RC_SUCCESS)) - GNIX_FATAL(FI_LOG_DOMAIN, "failed to release from " - "mr cache during domain destruct, dom=%p rc=%d\n", - domain, ret); - - ret = UDREG_CacheDestroy(info->udreg_cache); - if (OFI_UNLIKELY(ret != UDREG_RC_SUCCESS)) - GNIX_FATAL(FI_LOG_DOMAIN, "failed to destroy mr " - "cache during domain destruct, dom=%p rc=%d\n", - domain, ret); - } - - info->inuse = 0; - - return FI_SUCCESS; -} -#else -static int __udreg_init(struct gnix_fid_domain *domain, - struct gnix_auth_key *auth_key) -{ - return -FI_ENOSYS; -} - -static int __udreg_is_init(struct gnix_fid_domain *domain - struct gnix_auth_key *auth_key) -{ - return FI_SUCCESS; -} - -static int __udreg_reg_mr(struct gnix_fid_domain *domain, - uint64_t address, - uint64_t length, - struct _gnix_fi_reg_context *fi_reg_context, - void **handle) { - - return -FI_ENOSYS; -} - -static int __udreg_dereg_mr(struct gnix_fid_domain *domain, - struct gnix_fid_mem_desc *md) -{ - return -FI_ENOSYS; -} - -static int __udreg_close(struct gnix_fid_domain *domain, - struct gnix_mr_cache_info *info) -{ - return FI_SUCCESS; -} -#endif - -struct gnix_mr_ops udreg_mr_ops = { - .init = __udreg_init, - .is_init = __udreg_is_init, - .reg_mr = __udreg_reg_mr, - .dereg_mr = __udreg_dereg_mr, - .destroy_cache = __udreg_close, - .flush_cache = NULL, /* UDREG doesn't support cache flush */ -}; - -static int __cache_init(struct gnix_fid_domain *domain, - struct gnix_auth_key *auth_key) -{ - int ret; - struct gnix_mr_cache_info *info = - GNIX_GET_MR_CACHE_INFO(domain, auth_key); - -#if !HAVE_KDREG - domain->mr_cache_attr.lazy_deregistration = 0; -#endif - - ret = _gnix_mr_cache_init(&info->mr_cache_ro, - &domain->mr_cache_attr); - - if (ret) - return ret; - - ret = _gnix_mr_cache_init(&info->mr_cache_rw, - &domain->mr_cache_attr); - - if (ret == FI_SUCCESS) - info->inuse = 1; - - return ret; -} - -static int __cache_is_init(struct gnix_fid_domain *domain, - struct gnix_auth_key *auth_key) -{ - struct gnix_mr_cache_info *info = - GNIX_GET_MR_CACHE_INFO(domain, auth_key); - - return info->inuse; -} - -static int __cache_reg_mr( - struct gnix_fid_domain *domain, - uint64_t address, - uint64_t length, - struct _gnix_fi_reg_context *fi_reg_context, - void **handle) -{ - struct gnix_mr_cache *cache; - struct gnix_auth_key *auth_key = fi_reg_context->auth_key; - struct gnix_mr_cache_info *info = - GNIX_GET_MR_CACHE_INFO(domain, auth_key); - - if (fi_reg_context->access & (FI_RECV | FI_READ | FI_REMOTE_WRITE)) - cache = info->mr_cache_rw; - else - cache = info->mr_cache_ro; - - return _gnix_mr_cache_register(cache, address, length, - fi_reg_context, handle); -} - -static int __cache_dereg_mr(struct gnix_fid_domain *domain, - struct gnix_fid_mem_desc *md) -{ - gnix_mr_cache_t *cache; - struct gnix_mr_cache_info *info = GNIX_GET_MR_CACHE_INFO(domain, - md->auth_key); - - if (GNI_MEMHNDL_GET_FLAGS((md->mem_hndl)) & - GNI_MEMHNDL_FLAG_READONLY) - cache = info->mr_cache_ro; - else - cache = info->mr_cache_rw; - - return _gnix_mr_cache_deregister(cache, md); -} - -static int __cache_close(struct gnix_fid_domain *domain, - struct gnix_mr_cache_info *info) -{ - int ret; - - if (!info->inuse) - return FI_SUCCESS; - - if (info->mr_cache_ro) { - ret = _gnix_mr_cache_destroy(info->mr_cache_ro); - if (ret != FI_SUCCESS) - GNIX_FATAL(FI_LOG_DOMAIN, "failed to destroy ro mr " - "cache dom=%p ret=%d\n", - domain, ret); - } - - if (info->mr_cache_rw) { - ret = _gnix_mr_cache_destroy(info->mr_cache_rw); - if (ret != FI_SUCCESS) - GNIX_FATAL(FI_LOG_DOMAIN, "failed to destroy rw mr " - "cache dom=%p ret=%d\n", - domain, ret); - } - - info->inuse = 0; - return FI_SUCCESS; -} - -static int __cache_flush(struct gnix_fid_domain *domain) -{ - int ret = FI_SUCCESS; - int i; - struct gnix_mr_cache_info *info; - - ofi_spin_lock(&domain->mr_cache_lock); - - for (i = 0; i < 256; i++) { - info = &domain->mr_cache_info[i]; - - ofi_spin_lock(&info->mr_cache_lock); - if (!info->inuse) { - ofi_spin_unlock(&info->mr_cache_lock); - continue; - } - - ret = _gnix_mr_cache_flush(info->mr_cache_ro); - if (ret) { - ofi_spin_unlock(&info->mr_cache_lock); - break; - } - - ret = _gnix_mr_cache_flush(info->mr_cache_rw); - if (ret) { - ofi_spin_unlock(&info->mr_cache_lock); - break; - } - - ofi_spin_unlock(&info->mr_cache_lock); - } - - ofi_spin_unlock(&domain->mr_cache_lock); - - return ret; -} - -struct gnix_mr_ops cache_mr_ops = { - .init = __cache_init, - .is_init = __cache_is_init, - .reg_mr = __cache_reg_mr, - .dereg_mr = __cache_dereg_mr, - .destroy_cache = __cache_close, - .flush_cache = __cache_flush, -}; - -static int __basic_mr_init(struct gnix_fid_domain *domain, - struct gnix_auth_key *auth_key) -{ - struct gnix_mr_cache_info *info = domain->mr_cache_info; - - info->inuse = 1; - return FI_SUCCESS; -} - -static int __basic_mr_is_init(struct gnix_fid_domain *domain, - struct gnix_auth_key *auth_key) -{ - struct gnix_mr_cache_info *info = domain->mr_cache_info; - - return info->inuse; -} - -static int __basic_mr_reg_mr( - struct gnix_fid_domain *domain, - uint64_t address, - uint64_t length, - struct _gnix_fi_reg_context *fi_reg_context, - void **handle) -{ - struct gnix_fid_mem_desc *md, *ret; - - md = calloc(1, sizeof(*md)); - if (!md) { - GNIX_WARN(FI_LOG_MR, "failed to allocate memory"); - return -FI_ENOMEM; - } - - ret = __gnix_register_region((void *) md, (void *) address, length, - fi_reg_context, (void *) domain); - if (!ret) { - GNIX_WARN(FI_LOG_MR, "failed to register memory"); - free(md); - return -FI_ENOSPC; - } - - *handle = (void *) md; - - return FI_SUCCESS; -} - -static int __basic_mr_dereg_mr(struct gnix_fid_domain *domain, - struct gnix_fid_mem_desc *md) -{ - int ret; - - ret = __gnix_deregister_region((void *) md, NULL); - if (ret == FI_SUCCESS) - free((void *) md); - - return ret; -} - -struct gnix_mr_ops basic_mr_ops = { - .init = __basic_mr_init, - .is_init = __basic_mr_is_init, - .reg_mr = __basic_mr_reg_mr, - .dereg_mr = __basic_mr_dereg_mr, - .flush_cache = NULL, /* unsupported since there is no caching here */ -}; - -int _gnix_open_cache(struct gnix_fid_domain *domain, int type) -{ - if (type < 0 || type >= GNIX_MR_MAX_TYPE) - return -FI_EINVAL; - - if (domain->mr_ops && domain->mr_ops->is_init(domain, domain->auth_key)) - return -FI_EBUSY; - - switch(type) { - case GNIX_MR_TYPE_UDREG: - domain->mr_ops = &udreg_mr_ops; - break; - case GNIX_MR_TYPE_NONE: - domain->mr_ops = &basic_mr_ops; - break; - default: - domain->mr_ops = &cache_mr_ops; - break; - } - - domain->mr_cache_type = type; - - return FI_SUCCESS; -} - -int _gnix_flush_registration_cache(struct gnix_fid_domain *domain) -{ - if (domain->mr_ops && domain->mr_ops->flush_cache) - return domain->mr_ops->flush_cache(domain); - - return FI_SUCCESS; /* if no flush was present, silently pass */ -} - -int _gnix_close_cache(struct gnix_fid_domain *domain, - struct gnix_mr_cache_info *info) -{ - /* if the domain isn't being destructed by close, we need to check the - * cache again. This isn't a likely case. Destroy must succeed since we - * are in the destruct path */ - if (domain->mr_ops && domain->mr_ops->destroy_cache) - return domain->mr_ops->destroy_cache(domain, info); - - return FI_SUCCESS; -} - -gnix_mr_cache_attr_t _gnix_default_mr_cache_attr = { - .soft_reg_limit = 4096, - .hard_reg_limit = -1, - .hard_stale_limit = 128, -#if HAVE_KDREG - .lazy_deregistration = 1, -#else - .lazy_deregistration = 0, -#endif - .reg_callback = __gnix_register_region, - .dereg_callback = __gnix_deregister_region, - .destruct_callback = __gnix_destruct_registration, - .elem_size = sizeof(struct gnix_fid_mem_desc), -}; diff --git a/prov/gni/src/gnix_mr_cache.c b/prov/gni/src/gnix_mr_cache.c deleted file mode 100644 index 09f37f5cc79..00000000000 --- a/prov/gni/src/gnix_mr_cache.c +++ /dev/null @@ -1,1640 +0,0 @@ -/* - * Copyright (c) 2016-2017 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include - -typedef unsigned long long int cache_entry_state_t; -/* These are used for entry state and should be unique */ -#define GNIX_CES_INUSE (1ULL << 8) /* in use */ -#define GNIX_CES_STALE (2ULL << 8) /* cached for possible reuse */ -#define GNIX_CES_STATE_MASK (0xFULL << 8) - -typedef unsigned long long int cache_entry_flag_t; -/* One or more of these can be combined with the above */ -#define GNIX_CE_RETIRED (1ULL << 61) /* in use, but not to be reused */ -#define GNIX_CE_MERGED (1ULL << 62) /* merged entry, i.e., not - * an original request from - * fi_mr_reg */ -#define GNIX_CE_UNMAPPED (1ULL << 63) /* at least 1 page of the - * entry has been unmapped - * by the OS */ - -/** - * @brief structure for containing the fields relevant to the memory cache key - * - * @var address base address of the memory region - * @var address length of the memory region - */ -typedef struct gnix_mr_cache_key { - uint64_t address; - uint64_t length; -} gnix_mr_cache_key_t; - -/** - * @brief gnix memory registration cache entry - * - * @var state state of the memory registration cache entry - * @var mr gnix memory registration descriptor - * @var mem_hndl gni memory handle for the memory registration - * @var key gnix memory registration cache key - * @var domain gnix domain associated with the memory registration - * @var nic gnix nic associated with the memory registration - * @var ref_cnt reference counting for the cache - * @var lru_entry lru list entry - * @var siblings list of sibling entries - * @var children list of subsumed child entries - */ -typedef struct gnix_mr_cache_entry { - struct gnix_smrn_context context; - cache_entry_state_t state; - gnix_mr_cache_key_t key; - ofi_atomic32_t ref_cnt; - struct dlist_entry lru_entry; - struct dlist_entry siblings; - struct dlist_entry children; - uint64_t mr[0]; -} gnix_mr_cache_entry_t; - -/* forward declarations */ -int _gnix_mr_cache_register( - gnix_mr_cache_t *cache, - uint64_t address, - uint64_t length, - struct _gnix_fi_reg_context *fi_reg_context, - void **handle); - -int _gnix_mr_cache_deregister( - gnix_mr_cache_t *cache, - void *handle); - -static inline int __mr_cache_entry_put( - gnix_mr_cache_t *cache, - gnix_mr_cache_entry_t *entry); - -static inline int __mr_cache_entry_get( - gnix_mr_cache_t *cache, - gnix_mr_cache_entry_t *entry); - -static inline int __mr_cache_entry_destroy(gnix_mr_cache_t *cache, - gnix_mr_cache_entry_t *entry); - -static int __mr_cache_create_registration( - gnix_mr_cache_t *cache, - uint64_t address, - uint64_t length, - gnix_mr_cache_entry_t **entry, - gnix_mr_cache_key_t *key, - struct _gnix_fi_reg_context *fi_reg_context); - - -/* global declarations */ - -/* default attributes for new caches */ -gnix_mr_cache_attr_t __default_mr_cache_attr = { - .soft_reg_limit = 4096, - .hard_reg_limit = -1, - .hard_stale_limit = 128, -#if HAVE_KDREG - .lazy_deregistration = 1, -#else - .lazy_deregistration = 0, -#endif -}; - -/* Functions for using and manipulating cache entry state */ -static inline cache_entry_state_t __entry_get_state(gnix_mr_cache_entry_t *e) -{ - return e->state & GNIX_CES_STATE_MASK; -} - -static inline void __entry_set_state(gnix_mr_cache_entry_t *e, - cache_entry_state_t s) -{ - e->state = (e->state & ~GNIX_CES_STATE_MASK) | - (s & GNIX_CES_STATE_MASK); -} - -static inline void __entry_reset_state(gnix_mr_cache_entry_t *e) -{ - e->state = 0ULL; -} - -static inline bool __entry_is_flag(gnix_mr_cache_entry_t *e, - cache_entry_flag_t f) -{ - return (e->state & f) != 0; -} - -static inline void __entry_set_flag(gnix_mr_cache_entry_t *e, - cache_entry_flag_t f) -{ - e->state = e->state | f; -} - -static inline bool __entry_is_retired(gnix_mr_cache_entry_t *e) -{ - return __entry_is_flag(e, GNIX_CE_RETIRED); -} - -static inline bool __entry_is_merged(gnix_mr_cache_entry_t *e) -{ - return __entry_is_flag(e, GNIX_CE_MERGED); -} - -static inline bool __entry_is_unmapped(gnix_mr_cache_entry_t *e) -{ - return __entry_is_flag(e, GNIX_CE_UNMAPPED); -} - -static inline void __entry_set_retired(gnix_mr_cache_entry_t *e) -{ - __entry_set_flag(e, GNIX_CE_RETIRED); -} - -static inline void __entry_set_merged(gnix_mr_cache_entry_t *e) -{ - __entry_set_flag(e, GNIX_CE_MERGED); -} - -static inline void __entry_set_unmapped(gnix_mr_cache_entry_t *e) -{ - __entry_set_flag(e, GNIX_CE_UNMAPPED); -} - -/** - * Key comparison function for finding overlapping gnix memory - * registration cache entries - * - * @param[in] x key to be inserted or found - * @param[in] y key to be compared against - * - * @return -1 if it should be positioned at the left, 0 if it overlaps, - * 1 otherwise - */ -static int __find_overlapping_addr( - void *x, - void *y) -{ - gnix_mr_cache_key_t *to_find = (gnix_mr_cache_key_t *) x; - gnix_mr_cache_key_t *to_compare = (gnix_mr_cache_key_t *) y; - uint64_t to_find_end = to_find->address + to_find->length - 1; - uint64_t to_compare_end = to_compare->address + to_compare->length - 1; - - /* format: (x_addr, x_len) - (y_addr, y_len) truth_value - * - * case 1: (0x1000, 0x1000) - (0x1400, 0x0800) true - * case 2: (0x1000, 0x1000) - (0x0C00, 0x0800) true - * case 3: (0x1000, 0x1000) - (0x1C00, 0x0800) true - * case 4: (0x1000, 0x1000) - (0x0C00, 0x2000) true - * case 5: (0x1000, 0x1000) - (0x0400, 0x0400) false - * case 6: (0x1000, 0x1000) - (0x2400, 0x0400) false - */ - if (!(to_find_end < to_compare->address || - to_compare_end < to_find->address)) - return 0; - - /* left */ - if (to_find->address < to_compare->address) - return -1; - - return 1; -} - -/** - * Key comparison function for gnix memory registration caches - * - * @param[in] x key to be inserted or found - * @param[in] y key to be compared against - * - * @return -1 if it should be positioned at the left, 0 if the same, - * 1 otherwise - */ -static inline int __mr_cache_key_comp( - void *x, - void *y) -{ - gnix_mr_cache_key_t *to_insert = (gnix_mr_cache_key_t *) x; - gnix_mr_cache_key_t *to_compare = (gnix_mr_cache_key_t *) y; - - if (to_compare->address == to_insert->address) - return 0; - - /* to the left */ - if (to_insert->address < to_compare->address) - return -1; - - /* to the right */ - return 1; -} - -/** - * Helper function for matching the exact key entry - * - * @param entry memory registration cache key - * @param to_match memory registration cache key - * @return 1 if match, otherwise 0 - */ -static inline int __match_exact_key( - gnix_mr_cache_key_t *entry, - gnix_mr_cache_key_t *to_match) -{ - return entry->address == to_match->address && - entry->length == to_match->length; -} - -/** - * dlist search function for matching the exact memory registration key - * - * @param entry memory registration cache entry - * @param match memory registration cache key - * @return 1 if match, otherwise 0 - */ -static inline int __mr_exact_key(struct dlist_entry *entry, - const void *match) -{ - gnix_mr_cache_entry_t *x = container_of(entry, - gnix_mr_cache_entry_t, - siblings); - - gnix_mr_cache_key_t *y = (gnix_mr_cache_key_t *) match; - - return __match_exact_key(&x->key, y); -} - - -/** - * Helper function to determine if one key subsumes another - * - * @param x gnix_mr_cache_key - * @param y gnix_mr_cache_key - * @return 1 if x subsumes y, 0 otherwise - */ -static inline int __can_subsume( - gnix_mr_cache_key_t *x, - gnix_mr_cache_key_t *y) -{ - return (x->address <= y->address) && - ((x->address + x->length) >= - (y->address + y->length)); -} - -static inline void __attach_retired_entries_to_registration( - gnix_mr_cache_t *cache, - struct dlist_entry *retired_entries, - gnix_mr_cache_entry_t *parent) -{ - gnix_mr_cache_entry_t *entry, *tmp; - - dlist_for_each_safe(retired_entries, entry, tmp, siblings) { - dlist_remove(&entry->siblings); - dlist_insert_tail(&entry->siblings, - &parent->children); - if (!dlist_empty(&entry->children)) { - /* move the entry's children to the sibling tree - * and decrement the reference count */ - dlist_splice_tail(&parent->children, - &entry->children); - __mr_cache_entry_put(cache, entry); - } - } - - if (!dlist_empty(retired_entries)) { - GNIX_FATAL(FI_LOG_MR, "retired_entries not empty\n"); - } - - __mr_cache_entry_get(cache, parent); -} - -static inline void __remove_sibling_entries_from_tree( - gnix_mr_cache_t *cache, - struct dlist_entry *list, - RbtHandle tree) -{ - RbtStatus rc; - RbtIterator iter; - gnix_mr_cache_entry_t *entry; - - dlist_for_each(list, entry, siblings) - { - GNIX_DEBUG(FI_LOG_MR, - "removing key from tree, key=%llx:%llx\n", - entry->key.address, entry->key.length); - iter = rbtFind(tree, &entry->key); - if (OFI_UNLIKELY(!iter)) { - GNIX_FATAL(FI_LOG_MR, "key not found\n"); - } - - rc = rbtErase(tree, iter); - if (OFI_UNLIKELY(rc != RBT_STATUS_OK)) { - GNIX_FATAL(FI_LOG_MR, - "could not remove entry from tree\n"); - } - } -} - -/** - * Pushes an entry into the LRU cache. No limits are maintained here as - * the hard_stale_limit attr value will directly limit the lru size - * - * @param[in] cache a memory registration cache object - * @param[in] entry a memory registration cache entry - * - * @return FI_SUCCESS, always - */ -static inline int __mr_cache_lru_enqueue( - gnix_mr_cache_t *cache, - gnix_mr_cache_entry_t *entry) -{ - dlist_insert_tail(&entry->lru_entry, &cache->lru_head); - - return FI_SUCCESS; -} - -/** - * Pops an registration cache entry from the lru cache. - * - * @param[in] cache a memory registration cache - * @param[in] entry a memory registration cache entry - * - * @return FI_SUCCESS, on success - * @return -FI_ENOENT, on empty LRU - */ -static inline int __mr_cache_lru_dequeue( - gnix_mr_cache_t *cache, - gnix_mr_cache_entry_t **entry) -{ - gnix_mr_cache_entry_t *ret; - - ret = dlist_first_entry(&cache->lru_head, - gnix_mr_cache_entry_t, lru_entry); - if (OFI_UNLIKELY(!ret)) { /* we check list_empty before calling */ - *entry = NULL; - return -FI_ENOENT; - } - - /* remove entry from the list */ - *entry = ret; - dlist_remove(&ret->lru_entry); - - return FI_SUCCESS; -} - -/** - * Removes a particular registration cache entry from the lru cache. - * - * @param[in] cache a memory registration cache - * @param[in] entry a memory registration cache entry - * - * @return FI_SUCCESS, on success - * @return -FI_ENOENT, on empty LRU - */ -static inline int __mr_cache_lru_remove( - gnix_mr_cache_t *cache, - gnix_mr_cache_entry_t *entry) -{ - /* Could do some error checking to see if in cache */ - - dlist_remove(&entry->lru_entry); - - return FI_SUCCESS; -} - -/** - * Remove entries that have been unmapped as indicated by the notifer - * - * @param[in] cache a memory registration cache - * - * @return nothing - */ -static bool __notifier_warned = false; -static void -__clear_notifier_events(gnix_mr_cache_t *cache) -{ - int ret; - gnix_mr_cache_entry_t *entry; - struct gnix_smrn_context *context; - RbtIterator iter; - - if (!cache->attr.smrn) { - return; - } - - if (!cache->attr.lazy_deregistration) { - return; - } - - ret = _gnix_smrn_get_event(cache->attr.smrn, - &cache->rq, &context); - while (ret == FI_SUCCESS) { - entry = container_of(context, - struct gnix_mr_cache_entry, context); - switch (__entry_get_state(entry)) { - case GNIX_CES_INUSE: - /* First, warn that this might be a - * problem.*/ - if ((__notifier_warned == false) && - !__entry_is_merged(entry)) { - GNIX_WARN(FI_LOG_MR, - "Registered memory region" - " includes unmapped pages." - " Have you freed memory" - " without closing the memory" - " region?\n"); - __notifier_warned = true; - } - - GNIX_DEBUG(FI_LOG_MR, - "Marking unmapped entry (%p)" - " as retired %llx:%llx\n", entry, - entry->key.address, - entry->key.length); - - __entry_set_unmapped(entry); - - if (__entry_is_retired(entry)) { - /* Nothing to do */ - break; - } - - /* Retire this entry (remove from - * inuse tree) */ - - __entry_set_retired(entry); - iter = rbtFind(cache->inuse.rb_tree, - &entry->key); - if (OFI_LIKELY(iter != NULL)) { - ret = rbtErase(cache->inuse.rb_tree, - iter); - if (ret != RBT_STATUS_OK) { - GNIX_FATAL(FI_LOG_MR, - "Unmapped entry" - " could not be" - " removed from" - " in usetree.\n"); - } - } else { - /* The only way we should get - * here is if we're in the - * middle of retiring this - * entry. Not sure if this - * is worth a separate - * warning from the one - * above. */ - } - - break; - case GNIX_CES_STALE: - __entry_set_unmapped(entry); - iter = rbtFind(cache->stale.rb_tree, - &entry->key); - if (!iter) { - break; - } - - ret = rbtErase(cache->stale.rb_tree, iter); - if (ret != RBT_STATUS_OK) { - GNIX_FATAL(FI_LOG_MR, - "Unmapped entry could" - " not be removed " - " from stale tree.\n"); - } - - GNIX_DEBUG(FI_LOG_MR, "Removed unmapped entry" - " (%p) from stale tree %llx:%llx\n", - entry, entry->key.address, - entry->key.length); - - if (__mr_cache_lru_remove(cache, entry) == FI_SUCCESS) { - GNIX_DEBUG(FI_LOG_MR, "Removed" - " unmapped entry (%p)" - " from lru list %llx:%llx\n", - entry, entry->key.address, - entry->key.length); - - ofi_atomic_dec32(&cache->stale.elements); - - } else { - GNIX_WARN(FI_LOG_MR, "Failed to remove" - " unmapped entry" - " from lru list (%p) %p\n", - entry, iter); - } - - __mr_cache_entry_destroy(cache, entry); - - break; - default: - GNIX_FATAL(FI_LOG_MR, - "Unmapped entry (%p) in incorrect" - " state: %d\n", - entry, entry->state); - } - - ret = _gnix_smrn_get_event(cache->attr.smrn, - &cache->rq, &context); - } - if (ret != -FI_EAGAIN) { - /* Should we do something else here? */ - GNIX_WARN(FI_LOG_MR, - "_gnix_smrn_get_event returned error: %s\n", - fi_strerror(-ret)); - } - - return; -} - -/** - * Start monitoring a memory region - * - * @param[in] cache a memory registration cache - * @param[in] entry a memory registration entry - * - * @return return code from _gnix_smrn_monitor - */ -static int -__notifier_monitor(gnix_mr_cache_t *cache, - gnix_mr_cache_entry_t *entry) -{ - - if (!cache->attr.lazy_deregistration) { - return FI_SUCCESS; - } - - if (cache->attr.smrn == NULL) { - return FI_SUCCESS; - } - - GNIX_DEBUG(FI_LOG_MR, "monitoring entry=%p %llx:%llx\n", entry, - entry->key.address, entry->key.length); - - return _gnix_smrn_monitor(cache->attr.smrn, - &cache->rq, - (void *) entry->key.address, - entry->key.length, - (uint64_t) &entry->context, - &entry->context); -} - -/** - * Stop monitoring a memory region - * - * @param[in] cache a memory registration cache - * @param[in] entry a memory registration entry - * - * @return nothing - */ -static void -__notifier_unmonitor(gnix_mr_cache_t *cache, - gnix_mr_cache_entry_t *entry) -{ - int rc; - - if (!cache->attr.lazy_deregistration) { - return; - } - - if (cache->attr.smrn == NULL) { - return; - } - - __clear_notifier_events(cache); - - if (!__entry_is_unmapped(entry)) { - GNIX_DEBUG(FI_LOG_MR, "unmonitoring entry=%p (state=%d)\n", - entry, entry->state); - rc = _gnix_smrn_unmonitor(cache->attr.smrn, - (uint64_t) &entry->context, - &entry->context); - if (rc != FI_SUCCESS) { - /* This probably is okay (ESRCH), because the - * memory could have been unmapped in the - * interim, so clear the notifier events - * again */ - GNIX_DEBUG(FI_LOG_MR, - "failed to unmonitor memory (entry=%p)," - " so clear notifier events again\n", - entry, fi_strerror(-rc)); - - __clear_notifier_events(cache); - } - } -} - -/** - * Destroys the memory registration cache entry and deregisters the memory - * region with uGNI - * - * @param[in] entry a memory registration cache entry - * - * @return return code from callbacks - */ -static inline int __mr_cache_entry_destroy(gnix_mr_cache_t *cache, - gnix_mr_cache_entry_t *entry) -{ - int rc; - - rc = cache->attr.dereg_callback(entry->mr, - cache->attr.destruct_context); - if (!rc) { - /* Should we bother with this check? If we don't, the - * only difference it that __clear_notifier_events - * will be called one additional time. */ - if (!__entry_is_unmapped(entry)) { - __notifier_unmonitor(cache, entry); - } - __entry_reset_state(entry); - - rc = cache->attr.destruct_callback(cache->attr.dereg_context); - if (!rc) - free(entry); - } else { - GNIX_INFO(FI_LOG_MR, "failed to deregister memory" - " region with callback, " - "cache_entry=%p ret=%i\n", entry, rc); - } - - return rc; -} - -static inline int __insert_entry_into_stale( - gnix_mr_cache_t *cache, - gnix_mr_cache_entry_t *entry) -{ - RbtStatus rc; - int ret = 0; - - if (__entry_is_unmapped(entry)) { - GNIX_DEBUG(FI_LOG_MR, "entry (%p) unmapped, not inserting" - " into stale %llx:%llx", entry, - entry->key.address, entry->key.length); - /* Should we return some other value? */ - return ret; - } - - rc = rbtInsert(cache->stale.rb_tree, - &entry->key, - entry); - if (rc != RBT_STATUS_OK) { - GNIX_ERR(FI_LOG_MR, - "could not insert into stale rb tree," - " rc=%d key.address=%llx key.length=%llx entry=%p", - rc, - entry->key.address, - entry->key.length, - entry); - - ret = __mr_cache_entry_destroy(cache, entry); - } else { - GNIX_DEBUG(FI_LOG_MR, - "inserted key=%llx:%llx into stale\n", - entry->key.address, entry->key.length); - - __mr_cache_lru_enqueue(cache, entry); - ofi_atomic_inc32(&cache->stale.elements); - switch (__entry_get_state(entry)) { - case GNIX_CES_INUSE: - __entry_set_state(entry, GNIX_CES_STALE); - break; - default: - GNIX_FATAL(FI_LOG_MR, - "stale entry (%p) %llx:%llx in bad" - " state (%d)\n", entry, - entry->key.address, entry->key.length, - entry->state); - } - } - - return ret; -} - -static inline void __resolve_stale_entry_collision( - gnix_mr_cache_t *cache, - RbtIterator found, - gnix_mr_cache_entry_t *entry) -{ - RbtStatus __attribute__((unused)) rc; - gnix_mr_cache_entry_t *c_entry, *tmp; - gnix_mr_cache_key_t *c_key; - int ret; - DLIST_HEAD(to_destroy); - RbtIterator iter = found; - int add_new_entry = 1, cmp; - - GNIX_TRACE(FI_LOG_MR, "\n"); - - GNIX_DEBUG(FI_LOG_MR, - "resolving collisions with entry (%p) %llx:%llx\n", - entry, entry->key.address, entry->key.length); - - while (iter) { - rbtKeyValue(cache->stale.rb_tree, iter, (void **) &c_key, - (void **) &c_entry); - - cmp = __find_overlapping_addr(&entry->key, c_key); - if (cmp != 0) - break; - - if (__can_subsume(&entry->key, c_key) || - (entry->key.length > c_key->length)) { - GNIX_DEBUG(FI_LOG_MR, - "adding stale entry (%p) to destroy list," - " key=%llx:%llx\n", c_entry, - c_key->address, c_key->length); - dlist_insert_tail(&c_entry->siblings, &to_destroy); - } else { - add_new_entry = 0; - } - - iter = rbtNext(cache->stale.rb_tree, iter); - } - - /* TODO I can probably do this in a single sweep, avoiding a second - * pass and incurring n lg n removal time - */ - dlist_for_each_safe(&to_destroy, c_entry, tmp, siblings) - { - GNIX_DEBUG(FI_LOG_MR, "removing key from tree, entry %p" - " key=%llx:%llx\n", c_entry, - c_entry->key.address, c_entry->key.length); - iter = rbtFind(cache->stale.rb_tree, &c_entry->key); - if (OFI_UNLIKELY(!iter)) { - GNIX_FATAL(FI_LOG_MR, "key not found\n"); - } - - rc = rbtErase(cache->stale.rb_tree, - iter); - if (OFI_UNLIKELY(rc != RBT_STATUS_OK)) { - GNIX_FATAL(FI_LOG_MR, - "could not remove entry from tree\n"); - } - - if (__mr_cache_lru_remove(cache, c_entry) != FI_SUCCESS) { - GNIX_WARN(FI_LOG_MR, "Failed to remove entry" - " from lru list (%p)\n", - c_entry); - } - ofi_atomic_dec32(&cache->stale.elements); - dlist_remove(&c_entry->siblings); - __mr_cache_entry_destroy(cache, c_entry); - } - if (OFI_UNLIKELY(!dlist_empty(&to_destroy))) { - GNIX_FATAL(FI_LOG_MR, "to_destroy not empty\n"); - } - - if (add_new_entry) { - ret = __insert_entry_into_stale(cache, entry); - if (ret) { - GNIX_FATAL(FI_LOG_MR, - "Failed to insert subsumed MR " - " entry (%p) into stale list\n", - entry); - } - } else { - /* stale entry is larger than this one - * so lets just toss this entry out - */ - GNIX_DEBUG(FI_LOG_MR, - "larger entry already exists, " - "to_destroy=%llx:%llx\n", - entry->key.address, entry->key.length); - - ret = __mr_cache_entry_destroy(cache, entry); - if (ret) { - GNIX_ERR(FI_LOG_MR, - "failed to destroy a " - "registration, entry=%p grc=%d\n", - c_entry, ret); - } - } -} - -/** - * Increments the reference count on a memory registration cache entry - * - * @param[in] cache gnix memory registration cache - * @param[in] entry a memory registration cache entry - * - * @return reference count for the registration - */ -static inline int __mr_cache_entry_get( - gnix_mr_cache_t *cache, - gnix_mr_cache_entry_t *entry) -{ - GNIX_TRACE(FI_LOG_MR, "\n"); - - GNIX_DEBUG(FI_LOG_MR, - "Up ref cnt on entry %p\n", entry); - return ofi_atomic_inc32(&entry->ref_cnt); -} - -/** - * Decrements the reference count on a memory registration cache entry - * - * @param[in] cache gnix memory registration cache - * @param[in] entry a memory registration cache entry - * - * @return return code from dereg callback - */ -static inline int __mr_cache_entry_put( - gnix_mr_cache_t *cache, - gnix_mr_cache_entry_t *entry) -{ - RbtIterator iter; - int rc; - gni_return_t grc = GNI_RC_SUCCESS; - RbtIterator found; - gnix_mr_cache_entry_t *parent = NULL; - struct dlist_entry *next; - - GNIX_TRACE(FI_LOG_MR, "\n"); - - if (cache->attr.lazy_deregistration) { - __clear_notifier_events(cache); - } - - GNIX_DEBUG(FI_LOG_MR, - "Decrease ref cnt on entry %p\n", entry); - if (ofi_atomic_dec32(&entry->ref_cnt) == 0) { - next = entry->siblings.next; - dlist_remove(&entry->children); - dlist_remove(&entry->siblings); - - /* if this is the last child to deallocate, - * release the reference to the parent - */ - if (next != &entry->siblings && dlist_empty(next)) { - parent = container_of(next, gnix_mr_cache_entry_t, - children); - grc = __mr_cache_entry_put(cache, parent); - if (OFI_UNLIKELY(grc != GNI_RC_SUCCESS)) { - GNIX_ERR(FI_LOG_MR, - "failed to release reference to parent, " - "parent=%p refs=%d\n", - parent, - ofi_atomic_get32(&parent->ref_cnt)); - } - } - - ofi_atomic_dec32(&cache->inuse.elements); - - if (!__entry_is_retired(entry)) { - iter = rbtFind(cache->inuse.rb_tree, &entry->key); - if (OFI_UNLIKELY(!iter)) { - GNIX_ERR(FI_LOG_MR, - "failed to find entry in the inuse cache\n"); - } else { - rc = rbtErase(cache->inuse.rb_tree, iter); - if (OFI_UNLIKELY(rc != RBT_STATUS_OK)) { - GNIX_ERR(FI_LOG_MR, - "failed to erase lru entry" - " from stale tree\n"); - } - } - } - - /* if we are doing lazy dereg and the entry - * isn't retired, put it in the stale cache - */ - if (cache->attr.lazy_deregistration && - !(__entry_is_retired(entry))) { - GNIX_DEBUG(FI_LOG_MR, - "moving key %llx:%llx to stale\n", - entry->key.address, entry->key.length); - - found = rbtFindLeftmost(cache->stale.rb_tree, - &entry->key, __find_overlapping_addr); - if (found) { - /* one or more stale entries would overlap with this - * new entry. We need to resolve these collisions by dropping - * registrations - */ - __resolve_stale_entry_collision(cache, found, entry); - } else { - /* if not found, ... */ - grc = __insert_entry_into_stale(cache, entry); - } - } else { - /* if retired or not using lazy registration */ - GNIX_DEBUG(FI_LOG_MR, - "destroying entry, key=%llx:%llx\n", - entry->key.address, entry->key.length); - - grc = __mr_cache_entry_destroy(cache, entry); - } - - if (OFI_UNLIKELY(grc != GNI_RC_SUCCESS)) { - GNIX_INFO(FI_LOG_MR, "dereg callback returned '%s'\n", - gni_err_str[grc]); - } - } - - - return grc; -} - -/** - * Checks the sanity of cache attributes - * - * @param[in] attr attributes structure to be checked - * @return FI_SUCCESS if the attributes are valid - * -FI_EINVAL if the attributes are invalid - */ -static inline int __check_mr_cache_attr_sanity(gnix_mr_cache_attr_t *attr) -{ - /* 0 < attr->hard_reg_limit < attr->soft_reg_limit */ - if (attr->hard_reg_limit > 0 && - attr->hard_reg_limit < attr->soft_reg_limit) - return -FI_EINVAL; - - /* callbacks must be provided */ - if (!attr->reg_callback || !attr->dereg_callback || - !attr->destruct_callback) - return -FI_EINVAL; - - /* valid otherwise */ - return FI_SUCCESS; -} - -int _gnix_mr_cache_init( - gnix_mr_cache_t **cache, - gnix_mr_cache_attr_t *attr) -{ - gnix_mr_cache_attr_t *cache_attr = &__default_mr_cache_attr; - gnix_mr_cache_t *cache_p; - int rc; - - GNIX_TRACE(FI_LOG_MR, "\n"); - - /* if the provider asks us to use their attributes, are they sane? */ - if (attr) { - if (__check_mr_cache_attr_sanity(attr) != FI_SUCCESS) - return -FI_EINVAL; - - cache_attr = attr; - } - - cache_p = (gnix_mr_cache_t *) calloc(1, sizeof(*cache_p)); - if (!cache_p) - return -FI_ENOMEM; - - /* save the attribute values */ - memcpy(&cache_p->attr, cache_attr, sizeof(*cache_attr)); - - /* list is used because entries can be removed from the stale list if - * a user might call register on a stale entry's memory region - */ - dlist_init(&cache_p->lru_head); - - /* set up inuse tree */ - cache_p->inuse.rb_tree = rbtNew(__mr_cache_key_comp); - if (!cache_p->inuse.rb_tree) { - rc = -FI_ENOMEM; - goto err_inuse; - } - - /* if using lazy deregistration, set up stale tree */ - if (cache_p->attr.lazy_deregistration) { - cache_p->stale.rb_tree = rbtNew(__mr_cache_key_comp); - if (!cache_p->stale.rb_tree) { - rc = -FI_ENOMEM; - goto err_stale; - } - } - - /* initialize the element counts. If we are reinitializing a dead cache, - * destroy will have already set the element counts - */ - if (cache_p->state == GNIX_MRC_STATE_UNINITIALIZED) { - ofi_atomic_initialize32(&cache_p->inuse.elements, 0); - ofi_atomic_initialize32(&cache_p->stale.elements, 0); - } - - cache_p->hits = 0; - cache_p->misses = 0; - - cache_p->state = GNIX_MRC_STATE_READY; - - dlist_init(&cache_p->rq.list); - dlist_init(&cache_p->rq.entry); - ofi_spin_init(&cache_p->rq.lock); - - *cache = cache_p; - - return FI_SUCCESS; - -err_stale: - rbtDelete(cache_p->inuse.rb_tree); - cache_p->inuse.rb_tree = NULL; -err_inuse: - free(cache_p); - - return rc; -} - -int _gnix_mr_cache_destroy(gnix_mr_cache_t *cache) -{ - if (cache->state != GNIX_MRC_STATE_READY) - return -FI_EINVAL; - - GNIX_TRACE(FI_LOG_MR, "\n"); - - /* - * Remove all of the stale entries from the cache - */ - _gnix_mr_cache_flush(cache); - - /* - * if there are still elements in the cache after the flush, - * then someone forgot to deregister memory. - * We probably shouldn't destroy the cache at this point. - */ - if (ofi_atomic_get32(&cache->inuse.elements) != 0) { - return -FI_EAGAIN; - } - - /* destroy the tree */ - rbtDelete(cache->inuse.rb_tree); - cache->inuse.rb_tree = NULL; - - /* stale will been flushed already, so just destroy the tree */ - if (cache->attr.lazy_deregistration) { - rbtDelete(cache->stale.rb_tree); - cache->stale.rb_tree = NULL; - } - - cache->state = GNIX_MRC_STATE_DEAD; - free(cache); - - return FI_SUCCESS; -} - -int __mr_cache_flush(gnix_mr_cache_t *cache, int flush_count) { - int rc; - RbtIterator iter; - gnix_mr_cache_entry_t *entry; - int destroyed = 0; - - GNIX_TRACE(FI_LOG_MR, "\n"); - - GNIX_DEBUG(FI_LOG_MR, "starting flush on memory registration cache\n"); - - /* flushes are unnecessary for caches without lazy deregistration */ - if (!cache->attr.lazy_deregistration) - return FI_SUCCESS; - - while (!dlist_empty(&cache->lru_head)) { - - if (flush_count >= 0 && flush_count == destroyed) - break; - - rc = __mr_cache_lru_dequeue(cache, &entry); - if (OFI_UNLIKELY(rc != FI_SUCCESS)) { - GNIX_ERR(FI_LOG_MR, - "list may be corrupt, no entries from lru pop\n"); - break; - } - - GNIX_DEBUG(FI_LOG_MR, "attempting to flush key %llx:%llx\n", - entry->key.address, entry->key.length); - iter = rbtFind(cache->stale.rb_tree, &entry->key); - if (OFI_UNLIKELY(!iter)) { - GNIX_ERR(FI_LOG_MR, - "lru entries MUST be present in the cache," - " could not find entry (%p) in stale tree" - " %llx:%llx\n", - entry, entry->key.address, entry->key.length); - break; - } - - rc = rbtErase(cache->stale.rb_tree, iter); - if (OFI_UNLIKELY(rc != RBT_STATUS_OK)) { - GNIX_ERR(FI_LOG_MR, - "failed to erase lru entry from stale tree\n"); - break; - } - - __mr_cache_entry_destroy(cache, entry); - entry = NULL; - ++destroyed; - } - - GNIX_DEBUG(FI_LOG_MR, "flushed %i of %i entries from memory " - "registration cache\n", destroyed, - ofi_atomic_get32(&cache->stale.elements)); - - if (destroyed > 0) { - ofi_atomic_sub32(&cache->stale.elements, destroyed); - } - - return FI_SUCCESS; -} - -int _gnix_mr_cache_flush(gnix_mr_cache_t *cache) -{ - - if (OFI_UNLIKELY(cache->state != GNIX_MRC_STATE_READY)) - return -FI_EINVAL; - - __mr_cache_flush(cache, cache->attr.hard_reg_limit); - - return FI_SUCCESS; -} - -static int __mr_cache_search_inuse( - gnix_mr_cache_t *cache, - uint64_t address, - uint64_t length, - gnix_mr_cache_entry_t **entry, - gnix_mr_cache_key_t *key, - struct _gnix_fi_reg_context *fi_reg_context) -{ - int ret = FI_SUCCESS, cmp; - RbtIterator iter; - gnix_mr_cache_key_t *found_key, new_key; - gnix_mr_cache_entry_t *found_entry; - uint64_t new_end, found_end; - DLIST_HEAD(retired_entries); - - if (cache->attr.lazy_deregistration) { - __clear_notifier_events(cache); - } - - /* first we need to find an entry that overlaps with this one. - * care should be taken to find the left most entry that overlaps - * with this entry since the entry we are searching for might overlap - * many entries and the tree may be left or right balanced - * at the head - */ - iter = rbtFindLeftmost(cache->inuse.rb_tree, (void *) key, - __find_overlapping_addr); - if (!iter) { - GNIX_DEBUG(FI_LOG_MR, - "could not find key in inuse, key=%llx:%llx\n", - key->address, key->length); - return -FI_ENOENT; - } - - rbtKeyValue(cache->inuse.rb_tree, iter, (void **) &found_key, - (void **) &found_entry); - - GNIX_DEBUG(FI_LOG_MR, - "found a key that matches the search criteria, " - "found=%llx:%llx key=%llx:%llx\n", - found_key->address, found_key->length, - key->address, key->length); - - /* if the entry that we've found completely subsumes - * the requested entry, just return a reference to - * that existing registration - */ - if (__can_subsume(found_key, key)) { - GNIX_DEBUG(FI_LOG_MR, - "found an entry that subsumes the request, " - "existing=%llx:%llx key=%llx:%llx entry %p\n", - found_key->address, found_key->length, - key->address, key->length, found_entry); - *entry = found_entry; - __mr_cache_entry_get(cache, found_entry); - - cache->hits++; - return FI_SUCCESS; - } - - /* otherwise, iterate from the existing entry until we can no longer - * find an entry that overlaps with the new registration and remove - * and retire each of those entries. - */ - new_key.address = MIN(found_key->address, key->address); - new_end = key->address + key->length; - while (iter) { - rbtKeyValue(cache->inuse.rb_tree, iter, (void **) &found_key, - (void **) &found_entry); - - - cmp = __find_overlapping_addr(found_key, key); - GNIX_DEBUG(FI_LOG_MR, - "candidate: key=%llx:%llx result=%d\n", - found_key->address, - found_key->length, cmp); - if (cmp != 0) - break; - - /* compute new ending address */ - found_end = found_key->address + found_key->length; - - /* mark the entry as retired */ - GNIX_DEBUG(FI_LOG_MR, "retiring entry, key=%llx:%llx entry %p\n", - found_key->address, found_key->length, found_entry); - __entry_set_retired(found_entry); - dlist_insert_tail(&found_entry->siblings, &retired_entries); - - iter = rbtNext(cache->inuse.rb_tree, iter); - } - /* Since our new key might fully overlap every other entry in the tree, - * we need to take the maximum of the last entry and the new entry - */ - new_key.length = MAX(found_end, new_end) - new_key.address; - - - /* remove retired entries from tree */ - GNIX_DEBUG(FI_LOG_MR, "removing retired entries from inuse tree\n"); - __remove_sibling_entries_from_tree(cache, - &retired_entries, cache->inuse.rb_tree); - - /* create new registration */ - GNIX_DEBUG(FI_LOG_MR, - "creating a new merged registration, key=%llx:%llx\n", - new_key.address, new_key.length); - ret = __mr_cache_create_registration(cache, - new_key.address, new_key.length, - entry, &new_key, fi_reg_context); - if (ret) { - /* If we get here, one of two things have happened. - * Either some part of the new merged registration was - * unmapped (i.e., freed by user) or the merged - * registration failed for some other reason (probably - * GNI_RC_ERROR_RESOURCE). The first case is a user - * error (which they should have been warned about by - * the notifier), and the second case is always - * possible. Neither case is a problem. The entries - * above have been retired, and here we return the - * error */ - GNIX_DEBUG(FI_LOG_MR, - "failed to create merged registration, key=", - new_key.address, new_key.length); - return ret; - } - GNIX_DEBUG(FI_LOG_MR, - "created a new merged registration, key=%llx:%llx entry %p\n", - new_key.address, new_key.length, *entry); - - __entry_set_merged(*entry); - - /* move retired entries to the head of the new entry's child list */ - if (!dlist_empty(&retired_entries)) { - __attach_retired_entries_to_registration(cache, - &retired_entries, *entry); - } - - cache->misses++; - - return ret; -} - -static int __mr_cache_search_stale( - gnix_mr_cache_t *cache, - uint64_t address, - uint64_t length, - gnix_mr_cache_entry_t **entry, - gnix_mr_cache_key_t *key, - struct _gnix_fi_reg_context *fi_reg_context) -{ - int ret; - RbtStatus rc; - RbtIterator iter; - gnix_mr_cache_key_t *mr_key; - gnix_mr_cache_entry_t *mr_entry, *tmp; - - if (cache->attr.lazy_deregistration) { - __clear_notifier_events(cache); - } - - GNIX_DEBUG(FI_LOG_MR, "searching for stale entry, key=%llx:%llx\n", - key->address, key->length); - - iter = rbtFindLeftmost(cache->stale.rb_tree, (void *) key, - __find_overlapping_addr); - if (!iter) - return -FI_ENOENT; - - rbtKeyValue(cache->stale.rb_tree, iter, (void **) &mr_key, - (void **) &mr_entry); - - GNIX_DEBUG(FI_LOG_MR, - "found a matching entry, found=%llx:%llx key=%llx:%llx\n", - mr_key->address, mr_key->length, - key->address, key->length); - - - /* if the entry that we've found completely subsumes - * the requested entry, just return a reference to - * that existing registration - */ - if (__can_subsume(mr_key, key)) { - ret = __mr_cache_search_inuse(cache, address, length, - &tmp, mr_key, fi_reg_context); - if (ret == FI_SUCCESS) { - /* if we found an entry in the inuse tree - * in this manner, it means that there was - * an entry either overlapping or contiguous - * with the stale entry in the inuse tree, and - * a new entry has been made and saved to tmp. - * The old entry (mr_entry) should be destroyed - * now as it is no longer needed. - */ - GNIX_DEBUG(FI_LOG_MR, - "removing entry from stale key=%llx:%llx\n", - mr_key->address, mr_key->length); - - rc = rbtErase(cache->stale.rb_tree, iter); - if (OFI_UNLIKELY(rc != RBT_STATUS_OK)) { - GNIX_ERR(FI_LOG_MR, - "failed to erase entry from stale tree\n"); - } else { - if (__mr_cache_lru_remove(cache, mr_entry) - == FI_SUCCESS) { - ofi_atomic_dec32(&cache->stale.elements); - } else { - GNIX_WARN(FI_LOG_MR, "Failed to remove" - " entry (%p) from lru list\n", - mr_entry); - } - __mr_cache_entry_destroy(cache, mr_entry); - } - - *entry = tmp; - } else { - GNIX_DEBUG(FI_LOG_MR, - "removing entry (%p) from stale and" - " migrating to inuse, key=%llx:%llx\n", - mr_entry, mr_key->address, mr_key->length); - rc = rbtErase(cache->stale.rb_tree, iter); - if (OFI_UNLIKELY(rc != RBT_STATUS_OK)) { - GNIX_FATAL(FI_LOG_MR, - "failed to erase entry (%p) from " - " stale tree\n", mr_entry); - } - - if (__mr_cache_lru_remove(cache, mr_entry) - != FI_SUCCESS) { - GNIX_WARN(FI_LOG_MR, "Failed to remove" - " entry (%p) from lru list\n", - mr_entry); - } - - ofi_atomic_dec32(&cache->stale.elements); - - /* if we made it to this point, there weren't - * any entries in the inuse tree that would - * have overlapped with this entry - */ - rc = rbtInsert(cache->inuse.rb_tree, - &mr_entry->key, mr_entry); - if (OFI_UNLIKELY(rc != RBT_STATUS_OK)) { - GNIX_FATAL(FI_LOG_MR, - "failed to insert entry into" - "inuse tree\n"); - } - - ofi_atomic_set32(&mr_entry->ref_cnt, 1); - ofi_atomic_inc32(&cache->inuse.elements); - - *entry = mr_entry; - } - - return FI_SUCCESS; - } - - GNIX_DEBUG(FI_LOG_MR, - "could not use matching entry, " - "found=%llx:%llx\n", - mr_key->address, mr_key->length); - - return -FI_ENOENT; -} - -static int __mr_cache_create_registration( - gnix_mr_cache_t *cache, - uint64_t address, - uint64_t length, - gnix_mr_cache_entry_t **entry, - gnix_mr_cache_key_t *key, - struct _gnix_fi_reg_context *fi_reg_context) -{ - int rc; - gnix_mr_cache_entry_t *current_entry; - void *handle; - - /* if we made it here, we didn't find the entry at all */ - current_entry = calloc(1, sizeof(*current_entry) + cache->attr.elem_size); - if (!current_entry) - return -FI_ENOMEM; - - handle = (void *) current_entry->mr; - - dlist_init(¤t_entry->lru_entry); - dlist_init(¤t_entry->children); - dlist_init(¤t_entry->siblings); - - handle = cache->attr.reg_callback(handle, (void *) address, length, - fi_reg_context, cache->attr.reg_context); - if (OFI_UNLIKELY(!handle)) { - GNIX_INFO(FI_LOG_MR, - "failed to register memory with callback\n"); - goto err; - } - - __entry_reset_state(current_entry); - - /* set up the entry's key */ - current_entry->key.address = address; - current_entry->key.length = length; - - rc = __notifier_monitor(cache, current_entry); - if (OFI_UNLIKELY(rc != FI_SUCCESS)) { - GNIX_INFO(FI_LOG_MR, - "failed to monitor memory with notifier\n"); - goto err_dereg; - } - - rc = rbtInsert(cache->inuse.rb_tree, ¤t_entry->key, - current_entry); - if (OFI_UNLIKELY(rc != RBT_STATUS_OK)) { - GNIX_ERR(FI_LOG_MR, "failed to insert registration " - "into cache, ret=%i\n", rc); - - goto err_dereg; - } - - GNIX_DEBUG(FI_LOG_MR, "inserted key %llx:%llx into inuse %p\n", - current_entry->key.address, current_entry->key.length, current_entry); - - - ofi_atomic_inc32(&cache->inuse.elements); - ofi_atomic_initialize32(¤t_entry->ref_cnt, 1); - - *entry = current_entry; - - return FI_SUCCESS; - -err_dereg: - rc = cache->attr.dereg_callback(current_entry->mr, - cache->attr.dereg_context); - if (OFI_UNLIKELY(rc)) { - GNIX_INFO(FI_LOG_MR, - "failed to deregister memory with " - "callback, ret=%d\n", rc); - } -err: - free(current_entry); - return -FI_ENOMEM; -} - -/** - * Function to register memory with the cache - * - * @param[in] cache gnix memory registration cache pointer - * @param[in] mr gnix memory region descriptor pointer - * @param[in] address base address of the memory region to be - * registered - * @param[in] length length of the memory region to be registered - * @param[in] fi_reg_context fi_reg_mr API call parameters - * @param[in,out] mem_hndl gni memory handle pointer to written to and - * returned - */ -int _gnix_mr_cache_register( - gnix_mr_cache_t *cache, - uint64_t address, - uint64_t length, - struct _gnix_fi_reg_context *fi_reg_context, - void **handle) -{ - int ret; - gnix_mr_cache_key_t key = { - .address = address, - .length = length, - }; - gnix_mr_cache_entry_t *entry; - - GNIX_TRACE(FI_LOG_MR, "\n"); - - /* fastpath inuse */ - ret = __mr_cache_search_inuse(cache, address, length, - &entry, &key, fi_reg_context); - if (ret == FI_SUCCESS) - goto success; - - /* if we shouldn't introduce any new elements, return -FI_ENOSPC */ - if (OFI_UNLIKELY(cache->attr.hard_reg_limit > 0 && - (ofi_atomic_get32(&cache->inuse.elements) >= - cache->attr.hard_reg_limit))) { - ret = -FI_ENOSPC; - goto err; - } - - if (cache->attr.lazy_deregistration) { - __clear_notifier_events(cache); - - /* if lazy deregistration is in use, we can check the - * stale tree - */ - ret = __mr_cache_search_stale(cache, address, length, - &entry, &key, fi_reg_context); - if (ret == FI_SUCCESS) { - cache->hits++; - goto success; - } - } - - /* If the cache is full, then flush one of the stale entries to make - * room for the new entry. This works because we check above to see if - * the number of inuse entries exceeds the hard reg limit - */ - if ((ofi_atomic_get32(&cache->inuse.elements) + - ofi_atomic_get32(&cache->stale.elements)) == cache->attr.hard_reg_limit) - __mr_cache_flush(cache, 1); - - ret = __mr_cache_create_registration(cache, address, length, - &entry, &key, fi_reg_context); - if (ret) { - goto err; - } - - cache->misses++; - -success: - __entry_set_state(entry, GNIX_CES_INUSE); - *handle = (void *) entry->mr; - - return FI_SUCCESS; - -err: - return ret; -} - -/** - * Function to deregister memory in the cache - * - * @param[in] mr gnix memory registration descriptor pointer - * - * @return FI_SUCCESS on success - * -FI_ENOENT if there isn't an active memory registration - * associated with the mr - * return codes associated with dereg callback - */ -int _gnix_mr_cache_deregister( - gnix_mr_cache_t *cache, - void *handle) -{ - gnix_mr_cache_entry_t *entry; - gni_return_t grc; - - GNIX_TRACE(FI_LOG_MR, "\n"); - - /* check to see if we can find the entry so that we can drop the - * held reference - */ - - entry = container_of(handle, gnix_mr_cache_entry_t, mr); - if (__entry_get_state(entry) != GNIX_CES_INUSE) { - GNIX_WARN(FI_LOG_MR, "entry (%p) in incorrect state (%d)\n", - entry, entry->state); - return -FI_EINVAL; - } - - GNIX_DEBUG(FI_LOG_MR, "entry found, entry=%p refs=%d\n", - entry, ofi_atomic_get32(&entry->ref_cnt)); - - grc = __mr_cache_entry_put(cache, entry); - - /* Since we check this on each deregistration, the amount of elements - * over the limit should always be 1 - */ - if (ofi_atomic_get32(&cache->stale.elements) > cache->attr.hard_stale_limit) - __mr_cache_flush(cache, 1); - - return gnixu_to_fi_errno(grc); -} - diff --git a/prov/gni/src/gnix_mr_notifier.c b/prov/gni/src/gnix_mr_notifier.c deleted file mode 100644 index ccb950fcae5..00000000000 --- a/prov/gni/src/gnix_mr_notifier.c +++ /dev/null @@ -1,279 +0,0 @@ -/* - * Copyright (c) 2016 Cray Inc. All rights reserved. - * Copyright (c) 2017 Los Alamos National Security, LLC. - * All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - - -#include "gnix_mr_notifier.h" - -#if HAVE_KDREG - -struct gnix_mr_notifier global_mr_not; - -static inline int -notifier_verify_stuff(struct gnix_mr_notifier *mrn) { - /* Can someone confirm that these values are POSIX so we can - * be less pedantic? */ - if (mrn->fd == STDIN_FILENO || - mrn->fd == STDOUT_FILENO || - mrn->fd == STDERR_FILENO || - mrn->fd < 0) { - // Be quiet here - return -FI_EBADF; - } - - if (mrn->cntr == NULL) { - // Be quiet here - return -FI_ENODATA; - } - - return FI_SUCCESS; -} - -int -_gnix_notifier_init(void) -{ - global_mr_not.fd = -1; - global_mr_not.cntr = NULL; - ofi_spin_init(&global_mr_not.lock); - global_mr_not.ref_cnt = 0; - - return FI_SUCCESS; -} - -int -_gnix_notifier_open(struct gnix_mr_notifier **mrn) -{ - int ret = FI_SUCCESS; - int kdreg_fd, ret_errno; - kdreg_get_user_delta_args_t get_user_delta_args; - - ofi_spin_lock(&global_mr_not.lock); - - if (!global_mr_not.ref_cnt) { - kdreg_fd = open(KDREG_DEV, O_RDWR | O_NONBLOCK); - if (kdreg_fd < 0) { - ret_errno = errno; - GNIX_WARN(FI_LOG_MR, - "kdreg device open failed: %s\n", - strerror(ret_errno)); - /* Not all of these map to fi_errno values */ - ret = -ret_errno; - goto err_exit; - } - - memset(&get_user_delta_args, 0, sizeof(get_user_delta_args)); - if (ioctl(kdreg_fd, KDREG_IOC_GET_USER_DELTA, - &get_user_delta_args) < 0) { - ret_errno = errno; - GNIX_WARN(FI_LOG_MR, - "kdreg get_user_delta failed: %s\n", - strerror(ret_errno)); - close(kdreg_fd); - /* Not all of these map to fi_errno values */ - ret = -ret_errno; - goto err_exit; - } - - if (get_user_delta_args.user_delta == NULL) { - GNIX_WARN(FI_LOG_MR, "kdreg get_user_delta is NULL\n"); - ret = -FI_ENODATA; - goto err_exit; - } - - global_mr_not.fd = kdreg_fd; - global_mr_not.cntr = (kdreg_user_delta_t *) - get_user_delta_args.user_delta; - } - - global_mr_not.ref_cnt++; - *mrn = &global_mr_not; - -err_exit: - ofi_spin_unlock(&global_mr_not.lock); - - return ret; -} - -int -_gnix_notifier_close(struct gnix_mr_notifier *mrn) -{ - int ret = FI_SUCCESS; - int ret_errno; - - ofi_spin_lock(&mrn->lock); - - ret = notifier_verify_stuff(mrn); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_MR, "Invalid MR notifier\n"); - goto err_exit; - } - - assert(mrn->ref_cnt > 0); - if (--mrn->ref_cnt) { - goto err_exit; - } - - if (close(mrn->fd) != 0) { - ret_errno = errno; - GNIX_WARN(FI_LOG_MR, "error closing kdreg device: %s\n", - strerror(ret_errno)); - /* Not all of these map to fi_errno values */ - ret = -ret_errno; - goto err_exit; - } - - mrn->fd = -1; - mrn->cntr = NULL; -err_exit: - ofi_spin_unlock(&mrn->lock); - - return ret; -} - -static inline int -kdreg_write(struct gnix_mr_notifier *mrn, void *buf, size_t len) -{ - int ret; - - ret = write(mrn->fd, buf, len); - if ((ret < 0) || (ret != len)) { - // Not all of these map to fi_errno values - ret = -errno; - GNIX_WARN(FI_LOG_MR, "kdreg_write failed: %s\n", - strerror(errno)); - return ret; - } - - return FI_SUCCESS; -} - -int -_gnix_notifier_monitor(struct gnix_mr_notifier *mrn, - void *addr, uint64_t len, uint64_t cookie) -{ - int ret; - struct registration_monitor rm; - - ofi_spin_lock(&mrn->lock); - - ret = notifier_verify_stuff(mrn); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_MR, "Invalid MR notifier\n"); - goto err_exit; - } - - if (ret == 0) { - GNIX_DEBUG(FI_LOG_MR, "monitoring %p (len=%lu) cookie=%lu\n", - addr, len, cookie); - - memset(&rm, 0, sizeof(rm)); - rm.type = REGISTRATION_MONITOR; - rm.u.mon.addr = (uint64_t) addr; - rm.u.mon.len = len; - rm.u.mon.user_cookie = cookie; - - ret = kdreg_write(mrn, &rm, sizeof(rm)); - } - -err_exit: - ofi_spin_unlock(&mrn->lock); - - return ret; -} - -int -_gnix_notifier_unmonitor(struct gnix_mr_notifier *mrn, uint64_t cookie) -{ - int ret; - struct registration_monitor rm; - - ofi_spin_lock(&mrn->lock); - - ret = notifier_verify_stuff(mrn); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_MR, "Invalid MR notifier\n"); - goto err_exit; - } - - GNIX_DEBUG(FI_LOG_MR, "unmonitoring cookie=%lu\n", cookie); - - memset(&rm, 0, sizeof(rm)); - - rm.type = REGISTRATION_UNMONITOR; - rm.u.unmon.user_cookie = cookie; - - ret = kdreg_write(mrn, &rm, sizeof(rm)); - -err_exit: - ofi_spin_unlock(&mrn->lock); - - return ret; -} - -int -_gnix_notifier_get_event(struct gnix_mr_notifier *mrn, void* buf, size_t len) -{ - int ret, ret_errno; - - if ((mrn == NULL) || (buf == NULL) || (len <= 0)) { - GNIX_WARN(FI_LOG_MR, - "Invalid argument to _gnix_notifier_get_event\n"); - return -FI_EINVAL; - } - - ofi_spin_lock(&mrn->lock); - - if (*(mrn->cntr) > 0) { - GNIX_DEBUG(FI_LOG_MR, "reading kdreg event\n"); - ret = read(mrn->fd, buf, len); - if (ret < 0) { - ret_errno = errno; - if (ret_errno != EAGAIN) { - GNIX_WARN(FI_LOG_MR, - "kdreg event read failed: %s\n", - strerror(ret_errno)); - } - /* Not all of these map to fi_errno values */ - ret = -ret_errno; - } - } else { - GNIX_DEBUG(FI_LOG_MR, "nothing to read from kdreg :(\n"); - ret = -FI_EAGAIN; - } - - ofi_spin_unlock(&mrn->lock); - - return ret; -} - -#endif /* HAVE_KDREG */ diff --git a/prov/gni/src/gnix_msg.c b/prov/gni/src/gnix_msg.c deleted file mode 100644 index 4b24d8702fb..00000000000 --- a/prov/gni/src/gnix_msg.c +++ /dev/null @@ -1,3777 +0,0 @@ -/* - * Copyright (c) 2015-2019 Cray Inc. All rights reserved. - * Copyright (c) 2015-2018 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2019-2020 Triad National Security, LLC. - * All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include - -#include "gnix.h" -#include "gnix_cm_nic.h" -#include "gnix_nic.h" -#include "gnix_util.h" -#include "gnix_ep.h" -#include "gnix_hashtable.h" -#include "gnix_vc.h" -#include "gnix_cntr.h" -#include "gnix_av.h" -#include "gnix_rma.h" -#include "gnix_atomic.h" -#include "gnix_cm.h" - -#define INVALID_PEEK_FORMAT(fmt) \ - ((fmt) == FI_CQ_FORMAT_CONTEXT || (fmt) == FI_CQ_FORMAT_MSG) - -#define GNIX_TAGGED_PCD_COMPLETION_FLAGS (FI_MSG | FI_RECV | FI_TAGGED) - -smsg_completer_fn_t gnix_ep_smsg_completers[]; - -/******************************************************************************* - * helper functions - ******************************************************************************/ -/** - * This function return the receiver's address given that we are at offset cur_len - * within the send_iov. - * - * @param req the fabric request - * @param cur_len the current offset at which we found an unaligned head or tail - * within the sender's iov entries. - */ -static inline uint8_t *__gnix_msg_iov_unaligned_recv_addr(struct gnix_fab_req *req, - size_t cur_len) -{ - int i; - - /* Find the recv_address for the given head data */ - for (i = 0; i < req->msg.recv_iov_cnt; i++) { - if ((int64_t) cur_len - (int64_t) req->msg.recv_info[i].recv_len < 0) { - return (uint8_t *) (req->msg.recv_info[i].recv_addr + cur_len); - } else { - cur_len -= req->msg.recv_info[i].recv_len; - } - } - - return NULL; -} - -static inline void __gnix_msg_send_alignment(struct gnix_fab_req *req) -{ - int i; - - /* Ensure all head and tail fields are initialized properly */ - for (i = 0; i < req->msg.send_iov_cnt; i++) { - /* Check head for four byte alignment, if not aligned store - * the unaligned bytes (<=3) in head */ - if (req->msg.send_info[i].send_addr & GNI_READ_ALIGN_MASK) { - req->msg.send_info[i].head = - *(uint32_t *)(req->msg.send_info[i].send_addr & - ~GNI_READ_ALIGN_MASK); - - GNIX_DEBUG(FI_LOG_EP_DATA, - "Sending %d unaligned head bytes (%x)\n", - GNI_READ_ALIGN - - (req->msg.send_info[i].send_addr & - GNI_READ_ALIGN_MASK), - req->msg.send_info[i].head); - } - - /* Check tail for four byte alignment. */ - if ((req->msg.send_info[i].send_addr + - req->msg.send_info[i].send_len) & - GNI_READ_ALIGN_MASK) { - req->msg.send_info[i].tail = - *(uint32_t *)((req->msg.send_info[i].send_addr + - req->msg.send_info[i].send_len) & - ~GNI_READ_ALIGN_MASK); - - GNIX_DEBUG(FI_LOG_EP_DATA, - "Sending %d unaligned tail bytes (%x)\n", - (req->msg.send_info[i].send_addr + - req->msg.send_info[i].send_len) & - GNI_READ_ALIGN_MASK, - req->msg.send_info[i].tail); - } - } -} -static inline void __gnix_msg_free_rma_txd(struct gnix_fab_req *req, - struct gnix_tx_descriptor *txd) -{ - gni_ct_get_post_descriptor_t *cur_ct, *nxt_ct; - - if (txd->gni_desc.type & GNI_POST_FMA_GET) { - for (cur_ct = txd->gni_desc.next_descr; - cur_ct != NULL; - cur_ct = nxt_ct) { - nxt_ct = cur_ct->next_descr; - free(cur_ct); - } - } - _gnix_nic_tx_free(req->gnix_ep->nic, txd); -} - -static inline void __gnix_msg_free_iov_txds(struct gnix_fab_req *req, - size_t txd_cnt) -{ - int i; - - for (i = 0; i < txd_cnt; i++) { - __gnix_msg_free_rma_txd(req, req->iov_txds[i]); - } -} - -static void __gnix_msg_unpack_data_into_iov(const struct recv_info_t *dest, - size_t dest_cnt, - uint64_t src, size_t src_len) -{ - static int i; - static size_t cum_len; - static size_t offset; - size_t bwrite; - - GNIX_DEBUG(FI_LOG_EP_DATA, "Unpacking data for recvv count (%lu)" - " src = %p, dest = %p," - " i = %d, cum_len = %lu, offset = %lu\n", - dest_cnt, src, dest, i, cum_len, offset); - - /* Pull out each iov len and base field into the iov */ - while (i < dest_cnt) { - bwrite = dest[i].recv_len - offset < src_len - cum_len ? - dest[i].recv_len - offset : src_len - cum_len; - - memcpy((uint8_t *) dest[i].recv_addr + offset, - (void *) (src + cum_len), bwrite); - - cum_len += bwrite; - - /* Src exhausted */ - if (cum_len == src_len) { - offset = dest[i].recv_len - bwrite; - cum_len = 0; - - /* dest exhausted too, reset the counters */ - if (!offset) { - i = cum_len = 0; - } - - return; - } - - i++; - } -} - -static void __gnix_msg_pack_data_from_iov(uint64_t dest, size_t dest_len, - const struct iovec *src, - size_t src_cnt) -{ - int i; - size_t cum_len = 0; - - GNIX_DEBUG(FI_LOG_EP_CTRL, "Packing data for sendv count (%d)\n", - src_cnt); - - /* Pull out each iov len and base field into the dest buffer */ - for (i = 0; i < src_cnt && cum_len < dest_len; i++) { - memcpy((void *) (dest + cum_len), src[i].iov_base, - src[i].iov_len < dest_len - cum_len ? - src[i].iov_len : dest_len - cum_len); - cum_len += src[i].iov_len; - } -} - - -/* - * TODO: this can eventually be used in more places in this - * but the msg component of the fab request structure would - * need to be refactored. - */ -static inline int __gnix_msg_register_iov(struct gnix_fid_ep *ep, - const struct iovec *iov, - size_t count, - struct gnix_fid_mem_desc **md_vec) -{ - int i, ret = FI_SUCCESS; - struct fid_mr *auto_mr = NULL; - struct gnix_fid_domain *dom = ep->domain; - - for (i = 0; i < count; i++) { - - ret = _gnix_mr_reg(&dom->domain_fid.fid, - iov[i].iov_base, - iov[i].iov_len, - FI_READ | FI_WRITE, 0, - 0, 0, &auto_mr, NULL, - ep->auth_key, GNIX_PROV_REG); - - if (ret != FI_SUCCESS) { - GNIX_DEBUG(FI_LOG_EP_DATA, - "Failed to " - "auto-register" - " local buffer: %s\n", - fi_strerror(-ret)); - - for (i--; i >= 0; i--) { - ret = fi_close(&md_vec[i]->mr_fid.fid); - if (ret != FI_SUCCESS) { - GNIX_FATAL(FI_LOG_DOMAIN, - "failed to release auto-registered region, " - "rc=%d\n", ret); - } - } - - goto fn_exit; - } - - md_vec[i] = container_of((void *) auto_mr, - struct gnix_fid_mem_desc, - mr_fid); - } - -fn_exit: - return ret; -} - -static void __gnix_msg_copy_data_to_recv_addr(struct gnix_fab_req *req, - void *data) -{ - size_t len; - - GNIX_DBG_TRACE(FI_LOG_EP_DATA, "\n"); - - len = MIN(req->msg.cum_send_len, req->msg.cum_recv_len); - - switch(req->type) { - case GNIX_FAB_RQ_RECV: - memcpy((void *)req->msg.recv_info[0].recv_addr, data, len); - break; - - case GNIX_FAB_RQ_RECVV: - case GNIX_FAB_RQ_TRECVV: - __gnix_msg_unpack_data_into_iov(req->msg.recv_info, - req->msg.recv_iov_cnt, - (uint64_t) data, len); - break; - - default: - GNIX_FATAL(FI_LOG_EP_DATA, "Invalid request type: %d", req->type); - return; - } -} - -static void __gnix_msg_queues(struct gnix_fid_ep *ep, - int tagged, - struct gnix_tag_storage **posted_queue, - struct gnix_tag_storage **unexp_queue) -{ - if (tagged) { - *posted_queue = &ep->tagged_posted_recv_queue; - *unexp_queue = &ep->tagged_unexp_recv_queue; - } else { - *posted_queue = &ep->posted_recv_queue; - *unexp_queue = &ep->unexp_recv_queue; - } -} - -static void __gnix_msg_send_fr_complete(struct gnix_fab_req *req, - struct gnix_tx_descriptor *txd) -{ - struct gnix_vc *vc = req->vc; - - ofi_atomic_dec32(&vc->outstanding_tx_reqs); - _gnix_nic_tx_free(req->gnix_ep->nic, txd); - - _gnix_fr_free(req->gnix_ep, req); - - /* Schedule VC TX queue in case the VC is 'fenced'. */ - _gnix_vc_tx_schedule(vc); -} - -static int __recv_err(struct gnix_fid_ep *ep, void *context, uint64_t flags, - size_t len, void *addr, uint64_t data, uint64_t tag, - size_t olen, int err, int prov_errno, void *err_data, - size_t err_data_size) -{ - int rc; - - if (ep->recv_cq) { - rc = _gnix_cq_add_error(ep->recv_cq, context, flags, len, - addr, data, tag, olen, err, - prov_errno, err_data, err_data_size); - if (rc != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_cq_add_error returned %d\n", - rc); - } - } - - if (ep->recv_cntr) { - rc = _gnix_cntr_inc_err(ep->recv_cntr); - if (rc != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_cntr_inc_err() failed: %d\n", - rc); - } - - return FI_SUCCESS; -} - -static int __gnix_msg_recv_err(struct gnix_fid_ep *ep, struct gnix_fab_req *req) -{ - uint64_t flags = FI_RECV | FI_MSG; - - flags |= req->msg.send_flags & FI_TAGGED; - - return __recv_err(ep, req->user_context, flags, req->msg.cum_recv_len, - (void *)req->msg.recv_info[0].recv_addr, req->msg.imm, - req->msg.tag, 0, FI_ECANCELED, - gnixu_to_fi_errno(GNI_RC_TRANSACTION_ERROR), NULL, 0); -} - -static int __recv_completion( - struct gnix_fid_ep *ep, - struct gnix_fab_req *req, - uint64_t flags, - size_t len, - void *addr) -{ - ssize_t rc; - - if ((req->msg.recv_flags & FI_COMPLETION) && ep->recv_cq) { - rc = _gnix_cq_add_event(ep->recv_cq, - ep, - req->user_context, - flags, - len, - addr, - req->msg.imm, - req->msg.tag, - FI_ADDR_NOTAVAIL); - if (rc != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_cq_add_event returned %d\n", - rc); - } - } - - if (ep->recv_cntr) { - rc = _gnix_cntr_inc(ep->recv_cntr); - if (rc != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_cntr_inc() failed: %d\n", - rc); - } - - return FI_SUCCESS; -} - -static int __recv_completion_src( - struct gnix_fid_ep *ep, - struct gnix_fab_req *req, - uint64_t flags, - size_t len, - void *addr, - fi_addr_t src_addr) -{ - ssize_t rc; - char *buffer; - size_t buf_len; - - GNIX_DBG_TRACE(FI_LOG_EP_DATA, "\n"); - - if ((req->msg.recv_flags & FI_COMPLETION) && ep->recv_cq) { - if ((src_addr == FI_ADDR_NOTAVAIL) && - (ep->caps & FI_SOURCE_ERR) != 0) { - if (ep->domain->addr_format == FI_ADDR_STR) { - buffer = malloc(GNIX_FI_ADDR_STR_LEN); - rc = _gnix_ep_name_to_str(req->vc->gnix_ep_name, (char **)&buffer); - assert(rc == FI_SUCCESS); - buf_len = GNIX_FI_ADDR_STR_LEN; - } else { - buffer = malloc(GNIX_CQ_MAX_ERR_DATA_SIZE); - assert(buffer != NULL); - memcpy(buffer, req->vc->gnix_ep_name, - sizeof(struct gnix_ep_name)); - buf_len = sizeof(struct gnix_ep_name); - } - rc = _gnix_cq_add_error(ep->recv_cq, - req->user_context, - flags, - len, - addr, - req->msg.imm, - req->msg.tag, - 0, - FI_EADDRNOTAVAIL, - 0, - buffer, - buf_len); - } else { - rc = _gnix_cq_add_event(ep->recv_cq, - ep, - req->user_context, - flags, - len, - addr, - req->msg.imm, - req->msg.tag, - src_addr); - } - if (rc != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_cq_add_event returned %d\n", - rc); - } - } - - if (ep->recv_cntr) { - rc = _gnix_cntr_inc(ep->recv_cntr); - if (rc != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_cntr_inc() failed: %d\n", - rc); - } - - return FI_SUCCESS; -} - -/* - * GNI provider generates a separate CQE to indicate - * its releasing a FI_MULTI_RECV receive buffer back to - * the application. - */ -static void __gnix_msg_mrecv_completion(void *obj) -{ - int ret; - struct gnix_fab_req *req = (struct gnix_fab_req *)obj; - - if (req->msg.recv_flags & FI_LOCAL_MR) { - GNIX_DEBUG(FI_LOG_EP_DATA, "freeing auto-reg MR: %p\n", - req->msg.recv_md[0]); - ret = fi_close(&req->msg.recv_md[0]->mr_fid.fid); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_DOMAIN, - "failed to close auto-registered region, " - "ret %s\n", fi_strerror(-ret)); - } - } - - ret = __recv_completion(req->gnix_ep, - req, - FI_MULTI_RECV, - 0UL, - NULL); - if (ret != FI_SUCCESS) { - GNIX_DEBUG(FI_LOG_EP_DATA, - "__recv_completion failed for multi recv" - " complete: %d\n", - ret); - } - - _gnix_fr_free(req->gnix_ep, req); -} - -static inline int __gnix_msg_recv_completion(struct gnix_fid_ep *ep, - struct gnix_fab_req *req) -{ - int ret; - uint64_t flags = FI_RECV | FI_MSG; - size_t len; - void *recv_addr = NULL; - fi_addr_t src_addr; - - flags |= req->msg.send_flags & (FI_TAGGED | FI_REMOTE_CQ_DATA); - flags |= req->msg.recv_flags & (FI_PEEK | FI_CLAIM | FI_DISCARD); - - len = MIN(req->msg.cum_send_len, req->msg.cum_recv_len); - - - if (OFI_UNLIKELY(req->msg.recv_flags & FI_MULTI_RECV)) - recv_addr = (void *)req->msg.recv_info[0].recv_addr; - - /* - * Deal with possible truncation - */ - if (OFI_LIKELY(req->msg.cum_send_len <= req->msg.cum_recv_len)) { - - if (OFI_LIKELY(!(ep->caps & FI_SOURCE))) { - ret = __recv_completion(ep, - req, - flags, - len, - recv_addr); - } else { - src_addr = _gnix_vc_peer_fi_addr(req->vc); - ret = __recv_completion_src(ep, - req, - flags, - len, - recv_addr, - src_addr); - } - - } else { - - ret = __recv_err(ep, - req->user_context, - flags, - len, - recv_addr, - req->msg.imm, - req->msg.tag, - req->msg.cum_send_len - req->msg.cum_recv_len, - FI_ETRUNC, - FI_ETRUNC, NULL, 0); - }; - - /* - * if i'm child of a FI_MULTI_RECV request, decrement - * ref count - */ - - if (req->msg.parent != NULL) { - _gnix_ref_put(req->msg.parent); - } - return ret; -} - -static int __gnix_msg_send_err(struct gnix_fid_ep *ep, struct gnix_fab_req *req) -{ - uint64_t flags = FI_SEND | FI_MSG; - int rc; - - flags |= req->msg.send_flags & FI_TAGGED; - - if (ep->send_cq) { - rc = _gnix_cq_add_error(ep->send_cq, req->user_context, - flags, 0, 0, 0, 0, 0, FI_ECANCELED, - gnixu_to_fi_errno(GNI_RC_TRANSACTION_ERROR), - NULL, 0); - if (rc != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_cq_add_error() returned %d\n", - rc); - } - } - - if (ep->send_cntr) { - rc = _gnix_cntr_inc_err(ep->send_cntr); - if (rc != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_cntr_inc() failed: %d\n", - rc); - } - - return FI_SUCCESS; -} - -static int __gnix_msg_send_completion(struct gnix_fid_ep *ep, - struct gnix_fab_req *req) -{ - uint64_t flags = FI_SEND | FI_MSG; - int rc; - - flags |= req->msg.send_flags & FI_TAGGED; - - GNIX_DEBUG(FI_LOG_EP_DATA, "send_cq = %p\n", ep->send_cq); - - if ((req->msg.send_flags & FI_COMPLETION) && ep->send_cq) { - rc = _gnix_cq_add_event(ep->send_cq, ep, req->user_context, - flags, 0, 0, 0, 0, FI_ADDR_NOTAVAIL); - if (rc != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_cq_add_event returned %d\n", - rc); - } - } - - if (ep->send_cntr) { - rc = _gnix_cntr_inc(ep->send_cntr); - if (rc != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_cntr_inc() failed: %d\n", - rc); - } - - return FI_SUCCESS; -} - -static int __gnix_rndzv_req_send_fin(void *arg) -{ - struct gnix_fab_req *req = (struct gnix_fab_req *)arg; - struct gnix_nic *nic; - struct gnix_fid_ep *ep; - struct gnix_tx_descriptor *txd; - gni_return_t status; - int rc; - - GNIX_DBG_TRACE(FI_LOG_EP_DATA, "\n"); - - ep = req->gnix_ep; - assert(ep != NULL); - - nic = ep->nic; - assert(nic != NULL); - - rc = _gnix_nic_tx_alloc(nic, &txd); - if (rc) { - GNIX_DEBUG(FI_LOG_EP_DATA, - "_gnix_nic_tx_alloc() failed: %d\n", - rc); - return -FI_ENOSPC; - } - - txd->rndzv_fin_hdr.req_addr = req->msg.rma_id; - txd->rndzv_fin_hdr.status = req->msg.status; - - GNIX_DEBUG(FI_LOG_EP_DATA, "req_addr = %p\n", - (void *) txd->rndzv_fin_hdr.req_addr); - - txd->req = req; - - txd->completer_fn = gnix_ep_smsg_completers[GNIX_SMSG_T_RNDZV_FIN]; - - COND_ACQUIRE(nic->requires_lock, &nic->lock); - status = GNI_SmsgSendWTag(req->vc->gni_ep, - &txd->rndzv_fin_hdr, sizeof(txd->rndzv_fin_hdr), - NULL, 0, txd->id, GNIX_SMSG_T_RNDZV_FIN); - if ((status == GNI_RC_SUCCESS) && - (ep->domain->data_progress == FI_PROGRESS_AUTO)) - _gnix_rma_post_irq(req->vc); - - COND_RELEASE(nic->requires_lock, &nic->lock); - - if (status == GNI_RC_NOT_DONE) { - _gnix_nic_tx_free(nic, txd); - GNIX_DEBUG(FI_LOG_EP_DATA, - "GNI_SmsgSendWTag returned %s\n", - gni_err_str[status]); - } else if (status != GNI_RC_SUCCESS) { - _gnix_nic_tx_free(nic, txd); - GNIX_WARN(FI_LOG_EP_DATA, - "GNI_SmsgSendWTag returned %s\n", - gni_err_str[status]); - } - - GNIX_DEBUG(FI_LOG_EP_DATA, "Initiated RNDZV_FIN, req: %p\n", req); - - return gnixu_to_fi_errno(status); -} - -static void __gnix_msg_copy_unaligned_get_data(struct gnix_fab_req *req) -{ - int head_off, head_len, tail_len; - void *addr; - - head_off = req->msg.send_info[0].send_addr & GNI_READ_ALIGN_MASK; - head_len = head_off ? GNI_READ_ALIGN - head_off : 0; - tail_len = (req->msg.send_info[0].send_addr + req->msg.send_info[0].send_len) & - GNI_READ_ALIGN_MASK; - - if (head_off) { - addr = (uint8_t *)&req->msg.send_info[0].head + head_off; - - GNIX_DEBUG(FI_LOG_EP_DATA, - "writing %d bytes to head (%p, %hxx)\n", - head_len, req->msg.recv_info[0].recv_addr, - *(uint32_t *)addr); - memcpy((void *)req->msg.recv_info[0].recv_addr, addr, head_len); - } - - if (tail_len) { - addr = (void *)(req->msg.recv_info[0].recv_addr + - req->msg.send_info[0].send_len - - tail_len); - - GNIX_DEBUG(FI_LOG_EP_DATA, - "writing %d bytes to tail (%p, %hxx)\n", - tail_len, addr, req->msg.send_info[0].tail); - memcpy((void *)addr, &req->msg.send_info[0].tail, tail_len); - } -} - -static inline void __gnix_msg_iov_cpy_unaligned_head_tail_data(struct gnix_fab_req *req) -{ - int i, head_off, head_len, tail_len; - void *addr, *recv_addr; - size_t cur_len = 0; - -#if ENABLE_DEBUG - for (i = 0; i < req->msg.send_iov_cnt; i++) { - GNIX_DEBUG(FI_LOG_EP_DATA, "req->msg.send_info[%d].head = 0x%x\n" - "req->msg.send_info[%d].tail = 0x%x\n", i, - req->msg.send_info[i].head, i, req->msg.send_info[i].tail); - } -#endif - - /* Copy out the original head/tail data sent across in the control message */ - for (i = 0; i < req->msg.send_iov_cnt; i++) { - head_off = req->msg.send_info[i].send_addr & GNI_READ_ALIGN_MASK; - head_len = head_off ? GNI_READ_ALIGN - head_off : 0; - - if (head_off) { - recv_addr = __gnix_msg_iov_unaligned_recv_addr(req, cur_len); - - if (recv_addr) { - addr = (uint8_t *)&req->msg.send_info[i].head + head_off; - - GNIX_DEBUG(FI_LOG_EP_DATA, - "writing %d bytes to head (%p, 0x%x) for i = %d\n", - head_len, recv_addr, - *(uint32_t *)addr, i); - memcpy(recv_addr, addr, head_len); - } - } - - tail_len = (req->msg.send_info[i].send_addr + req->msg.send_info[i].send_len) & - GNI_READ_ALIGN_MASK; - - if (tail_len) { - recv_addr = __gnix_msg_iov_unaligned_recv_addr(req, - cur_len + - req->msg.send_info[i].send_len - tail_len); - - if (recv_addr) { - GNIX_DEBUG(FI_LOG_EP_DATA, - "writing %d bytes to tail (%p, 0x%x)\n", - tail_len, recv_addr, req->msg.send_info[i].tail); - memcpy((void *)recv_addr, &req->msg.send_info[i].tail, tail_len); - } - } - - cur_len += req->msg.send_info[i].send_len; - } - -#if ENABLE_DEBUG - for (i = 0; i < req->msg.recv_iov_cnt; i++) { - GNIX_DEBUG(FI_LOG_EP_DATA, "req->msg.recv_info[%d].tail_len = %d\n" - "req->msg.recv_info[%d].head_len = %d\n", - i, req->msg.recv_info[i].tail_len, - i, req->msg.recv_info[i].head_len); - } -#endif - - /* Copy out the "middle" head and tail data found when building the iov request */ - for (i = 0; i < req->msg.recv_iov_cnt; i++) { - if (req->msg.recv_info[i].tail_len) { - addr = (void *) ((uint8_t *) req->int_tx_buf + - (GNI_READ_ALIGN * i)); - - recv_addr = (void *) (req->msg.recv_info[i].recv_addr + - req->msg.recv_info[i].recv_len - - req->msg.recv_info[i].tail_len); - - GNIX_DEBUG(FI_LOG_EP_DATA, - "writing %d bytes to mid-tail (%p, 0x%x)\n", - req->msg.recv_info[i].tail_len, - recv_addr, *(uint32_t *)addr); - - memcpy(recv_addr, addr, req->msg.recv_info[i].tail_len); - } - - if (req->msg.recv_info[i].head_len) { - /* Since we move the remote addr backwards to a four - * byte address and read four bytes, ensure that - * what we read from the int_tx_buf is just the actual - * head data we are interested in. - */ - addr = (void *) ((uint8_t *) req->int_tx_buf + - (GNI_READ_ALIGN * (i + GNIX_MAX_MSG_IOV_LIMIT)) + - GNI_READ_ALIGN - req->msg.recv_info[i].head_len); - recv_addr = (void *) req->msg.recv_info[i].recv_addr; - - GNIX_DEBUG(FI_LOG_EP_DATA, - "writing %d bytes to mid-head (%p, 0x%x)\n", - req->msg.recv_info[i].head_len, - recv_addr, *(uint32_t *)addr); - - memcpy(recv_addr, addr, req->msg.recv_info[i].head_len); - } - } -} - -static int __gnix_rndzv_req_complete(void *arg, gni_return_t tx_status) -{ - struct gnix_tx_descriptor *txd = (struct gnix_tx_descriptor *)arg; - struct gnix_fab_req *req = txd->req; - int ret, rc; - - if (req->msg.recv_flags & GNIX_MSG_GET_TAIL) { - /* There are two TXDs involved with this request, an RDMA - * transfer to move the middle block and an FMA transfer to - * move unaligned tail data. If this is the FMA TXD, store the - * unaligned bytes. Bytes are copied from the request to the - * user buffer once both TXDs arrive. */ - if (txd->gni_desc.type == GNI_POST_FMA_GET) - req->msg.send_info[0].tail = - *(uint32_t *)req->int_tx_buf; - - /* Remember any failure. Retransmit both TXDs once both are - * complete. */ - req->msg.status |= tx_status; - - if (ofi_atomic_dec32(&req->msg.outstanding_txds) == 1) { - _gnix_nic_tx_free(req->gnix_ep->nic, txd); - GNIX_DEBUG(FI_LOG_EP_DATA, - "Received first RDMA chain TXD, req: %p\n", - req); - return FI_SUCCESS; - } - - tx_status = req->msg.status; - } - - _gnix_nic_tx_free(req->gnix_ep->nic, txd); - - if (tx_status != GNI_RC_SUCCESS) { - if (GNIX_EP_RDM(req->gnix_ep->type) && - _gnix_req_replayable(req)) { - req->tx_failures++; - GNIX_DEBUG(FI_LOG_EP_DATA, - "Requeueing failed request: %p\n", req); - return _gnix_vc_requeue_work_req(req); - } - - if (!GNIX_EP_DGM(req->gnix_ep->type)) { - GNIX_WARN(FI_LOG_EP_DATA, - "Dropping failed request: %p\n", req); - ret = __gnix_msg_recv_err(req->gnix_ep, - req); - if (ret != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_DATA, - "__gnix_msg_send_err() failed: %s\n", - fi_strerror(-ret)); - __gnix_msg_send_fr_complete(req, txd); - return ret; - } - - req->msg.status = tx_status; - req->work_fn = __gnix_rndzv_req_send_fin; - return _gnix_vc_requeue_work_req(req); - } - - __gnix_msg_copy_unaligned_get_data(req); - - GNIX_DEBUG(FI_LOG_EP_DATA, "Completed RNDZV GET, req: %p\n", req); - - if ((req->msg.recv_flags & FI_LOCAL_MR) && - (req->msg.parent == NULL)) { - GNIX_DEBUG(FI_LOG_EP_DATA, "freeing auto-reg MR: %p\n", - req->msg.recv_md[0]); - rc = fi_close(&req->msg.recv_md[0]->mr_fid.fid); - if (rc != FI_SUCCESS) { - GNIX_FATAL(FI_LOG_DOMAIN, - "failed to close auto-registered region, " - "rc=%d\n", rc); - } - } - - req->work_fn = __gnix_rndzv_req_send_fin; - return _gnix_vc_requeue_work_req(req); -} - -/* - * As the completer fn is called in the nic's progress loop, the remote - * side's fabric request will keep track of the state of this rndzv iov - * transaction, once the outstanding_txds reaches zero for this fabric - * request, we are either ready to generate CQEs and send the fin msg - * back to the sender or rebuild and retransmit the iov txds. - */ -static int __gnix_rndzv_iov_req_build(void *arg); -static int __gnix_rndzv_iov_req_complete(void *arg, gni_return_t tx_status) -{ - struct gnix_tx_descriptor *txd = (struct gnix_tx_descriptor *)arg; - struct gnix_fab_req *req = txd->req; - int i, ret = FI_SUCCESS; - struct gnix_auth_key *info __attribute__ ((unused)); - int rc; - - GNIX_DBG_TRACE(FI_LOG_EP_DATA, "\n"); - - GNIX_DEBUG(FI_LOG_EP_DATA, "req->msg.outstanding_txds = %d\n", - ofi_atomic_get32(&req->msg.outstanding_txds)); - - req->msg.status |= tx_status; - - __gnix_msg_free_rma_txd(req, txd); - - if (ofi_atomic_dec32(&req->msg.outstanding_txds) == 0) { - - /* All the txd's are complete, we just need our unaligned heads - * and tails now - */ - __gnix_msg_iov_cpy_unaligned_head_tail_data(req); - GNIX_DEBUG(FI_LOG_EP_DATA, "req->msg.recv_flags == FI_LOCAL_MR " - "is %s, req->msg.recv_iov_cnt = %lu\n", - req->msg.recv_flags & FI_LOCAL_MR ? "true" : "false", - req->msg.recv_iov_cnt); - - if (req->msg.status != FI_SUCCESS) { - - if (GNIX_EP_RDM(req->gnix_ep->type) && - _gnix_req_replayable(req)) { - req->tx_failures++; - /* Build and re-tx the entire iov request if the - * ep type is "reliable datagram" */ - req->work_fn = __gnix_rndzv_iov_req_build; - return _gnix_vc_requeue_work_req(req); - } - - if (!GNIX_EP_DGM(req->gnix_ep->type)) { - GNIX_WARN(FI_LOG_EP_DATA, - "Dropping failed request: %p\n", req); - ret = __gnix_msg_send_err(req->gnix_ep, - req); - if (ret != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_DATA, - "__gnix_msg_send_err() failed: %s\n", - fi_strerror(-ret)); - __gnix_msg_send_fr_complete(req, txd); - return ret; - } - } else { - if (req->msg.recv_flags & FI_LOCAL_MR) { - - if (req->gnix_ep->domain->using_vmdh) { - info = req->gnix_ep->auth_key; - assert(info); - } - - for (i = 0; i < req->msg.recv_iov_cnt; i++) { - GNIX_DEBUG(FI_LOG_EP_DATA, "freeing auto" - "-reg MR: %p\n", - req->msg.recv_md[i]); - rc = fi_close(&req->msg.recv_md[i]->mr_fid.fid); - if (rc != FI_SUCCESS) { - GNIX_FATAL(FI_LOG_DOMAIN, - "failed to close auto-registration, " - "rc=%d\n", rc); - } - } - } - } - - /* Generate remote CQE and send fin msg back to sender */ - req->work_fn = __gnix_rndzv_req_send_fin; - return _gnix_vc_requeue_work_req(req); - } - - /* - * Successful tx, continue until the txd counter reaches zero - * or we can't recover from the error. - */ - return ret; -} - -static int __gnix_rndzv_req_xpmem(struct gnix_fab_req *req) -{ - int ret = FI_SUCCESS, send_idx = 0, recv_idx = 0, i; - size_t cpy_len, recv_len; - uint64_t recv_ptr = 0UL; - struct gnix_xpmem_access_handle *access_hndl; - - recv_len = req->msg.recv_info[0].recv_len; - recv_ptr = req->msg.recv_info[0].recv_addr; - - /* to avoid small xpmem cross maps first get/release - * xpmem handles for all of the send_iov_cnt bufs - */ - - for (i = 0; i < req->msg.send_iov_cnt; i++) { - ret = _gnix_xpmem_access_hndl_get(req->gnix_ep->xpmem_hndl, - req->vc->peer_apid, - req->msg.send_info[i].send_addr, - req->msg.send_info[i].send_len, - &access_hndl); - ret = _gnix_xpmem_access_hndl_put(access_hndl); - } - - /* Copy data from/to (>=1) iovec entries */ - while (send_idx < req->msg.send_iov_cnt) { - cpy_len = MIN(recv_len, req->msg.send_info[send_idx].send_len); - - /* - * look up mapping from other EP - */ - ret = _gnix_xpmem_access_hndl_get(req->gnix_ep->xpmem_hndl, - req->vc->peer_apid, - req->msg.send_info[send_idx].send_addr, - cpy_len, - &access_hndl); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_DATA, "_gnix_xpmem_access_hndl_get failed %s\n", - fi_strerror(-ret)); - req->msg.status = GNI_RC_TRANSACTION_ERROR; - return ret; - } - - /* - * pull the data from the other process' address space - */ - ret = _gnix_xpmem_copy(access_hndl, - (void *)recv_ptr, - (void *)req->msg.send_info[send_idx].send_addr, - cpy_len); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_DATA, "_gnix_xpmem_vaddr_copy failed %s\n", - fi_strerror(-ret)); - req->msg.status = GNI_RC_TRANSACTION_ERROR; - _gnix_xpmem_access_hndl_put(access_hndl); - return ret; - } - - ret = _gnix_xpmem_access_hndl_put(access_hndl); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_DATA, "_gnix_xpmem_access_hndl_put failed %s\n", - fi_strerror(-ret)); - } - - /* Update the local and remote addresses */ - recv_len -= cpy_len; - - /* We have exhausted the current recv (and possibly send) - * buffer */ - if (recv_len == 0) { - recv_idx++; - - /* We cannot receive any more. */ - if (recv_idx == req->msg.recv_iov_cnt) - break; - - recv_ptr = req->msg.recv_info[recv_idx].recv_addr; - recv_len = req->msg.recv_info[recv_idx].recv_len; - - /* Also exhausted send buffer */ - if (cpy_len == req->msg.send_info[send_idx].send_len) { - send_idx++; - } else { - req->msg.send_info[send_idx].send_addr += cpy_len; - req->msg.send_info[send_idx].send_len -= cpy_len; - } - } else { /* Just exhausted current send buffer. */ - send_idx++; - recv_ptr += cpy_len; - } - GNIX_DEBUG(FI_LOG_EP_DATA, "send_idx = %d, recv_idx = %d\n", - send_idx, recv_idx); - } - - /* - * set the req send fin and reschedule req - */ - - req->msg.status = GNI_RC_SUCCESS; /* hmph */ - req->work_fn = __gnix_rndzv_req_send_fin; - return _gnix_vc_queue_work_req(req); -} - -static int __gnix_rndzv_req(void *arg) -{ - struct gnix_fab_req *req = (struct gnix_fab_req *)arg; - struct gnix_fid_ep *ep = req->gnix_ep; - struct gnix_nic *nic = ep->nic; - struct gnix_tx_descriptor *txd, *tail_txd = NULL; - gni_return_t status; - int rc; - int use_tx_cq_blk = 0; - struct fid_mr *auto_mr = NULL; - int inject_err = _gnix_req_inject_err(req); - int head_off, head_len, tail_len; - void *tail_data = NULL; - - GNIX_DBG_TRACE(FI_LOG_EP_DATA, "\n"); - - if (req->vc->modes & GNIX_VC_MODE_XPMEM) - return __gnix_rndzv_req_xpmem(req); - - if (!req->msg.recv_md[0]) { - rc = _gnix_mr_reg(&ep->domain->domain_fid.fid, - (void *)req->msg.recv_info[0].recv_addr, - req->msg.recv_info[0].recv_len, - FI_READ | FI_WRITE, 0, 0, 0, - &auto_mr, NULL, ep->auth_key, GNIX_PROV_REG); - if (rc != FI_SUCCESS) { - GNIX_DEBUG(FI_LOG_EP_DATA, - "Failed to auto-register local buffer: %d\n", - rc); - - return -FI_EAGAIN; - } - req->msg.recv_flags |= FI_LOCAL_MR; - req->msg.recv_md[0] = container_of(auto_mr, - struct gnix_fid_mem_desc, - mr_fid); - req->msg.recv_info[0].mem_hndl = req->msg.recv_md[0]->mem_hndl; - GNIX_DEBUG(FI_LOG_EP_DATA, "auto-reg MR: %p\n", auto_mr); - } - - rc = _gnix_nic_tx_alloc(nic, &txd); - if (rc) { - GNIX_DEBUG(FI_LOG_EP_DATA, "_gnix_nic_tx_alloc() failed: %d\n", - rc); - return -FI_ENOSPC; - } - - txd->completer_fn = __gnix_rndzv_req_complete; - txd->req = req; - - - use_tx_cq_blk = (ep->domain->data_progress == FI_PROGRESS_AUTO) ? 1 : 0; - - txd->gni_desc.type = GNI_POST_RDMA_GET; - txd->gni_desc.cq_mode = GNI_CQMODE_GLOBAL_EVENT; - txd->gni_desc.dlvr_mode = GNI_DLVMODE_PERFORMANCE; - txd->gni_desc.local_mem_hndl = req->msg.recv_info[0].mem_hndl; - txd->gni_desc.remote_mem_hndl = req->msg.rma_mdh; - txd->gni_desc.rdma_mode = 0; - txd->gni_desc.src_cq_hndl = (use_tx_cq_blk) ? - nic->tx_cq_blk : nic->tx_cq; - - head_off = req->msg.send_info[0].send_addr & GNI_READ_ALIGN_MASK; - head_len = head_off ? GNI_READ_ALIGN - head_off : 0; - tail_len = (req->msg.send_info[0].send_addr + req->msg.send_info[0].send_len) & - GNI_READ_ALIGN_MASK; - - txd->gni_desc.local_addr = (uint64_t)req->msg.recv_info[0].recv_addr + head_len; - txd->gni_desc.remote_addr = (uint64_t)req->msg.send_info[0].send_addr + head_len; - txd->gni_desc.length = req->msg.send_info[0].send_len - head_len - tail_len; - - if (req->msg.recv_flags & GNIX_MSG_GET_TAIL) { - /* The user ended up with a send matching a receive with a - * buffer that is too short and unaligned... what a way to - * behave. We could not have forseen which unaligned data to - * send across with the rndzv_start request, so we do an extra - * TX here to pull the random unaligned bytes. */ - rc = _gnix_nic_tx_alloc(nic, &tail_txd); - if (rc) { - _gnix_nic_tx_free(nic, txd); - GNIX_DEBUG(FI_LOG_EP_DATA, - "_gnix_nic_tx_alloc() failed (tail): %d\n", - rc); - return -FI_ENOSPC; - } - - if (req->int_tx_buf_e == NULL) { - req->int_tx_buf_e = _gnix_ep_get_int_tx_buf(ep); - if (req->int_tx_buf_e == NULL) { - GNIX_FATAL(FI_LOG_EP_DATA, - "RAN OUT OF INT_TX_BUFS"); - /* TODO return error */ - } - } - - req->int_tx_buf = ((struct gnix_int_tx_buf *) - req->int_tx_buf_e)->buf; - req->int_tx_mdh = _gnix_ep_get_int_tx_mdh(req->int_tx_buf_e); - - tail_txd->completer_fn = __gnix_rndzv_req_complete; - tail_txd->req = req; - - tail_data = (void *)((req->msg.send_info[0].send_addr + - req->msg.send_info[0].send_len) & - ~GNI_READ_ALIGN_MASK); - - tail_txd->gni_desc.type = GNI_POST_FMA_GET; - tail_txd->gni_desc.cq_mode = GNI_CQMODE_GLOBAL_EVENT; - tail_txd->gni_desc.dlvr_mode = GNI_DLVMODE_PERFORMANCE; - tail_txd->gni_desc.local_mem_hndl = req->int_tx_mdh; - tail_txd->gni_desc.remote_mem_hndl = req->msg.rma_mdh; - tail_txd->gni_desc.rdma_mode = 0; - tail_txd->gni_desc.src_cq_hndl = nic->tx_cq; - tail_txd->gni_desc.local_addr = (uint64_t)req->int_tx_buf; - tail_txd->gni_desc.remote_addr = (uint64_t)tail_data; - tail_txd->gni_desc.length = GNI_READ_ALIGN; - - GNIX_DEBUG(FI_LOG_EP_DATA, "Using two GETs\n"); - } - - COND_ACQUIRE(nic->requires_lock, &nic->lock); - - if (inject_err) { - _gnix_nic_txd_err_inject(nic, txd); - status = GNI_RC_SUCCESS; - } else { - status = GNI_PostRdma(req->vc->gni_ep, &txd->gni_desc); - } - - if (status != GNI_RC_SUCCESS) { - COND_RELEASE(nic->requires_lock, &nic->lock); - if (tail_txd) - _gnix_nic_tx_free(nic, tail_txd); - _gnix_nic_tx_free(nic, txd); - GNIX_DEBUG(FI_LOG_EP_DATA, "GNI_PostRdma failed: %s\n", - gni_err_str[status]); - - GNIX_DEBUG(FI_LOG_EP_DATA, "\n"); - return gnixu_to_fi_errno(status); - } - - if (req->msg.recv_flags & GNIX_MSG_GET_TAIL) { - if (OFI_UNLIKELY(inject_err)) { - _gnix_nic_txd_err_inject(nic, tail_txd); - status = GNI_RC_SUCCESS; - } else { - status = GNI_PostFma(req->vc->gni_ep, - &tail_txd->gni_desc); - } - - if (status != GNI_RC_SUCCESS) { - COND_RELEASE(nic->requires_lock, &nic->lock); - _gnix_nic_tx_free(nic, tail_txd); - - /* Wait for the first TX to complete, then retransmit - * the entire thing. */ - ofi_atomic_set32(&req->msg.outstanding_txds, 1); - req->msg.status = GNI_RC_TRANSACTION_ERROR; - - GNIX_DEBUG(FI_LOG_EP_DATA, "GNI_PostFma() failed: %s\n", - gni_err_str[status]); - return FI_SUCCESS; - } - - /* Wait for both TXs to complete, then process the request. */ - ofi_atomic_set32(&req->msg.outstanding_txds, 2); - req->msg.status = GNI_RC_SUCCESS; - - } - - COND_RELEASE(nic->requires_lock, &nic->lock); - - GNIX_DEBUG(FI_LOG_EP_DATA, "Initiated RNDZV GET, req: %p\n", req); - - return gnixu_to_fi_errno(status); -} - -/* Attempt to post the iov txds */ -static int __gnix_rndzv_iov_req_post(void *arg) -{ - struct gnix_fab_req *req = (struct gnix_fab_req *)arg; - struct gnix_tx_descriptor *txd; - gni_return_t status; - struct gnix_nic *nic = req->gnix_ep->nic; - int i, iov_txd_cnt = ofi_atomic_get32(&req->msg.outstanding_txds); - - assert(nic != NULL); - - GNIX_DBG_TRACE(FI_LOG_EP_DATA, "\n"); - - if (OFI_UNLIKELY(iov_txd_cnt == 0)) - return -FI_EAGAIN; - - COND_ACQUIRE(nic->requires_lock, &nic->lock); - - for (i = 0, txd = req->iov_txds[0]; - i < iov_txd_cnt; - txd = req->iov_txds[++i]) { - if (txd->gni_desc.type & GNI_POST_RDMA_GET) { - status = GNI_PostRdma(req->vc->gni_ep, - &txd->gni_desc); - } else { - status = GNI_CtPostFma(req->vc->gni_ep, - &txd->gni_desc); - } - - if (status != GNI_RC_SUCCESS) { - COND_RELEASE(nic->requires_lock, &nic->lock); - - GNIX_WARN(FI_LOG_EP_DATA, "%s failed on txd %p: with " - "%s\n", - txd->gni_desc.type & GNI_POST_RDMA_GET ? - "GNI_PostRdma" : "GNI_CtPostFma", txd, - gni_err_str[status]); - - return gnixu_to_fi_errno(status); - } - } - - COND_RELEASE(nic->requires_lock, &nic->lock); - - return FI_SUCCESS; -} - -/* - * Dequeued a iov work req on the remote endpoint, initiate the pull of data. - */ -static int __gnix_rndzv_iov_req_build(void *arg) -{ - int ret = FI_SUCCESS, send_idx, recv_idx, use_tx_cq_blk; - struct gnix_fab_req *req = (struct gnix_fab_req *)arg; - struct gnix_fid_ep *ep = req->gnix_ep; - struct gnix_nic *nic = ep->nic; - gni_ep_handle_t gni_ep = req->vc->gni_ep; - struct gnix_tx_descriptor *txd = NULL, *ct_txd = NULL; - size_t recv_len, send_len, get_len, send_cnt, recv_cnt, txd_cnt, ht_len; - uint64_t recv_ptr = 0UL, send_ptr = 0UL; - /* TODO: Should this be the sender's rndzv thresh instead? */ - size_t rndzv_thresh = ep->domain->params.msg_rendezvous_thresh; - gni_ct_get_post_descriptor_t *cur_ct = NULL; - void **next_ct = NULL; - int head_off, head_len, tail_len; - - GNIX_DBG_TRACE(FI_LOG_EP_DATA, "\n"); - - if (req->vc->modes & GNIX_VC_MODE_XPMEM) - return __gnix_rndzv_req_xpmem(req); - - txd_cnt = 0; - send_cnt = req->msg.send_iov_cnt; - - recv_ptr = req->msg.recv_info[0].recv_addr; - recv_len = req->msg.recv_info[0].recv_len; - recv_cnt = req->msg.recv_iov_cnt; - - send_ptr = req->msg.send_info[0].send_addr; - send_len = req->msg.send_info[0].send_len; - - use_tx_cq_blk = (ep->domain->data_progress == FI_PROGRESS_AUTO); - - GNIX_DEBUG(FI_LOG_EP_DATA, "send_cnt = %lu, recv_cnt = %lu\n", - send_cnt, recv_cnt); - - /* Ensure the user's recv buffer is registered for recv/recvv */ - if (!req->msg.recv_md[0]) { - struct fid_mr *auto_mr; - - for (recv_idx = 0; recv_idx < recv_cnt; recv_idx++) { - auto_mr = NULL; - - ret = _gnix_mr_reg(&ep->domain->domain_fid.fid, - (void *) - req->msg.recv_info[recv_idx].recv_addr, - req->msg.recv_info[recv_idx].recv_len, - FI_READ | FI_WRITE, 0, 0, 0, - &auto_mr, NULL, ep->auth_key, GNIX_PROV_REG); - - if (ret != FI_SUCCESS) { - GNIX_DEBUG(FI_LOG_EP_DATA, - "Failed to auto-register" - " local buffer: %s\n", - fi_strerror(-ret)); - - for (recv_idx--; recv_idx >= 0; recv_idx--) - fi_close(&req->msg.recv_md[recv_idx]->mr_fid.fid); - - return ret; - } - - req->msg.recv_md[recv_idx] = container_of( - (void *) auto_mr, - struct gnix_fid_mem_desc, - mr_fid); - - req->msg.recv_info[recv_idx].mem_hndl = - req->msg.recv_md[recv_idx]->mem_hndl; - - GNIX_DEBUG(FI_LOG_EP_DATA, "auto-reg MR: %p\n", - req->msg.recv_md[recv_idx]); - - } - req->msg.recv_flags |= FI_LOCAL_MR; - } - - recv_idx = send_idx = 0; - - /* Iterate through the buffers and build the Fma and Rdma requests! */ - while (send_idx < send_cnt) { - get_len = MIN(recv_len, send_len); - - /* Begin alignment checks - * - * Each "mid-head" and "mid-tail" (resulting from the send pointer - * and length being adjusted to match the smaller posted recv buf in - * this loop) will be added to one or more chained transactions - * below. - * - * The original heads and tails (sent across in the control - * message) must be accounted for below in order to GET the - * correct, now four byte aligned, "body" section of the - * message. - */ - if (send_ptr & GNI_READ_ALIGN_MASK || - (send_ptr + get_len) & GNI_READ_ALIGN_MASK) { - if (req->int_tx_buf_e == NULL) { - req->int_tx_buf_e = _gnix_ep_get_int_tx_buf(ep); - - /* There are no available int_tx bufs */ - if (req->int_tx_buf_e == NULL) { - ofi_atomic_set32(&req->msg.outstanding_txds, 0); - req->work_fn = __gnix_rndzv_iov_req_post; - return _gnix_vc_queue_work_req(req); - } - - req->int_tx_buf = ((struct gnix_int_tx_buf *) - req->int_tx_buf_e)->buf; - req->int_tx_mdh = _gnix_ep_get_int_tx_mdh( - req->int_tx_buf_e); - GNIX_DEBUG(FI_LOG_EP_DATA, - "req->int_tx_buf = %p\n", req->int_tx_buf); - } - - head_off = send_ptr & GNI_READ_ALIGN_MASK; - head_len = head_off ? GNI_READ_ALIGN - head_off : 0; - tail_len = (send_ptr + get_len) & GNI_READ_ALIGN_MASK; - - ht_len = (size_t) (head_len + tail_len); - - /* TODO: handle this. */ - if (ht_len > recv_len) { - GNIX_FATAL(FI_LOG_EP_DATA, "The head tail data " - "length exceeds the matching receive" - "buffer length.\n"); - } - - /* found a mid-head? (see "Begin alignment" comment block) */ - req->msg.recv_info[recv_idx].head_len = (send_ptr != req->msg.send_info[send_idx].send_addr) ? - head_len : 0; - - /* found a mid-tail? (see "Begin alignment" comment block) */ - req->msg.recv_info[recv_idx].tail_len = (send_len > recv_len) ? - tail_len : 0; - - /* Update the local and remote addresses */ - get_len -= ht_len; - send_len -= ht_len; - recv_len -= ht_len; - - send_ptr += head_len; - recv_ptr += head_len; - - /* Add to existing ct */ - if (ct_txd) { - if (req->msg.recv_info[recv_idx].tail_len) { - cur_ct = *next_ct = malloc(sizeof(gni_ct_get_post_descriptor_t)); - - if (cur_ct == NULL) { - GNIX_DEBUG(FI_LOG_EP_DATA, - "Failed to allocate " - "gni FMA get chained " - "descriptor."); - - /* +1 to ensure we free the - * current chained txd */ - __gnix_msg_free_iov_txds(req, txd_cnt + 1); - return -FI_ENOSPC; - } - - cur_ct->ep_hndl = gni_ep; - cur_ct->remote_mem_hndl = req->msg.send_info[send_idx].mem_hndl; - cur_ct->local_mem_hndl = req->int_tx_mdh; - cur_ct->length = GNI_READ_ALIGN; - cur_ct->remote_addr = (send_ptr + get_len + tail_len) & ~GNI_READ_ALIGN_MASK; - cur_ct->local_addr = (uint64_t) (((uint8_t *) req->int_tx_buf) + (GNI_READ_ALIGN * recv_idx)); - next_ct = &cur_ct->next_descr; - } - - if (req->msg.recv_info[recv_idx].head_len) { - cur_ct = *next_ct = malloc(sizeof(gni_ct_get_post_descriptor_t)); - - if (cur_ct == NULL) { - GNIX_DEBUG(FI_LOG_EP_DATA, - "Failed to allocate " - "gni FMA get chained " - "descriptor."); - - /* +1 to ensure we free the - * current chained txd */ - __gnix_msg_free_iov_txds(req, txd_cnt + 1); - return -FI_ENOSPC; - } - - cur_ct->ep_hndl = gni_ep; - cur_ct->remote_mem_hndl = req->msg.send_info[send_idx].mem_hndl; - cur_ct->local_mem_hndl = req->int_tx_mdh; - cur_ct->length = GNI_READ_ALIGN; - cur_ct->remote_addr = send_ptr - GNI_READ_ALIGN; - cur_ct->local_addr = (uint64_t) (((uint8_t *) req->int_tx_buf) + - (GNI_READ_ALIGN * (recv_idx + GNIX_MAX_MSG_IOV_LIMIT))); - next_ct = &cur_ct->next_descr; - } - } else { /* Start a new ct */ - if (req->msg.recv_info[recv_idx].tail_len) { - GNIX_DEBUG(FI_LOG_EP_DATA, "New FMA" - " CT\n"); - ret = _gnix_nic_tx_alloc(nic, &ct_txd); - - if (ret != FI_SUCCESS) { - /* We'll try again. */ - GNIX_DEBUG(FI_LOG_EP_DATA, - "_gnix_nic_tx_alloc()" - " returned %s\n", - fi_strerror(-ret)); - - __gnix_msg_free_iov_txds(req, txd_cnt); - return -FI_ENOSPC; - } - - ct_txd->completer_fn = __gnix_rndzv_iov_req_complete; - ct_txd->req = req; - - ct_txd->gni_desc.type = GNI_POST_FMA_GET; - ct_txd->gni_desc.cq_mode = GNI_CQMODE_GLOBAL_EVENT; - ct_txd->gni_desc.dlvr_mode = GNI_DLVMODE_PERFORMANCE; - ct_txd->gni_desc.rdma_mode = 0; - ct_txd->gni_desc.src_cq_hndl = (use_tx_cq_blk) ? nic->tx_cq_blk : nic->tx_cq; - - ct_txd->gni_desc.remote_addr = (send_ptr + get_len + tail_len) & ~GNI_READ_ALIGN_MASK; - ct_txd->gni_desc.remote_mem_hndl = req->msg.send_info[send_idx].mem_hndl; - - ct_txd->gni_desc.local_addr = (uint64_t) ((uint8_t *) req->int_tx_buf + (GNI_READ_ALIGN * recv_idx)); - ct_txd->gni_desc.local_mem_hndl = req->int_tx_mdh; - - ct_txd->gni_desc.length = GNI_READ_ALIGN; - - next_ct = &ct_txd->gni_desc.next_descr; - } - - if (req->msg.recv_info[recv_idx].head_len) { - if (req->msg.recv_info[recv_idx].tail_len) { /* Existing FMA CT */ - cur_ct = *next_ct = malloc(sizeof(gni_ct_get_post_descriptor_t)); - if (cur_ct == NULL) { - GNIX_DEBUG(FI_LOG_EP_DATA, - "Failed to allocate " - "gni FMA get chained " - "descriptor."); - - /* +1 to ensure we free the - * current chained txd */ - __gnix_msg_free_iov_txds(req, txd_cnt + 1); - return -FI_ENOSPC; - } - - cur_ct->ep_hndl = gni_ep; - cur_ct->remote_mem_hndl = req->msg.send_info[send_idx].mem_hndl; - cur_ct->local_mem_hndl = req->int_tx_mdh; - cur_ct->length = GNI_READ_ALIGN; - cur_ct->remote_addr = send_ptr - GNI_READ_ALIGN; - cur_ct->local_addr = (uint64_t) (((uint8_t *) req->int_tx_buf) + - (GNI_READ_ALIGN * (recv_idx + GNIX_MAX_MSG_IOV_LIMIT))); - next_ct = &cur_ct->next_descr; - } else { /* New FMA ct */ - GNIX_DEBUG(FI_LOG_EP_DATA, "New FMA" - " CT\n"); - ret = _gnix_nic_tx_alloc(nic, &ct_txd); - - if (ret != FI_SUCCESS) { - /* We'll try again. */ - GNIX_DEBUG(FI_LOG_EP_DATA, - "_gnix_nic_tx_alloc()" - " returned %s\n", - fi_strerror(-ret)); - - __gnix_msg_free_iov_txds(req, txd_cnt); - return -FI_ENOSPC; - } - - ct_txd->completer_fn = __gnix_rndzv_iov_req_complete; - ct_txd->req = req; - - ct_txd->gni_desc.type = GNI_POST_FMA_GET; - ct_txd->gni_desc.cq_mode = GNI_CQMODE_GLOBAL_EVENT; - ct_txd->gni_desc.dlvr_mode = GNI_DLVMODE_PERFORMANCE; - ct_txd->gni_desc.rdma_mode = 0; - ct_txd->gni_desc.src_cq_hndl = (use_tx_cq_blk) ? nic->tx_cq_blk : nic->tx_cq; - - ct_txd->gni_desc.remote_addr = send_ptr - GNI_READ_ALIGN; - ct_txd->gni_desc.remote_mem_hndl = req->msg.send_info[send_idx].mem_hndl; - - ct_txd->gni_desc.local_addr = (uint64_t) ((uint8_t *) req->int_tx_buf + - (GNI_READ_ALIGN * (recv_idx + GNIX_MAX_MSG_IOV_LIMIT))); - ct_txd->gni_desc.local_mem_hndl = req->int_tx_mdh; - - ct_txd->gni_desc.length = GNI_READ_ALIGN; - - next_ct = &ct_txd->gni_desc.next_descr; - } - } - } - } else { /* no head/tail found */ - head_len = tail_len = 0; - req->msg.recv_info[recv_idx].head_len = req->msg.recv_info[recv_idx].tail_len = 0; - } - /* End alignment checks */ - - GNIX_DEBUG(FI_LOG_EP_DATA, "send_info[%d].send_len = %lu," - " recv_len = %lu, get_len = %lu, head_len = %d," - " tail_len = %d, req->msg.recv_info[%d].tail_len = %u\n" - "req->msg.recv_info[%d].head_len = %u, " - "recv_ptr(head) = %p, recv_ptr(tail) = %p\n", send_idx, - send_len, recv_len, get_len, head_len, tail_len, recv_idx, - req->msg.recv_info[recv_idx].tail_len, recv_idx, - req->msg.recv_info[recv_idx].head_len, (void *) (recv_ptr - head_len), - (void *) (recv_ptr + get_len)); - - GNIX_DEBUG(FI_LOG_EP_DATA, "txd = %p, send_ptr = %p, " - "send_ptr + get_len = %p, recv_ptr = %p\n", - txd, (void *) send_ptr, (void *)(send_ptr + get_len), - recv_ptr); - - if (get_len >= rndzv_thresh) { /* Build the rdma txd */ - ret = _gnix_nic_tx_alloc(nic, &txd); - - if (ret != FI_SUCCESS) { - /* We'll try again. */ - GNIX_DEBUG(FI_LOG_EP_DATA, "_gnix_nic_tx_alloc()" - " returned %s\n", - fi_strerror(-ret)); - - __gnix_msg_free_iov_txds(req, txd_cnt); - return -FI_ENOSPC; - } - - txd->completer_fn = __gnix_rndzv_iov_req_complete; - txd->req = req; - - txd->gni_desc.type = GNI_POST_RDMA_GET; - txd->gni_desc.cq_mode = GNI_CQMODE_GLOBAL_EVENT; - txd->gni_desc.dlvr_mode = GNI_DLVMODE_PERFORMANCE; - txd->gni_desc.local_mem_hndl = req->msg.recv_info[recv_idx].mem_hndl; - txd->gni_desc.remote_mem_hndl = req->msg.send_info[send_idx].mem_hndl; - txd->gni_desc.rdma_mode = 0; - txd->gni_desc.src_cq_hndl = (use_tx_cq_blk) ? nic->tx_cq_blk : nic->tx_cq; - - txd->gni_desc.local_addr = recv_ptr; - txd->gni_desc.remote_addr = send_ptr; - txd->gni_desc.length = get_len; - - req->iov_txds[txd_cnt++] = txd; - txd = NULL; - } else if (get_len) { /* Build the Ct txd */ - if (!ct_txd) { - GNIX_DEBUG(FI_LOG_EP_DATA, "New FMA" - " CT\n"); - ret = _gnix_nic_tx_alloc(nic, &ct_txd); - if (ret != FI_SUCCESS) { - /* We'll try again. */ - GNIX_DEBUG(FI_LOG_EP_DATA, - "_gnix_nic_tx_alloc()" - " returned %s\n", - fi_strerror(-ret)); - - __gnix_msg_free_iov_txds(req, txd_cnt); - return -FI_ENOSPC; - } - - ct_txd->completer_fn = __gnix_rndzv_iov_req_complete; - ct_txd->req = req; - - ct_txd->gni_desc.type = GNI_POST_FMA_GET; - ct_txd->gni_desc.cq_mode = GNI_CQMODE_GLOBAL_EVENT; - ct_txd->gni_desc.dlvr_mode = GNI_DLVMODE_PERFORMANCE; - ct_txd->gni_desc.local_mem_hndl = req->msg.recv_info[recv_idx]. mem_hndl; - - ct_txd->gni_desc.remote_mem_hndl = req->msg.send_info[send_idx].mem_hndl; - ct_txd->gni_desc.rdma_mode = 0; - ct_txd->gni_desc.src_cq_hndl = (use_tx_cq_blk) ? nic->tx_cq_blk : nic->tx_cq; - - ct_txd->gni_desc.local_addr = recv_ptr; - ct_txd->gni_desc.remote_addr = send_ptr; - ct_txd->gni_desc.length = get_len; - - next_ct = &ct_txd->gni_desc.next_descr; - } else { - cur_ct = *next_ct = malloc(sizeof(gni_ct_get_post_descriptor_t)); - - if (cur_ct == NULL) { - GNIX_DEBUG(FI_LOG_EP_DATA, - "Failed to allocate " - "gni FMA get chained " - "descriptor."); - - /* +1 to ensure we free the - * current chained txd */ - __gnix_msg_free_iov_txds(req, txd_cnt + 1); - return -FI_ENOSPC; - } - - cur_ct->ep_hndl = gni_ep; - cur_ct->length = get_len; - cur_ct->remote_addr = send_ptr; - cur_ct->remote_mem_hndl = req->msg.send_info[send_idx].mem_hndl; - cur_ct->local_addr = (uint64_t) recv_ptr; - cur_ct->local_mem_hndl = req->msg.recv_info[recv_idx].mem_hndl; - - next_ct = &cur_ct->next_descr; - } - } - - /* Update the recv len */ - recv_len -= get_len; - - /* We have exhausted the current recv (and possibly send) - * buffer */ - if (recv_len == 0) { - recv_idx++; - - /* We cannot receive any more. */ - if (recv_idx == recv_cnt) - break; - - recv_ptr = req->msg.recv_info[recv_idx].recv_addr; - recv_len = req->msg.recv_info[recv_idx].recv_len; - - /* Also exhausted send buffer */ - if (get_len == send_len) { - send_idx++; - send_ptr = req->msg.send_info[send_idx].send_addr; - send_len = req->msg.send_info[send_idx].send_len; - } else { - send_ptr += (get_len + tail_len); - send_len -= get_len; - } - } else { /* Just exhausted current send buffer. */ - send_idx++; - send_ptr = req->msg.send_info[send_idx].send_addr; - send_len = req->msg.send_info[send_idx].send_len; - recv_ptr += (get_len + tail_len); - } - GNIX_DEBUG(FI_LOG_EP_DATA, "send_idx = %d, recv_idx = %d\n", - send_idx, recv_idx); - } - - /* - * If we ran out of buffer space on the sender's/receiver's side in the - * middle of building the ct, we must terminate and add that ct to the - * queue. Note that if the last txd built was a rdma txd then the txd - * will have been queued and txd will have a NULL value. - */ - if (ct_txd) { - *next_ct = NULL; - req->iov_txds[txd_cnt++] = ct_txd; - } - - GNIX_DEBUG(FI_LOG_EP_DATA, "txd_cnt = %lu\n", txd_cnt); - ofi_atomic_set32(&req->msg.outstanding_txds, txd_cnt); - - /* All the txd's are built, update the work_fn */ - req->work_fn = __gnix_rndzv_iov_req_post; - - /* Put this request back on work Q. - * TODO: Should we put it at the beginning of the work Q? */ - ret = _gnix_vc_queue_work_req(req); - return ret; -} - -/******************************************************************************* - * GNI SMSG callbacks invoked upon completion of an SMSG message at the sender. - ******************************************************************************/ - -static int __comp_eager_msg_w_data(void *data, gni_return_t tx_status) -{ - struct gnix_tx_descriptor *tdesc = (struct gnix_tx_descriptor *)data; - struct gnix_fab_req *req = tdesc->req; - int ret = FI_SUCCESS; - - if (tx_status != GNI_RC_SUCCESS) { - GNIX_DEBUG(FI_LOG_EP_DATA, "Failed transaction: %p\n", req); - ret = __gnix_msg_send_err(req->gnix_ep, req); - if (ret != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_DATA, - "__gnix_msg_send_err() failed: %s\n", - fi_strerror(-ret)); - } else { - /* Successful delivery. Generate completions. */ - ret = __gnix_msg_send_completion(req->gnix_ep, req); - - /* - * For fi_sendv we must free the temporary buf used to flatten - * the user's iovec. - */ - if (req->type == GNIX_FAB_RQ_SENDV || - req->type == GNIX_FAB_RQ_TSENDV) { - free((void *) req->msg.send_info[0].send_addr); - } - - if (ret != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_DATA, - "__gnix_msg_send_completion() failed: %d\n", - ret); - } - - __gnix_msg_send_fr_complete(req, tdesc); - - return FI_SUCCESS; -} - -static int __comp_eager_msg_w_data_ack(void *data, gni_return_t tx_status) -{ - return -FI_ENOSYS; -} - -static int __comp_eager_msg_data_at_src(void *data, gni_return_t tx_status) -{ - return -FI_ENOSYS; -} - -static int __comp_eager_msg_data_at_src_ack(void *data, gni_return_t tx_status) -{ - return -FI_ENOSYS; -} - -static int __comp_rndzv_msg_rts(void *data, gni_return_t tx_status) -{ - return -FI_ENOSYS; -} - -static int __comp_rndzv_msg_rtr(void *data, gni_return_t tx_status) -{ - return -FI_ENOSYS; -} - -static int __comp_rndzv_msg_cookie(void *data, gni_return_t tx_status) -{ - return -FI_ENOSYS; -} - -static int __comp_rndzv_msg_send_done(void *data, gni_return_t tx_status) -{ - return -FI_ENOSYS; -} - -static int __comp_rndzv_msg_recv_done(void *data, gni_return_t tx_status) -{ - return -FI_ENOSYS; -} - -/* Completed request to start rendezvous send. */ -static int __comp_rndzv_start(void *data, gni_return_t tx_status) -{ - struct gnix_tx_descriptor *txd = (struct gnix_tx_descriptor *)data; - struct gnix_fab_req *req = txd->req; - int ret; - - if (tx_status != GNI_RC_SUCCESS) { - GNIX_DEBUG(FI_LOG_EP_DATA, "Failed transaction: %p\n", txd->req); - ret = __gnix_msg_send_err(req->gnix_ep, req); - if (ret != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_DATA, - "__gnix_msg_send_err() failed: %s\n", - fi_strerror(-ret)); - __gnix_msg_send_fr_complete(req, txd); - } else { - /* Just free the TX descriptor for now. The request remains - * active until the remote peer notifies us that they're done - * with the send buffer. */ - _gnix_nic_tx_free(txd->req->gnix_ep->nic, txd); - - GNIX_DEBUG(FI_LOG_EP_DATA, "Completed RNDZV_START, req: %p\n", - txd->req); - } - - return FI_SUCCESS; -} - -/* Notified sender that rendezvous data has been moved. Rendezvous send - * complete. Generate Completions. */ -static int __comp_rndzv_fin(void *data, gni_return_t tx_status) -{ - int ret = FI_SUCCESS; - struct gnix_tx_descriptor *tdesc = (struct gnix_tx_descriptor *)data; - struct gnix_fab_req *req = tdesc->req; - - if (tx_status != GNI_RC_SUCCESS || req->msg.status != GNI_RC_SUCCESS) { - /* TODO should this be fatal? A request will sit waiting at the - * peer. */ - GNIX_WARN(FI_LOG_EP_DATA, "Failed transaction: %p\n", req); - ret = __gnix_msg_recv_err(req->gnix_ep, req); - if (ret != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_DATA, - "__gnix_msg_recv_err() failed: %d\n", - ret); - } else { - GNIX_DEBUG(FI_LOG_EP_DATA, "Completed RNDZV_FIN, req: %p\n", - req); - - ret = __gnix_msg_recv_completion(req->gnix_ep, req); - if (ret != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_DATA, - "__gnix_msg_recv_completion() failed: %d\n", - ret); - } - - _gnix_nic_tx_free(req->gnix_ep->nic, tdesc); - _gnix_fr_free(req->gnix_ep, req); - - return FI_SUCCESS; -} - -smsg_completer_fn_t gnix_ep_smsg_completers[] = { - [GNIX_SMSG_T_EGR_W_DATA] = __comp_eager_msg_w_data, - [GNIX_SMSG_T_EGR_W_DATA_ACK] = __comp_eager_msg_w_data_ack, - [GNIX_SMSG_T_EGR_GET] = __comp_eager_msg_data_at_src, - [GNIX_SMSG_T_EGR_GET_ACK] = __comp_eager_msg_data_at_src_ack, - [GNIX_SMSG_T_RNDZV_RTS] = __comp_rndzv_msg_rts, - [GNIX_SMSG_T_RNDZV_RTR] = __comp_rndzv_msg_rtr, - [GNIX_SMSG_T_RNDZV_COOKIE] = __comp_rndzv_msg_cookie, - [GNIX_SMSG_T_RNDZV_SDONE] = __comp_rndzv_msg_send_done, - [GNIX_SMSG_T_RNDZV_RDONE] = __comp_rndzv_msg_recv_done, - [GNIX_SMSG_T_RNDZV_START] = __comp_rndzv_start, - [GNIX_SMSG_T_RNDZV_FIN] = __comp_rndzv_fin, - [GNIX_SMSG_T_RNDZV_IOV_START] = __comp_rndzv_start -}; - - -/******************************************************************************* - * GNI SMSG callbacks invoked upon receipt of an SMSG message. - * These callback functions are invoked with the lock for the nic - * associated with the vc already held. - ******************************************************************************/ -/* - * Handle SMSG message with tag GNIX_SMSG_T_EGR_W_DATA - */ - -static inline struct gnix_fab_req * - __handle_mrecv_req(struct gnix_fab_req *mrecv_req, - struct gnix_fid_ep *ep, - uint64_t len, - struct gnix_tag_storage *queue) -{ - struct gnix_fab_req *req = NULL; - - req = _gnix_fr_alloc(ep); - if (req == NULL) { - return NULL; - } - - /* - * set recv related fields in request, - * and adjust mrecv buffer pointer and - * space remaining - */ - - req->type = GNIX_FAB_RQ_RECV; - ofi_atomic_initialize32(&req->msg.outstanding_txds, 0); - req->msg.recv_flags = mrecv_req->msg.recv_flags; - req->msg.recv_flags |= FI_MULTI_RECV; - req->msg.recv_info[0].recv_addr = - mrecv_req->msg.mrecv_buf_addr; - req->msg.recv_info[0].recv_len = - mrecv_req->msg.mrecv_space_left; - req->msg.recv_md[0] = mrecv_req->msg.recv_md[0]; - req->msg.recv_info[0].mem_hndl = mrecv_req->msg.recv_info[0].mem_hndl; - req->user_context = mrecv_req->user_context; - req->msg.cum_recv_len = mrecv_req->msg.mrecv_space_left; - - req->msg.parent = mrecv_req; - if (req->msg.parent) - _gnix_ref_get(req->msg.parent); - - mrecv_req->msg.mrecv_space_left -= len; - mrecv_req->msg.mrecv_buf_addr += len; - - if ((int64_t)mrecv_req->msg.mrecv_space_left < - ep->min_multi_recv) { - _gnix_remove_tag(queue, mrecv_req); - _gnix_ref_put(mrecv_req); - } else { - GNIX_DEBUG(FI_LOG_EP_DATA, - "Re-using multi-recv req: %p\n", mrecv_req); - } - - return req; -} - -static int __smsg_eager_msg_w_data(void *data, void *msg) -{ - int ret = FI_SUCCESS; - gni_return_t status; - struct gnix_vc *vc = (struct gnix_vc *)data; - struct gnix_smsg_eager_hdr *hdr = (struct gnix_smsg_eager_hdr *)msg; - struct gnix_fid_ep *ep; - struct gnix_fab_req *req = NULL; - void *data_ptr; - struct gnix_tag_storage *unexp_queue; - struct gnix_tag_storage *posted_queue; - int tagged; - - GNIX_DBG_TRACE(FI_LOG_EP_DATA, "\n"); - - ep = vc->ep; - assert(ep); - - data_ptr = (void *)((char *)msg + sizeof(*hdr)); - - tagged = !!(hdr->flags & FI_TAGGED); - __gnix_msg_queues(ep, tagged, &posted_queue, &unexp_queue); - - /* Lookup a matching posted request. */ - req = _gnix_match_tag(posted_queue, hdr->msg_tag, 0, FI_PEEK, NULL, - &vc->peer_addr); - if (req) { - if (req->type == GNIX_FAB_RQ_MRECV) { - req = __handle_mrecv_req(req, ep, hdr->len, posted_queue); - if (req == NULL) { - return -FI_ENOMEM; - } - } - - req->addr = vc->peer_addr; - req->gnix_ep = ep; - req->vc = vc; - - req->msg.cum_send_len = hdr->len; - req->msg.send_flags = hdr->flags; - req->msg.send_iov_cnt = 1; - req->msg.tag = hdr->msg_tag; - req->msg.imm = hdr->imm; - - GNIX_DEBUG(FI_LOG_EP_DATA, "Matched req: %p (%p, %u)\n", - req, req->msg.recv_info[0].recv_addr, - req->msg.cum_send_len); - - __gnix_msg_copy_data_to_recv_addr(req, data_ptr); - - __gnix_msg_recv_completion(ep, req); - - GNIX_DEBUG(FI_LOG_EP_DATA, "Freeing req: %p\n", req); - - /* - * Dequeue and free the request. - */ - _gnix_remove_tag(posted_queue, req); - _gnix_fr_free(ep, req); - } else { - /* Add new unexpected receive request. */ - req = _gnix_fr_alloc(ep); - if (req == NULL) { - return -FI_ENOMEM; - } - - /* TODO: Buddy alloc */ - req->msg.send_info[0].send_addr = (uint64_t)malloc(hdr->len); - if (OFI_UNLIKELY(req->msg.send_info[0].send_addr == 0ULL)) { - _gnix_fr_free(ep, req); - return -FI_ENOMEM; - } - - req->type = GNIX_FAB_RQ_RECV; - req->addr = vc->peer_addr; - req->gnix_ep = ep; - req->vc = vc; - - req->msg.cum_send_len = hdr->len; - req->msg.send_info[0].send_len = hdr->len; - req->msg.send_iov_cnt = 1; - req->msg.send_flags = hdr->flags; - req->msg.tag = hdr->msg_tag; - req->msg.imm = hdr->imm; - req->msg.parent = NULL; - - memcpy((void *)req->msg.send_info[0].send_addr, data_ptr, hdr->len); - req->addr = vc->peer_addr; - - _gnix_insert_tag(unexp_queue, req->msg.tag, req, ~0); - - GNIX_DEBUG(FI_LOG_EP_DATA, "New req: %p (%u)\n", - req, req->msg.cum_send_len); - } - - status = GNI_SmsgRelease(vc->gni_ep); - if (OFI_UNLIKELY(status != GNI_RC_SUCCESS)) { - GNIX_WARN(FI_LOG_EP_DATA, - "GNI_SmsgRelease returned %s\n", - gni_err_str[status]); - ret = gnixu_to_fi_errno(status); - } - - return ret; -} - -/* - * this function will probably not be used unless we need - * some kind of explicit flow control to handle unexpected - * receives - */ - -static int __smsg_eager_msg_w_data_ack(void *data, void *msg) -{ - return -FI_ENOSYS; -} - -/* - * Handle SMSG message with tag GNIX_SMSG_T_EGR_GET - */ -static int __smsg_eager_msg_data_at_src(void *data, void *msg) -{ - return -FI_ENOSYS; -} - -/* - * Handle SMSG message with tag GNIX_SMSG_T_EGR_GET_ACK - */ -static int __smsg_eager_msg_data_at_src_ack(void *data, void *msg) -{ - return -FI_ENOSYS; -} - -static int __smsg_rndzv_msg_rts(void *data, void *msg) -{ - return -FI_ENOSYS; -} - -static int __smsg_rndzv_msg_rtr(void *data, void *msg) -{ - return -FI_ENOSYS; -} - -static int __smsg_rndzv_msg_cookie(void *data, void *msg) -{ - return -FI_ENOSYS; -} - -static int __smsg_rndzv_msg_send_done(void *data, void *msg) -{ - return -FI_ENOSYS; -} - -static int __smsg_rndzv_msg_recv_done(void *data, void *msg) -{ - return -FI_ENOSYS; -} - -/* Received SMSG rendezvous start message. Try to match a posted receive and - * start pulling data. */ -static int __smsg_rndzv_start(void *data, void *msg) -{ - int ret = FI_SUCCESS; - gni_return_t status; - struct gnix_vc *vc = (struct gnix_vc *)data; - struct gnix_smsg_rndzv_start_hdr *hdr = - (struct gnix_smsg_rndzv_start_hdr *)msg; - struct gnix_fid_ep *ep; - struct gnix_fab_req *req = NULL; - struct gnix_tag_storage *unexp_queue; - struct gnix_tag_storage *posted_queue; - int tagged; - - GNIX_DBG_TRACE(FI_LOG_EP_DATA, "\n"); - - ep = vc->ep; - assert(ep); - - tagged = !!(hdr->flags & FI_TAGGED); - __gnix_msg_queues(ep, tagged, &posted_queue, &unexp_queue); - - req = _gnix_match_tag(posted_queue, hdr->msg_tag, 0, FI_PEEK, NULL, - &vc->peer_addr); - - if (req) { - if (req->type == GNIX_FAB_RQ_MRECV) { - req = __handle_mrecv_req(req, ep, hdr->len, - posted_queue); - if (req == NULL) { - return -FI_ENOMEM; - } - } - - req->addr = vc->peer_addr; - req->gnix_ep = ep; - req->vc = vc; - req->tx_failures = 0; - - /* Check if a second GET for unaligned data is needed. */ - if (hdr->len > req->msg.recv_info[0].recv_len && - ((hdr->addr + req->msg.recv_info[0].recv_len) & GNI_READ_ALIGN_MASK)) { - req->msg.recv_flags |= GNIX_MSG_GET_TAIL; - } - - req->msg.send_info[0].send_addr = hdr->addr; - req->msg.send_info[0].send_len = - MIN(hdr->len, req->msg.cum_recv_len); - req->msg.send_info[0].mem_hndl = hdr->mdh; - req->msg.cum_send_len = hdr->len; - req->msg.send_iov_cnt = 1; - req->msg.send_flags = hdr->flags; - req->msg.tag = hdr->msg_tag; - req->msg.imm = hdr->imm; - req->msg.rma_mdh = hdr->mdh; - req->msg.rma_id = hdr->req_addr; - req->msg.send_info[0].head = hdr->head; - req->msg.send_info[0].tail = hdr->tail; - - if (req->type == GNIX_FAB_RQ_RECV) { - /* fi_send is rndzv with recv */ - - GNIX_DEBUG(FI_LOG_EP_DATA, "recv rndzv start - POSTED," - " req = %p\n", req); - req->work_fn = __gnix_rndzv_req; - } else if (req->type == GNIX_FAB_RQ_RECVV || - req->type == GNIX_FAB_RQ_TRECVV) { - /* fi_send is rndzv with recvv */ - - GNIX_DEBUG(FI_LOG_EP_DATA, "recvv rndzv start - POSTED," - " req = %p\n", req); - - req->work_fn = __gnix_rndzv_iov_req_build; - } - - GNIX_DEBUG(FI_LOG_EP_DATA, "Matched req: %p (%p, %u)\n", - req, req->msg.recv_info[0].recv_addr, - req->msg.send_info[0].send_len); - - _gnix_remove_tag(posted_queue, req); - - /* Queue request to initiate pull of source data. */ - ret = _gnix_vc_queue_work_req(req); - - GNIX_DEBUG(FI_LOG_EP_DATA, - "_gnix_vc_queue_work_req returned %s\n", - fi_strerror(-ret)); - } else { - /* Add new unexpected receive request. */ - req = _gnix_fr_alloc(ep); - if (req == NULL) { - return -FI_ENOMEM; - } - - req->type = GNIX_FAB_RQ_RECV; - req->addr = vc->peer_addr; - req->gnix_ep = ep; - req->vc = vc; - - req->msg.send_info[0].send_addr = hdr->addr; - req->msg.send_info[0].send_len = hdr->len; - req->msg.send_info[0].mem_hndl = hdr->mdh; - req->msg.send_iov_cnt = 1; - req->msg.cum_send_len = req->msg.send_info[0].send_len; - req->msg.send_flags = hdr->flags; - req->msg.tag = hdr->msg_tag; - req->msg.imm = hdr->imm; - req->msg.rma_mdh = hdr->mdh; - req->msg.rma_id = hdr->req_addr; - req->msg.send_info[0].head = hdr->head; - req->msg.send_info[0].tail = hdr->tail; - ofi_atomic_initialize32(&req->msg.outstanding_txds, 0); - req->msg.parent = NULL; - - _gnix_insert_tag(unexp_queue, req->msg.tag, req, ~0); - - GNIX_DEBUG(FI_LOG_EP_DATA, "New req: %p (%u)\n", - req, req->msg.send_info[0].send_len); - } - - status = GNI_SmsgRelease(vc->gni_ep); - if (OFI_UNLIKELY(status != GNI_RC_SUCCESS)) { - GNIX_WARN(FI_LOG_EP_DATA, - "GNI_SmsgRelease returned %s\n", - gni_err_str[status]); - ret = gnixu_to_fi_errno(status); - } - - return ret; -} - -static int __smsg_rndzv_iov_start(void *data, void *msg) -{ - int ret = FI_SUCCESS; - gni_return_t status; - struct gnix_vc *vc = (struct gnix_vc *) data; - struct gnix_smsg_rndzv_iov_start_hdr *hdr = msg; - void *data_ptr = (void *) ((uint8_t *) msg + sizeof(*hdr)); - struct gnix_fid_ep *ep; - struct gnix_fab_req *req = NULL; - struct gnix_tag_storage *unexp_queue; - struct gnix_tag_storage *posted_queue; - char is_req_posted = 0; - - GNIX_DBG_TRACE(FI_LOG_EP_DATA, "\n"); - -#if ENABLE_DEBUG - int i; - - for (i = 0; i < hdr->iov_cnt; i++) { - GNIX_DEBUG(FI_LOG_EP_DATA, "send_addr[%d] = %p, send_len[%d] = %lu\n", i, - ((struct send_info_t *)data_ptr)[i].send_addr, i, - ((struct send_info_t *)data_ptr)[i].send_len); - } -#endif - ep = vc->ep; - assert(ep != NULL); - - __gnix_msg_queues(ep, hdr->flags & FI_TAGGED, - &posted_queue, &unexp_queue); - - req = _gnix_match_tag(posted_queue, hdr->msg_tag, 0, FI_PEEK, NULL, - &vc->peer_addr); - - if (req) { /* Found a request in the posted queue */ - is_req_posted = 1; - req->tx_failures = 0; - req->msg.cum_send_len = hdr->send_len; - - GNIX_DEBUG(FI_LOG_EP_DATA, "Matched req: %p (%p, %u)\n", - req, req->msg.recv_info[0].recv_addr, hdr->send_len); - _gnix_remove_tag(posted_queue, req); - } else { /* Unexpected receive, enqueue it */ - req = _gnix_fr_alloc(ep); - if (req == NULL) { - return -FI_ENOMEM; - } - - ofi_atomic_initialize32(&req->msg.outstanding_txds, 0); - - GNIX_DEBUG(FI_LOG_EP_DATA, "New req: %p (%u)\n", - req, hdr->send_len); - - req->msg.cum_send_len = hdr->send_len; - } - - req->addr = vc->peer_addr; - req->gnix_ep = ep; - req->vc = vc; - req->work_fn = __gnix_rndzv_iov_req_build; - - req->msg.send_flags = hdr->flags; - req->msg.imm = hdr->imm; - req->msg.tag = hdr->msg_tag; - req->msg.send_iov_cnt = hdr->iov_cnt; - req->msg.rma_id = hdr->req_addr; - req->msg.parent = NULL; - memcpy(req->msg.send_info, data_ptr, - sizeof(struct send_info_t) * hdr->iov_cnt); - - if (is_req_posted) - ret = _gnix_vc_queue_work_req(req); - else - _gnix_insert_tag(unexp_queue, req->msg.tag, req, ~0); - - /* - * Release the message buffer on the nic, need to copy the data - * section out before this. - */ - status = GNI_SmsgRelease(vc->gni_ep); - - if (OFI_UNLIKELY(status != GNI_RC_SUCCESS)) { - GNIX_WARN(FI_LOG_EP_DATA, - "GNI_SmsgRelease returned %s\n", - gni_err_str[status]); - ret = gnixu_to_fi_errno(status); - } - - return ret; -} - -static int __gnix_rndzv_fin_cleanup(void *arg) -{ - int i; - struct gnix_fab_req *req = (struct gnix_fab_req *)arg; - int rc; - - GNIX_DBG_TRACE(FI_LOG_EP_DATA, "\n"); - - for (i = 0; i < req->msg.send_iov_cnt; i++) { - GNIX_DEBUG(FI_LOG_EP_DATA, "freeing auto-reg MR: %p\n", - req->msg.send_md[i]); - - GNIX_DEBUG(FI_LOG_EP_DATA, "req->msg.send_md[%d] =" - " %p\n", i, req->msg.send_md[i]); - - rc = fi_close(&req->msg.send_md[i]->mr_fid.fid); - if (rc != FI_SUCCESS) { - GNIX_FATAL(FI_LOG_DOMAIN, - "failed to release internal memory registration, " - "rc=%d\n", rc); - } - - req->flags &= ~FI_LOCAL_MR; - } - - _gnix_fr_free(req->gnix_ep, req); - - return FI_SUCCESS; -} - -/* Received SMSG rendezvous fin message. The peer has finished pulling send - * data. Free the send request and generate completions. */ -static int __smsg_rndzv_fin(void *data, void *msg) -{ - int ret = FI_SUCCESS; - gni_return_t status; - struct gnix_vc *vc = (struct gnix_vc *)data; - struct gnix_smsg_rndzv_fin_hdr *hdr = - (struct gnix_smsg_rndzv_fin_hdr *)msg; - struct gnix_fab_req *req; - struct gnix_fid_ep *ep; - - GNIX_DBG_TRACE(FI_LOG_EP_DATA, "\n"); - - req = (struct gnix_fab_req *)hdr->req_addr; - assert(req); - - GNIX_DEBUG(FI_LOG_EP_DATA, "Received RNDZV_FIN, req: %p\n", req); - - ep = req->gnix_ep; - assert(ep != NULL); - - if (hdr->status == GNI_RC_SUCCESS) { - __gnix_msg_send_completion(ep, req); - } else { - ret = __gnix_msg_send_err(ep, req); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_DATA, - "__gnix_msg_send_err() failed: %s\n", - fi_strerror(-ret)); - } - } - - ofi_atomic_dec32(&req->vc->outstanding_tx_reqs); - - /* Schedule VC TX queue in case the VC is 'fenced'. */ - _gnix_vc_tx_schedule(req->vc); - - if (req->msg.send_flags & FI_LOCAL_MR) { - /* Defer freeing the MR and request. */ - req->work_fn = __gnix_rndzv_fin_cleanup; - ret = _gnix_vc_queue_work_req(req); - } else { - _gnix_fr_free(ep, req); - } - - status = GNI_SmsgRelease(vc->gni_ep); - if (OFI_UNLIKELY(status != GNI_RC_SUCCESS)) { - GNIX_WARN(FI_LOG_EP_DATA, - "GNI_SmsgRelease returned %s\n", - gni_err_str[status]); - ret = gnixu_to_fi_errno(status); - } - - return ret; -} - -smsg_callback_fn_t gnix_ep_smsg_callbacks[] = { - [GNIX_SMSG_T_EGR_W_DATA] = __smsg_eager_msg_w_data, - [GNIX_SMSG_T_EGR_W_DATA_ACK] = __smsg_eager_msg_w_data_ack, - [GNIX_SMSG_T_EGR_GET] = __smsg_eager_msg_data_at_src, - [GNIX_SMSG_T_EGR_GET_ACK] = __smsg_eager_msg_data_at_src_ack, - [GNIX_SMSG_T_RNDZV_RTS] = __smsg_rndzv_msg_rts, - [GNIX_SMSG_T_RNDZV_RTR] = __smsg_rndzv_msg_rtr, - [GNIX_SMSG_T_RNDZV_COOKIE] = __smsg_rndzv_msg_cookie, - [GNIX_SMSG_T_RNDZV_SDONE] = __smsg_rndzv_msg_send_done, - [GNIX_SMSG_T_RNDZV_RDONE] = __smsg_rndzv_msg_recv_done, - [GNIX_SMSG_T_RNDZV_START] = __smsg_rndzv_start, - [GNIX_SMSG_T_RNDZV_FIN] = __smsg_rndzv_fin, - [GNIX_SMSG_T_RMA_DATA] = __smsg_rma_data, /* defined in gnix_rma.c */ - [GNIX_SMSG_T_AMO_CNTR] = __smsg_amo_cntr, /* defined in gnix_amo.c */ - [GNIX_SMSG_T_RNDZV_IOV_START] = __smsg_rndzv_iov_start -}; - -static int __gnix_peek_request(struct gnix_fab_req *req) -{ - struct gnix_fid_cq *recv_cq = req->gnix_ep->recv_cq; - int rendezvous = !!(req->msg.send_flags & GNIX_MSG_RENDEZVOUS); - int ret, send_idx, recv_idx, copy_len; - uint64_t send_ptr, recv_ptr, send_len, recv_len; - - /* All claim work is performed by the tag storage, so nothing special - * here. If no CQ, no data is to be returned. Just inform the user - * that a message is present. */ - GNIX_DEBUG(FI_LOG_EP_DATA, "peeking req=%p\n", req); - if (!recv_cq) - return FI_SUCCESS; - - /* Rendezvous messages on the unexpected queue won't have data. - * Additionally, if the CQ format doesn't support passing a buffer - * location and length, then data will not be copied. */ - if (!rendezvous && req->msg.recv_info[0].recv_addr && - !INVALID_PEEK_FORMAT(recv_cq->attr.format)) { - send_len = req->msg.send_info[0].send_len; - send_ptr = req->msg.send_info[0].send_addr; - recv_len = req->msg.recv_info[0].recv_len; - recv_ptr = req->msg.recv_info[0].recv_addr; - send_idx = recv_idx = 0; - - while (1) { - copy_len = MIN(send_len, recv_len); - memcpy((void *)recv_ptr, (void *)send_ptr, copy_len); - - /* Update lengths/addresses */ - send_len -= copy_len; - recv_len -= copy_len; - - if (send_len == 0) { - send_idx++; - - if (send_idx == req->msg.send_iov_cnt) - break; - - send_ptr = req->msg.send_info[send_idx].send_addr; - send_len = req->msg.send_info[send_idx].send_len; - } else { - send_ptr += copy_len; - } - - if (recv_len == 0) { - recv_idx++; - - if (recv_idx == req->msg.recv_iov_cnt) - break; - - recv_ptr = req->msg.recv_info[recv_idx].recv_addr; - recv_len = req->msg.recv_info[recv_idx].recv_len; - } else { - recv_ptr += copy_len; - } - - } - } else { - /* The CQE should not contain a valid buffer. */ - req->msg.recv_info[0].recv_addr = 0; - } - - ret = __gnix_msg_recv_completion(req->gnix_ep, req); - if (ret != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_DATA, - "__gnix_msg_recv_completion() failed: %d\n", - ret); - - return ret; -} - -static int __gnix_discard_request(struct gnix_fab_req *req) -{ - int ret = FI_SUCCESS; - int rendezvous = !!(req->msg.send_flags & GNIX_MSG_RENDEZVOUS); - - /* The CQE should not contain a valid buffer. */ - req->msg.recv_info[0].recv_addr = 0; - req->msg.cum_send_len = req->msg.send_info[0].send_len = 0; - - GNIX_DEBUG(FI_LOG_EP_DATA, "discarding req=%p\n", req); - if (rendezvous) { - GNIX_DEBUG(FI_LOG_EP_DATA, - "returning rndzv completion for req, %p", req); - - /* Complete rendezvous request, skipping data transfer. */ - req->work_fn = __gnix_rndzv_req_send_fin; - ret = _gnix_vc_queue_work_req(req); - } else { - /* Data has already been delivered, so just discard it and - * generate a CQE. */ - ret = __gnix_msg_recv_completion(req->gnix_ep, req); - if (ret != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_DATA, - "__gnix_msg_recv_completion() failed: %d\n", - ret); - - /* Free unexpected eager receive buffer. */ - free((void *)req->msg.send_info[0].send_addr); - _gnix_fr_free(req->gnix_ep, req); - } - - return ret; -} - -static int __gnix_msg_addr_lookup(struct gnix_fid_ep *ep, uint64_t src_addr, - struct gnix_address *gnix_addr) -{ - int ret; - struct gnix_fid_av *av; - struct gnix_av_addr_entry av_entry; - - /* Translate source address. */ - if (GNIX_EP_RDM_DGM(ep->type)) { - if ((ep->caps & FI_DIRECTED_RECV) && - (src_addr != FI_ADDR_UNSPEC)) { - av = ep->av; - assert(av != NULL); - ret = _gnix_av_lookup(av, src_addr, &av_entry); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_AV, - "_gnix_av_lookup returned %d\n", - ret); - return ret; - } - *gnix_addr = av_entry.gnix_addr; - } else { - *(uint64_t *)gnix_addr = FI_ADDR_UNSPEC; - } - } - /* NOP for MSG EPs. */ - - return FI_SUCCESS; -} - -/******************************************************************************* - * Generic EP recv handling - ******************************************************************************/ - -ssize_t _gnix_recv(struct gnix_fid_ep *ep, uint64_t buf, size_t len, - void *mdesc, uint64_t src_addr, void *context, - uint64_t flags, uint64_t tag, uint64_t ignore, - struct gnix_fab_req *mrecv_req) -{ - int ret = FI_SUCCESS; - struct gnix_fab_req *req = NULL; - struct gnix_address gnix_addr; - struct gnix_tag_storage *posted_queue = NULL; - struct gnix_tag_storage *unexp_queue = NULL; - uint64_t match_flags; - struct gnix_fid_mem_desc *md = NULL; - int tagged = !!(flags & FI_TAGGED); - - if (!ep->recv_cq && !ep->recv_cntr) { - return -FI_ENOCQ; - } - - if (!tagged) { - if (!ep->ep_ops.msg_recv_allowed) - return -FI_EOPNOTSUPP; - } else { - if (!ep->ep_ops.tagged_recv_allowed) - return -FI_EOPNOTSUPP; - } - - ret = __gnix_msg_addr_lookup(ep, src_addr, &gnix_addr); - if (ret != FI_SUCCESS) - return ret; - - match_flags = flags & (FI_CLAIM | FI_DISCARD | FI_PEEK); - - __gnix_msg_queues(ep, tagged, &posted_queue, &unexp_queue); - - GNIX_DEBUG(FI_LOG_EP_DATA, "posted_queue = %p\n", posted_queue); - - if (!tagged) { - tag = 0; - ignore = ~0; - } - - COND_ACQUIRE(ep->requires_lock, &ep->vc_lock); - - /* Look for a matching unexpected receive request. */ - req = _gnix_match_tag(unexp_queue, tag, ignore, - match_flags, context, &gnix_addr); - if (req) { - /* - * if we posted a multi-recv buffer and we can't - * hold the matched message, stop dequeuing and - * return. - */ - if (OFI_UNLIKELY(mrecv_req != NULL)) { - - mrecv_req->msg.mrecv_space_left -= - req->msg.cum_send_len; - mrecv_req->msg.mrecv_buf_addr += - req->msg.cum_send_len; - req->msg.parent = mrecv_req; - _gnix_ref_get(mrecv_req); - } - - /* Found matching request, populate local fields. */ - - req->gnix_ep = ep; - req->user_context = context; - req->msg.recv_info[0].recv_addr = (uint64_t)buf; - req->msg.recv_info[0].recv_len = len; - req->msg.cum_recv_len = len; - - if (mdesc) { - md = container_of(mdesc, - struct gnix_fid_mem_desc, - mr_fid); - req->msg.recv_info[0].mem_hndl = md->mem_hndl; - } - req->msg.recv_md[0] = md; - req->msg.recv_iov_cnt = 1; - req->msg.recv_flags = flags; - req->msg.ignore = ignore; - - if ((flags & GNIX_SUPPRESS_COMPLETION) || - (ep->recv_selective_completion && - !(flags & FI_COMPLETION))) { - req->msg.recv_flags &= ~FI_COMPLETION; - } else { - req->msg.recv_flags |= FI_COMPLETION; - } - - /* Check to see if we are using P/C/D matching flags. */ - if (match_flags & FI_DISCARD) { - ret = __gnix_discard_request(req); - goto pdc_exit; - } else if (match_flags & FI_PEEK) { - ret = __gnix_peek_request(req); - goto pdc_exit; - } - - if (req->msg.send_flags & GNIX_MSG_RENDEZVOUS) { - /* Matched rendezvous request. Start data movement. */ - GNIX_DEBUG(FI_LOG_EP_DATA, "matched RNDZV, req: %p\n", - req); - - /* - * this shouldn't happen - */ - if (OFI_UNLIKELY(req->vc == NULL)) { - GNIX_ERR(FI_LOG_EP_DATA, - "fab req vc field NULL"); - } - - /* Check if second GET for unaligned data is needed. */ - if (req->msg.send_info[0].send_len > req->msg.recv_info[0].recv_len && - ((req->msg.send_info[0].send_addr + req->msg.recv_info[0].recv_len) & - GNI_READ_ALIGN_MASK)) { - req->msg.recv_flags |= GNIX_MSG_GET_TAIL; - } - - /* Initiate pull of source data. */ - req->work_fn = req->msg.send_iov_cnt == 1 ? - __gnix_rndzv_req : __gnix_rndzv_iov_req_build; - - _gnix_remove_tag(unexp_queue, req); - ret = _gnix_vc_queue_work_req(req); - - } else { - /* Matched eager request. Copy data and generate - * completions. */ - GNIX_DEBUG(FI_LOG_EP_DATA, "Matched recv, req: %p\n", - req); - - req->msg.cum_send_len = req->msg.send_info[0].send_len; - - /* Send length is truncated to receive buffer size. */ - req->msg.send_info[0].send_len = - MIN(req->msg.send_info[0].send_len, - req->msg.recv_info[0].recv_len); - - /* Copy data from unexpected eager receive buffer. */ - memcpy((void *)buf, (void *)req->msg.send_info[0].send_addr, - req->msg.send_info[0].send_len); - free((void *)req->msg.send_info[0].send_addr); - - _gnix_remove_tag(unexp_queue, req); - __gnix_msg_recv_completion(ep, req); - - _gnix_fr_free(ep, req); - } - } else { - - /* - * if handling a multi receive request, - * just return - */ - if (mrecv_req) - goto mrecv_exit; - - /* if peek/claim/discard, we didn't find what we - * were looking for, return FI_ENOMSG - */ - if (match_flags) { - __recv_err(ep, context, flags, len, - (void *)buf, 0, tag, len, FI_ENOMSG, - FI_ENOMSG, NULL, 0); - - /* if handling trecvmsg flags, return here - * Never post a receive request from this type of context - */ - ret = FI_SUCCESS; - goto pdc_exit; - } - - req = _gnix_fr_alloc(ep); - if (req == NULL) { - ret = -FI_EAGAIN; - goto err; - } - - GNIX_DEBUG(FI_LOG_EP_DATA, "New recv, req: %p\n", req); - - req->type = GNIX_FAB_RQ_RECV; - - req->addr = gnix_addr; - req->gnix_ep = ep; - req->user_context = context; - - req->msg.recv_info[0].recv_addr = (uint64_t)buf; - req->msg.recv_info[0].recv_len = len; - req->msg.cum_recv_len = len; - - if (mdesc) { - md = container_of(mdesc, - struct gnix_fid_mem_desc, - mr_fid); - - req->msg.recv_info[0].mem_hndl = md->mem_hndl; - } - - req->msg.recv_md[0] = md; - req->msg.send_iov_cnt = req->msg.recv_iov_cnt = 1; - req->msg.recv_flags = flags; - req->msg.tag = tag; - req->msg.ignore = ignore; - req->msg.parent = NULL; - ofi_atomic_initialize32(&req->msg.outstanding_txds, 0); - - if ((flags & GNIX_SUPPRESS_COMPLETION) || - (ep->recv_selective_completion && - !(flags & FI_COMPLETION))) { - req->msg.recv_flags &= ~FI_COMPLETION; - } else { - req->msg.recv_flags |= FI_COMPLETION; - } - _gnix_insert_tag(posted_queue, tag, req, ignore); - } - -mrecv_exit: -pdc_exit: -err: - COND_RELEASE(ep->requires_lock, &ep->vc_lock); - - return ret; -} - -ssize_t _gnix_recv_mr(struct gnix_fid_ep *ep, uint64_t buf, size_t len, - void *mdesc, uint64_t src_addr, void *context, - uint64_t flags, uint64_t tag, uint64_t ignore) -{ - int ret; - int tagged = !!(flags & FI_TAGGED); - struct gnix_fab_req *mrecv_req = NULL; - struct gnix_address gnix_addr; - struct gnix_fid_mem_desc *md = NULL; - struct fid_mr *auto_mr = NULL; - struct gnix_tag_storage *posted_queue = NULL; - struct gnix_tag_storage *unexp_queue = NULL; - uint64_t last_space_left; - - assert(flags & FI_MULTI_RECV); - - mrecv_req = _gnix_fr_alloc_w_cb(ep, __gnix_msg_mrecv_completion); - if (mrecv_req == NULL) { - return -FI_ENOMEM; - } - - mrecv_req->type = GNIX_FAB_RQ_MRECV; - - ret = __gnix_msg_addr_lookup(ep, src_addr, &gnix_addr); - if (ret != FI_SUCCESS) - return ret; - - mrecv_req->addr = gnix_addr; - mrecv_req->gnix_ep = ep; - mrecv_req->user_context = context; - - mrecv_req->msg.mrecv_buf_addr = (uint64_t)buf; - mrecv_req->msg.mrecv_space_left = len; - mrecv_req->msg.recv_flags = flags; - - if (mdesc) { - md = container_of(mdesc, - struct gnix_fid_mem_desc, - mr_fid); - - mrecv_req->msg.recv_info[0].mem_hndl = md->mem_hndl; - mrecv_req->msg.recv_md[0] = md; - } else { - ret = _gnix_mr_reg(&ep->domain->domain_fid.fid, - (void *)mrecv_req->msg.mrecv_buf_addr, - mrecv_req->msg.mrecv_space_left, - FI_READ | FI_WRITE, 0, 0, 0, - &auto_mr, NULL, ep->auth_key, GNIX_PROV_REG); - if (ret != FI_SUCCESS) { - GNIX_DEBUG(FI_LOG_EP_DATA, - "Failed to auto-register local buffer: %s\n", - fi_strerror(-ret)); - - return -FI_EAGAIN; - } - mrecv_req->msg.recv_flags |= FI_LOCAL_MR; - mrecv_req->msg.recv_md[0] = container_of(auto_mr, - struct gnix_fid_mem_desc, - mr_fid); - mrecv_req->msg.recv_info[0].mem_hndl = - mrecv_req->msg.recv_md[0]->mem_hndl; - GNIX_DEBUG(FI_LOG_EP_DATA, "auto-reg MR: %p\n", auto_mr); - } - - if (!tagged) { - mrecv_req->msg.tag = 0; - mrecv_req->msg.ignore = ~0; - } else { - mrecv_req->msg.tag = tag; - mrecv_req->msg.ignore = ignore; - } - - if ((flags & GNIX_SUPPRESS_COMPLETION) || - (ep->recv_selective_completion && - !(flags & FI_COMPLETION))) { - mrecv_req->msg.recv_flags &= ~FI_COMPLETION; - } else { - mrecv_req->msg.recv_flags |= FI_COMPLETION; - } - - last_space_left = mrecv_req->msg.mrecv_space_left; - - do { - ret = _gnix_recv(ep, - mrecv_req->msg.mrecv_buf_addr, - mrecv_req->msg.mrecv_space_left, - mdesc, - src_addr, - context, - mrecv_req->msg.recv_flags, - mrecv_req->msg.tag, - mrecv_req->msg.ignore, - mrecv_req); - if (ret != FI_SUCCESS) { - _gnix_fr_free(ep, mrecv_req); - return ret; - } - if ((last_space_left == mrecv_req->msg.mrecv_space_left) || - (mrecv_req->msg.mrecv_space_left < ep->min_multi_recv)) - break; - last_space_left = mrecv_req->msg.mrecv_space_left; - } while (1); - - /* - * if space left in multi receive request - * add to posted receive queue. - * Otherwise free request via put ref. - */ - if ((int64_t)mrecv_req->msg.mrecv_space_left > ep->min_multi_recv) { - __gnix_msg_queues(ep, tagged, &posted_queue, &unexp_queue); - _gnix_insert_tag(posted_queue, tag, mrecv_req, ignore); - } else { - _gnix_ref_put(mrecv_req); - } - - return ret; -} - -/******************************************************************************* - * Generic EP send handling - ******************************************************************************/ - -static int _gnix_send_req(void *arg) -{ - struct gnix_fab_req *req = (struct gnix_fab_req *)arg; - struct gnix_nic *nic; - struct gnix_fid_ep *ep; - struct gnix_tx_descriptor *tdesc; - gni_return_t status; - int rc; - int rendezvous = !!(req->msg.send_flags & GNIX_MSG_RENDEZVOUS); - int hdr_len, data_len; - void *hdr, *data; - int tag; - int inject_err = _gnix_req_inject_smsg_err(req); - - ep = req->gnix_ep; - assert(ep != NULL); - - nic = ep->nic; - assert(nic != NULL); - - rc = _gnix_nic_tx_alloc(nic, &tdesc); - if (rc != FI_SUCCESS) { - GNIX_DEBUG(FI_LOG_EP_DATA, "_gnix_nic_tx_alloc() failed: %d\n", - rc); - return -FI_ENOSPC; - } - assert(rc == FI_SUCCESS); - - if (OFI_UNLIKELY(rendezvous)) { - switch(req->type) { - - case GNIX_FAB_RQ_SEND: - case GNIX_FAB_RQ_TSEND: - assert(req->msg.send_md); - tag = GNIX_SMSG_T_RNDZV_START; - tdesc->rndzv_start_hdr.flags = req->msg.send_flags; - tdesc->rndzv_start_hdr.imm = req->msg.imm; - tdesc->rndzv_start_hdr.msg_tag = req->msg.tag; - tdesc->rndzv_start_hdr.mdh = req->msg.send_info[0].mem_hndl; - tdesc->rndzv_start_hdr.addr = req->msg.send_info[0].send_addr; - tdesc->rndzv_start_hdr.len = req->msg.send_info[0].send_len; - tdesc->rndzv_start_hdr.req_addr = (uint64_t)req; - - if (req->msg.send_info[0].send_addr & GNI_READ_ALIGN_MASK) { - tdesc->rndzv_start_hdr.head = - *(uint32_t *)(req->msg.send_info[0].send_addr & - ~GNI_READ_ALIGN_MASK); - GNIX_DEBUG(FI_LOG_EP_DATA, - "Sending %d unaligned head bytes (%x)\n", - GNI_READ_ALIGN - - (req->msg.send_info[0].send_addr & - GNI_READ_ALIGN_MASK), - tdesc->rndzv_start_hdr.head); - } - - if ((req->msg.send_info[0].send_addr + - req->msg.send_info[0].send_len) & - GNI_READ_ALIGN_MASK) { - tdesc->rndzv_start_hdr.tail = - *(uint32_t *)((req->msg.send_info[0].send_addr + - req->msg.send_info[0].send_len) & - ~GNI_READ_ALIGN_MASK); - GNIX_DEBUG(FI_LOG_EP_DATA, - "Sending %d unaligned tail bytes (%x)\n", - (req->msg.send_info[0].send_addr + - req->msg.send_info[0].send_len) & - GNI_READ_ALIGN_MASK, - tdesc->rndzv_start_hdr.tail); - } - - hdr = &tdesc->rndzv_start_hdr; - hdr_len = sizeof(tdesc->rndzv_start_hdr); - /* TODO: Unify send&sendv/recv&recvv, so data will be - * req->msg.send_info */ - data = NULL; - data_len = 0; - - break; - - case GNIX_FAB_RQ_SENDV: - case GNIX_FAB_RQ_TSENDV: - assert(req->msg.send_md[0]); - tag = GNIX_SMSG_T_RNDZV_IOV_START; - tdesc->rndzv_iov_start_hdr.flags = req->msg.send_flags; - tdesc->rndzv_iov_start_hdr.imm = req->msg.imm; - tdesc->rndzv_iov_start_hdr.msg_tag = req->msg.tag; - tdesc->rndzv_iov_start_hdr.iov_cnt = - req->msg.send_iov_cnt; - tdesc->rndzv_iov_start_hdr.req_addr = (uint64_t) req; - tdesc->rndzv_iov_start_hdr.send_len = req->msg.cum_send_len; - - /* Send data at unaligned bytes in the iov addresses - * within the data section of the control message so - * that the remote peer can pull from four byte aligned - * addresses and still transfer all the data. */ - __gnix_msg_send_alignment(req); - - data_len = sizeof(struct send_info_t) * req->msg.send_iov_cnt; - data = (void *) req->msg.send_info; - hdr_len = sizeof(tdesc->rndzv_iov_start_hdr); - hdr = &tdesc->rndzv_iov_start_hdr; - -#if ENABLE_DEBUG - int i; - for (i = 0; i < req->msg.send_iov_cnt; i++) { - GNIX_DEBUG(FI_LOG_EP_DATA, - "data[%d].send_addr = 0x%ux, " - "data[%d].send_len = %lu, " - "data[%d].mem_hndl = %hxx\n", i, - ((struct send_info_t *)data)[i].send_addr, - i, - ((struct send_info_t *)data)[i].send_len, - i, - ((struct send_info_t *)data)[i].mem_hndl); - - } -#endif - break; - default: - GNIX_FATAL(FI_LOG_EP_DATA, "Invalid request type: %d\n", req->type); - return -FI_EINVAL; - } - } else { - tag = GNIX_SMSG_T_EGR_W_DATA; - - tdesc->eager_hdr.flags = req->msg.send_flags; - tdesc->eager_hdr.imm = req->msg.imm; - tdesc->eager_hdr.msg_tag = req->msg.tag; - tdesc->eager_hdr.len = req->msg.cum_send_len; - - hdr = &tdesc->eager_hdr; - hdr_len = sizeof(tdesc->eager_hdr); - data = (void *)req->msg.send_info[0].send_addr; - /* If this is not rndzv the send length should always be the - * cumulative length of all the send_info lengths */ - data_len = req->msg.cum_send_len; - } - tdesc->req = req; - tdesc->completer_fn = gnix_ep_smsg_completers[tag]; - - COND_ACQUIRE(nic->requires_lock, &nic->lock); - - if (OFI_UNLIKELY(inject_err)) { - _gnix_nic_txd_err_inject(nic, tdesc); - status = GNI_RC_SUCCESS; - } else { - status = GNI_SmsgSendWTag(req->vc->gni_ep, - hdr, hdr_len, data, data_len, - tdesc->id, tag); - } - - /* - * if this is a rendezvous message, we want to generate - * IRQ at remote peer. - * TODO: Do we need to do this for sendv? - */ - if ((status == GNI_RC_SUCCESS) && - (tag == GNIX_SMSG_T_RNDZV_START || - tag == GNIX_SMSG_T_RNDZV_IOV_START)) - _gnix_rma_post_irq(req->vc); - - COND_RELEASE(nic->requires_lock, &nic->lock); - - if (status == GNI_RC_NOT_DONE) { - _gnix_nic_tx_free(nic, tdesc); - GNIX_DEBUG(FI_LOG_EP_DATA, - "GNI_SmsgSendWTag returned %s\n", - gni_err_str[status]); - } else if (status != GNI_RC_SUCCESS) { - _gnix_nic_tx_free(nic, tdesc); - GNIX_WARN(FI_LOG_EP_DATA, - "GNI_SmsgSendWTag returned %s\n", - gni_err_str[status]); - } - - return gnixu_to_fi_errno(status); -} - -ssize_t _gnix_send(struct gnix_fid_ep *ep, uint64_t loc_addr, size_t len, - void *mdesc, uint64_t dest_addr, void *context, - uint64_t flags, uint64_t data, uint64_t tag) -{ - int ret = FI_SUCCESS; - struct gnix_vc *vc = NULL; - struct gnix_fab_req *req; - struct gnix_fid_mem_desc *md = NULL; - int rendezvous; - struct fid_mr *auto_mr = NULL; - int connected; - - if (!ep->send_cq && !ep->send_cntr) { - return -FI_ENOCQ; - } - - if (flags & FI_TRIGGER) { - struct fi_triggered_context *trigger_context = - (struct fi_triggered_context *)context; - if ((trigger_context->event_type != FI_TRIGGER_THRESHOLD) || - (flags & FI_INJECT)) { - return -FI_EINVAL; - } - } - - if ((flags & FI_INJECT) && (len > GNIX_INJECT_SIZE)) { - GNIX_DEBUG(FI_LOG_EP_DATA, - "Send length %d exceeds inject max size: %d\n", - len, GNIX_INJECT_SIZE); - return -FI_EINVAL; - } - - if (!(flags & FI_TAGGED)) { - if (!ep->ep_ops.msg_send_allowed) - return -FI_EOPNOTSUPP; - } else { - if (!ep->ep_ops.tagged_send_allowed) - return -FI_EOPNOTSUPP; - } - - rendezvous = len >= ep->domain->params.msg_rendezvous_thresh; - - /* need a memory descriptor for large sends */ - if (rendezvous && !mdesc) { - ret = _gnix_mr_reg(&ep->domain->domain_fid.fid, (void *)loc_addr, - len, FI_READ | FI_WRITE, 0, - 0, 0, - &auto_mr, NULL, ep->auth_key, - GNIX_PROV_REG); - if (ret != FI_SUCCESS) { - GNIX_DEBUG(FI_LOG_EP_DATA, - "Failed to auto-register local buffer: %s\n", - fi_strerror(-ret)); - return ret; - } - flags |= FI_LOCAL_MR; - mdesc = (void *)auto_mr; - GNIX_DEBUG(FI_LOG_EP_DATA, "auto-reg MR: %p\n", auto_mr); - } - - req = _gnix_fr_alloc(ep); - if (req == NULL) { - return -FI_ENOSPC; - } - - req->type = GNIX_FAB_RQ_SEND; - req->gnix_ep = ep; - req->user_context = context; - req->work_fn = _gnix_send_req; - - if (flags & FI_TAGGED) { - req->msg.tag = tag; - } else { - /* Make sure zeroed tag ends up in the send CQE. */ - req->msg.tag = 0; - } - - if (mdesc) { - md = container_of(mdesc, struct gnix_fid_mem_desc, mr_fid); - req->msg.send_info[0].mem_hndl = md->mem_hndl; - } - req->msg.send_md[0] = md; - req->msg.send_iov_cnt = 1; - req->msg.send_flags = flags; - req->msg.send_info[0].send_len = len; - req->msg.cum_send_len = len; - req->msg.imm = data; - req->flags = flags; - - if (flags & FI_INJECT) { - memcpy(req->inject_buf, (void *)loc_addr, len); - req->msg.send_info[0].send_addr = (uint64_t)req->inject_buf; - } else { - req->msg.send_info[0].send_addr = loc_addr; - } - - if ((flags & GNIX_SUPPRESS_COMPLETION) || - (ep->send_selective_completion && - !(flags & FI_COMPLETION))) { - req->msg.send_flags &= ~FI_COMPLETION; - } else { - req->msg.send_flags |= FI_COMPLETION; - } - - if (rendezvous) { - /* - * this initialization is not necessary currently - * but is a place holder in the event a RDMA write - * path is implemented for rendezvous - */ - ofi_atomic_initialize32(&req->msg.outstanding_txds, 0); - req->msg.send_flags |= GNIX_MSG_RENDEZVOUS; - } - - GNIX_DEBUG(FI_LOG_EP_DATA, "Queuing (%p %d)\n", - (void *)loc_addr, len); - - COND_ACQUIRE(ep->requires_lock, &ep->vc_lock); - - ret = _gnix_vc_ep_get_vc(ep, dest_addr, &vc); - if (ret) { - goto err_get_vc; - } - - req->vc = vc; - - ret = _gnix_vc_queue_tx_req(req); - connected = (vc->conn_state == GNIX_VC_CONNECTED); - - COND_RELEASE(ep->requires_lock, &ep->vc_lock); - - /* - * If a new VC was allocated, progress CM before returning. - * If the VC is connected and there's a backlog, poke - * the nic progress engine befure returning. - */ - if (!connected) { - _gnix_cm_nic_progress(ep->cm_nic); - } else if (!dlist_empty(&vc->tx_queue)) { - _gnix_nic_progress(vc->ep->nic); - } - - return ret; - -err_get_vc: - COND_RELEASE(ep->requires_lock, &ep->vc_lock); - _gnix_fr_free(ep, req); - if (flags & FI_LOCAL_MR) - fi_close(&auto_mr->fid); - return ret; -} - -ssize_t _gnix_recvv(struct gnix_fid_ep *ep, const struct iovec *iov, - void **desc, size_t count, uint64_t src_addr, void *context, - uint64_t flags, uint64_t ignore, uint64_t tag) -{ - int i, ret = FI_SUCCESS; - size_t cum_len = 0; - struct gnix_fab_req *req = NULL; - struct gnix_address gnix_addr; - struct gnix_tag_storage *posted_queue = NULL; - struct gnix_tag_storage *unexp_queue = NULL; - uint64_t match_flags; - int tagged = flags & FI_TAGGED; - - if (!ep->recv_cq && !ep->recv_cntr) { - return -FI_ENOCQ; - } - - if (!tagged) { - if (!ep->ep_ops.msg_send_allowed) - return -FI_EOPNOTSUPP; - - tag = 0; - ignore = ~0; - } else { - if (!ep->ep_ops.tagged_send_allowed) - return -FI_EOPNOTSUPP; - } - - match_flags = flags & (FI_CLAIM | FI_DISCARD | FI_PEEK); - - /* - * Lookup the gni addr in the av_table or av_hashtable. - * If the gni addr doesn't exist the addr is FI_ADDR_UNSPEC, - * meaning this remote node wants to receive from all senders? - */ - ret = __gnix_msg_addr_lookup(ep, src_addr, &gnix_addr); - if (ret != FI_SUCCESS) - return ret; - - /* calculate cumulative size of the iovec buf lens */ - for (i = 0; i < count; i++) { - cum_len += iov[i].iov_len; - } - - /* - * Initialize the tag storage objects. - * The posted_queue holds information about receives that have - * been posted on the remote endpoint. - * - * The unexp_queue holds information about data that has arrived - * prior to posting a receive on the remote endpoint. - * - * Both {unexp,posted}_queue objects have two sets, one for tagged - * messages and the other for untagged messages. - * - * The untagged queues match based off the source address. - * - * The tagged queues match based off the tag and source address (when - * the ep is created with FI_DIRECTED_RECV). - * - * A "message" is added to the unexpected queue when it arrives at a - * remote endpoint and the completer_fn doesn't find an existing request - * in the posted queue (i.e. no fi_recvs have been called (or posted) - * on the remote endpoint). - */ - __gnix_msg_queues(ep, tagged, &posted_queue, &unexp_queue); - - COND_ACQUIRE(ep->requires_lock, &ep->vc_lock); - - /* - * Posting a recv, look for an existing request in the - * unexpected queue. - */ - req = _gnix_match_tag(unexp_queue, tag, ignore, - match_flags, context, &gnix_addr); - - if (req) { - GNIX_DEBUG(FI_LOG_EP_DATA, "UNEXPECTED, req = %p\n", req); - /* Found a matching request in the unexpected queue. */ - - /* - * reset ep, it might be different than the ep the message came - * in on. - */ - req->gnix_ep = ep; - req->user_context = context; - req->flags = 0; - req->msg.recv_flags = flags; - req->msg.recv_iov_cnt = count; - req->msg.cum_recv_len = cum_len; - /* req->msg.cum_send_len = MIN(req->msg.cum_send_len, cum_len); */ - - if (tagged) { - req->type = GNIX_FAB_RQ_TRECVV; - } else { - req->type = GNIX_FAB_RQ_RECVV; - } - - if ((flags & GNIX_SUPPRESS_COMPLETION) || - (ep->recv_selective_completion && - !(flags & FI_COMPLETION))) { - req->msg.recv_flags &= ~FI_COMPLETION; - } else { - req->msg.recv_flags |= FI_COMPLETION; - } - - /* Check to see if we are using P/C/D matching flags. */ - if (match_flags & FI_DISCARD) { - ret = __gnix_discard_request(req); - goto pdc_exit; - } else if (match_flags & FI_PEEK) { - ret = __gnix_peek_request(req); - goto pdc_exit; - } - - for (i = 0; i < count; i++) { - req->msg.recv_info[i].recv_addr = (uint64_t) iov[i].iov_base; - req->msg.recv_info[i].recv_len = iov[i].iov_len; - } - - if (req->msg.send_flags & GNIX_MSG_RENDEZVOUS) { - req->work_fn = __gnix_rndzv_iov_req_build; - if (!(req->vc->modes & GNIX_VC_MODE_XPMEM)) { - if (!desc) { - ret = __gnix_msg_register_iov(ep, - iov, - count, - req->msg.recv_md); - if (ret != FI_SUCCESS) { - GNIX_DEBUG(FI_LOG_EP_DATA, - "Failed to " - "auto-register" - " local buffer: %s\n" - , fi_strerror(-ret)); - goto err; - } - req->msg.send_flags |= FI_LOCAL_MR; - - } else { /* User registered their memory */ - - for (i = 0; i < count; i++) { - if (!desc[i]) { - GNIX_WARN(FI_LOG_EP_DATA, - "invalid memory reg" - "istration (%p).\n", - desc[i]); - ret = -FI_EINVAL; - goto err; - } - - req->msg.recv_md[i] = - container_of(desc[i], - struct gnix_fid_mem_desc, - mr_fid); - } - } - - for (i = 0; i < count; i++) - req->msg.recv_info[i].mem_hndl = - req->msg.recv_md[i]->mem_hndl; - } - - ret = _gnix_vc_queue_work_req(req); - } else { - - /* - * This request is associate with a regular eager smsg, - * the rndzv threshold on the sender was not reached or - * exceeded. - */ - __gnix_msg_unpack_data_into_iov(req->msg.recv_info, - count, - req->msg.send_info[0].send_addr, - req->msg.send_info[0].send_len); - - __gnix_msg_recv_completion(ep, req); - _gnix_fr_free(ep, req); - } - } else { - /* if peek/claim/discard, we didn't find what we - * were looking for, return FI_ENOMSG - */ - if (match_flags) { - __recv_err(ep, context, flags, cum_len, - (void *) iov, 0, tag, cum_len, FI_ENOMSG, - FI_ENOMSG, NULL, 0); - - /* if handling trecvmsg flags, return here - * Never post a receive request from this type of - * context - */ - ret = FI_SUCCESS; - goto pdc_exit; - } - - /* - * No matching requests found, create a new one and enqueue - * it in the posted queue. - */ - req = _gnix_fr_alloc(ep); - if (req == NULL) { - ret = -FI_EAGAIN; - goto err; - } - - GNIX_DEBUG(FI_LOG_EP_DATA, "EXPECTED, req = %p\n", req); - - if (tagged) { - req->type = GNIX_FAB_RQ_TRECVV; - } else { - req->type = GNIX_FAB_RQ_RECVV; - } - - if ((flags & GNIX_SUPPRESS_COMPLETION) || - (ep->recv_selective_completion && - !(flags & FI_COMPLETION))) { - req->msg.recv_flags &= ~FI_COMPLETION; - } else { - req->msg.recv_flags |= FI_COMPLETION; - } - - req->addr = gnix_addr; - req->gnix_ep = ep; - req->user_context = context; - req->flags = 0; - - for (i = 0; i < count; i++) { - req->msg.recv_info[i].recv_addr = (uint64_t) iov[i].iov_base; - req->msg.recv_info[i].recv_len = iov[i].iov_len; - req->msg.recv_md[i] = NULL; - } - - req->msg.recv_iov_cnt = count; - req->msg.recv_flags = flags; - req->msg.cum_recv_len = cum_len; - req->msg.tag = tag; - req->msg.ignore = ignore; - req->msg.parent = NULL; - ofi_atomic_initialize32(&req->msg.outstanding_txds, 0); - - - if ((flags & GNIX_SUPPRESS_COMPLETION) || - (ep->recv_selective_completion && - !(flags & FI_COMPLETION))) { - req->msg.recv_flags &= ~FI_COMPLETION; - } else { - req->msg.recv_flags |= FI_COMPLETION; - } - - _gnix_insert_tag(posted_queue, tag, req, ignore); - } - -pdc_exit: -err: - COND_RELEASE(ep->requires_lock, &ep->vc_lock); - - return ret; -} - -ssize_t _gnix_sendv(struct gnix_fid_ep *ep, const struct iovec *iov, - void **mdesc, size_t count, uint64_t dest_addr, - void *context, uint64_t flags, uint64_t tag) -{ - int i, ret = FI_SUCCESS; - unsigned long long cum_len = 0; - void *tmp = NULL; - struct gnix_vc *vc = NULL; - struct gnix_fab_req *req = NULL; - struct fid_mr *auto_mr; - int connected; - - GNIX_DEBUG(FI_LOG_EP_DATA, "iov_count = %lu\n", count); - - if (!ep->send_cq && !ep->send_cntr) { - return -FI_ENOCQ; - } - - if (!(flags & FI_TAGGED)) { - if (!ep->ep_ops.msg_send_allowed) - return -FI_EOPNOTSUPP; - } else { - if (!ep->ep_ops.tagged_send_allowed) - return -FI_EOPNOTSUPP; - } - - req = _gnix_fr_alloc(ep); - if (req == NULL) { - return -FI_ENOSPC; - } - - GNIX_DEBUG(FI_LOG_EP_DATA, "Created req - %p\n", req); - - /* calculate cumulative size of the iovec buf lens */ - for (i = 0; i < count; i++) { - /* TODO: handle possible overflow */ - cum_len += iov[i].iov_len; - - GNIX_DEBUG(FI_LOG_EP_DATA, "iov[%d].iov_len = %lu\n", i, iov[i].iov_len); - } - - /* Fill out fabric request */ - if (flags & FI_TAGGED) { - req->type = GNIX_FAB_RQ_TSENDV; - req->msg.tag = tag; - req->msg.ignore = 0; - } else { - req->type = GNIX_FAB_RQ_SENDV; - req->msg.tag = 0; - req->msg.ignore = ~0; - } - - req->gnix_ep = ep; - req->user_context = context; - req->work_fn = _gnix_send_req; - req->flags = flags; - req->msg.send_flags = flags; - req->msg.imm = 0; - req->msg.parent = NULL; - - /* - * If the cum_len is >= ep->domain->params.msg_rendezvous_thresh - * transfer the iovec entries individually. - * - * For this case, use CtPostFma for iovec lengths that are smaller than - * the rendezvous thresh. For CtPostFma: - * the sum of the iov lens must be either <= 1GB or <= 1MB if the comm - * dom is configured with FmaSharing. - * otherwise use PostRdma. - */ - if (cum_len >= ep->domain->params.msg_rendezvous_thresh) { - if (!mdesc) { /* Register the memory for the user */ - for (i = 0; i < count; i++) { - auto_mr = NULL; - - ret = _gnix_mr_reg(&ep->domain->domain_fid.fid, - iov[i].iov_base, - iov[i].iov_len, - FI_READ | FI_WRITE, 0, 0, 0, - &auto_mr, NULL, ep->auth_key, GNIX_PROV_REG); - - if (ret != FI_SUCCESS) { - GNIX_DEBUG(FI_LOG_EP_DATA, - "Failed to auto-register" - " local buffer: %s\n", - fi_strerror(-ret)); - - for (i--; i >= 0; i--) { - ret = fi_close(&req->msg.send_md[i]->mr_fid.fid); - if (ret != FI_SUCCESS) { - GNIX_FATAL(FI_LOG_DOMAIN, - "failed to release auto-registered region, " - "rc=%d\n", ret); - } - } - - goto err_mr_reg; - } - - req->msg.send_md[i] = container_of( - (void *) auto_mr, - struct gnix_fid_mem_desc, - mr_fid); - - req->msg.send_info[i].send_addr = (uint64_t) iov[i].iov_base; - req->msg.send_info[i].send_len = iov[i].iov_len; - req->msg.send_info[i].mem_hndl = - req->msg.send_md[i]->mem_hndl; - - GNIX_DEBUG(FI_LOG_EP_DATA, "iov[%d].iov_len = %lu," - " req->msg.send_info[%d].send_addr = " - "%p, req->msg.send_info[%d].send_len " - "= %lu\n", i, iov[i].iov_len, i, - (void *) req->msg.send_info[i].send_addr, - i, req->msg.send_info[i].send_len); - - GNIX_DEBUG(FI_LOG_EP_DATA, "req->msg.send_md[%d] " - "= %p\n", i, - req->msg.send_md[i]); - - GNIX_DEBUG(FI_LOG_EP_DATA, "auto-reg MR: %p\n", - req->msg.send_md[i]); - - } - - req->msg.send_flags |= FI_LOCAL_MR; - } else { /* User registered their memory */ - for (i = 0; i < count; i++) { - if (!mdesc[i]) { - GNIX_WARN(FI_LOG_EP_DATA, - "invalid memory reg" - "istration (%p).\n", - mdesc[i]); - ret = -FI_EINVAL; - goto err_mr_reg; - } - - req->msg.send_md[i] = - container_of(mdesc[i], - struct gnix_fid_mem_desc, - mr_fid); - - req->msg.send_info[i].send_addr = (uint64_t) iov[i].iov_base; - req->msg.send_info[i].send_len = iov[i].iov_len; - req->msg.send_info[i].mem_hndl = - req->msg.send_md[i]->mem_hndl; - } - } - - req->msg.send_iov_cnt = count; - req->msg.send_flags |= GNIX_MSG_RENDEZVOUS; - } else { - /* - * TODO: Use buddy allocator with max alloc lim of - * ep->domain->params.msg_rendezvous_thresh - */ - /* This is freed in __comp_eager_msg_w_data */ - tmp = malloc(cum_len); - assert(tmp != NULL); - - __gnix_msg_pack_data_from_iov((uint64_t) tmp, cum_len, - iov, count); - req->msg.send_info[0].send_addr = (uint64_t) tmp; - req->msg.send_info[0].send_len = cum_len; - } - - if ((flags & GNIX_SUPPRESS_COMPLETION) || - (ep->send_selective_completion && - !(flags & FI_COMPLETION))) { - req->msg.send_flags &= ~FI_COMPLETION; - } else { - req->msg.send_flags |= FI_COMPLETION; - } - - req->msg.cum_send_len = (size_t) cum_len; - - COND_ACQUIRE(ep->requires_lock, &ep->vc_lock); - - ret = _gnix_vc_ep_get_vc(ep, dest_addr, &vc); - if (ret != FI_SUCCESS) { - goto err_get_vc; - } - - req->vc = vc; - - ret = _gnix_vc_queue_tx_req(req); - connected = (vc->conn_state == GNIX_VC_CONNECTED); - - COND_RELEASE(ep->requires_lock, &ep->vc_lock); - - /* - * If a new VC was allocated, progress CM before returning. - * If the VC is connected and there's a backlog, poke - * the nic progress engine befure returning. - */ - if (!connected) { - _gnix_cm_nic_progress(ep->cm_nic); - } else if (!dlist_empty(&vc->tx_queue)) { - _gnix_nic_progress(vc->ep->nic); - } - - return ret; - -err_get_vc: - COND_RELEASE(ep->requires_lock, &ep->vc_lock); - if (req->msg.send_flags & FI_LOCAL_MR) { - for (i = 0; i < count; i++) { - fi_close(&req->msg.send_md[i]->mr_fid.fid); - } - } -err_mr_reg: - _gnix_fr_free(ep, req); - - return ret; -} diff --git a/prov/gni/src/gnix_nameserver.c b/prov/gni/src/gnix_nameserver.c deleted file mode 100644 index 94c90f72ee7..00000000000 --- a/prov/gni/src/gnix_nameserver.c +++ /dev/null @@ -1,437 +0,0 @@ -/* - * Copyright (c) 2014 Intel Corporation, Inc. All rights reserved. - * Copyright (c) 2015-2016 Los Alamos National Security, LLC. - * Allrights reserved. - * Copyright (c) 2015 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#if HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "gnix.h" -#include "gnix_util.h" - -#define BUF_SIZE 256 - -static int __gnix_ipaddr_from_iface(const char *iface, struct sockaddr_in *sin) -{ - int ret = FI_SUCCESS; - struct ifreq ifr = { { { 0 } } }; - int sock = -1; - - GNIX_TRACE(FI_LOG_FABRIC, "\n"); - - sock = socket(AF_INET, SOCK_DGRAM, 0); - if (sock == -1) { - GNIX_WARN(FI_LOG_FABRIC, "Socket creation failed: %s\n", - strerror(errno)); - return -FI_EIO; - } - - ifr.ifr_addr.sa_family = AF_INET; - snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s", iface); - - ret = ioctl(sock, SIOCGIFADDR, &ifr); - if (ret == -1) { - GNIX_WARN(FI_LOG_FABRIC, - "Failed to get address for ipogif0: %s\n", - strerror(errno)); - goto exit_w_sock; - ret = -FI_EIO; - } - - *sin = *(struct sockaddr_in *) &ifr.ifr_addr; - -exit_w_sock: - - if (close(sock) == -1) { - GNIX_WARN(FI_LOG_FABRIC, "Unable to close socket: %s\n", - strerror(errno)); - } - - return ret; - -} - -int _gnix_local_ipaddr(struct sockaddr_in *sin) -{ - int ret; - - /* - * Get the address for the ipogif0 interface. On nodes with KNC - * accelerators, the iface is br0. - */ - - ret = __gnix_ipaddr_from_iface("ipogif0", sin); - if (ret != FI_SUCCESS) - ret = __gnix_ipaddr_from_iface("br0", sin); - - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_FABRIC, - "Unable to obtain local iface addr\n"); - } - - return ret; -} - -static inline uint64_t __gnix_pe_to_mac(const uint32_t pe) -{ - return ((pe & 0x3ffff) | 0x000101000000); -} - -union mac_addr { - uint8_t octets[8]; - uint64_t u64; -}; - -/* - * IP address HW type Flags HW address Mask Device - * 10.128.0.9 0x1 0x6 00:01:01:00:00:08 * ipogif0 - */ -#define ARP_TABLE_FILE "/proc/net/arp" -#define ARP_TABLE_FORMAT "%s %*s %*s %s %*s %*s" - -int _gnix_pe_to_ip(const struct gnix_ep_name *ep_name, - struct sockaddr_in *saddr) -{ - int ret = -FI_EIO; - FILE *arp_table; - char buf[1024]; - char ip_str[128], mac_str[128]; - union mac_addr mac; - union mac_addr tmp_mac = {0}; - gni_return_t status; - uint32_t pe, cpu_id; - - status = GNI_CdmGetNicAddress(0, &pe, &cpu_id); - if (status == GNI_RC_SUCCESS && - ep_name->gnix_addr.device_addr == pe) { - ret = _gnix_local_ipaddr(saddr); - saddr->sin_port = ep_name->gnix_addr.cdm_id; - return ret; - } - - arp_table = fopen(ARP_TABLE_FILE, "r"); - if (!arp_table) { - GNIX_WARN(FI_LOG_FABRIC, "Failed to fopen(): %s\n", - ARP_TABLE_FILE); - return -FI_EIO; - } - - /* Eat header line. */ - if (!fgets(buf, sizeof(buf), arp_table)) { - GNIX_WARN(FI_LOG_FABRIC, "Failed to fgets(): %s\n", - ARP_TABLE_FILE); - return -FI_EIO; - } - - mac.u64 = __gnix_pe_to_mac(ep_name->gnix_addr.device_addr); - - while (fscanf(arp_table, ARP_TABLE_FORMAT, ip_str, mac_str) == 2) { - ret = sscanf(mac_str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", - &tmp_mac.octets[5], &tmp_mac.octets[4], - &tmp_mac.octets[3], &tmp_mac.octets[2], - &tmp_mac.octets[1], &tmp_mac.octets[0]); - if (ret == 6) { - GNIX_DEBUG(FI_LOG_FABRIC, - "Comparing 0x%llx, 0x%llx\n", - mac.u64, tmp_mac.u64); - if (mac.u64 == tmp_mac.u64) { - saddr->sin_family = AF_INET; - saddr->sin_port = ep_name->gnix_addr.cdm_id; - saddr->sin_addr.s_addr = inet_addr(ip_str); - ret = FI_SUCCESS; - GNIX_DEBUG(FI_LOG_FABRIC, - "Translated %s->%s\n", - ip_str, mac_str); - break; - } - } else { - GNIX_WARN(FI_LOG_FABRIC, "Parse error: %d : %s\n", - ret, mac_str); - break; - } - } - - fclose(arp_table); - - return ret; -} - -/* - * get gni nic addr from AF_INET ip addr, also return local device id on same - *subnet - * as the input ip_addr. - * - * returns 0 if ipogif entry found - * otherwise -errno - */ -static int __gnix_get_pe_from_ip(const char *iface_name, const char *ip_addr, - uint32_t *gni_nic_addr) -{ - int scount; - /* return this if no ipgogif for this ip-addr found */ - int ret = -FI_ENODATA; - FILE *fd = NULL; - char line[BUF_SIZE], *tmp; - char dummy[64], iface[64], fnd_ip_addr[64]; - char mac_str[64]; - int w, x, y; - - GNIX_TRACE(FI_LOG_FABRIC, "\n"); - - fd = fopen("/proc/net/arp", "r"); - if (fd == NULL) { - return -errno; - } - - if (fd == NULL) { - return -errno; - } - - while (1) { - tmp = fgets(line, BUF_SIZE, fd); - if (!tmp) { - break; - } - - /* - * check for a match - */ - if ((strstr(line, ip_addr) != NULL) && - (strstr(line, iface_name) != NULL)) { - ret = 0; - scount = sscanf(line, "%s%s%s%s%s%s", fnd_ip_addr, - dummy, dummy, mac_str, dummy, iface); - if (scount != 6) { - ret = -EIO; - goto err; - } - - /* - * check exact match of ip addr - */ - if (!strcmp(fnd_ip_addr, ip_addr)) { - scount = - sscanf(mac_str, "00:01:01:%02x:%02x:%02x", - &w, &x, &y); - if (scount != 3) { - ret = -EIO; - goto err; - } - - /* - * mysteries of XE/XC mac to nid mapping, see - * nid2mac in xt sysutils - */ - *gni_nic_addr = (w << 16) | (x << 8) | y; - ret = FI_SUCCESS; - break; - } - } - } - -err: - fclose(fd); - return ret; -} - -/* - * gnix_resolve_name: given a node hint and a valid pointer to a gnix_ep_name - * will resolve the gnix specific address of node and fill the provided - * gnix_ep_name pointer with the information. - * - * node (IN) : Node name being resolved to gnix specific address - * service (IN) : Port number being resolved to gnix specific address - * resolved_addr (IN/OUT) : Pointer that must be provided to contain the - * resolved address. - */ -int _gnix_resolve_name(IN const char *node, IN const char *service, - IN uint64_t flags, - INOUT struct gnix_ep_name *resolved_addr) -{ - uint32_t pe = -1; - uint32_t cpu_id = -1; - struct addrinfo *result = NULL; - struct addrinfo *rp = NULL; - - struct sockaddr_in *sa = NULL; - struct sockaddr_in sin; - - int ret = FI_SUCCESS; - gni_return_t status = GNI_RC_SUCCESS; - - struct addrinfo hints = { - .ai_family = AF_INET, - .ai_socktype = SOCK_DGRAM, - }; - - GNIX_TRACE(FI_LOG_FABRIC, "\n"); - - if (flags & FI_SOURCE) - hints.ai_flags |= AI_PASSIVE; - - if (flags & FI_NUMERICHOST) - hints.ai_flags |= AI_NUMERICHOST; - - if (!resolved_addr) { - GNIX_WARN(FI_LOG_FABRIC, - "Resolved_addr must be a valid pointer.\n"); - ret = -FI_EINVAL; - goto err; - } - - ret = _gnix_local_ipaddr(&sin); - if (ret != FI_SUCCESS) - goto err; - - ret = getaddrinfo(node, service, &hints, &result); - if (ret != 0) { - GNIX_WARN(FI_LOG_FABRIC, - "Failed to get address for node provided: %s\n", - gai_strerror(ret)); - ret = -FI_EINVAL; - goto err; - } - - for (rp = result; rp != NULL; rp = rp->ai_next) { - assert(rp->ai_addr->sa_family == AF_INET); - sa = (struct sockaddr_in *) rp->ai_addr; - - /* - * If we are trying to resolve localhost then use - * CdmGetNicAddress. - */ - if (sa->sin_addr.s_addr == sin.sin_addr.s_addr) { - status = GNI_CdmGetNicAddress(0, &pe, &cpu_id); - if(status == GNI_RC_SUCCESS) { - break; - } else { - GNIX_WARN(FI_LOG_FABRIC, - "Unable to get NIC address."); - ret = gnixu_to_fi_errno(status); - goto err; - } - } else { - ret = __gnix_get_pe_from_ip("ipogif0", - inet_ntoa(sa->sin_addr), &pe); - if (ret == 0) { - break; - } else { - ret = __gnix_get_pe_from_ip("br0", - inet_ntoa(sa->sin_addr), &pe); - } - if (ret == 0) - break; - } - } - - /* - * Make sure address is valid. - */ - if (pe == -1) { - GNIX_WARN(FI_LOG_FABRIC, - "Unable to acquire valid address for node %s\n", - node); - ret = -FI_EADDRNOTAVAIL; - goto err; - } - - /* - * Fill the INOUT parameter resolved_addr with the address information - * acquired for the provided node parameter. - */ - memset(resolved_addr, 0, sizeof(struct gnix_ep_name)); - - resolved_addr->gnix_addr.device_addr = pe; - if (service) { - /* use resolved service/port */ - resolved_addr->gnix_addr.cdm_id = ntohs(sa->sin_port); - resolved_addr->name_type = GNIX_EPN_TYPE_BOUND; - resolved_addr->cm_nic_cdm_id = resolved_addr->gnix_addr.cdm_id; - } else { - /* generate port internally */ - resolved_addr->name_type = GNIX_EPN_TYPE_UNBOUND; - } - GNIX_INFO(FI_LOG_FABRIC, "Resolved: %s:%s to gnix_addr: 0x%lx\n", - node ?: "", service ?: "", resolved_addr->gnix_addr); -err: - if (result != NULL) { - freeaddrinfo(result); - } - return ret; -} - -int _gnix_src_addr(struct gnix_ep_name *resolved_addr) -{ - gni_return_t status; - uint32_t pe = -1; - uint32_t cpu_id = -1; - - assert(resolved_addr); - memset(resolved_addr, 0, sizeof(*resolved_addr)); - - status = GNI_CdmGetNicAddress(0, &pe, &cpu_id); - if (status != GNI_RC_SUCCESS) { - GNIX_WARN(FI_LOG_FABRIC, - "Unable to get NIC address."); - return -FI_ENODATA; - } - - resolved_addr->gnix_addr.device_addr = pe; - resolved_addr->name_type = GNIX_EPN_TYPE_UNBOUND; - - return FI_SUCCESS; -} diff --git a/prov/gni/src/gnix_nic.c b/prov/gni/src/gnix_nic.c deleted file mode 100644 index 195e9a62ad2..00000000000 --- a/prov/gni/src/gnix_nic.c +++ /dev/null @@ -1,1430 +0,0 @@ -/* - * Copyright (c) 2015-2018 Cray Inc. All rights reserved. - * Copyright (c) 2015-2018 Los Alamos National Security, LLC. - * All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "gnix.h" -#include "gnix_nic.h" -#include "gnix_cm_nic.h" -#include "gnix_vc.h" -#include "gnix_mbox_allocator.h" -#include "gnix_util.h" -#include "fi_ext_gni.h" - -/* - * TODO: make this a domain parameter - */ -#define GNIX_VC_FL_MIN_SIZE 128 -#define GNIX_VC_FL_INIT_REFILL_SIZE 10 - -static int gnix_nics_per_ptag[GNI_PTAG_MAX]; -struct dlist_entry gnix_nic_list_ptag[GNI_PTAG_MAX]; -DLIST_HEAD(gnix_nic_list); -pthread_mutex_t gnix_nic_list_lock = PTHREAD_MUTEX_INITIALIZER; - -/* - * globals - */ - -uint32_t gnix_max_nics_per_ptag = GNIX_DEF_MAX_NICS_PER_PTAG; - -/* - * local variables - */ - -static struct gnix_nic_attr default_attr = { - .gni_cdm_hndl = NULL, - .gni_nic_hndl = NULL -}; - -/******************************************************************************* - * Helper functions. - ******************************************************************************/ - -/* - * this function is intended to be invoked as an argument to pthread_create, - */ -static void *__gnix_nic_prog_thread_fn(void *the_arg) -{ - int ret = FI_SUCCESS, prev_state; - int retry = 0; - uint32_t which; - struct gnix_nic *nic = (struct gnix_nic *)the_arg; - sigset_t sigmask; - gni_cq_handle_t cqv[2]; - gni_return_t status; - gni_cq_entry_t cqe; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - /* - * temporarily disable cancelability while we set up - * some stuff - */ - - pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &prev_state); - - /* - * help out Cray core-spec, say we're not an app thread - * and can be run on core-spec cpus. - */ - - ret = _gnix_task_is_not_app(); - if (ret) - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_task_is_not_app call returned %d\n", - ret); - - /* - * block all signals, don't want this thread to catch - * signals that may be for app threads - */ - - memset(&sigmask, 0, sizeof(sigset_t)); - ret = sigfillset(&sigmask); - if (ret) { - GNIX_WARN(FI_LOG_EP_CTRL, - "sigfillset call returned %d\n", ret); - } else { - - ret = pthread_sigmask(SIG_SETMASK, - &sigmask, NULL); - if (ret) - GNIX_WARN(FI_LOG_EP_CTRL, - "pthread_sigmask call returned %d\n", ret); - } - - /* - * okay now we're ready to be cancelable. - */ - - pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &prev_state); - - pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); - - cqv[0] = nic->tx_cq_blk; - cqv[1] = nic->rx_cq_blk; - -try_again: - status = GNI_CqVectorMonitor(cqv, - 2, - -1, - &which); - - switch (status) { - case GNI_RC_SUCCESS: - - /* - * first dequeue RX CQEs - */ - if (nic->rx_cq_blk != nic->rx_cq && which == 1) { - do { - status = GNI_CqGetEvent(nic->rx_cq_blk, - &cqe); - } while (status == GNI_RC_SUCCESS); - } - pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &prev_state); - _gnix_nic_progress(nic); - pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &prev_state); - retry = 1; - break; - case GNI_RC_TIMEOUT: - case GNI_RC_NOT_DONE: - /* Invalid state indicates call interrupted by signal using various tools */ - case GNI_RC_INVALID_STATE: - retry = 1; - break; - case GNI_RC_INVALID_PARAM: - case GNI_RC_ERROR_RESOURCE: - case GNI_RC_ERROR_NOMEM: - retry = 0; - GNIX_WARN(FI_LOG_EP_CTRL, - "GNI_CqGetEvent returned %s\n", - gni_err_str[status]); - break; - default: - retry = 0; - GNIX_WARN(FI_LOG_EP_CTRL, - "GNI_CqGetEvent returned unexpected code %s\n", - gni_err_str[status]); - break; - } - - if (retry) - goto try_again; - - return NULL; -} - -/* - * setup memory registration for remote GNI_PostCqWrite's to target - */ - -static int __nic_setup_irq_cq(struct gnix_nic *nic) -{ - int ret = FI_SUCCESS; - size_t len; - gni_return_t status; - int fd = -1; - void *mmap_addr; - int vmdh_index = -1; - int flags = GNI_MEM_READWRITE; - struct gnix_auth_key *info; - struct fi_gni_auth_key key; - - len = (size_t)sysconf(_SC_PAGESIZE); - - mmap_addr = mmap(NULL, len, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_ANON, fd, 0); - if (mmap_addr == MAP_FAILED) { - GNIX_WARN(FI_LOG_EP_CTRL, "mmap failed - %s\n", - strerror(errno)); - ret = -errno; - goto err; - } - - nic->irq_mmap_addr = mmap_addr; - nic->irq_mmap_len = len; - - /* On some systems, the page may not be zero'd from first use. - Memset it here */ - memset(mmap_addr, 0x0, len); - - if (nic->using_vmdh) { - key.type = GNIX_AKT_RAW; - key.raw.protection_key = nic->cookie; - - info = _gnix_auth_key_lookup((uint8_t *) &key, sizeof(key)); - assert(info); - - if (!nic->mdd_resources_set) { - /* check to see if the ptag registration limit was set - yet or not -- becomes read-only after success */ - ret = _gnix_auth_key_enable(info); - if (ret != FI_SUCCESS && ret != -FI_EBUSY) { - GNIX_WARN(FI_LOG_DOMAIN, - "failed to enable authorization key, " - "unexpected error rc=%d\n", ret); - } - - status = GNI_SetMddResources(nic->gni_nic_hndl, - (info->attr.prov_key_limit + - info->attr.user_key_limit)); - if (status != GNI_RC_SUCCESS) { - GNIX_FATAL(FI_LOG_DOMAIN, - "failed to set MDD resources, rc=%d\n", - status); - } - - nic->mdd_resources_set = 1; - } - vmdh_index = _gnix_get_next_reserved_key(info); - if (vmdh_index <= 0) { - GNIX_FATAL(FI_LOG_DOMAIN, - "failed to get next reserved key, " - "rc=%d\n", vmdh_index); - } - - flags |= GNI_MEM_USE_VMDH; - } - - status = GNI_MemRegister(nic->gni_nic_hndl, - (uint64_t) nic->irq_mmap_addr, - len, - nic->rx_cq_blk, - flags, - vmdh_index, - &nic->irq_mem_hndl); - if (status != GNI_RC_SUCCESS) { - ret = gnixu_to_fi_errno(status); - GNIX_WARN(FI_LOG_EP_CTRL, - "GNI_MemRegister returned %s\n", - gni_err_str[status]); - goto err_w_mmap; - } - -#if 0 - fprintf(stderr,"registered ireq memhndl 0x%016lx 0x%016lx\n", - nic->irq_mem_hndl.qword1, - nic->irq_mem_hndl.qword2); -#endif - - - return ret; - -err_w_mmap: - munmap(mmap_addr, len); -err: - return ret; -} - -/* - * release resources previously set up for remote - * GNI_PostCqWrite's to target - */ -static int __nic_teardown_irq_cq(struct gnix_nic *nic) -{ - int ret = FI_SUCCESS; - gni_return_t status; - - if (nic == NULL) - return ret; - - if (nic->irq_mmap_addr == NULL) - return ret; - - if ((nic->irq_mem_hndl.qword1) || - (nic->irq_mem_hndl.qword2)) { - status = GNI_MemDeregister(nic->gni_nic_hndl, - &nic->irq_mem_hndl); - if (status != GNI_RC_SUCCESS) { - ret = gnixu_to_fi_errno(status); - GNIX_WARN(FI_LOG_EP_CTRL, - "GNI_MemDeregister returned %s\n", - gni_err_str[status]); - } - } - - munmap(nic->irq_mmap_addr, - nic->irq_mmap_len); - return ret; -} - - -/* - * place holder for better attributes checker - */ -static int __gnix_nic_check_attr_sanity(struct gnix_nic_attr *attr) -{ - return FI_SUCCESS; -} - -static inline struct gnix_tx_descriptor * -__desc_lkup_by_id(struct gnix_nic *nic, int desc_id) -{ - struct gnix_tx_descriptor *tx_desc; - - assert((desc_id >= 0) && (desc_id <= nic->max_tx_desc_id)); - tx_desc = &nic->tx_desc_base[desc_id]; - return tx_desc; -} - -static int __nic_rx_overrun(struct gnix_nic *nic) -{ - int i, max_id, ret; - struct gnix_vc *vc; - gni_return_t status; - gni_cq_entry_t cqe; - - GNIX_WARN(FI_LOG_EP_DATA, "\n"); - - /* clear out the CQ */ - /* - * TODO: really need to process CQEs better for error reporting, - * etc. - */ - while ((status = GNI_CqGetEvent(nic->rx_cq, &cqe)) == GNI_RC_SUCCESS); - assert(status == GNI_RC_NOT_DONE); - - COND_ACQUIRE(nic->requires_lock, &nic->vc_id_lock); - max_id = nic->vc_id_table_count; - COND_RELEASE(nic->requires_lock, &nic->vc_id_lock); - /* - * TODO: optimization would - * be to keep track of last time - * this happened and where smsg msgs. - * were found. - */ - for (i = 0; i < max_id; i++) { - ret = _gnix_test_bit(&nic->vc_id_bitmap, i); - if (ret) { - vc = __gnix_nic_elem_by_rem_id(nic, i); - ret = _gnix_vc_rx_schedule(vc); - assert(ret == FI_SUCCESS); - } - } - - return FI_SUCCESS; -} - -static int __process_rx_cqe(struct gnix_nic *nic, gni_cq_entry_t cqe) -{ - int ret = FI_SUCCESS, vc_id = 0; - struct gnix_vc *vc; - - vc_id = GNI_CQ_GET_INST_ID(cqe); - - /* - * its possible this vc has been destroyed, so may get NULL - * back. - */ - - vc = __gnix_nic_elem_by_rem_id(nic, vc_id); - if (vc != NULL) { - switch (vc->conn_state) { - case GNIX_VC_CONNECTING: - GNIX_DEBUG(FI_LOG_EP_DATA, - "Scheduling VC for RX processing (%p)\n", - vc); - ret = _gnix_vc_rx_schedule(vc); - assert(ret == FI_SUCCESS); - break; - case GNIX_VC_CONNECTED: - GNIX_DEBUG(FI_LOG_EP_DATA, - "Processing VC RX (%p)\n", - vc); - ret = _gnix_vc_rx_schedule(vc); - assert(ret == FI_SUCCESS); - break; - default: - break; /* VC not in a state for scheduling or - SMSG processing */ - } - } - - return ret; -} - -static int __nic_rx_progress(struct gnix_nic *nic) -{ - int ret = FI_SUCCESS; - gni_return_t status = GNI_RC_NOT_DONE; - gni_cq_entry_t cqe; - - status = GNI_CqTestEvent(nic->rx_cq); - if (status == GNI_RC_NOT_DONE) - return FI_SUCCESS; - - COND_ACQUIRE(nic->requires_lock, &nic->lock); - - do { - status = GNI_CqGetEvent(nic->rx_cq, &cqe); - if (OFI_UNLIKELY(status == GNI_RC_NOT_DONE)) { - ret = FI_SUCCESS; - break; - } - - if (OFI_LIKELY(status == GNI_RC_SUCCESS)) { - /* Find and schedule the associated VC. */ - ret = __process_rx_cqe(nic, cqe); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_DATA, - "process_rx_cqe() failed: %d\n", - ret); - } - } else if (status == GNI_RC_ERROR_RESOURCE) { - /* The remote CQ was overrun. Events related to any VC - * could have been missed. Schedule each VC to be sure - * all messages are processed. */ - assert(GNI_CQ_OVERRUN(cqe)); - __nic_rx_overrun(nic); - } else { - GNIX_WARN(FI_LOG_EP_DATA, - "GNI_CqGetEvent returned %s\n", - gni_err_str[status]); - ret = gnixu_to_fi_errno(status); - break; - } - } while (1); - - COND_RELEASE(nic->requires_lock, &nic->lock); - - return ret; -} - -void _gnix_nic_txd_err_inject(struct gnix_nic *nic, - struct gnix_tx_descriptor *txd) -{ - slist_insert_tail(&txd->err_list, &nic->err_txds); -} - -static int __gnix_nic_txd_err_get(struct gnix_nic *nic, - struct gnix_tx_descriptor **txd) -{ - struct slist_entry *list_entry; - struct gnix_tx_descriptor *txd_p; - - list_entry = slist_remove_head(&nic->err_txds); - if (list_entry) { - txd_p = container_of(list_entry, - struct gnix_tx_descriptor, - err_list); - *txd = txd_p; - return 1; - } - - return 0; -} - -static void __nic_get_completed_txd(struct gnix_nic *nic, - gni_cq_handle_t hw_cq, - struct gnix_tx_descriptor **txd, - gni_return_t *tx_status) -{ - gni_post_descriptor_t *gni_desc; - struct gnix_tx_descriptor *txd_p = NULL; - struct gnix_fab_req *req; - gni_return_t status; - int msg_id; - gni_cq_entry_t cqe; - uint32_t recov = 1; - - if (__gnix_nic_txd_err_get(nic, &txd_p)) { - *txd = txd_p; - *tx_status = GNI_RC_TRANSACTION_ERROR; - return; - } - - status = GNI_CqGetEvent(hw_cq, &cqe); - if (status == GNI_RC_NOT_DONE) { - *txd = NULL; - *tx_status = GNI_RC_NOT_DONE; - return; - } - - assert(status == GNI_RC_SUCCESS || - status == GNI_RC_TRANSACTION_ERROR); - - if (OFI_UNLIKELY(status == GNI_RC_TRANSACTION_ERROR)) { - status = GNI_CqErrorRecoverable(cqe, &recov); - if (status == GNI_RC_SUCCESS) { - if (!recov) { - char ebuf[512]; - - GNI_CqErrorStr(cqe, ebuf, sizeof(ebuf)); - GNIX_WARN(FI_LOG_EP_DATA, - "CQ error status: %s\n", - ebuf); - } - } else { - GNIX_WARN(FI_LOG_EP_DATA, - "GNI_CqErrorRecover returned: %s\n", - gni_err_str[status]); - recov = 0; /* assume something bad has happened */ - } - } - - if (GNI_CQ_GET_TYPE(cqe) == GNI_CQ_EVENT_TYPE_POST) { - status = GNI_GetCompleted(hw_cq, cqe, &gni_desc); - - assert(status == GNI_RC_SUCCESS || - status == GNI_RC_TRANSACTION_ERROR); - - txd_p = container_of(gni_desc, - struct gnix_tx_descriptor, - gni_desc); - } else if (GNI_CQ_GET_TYPE(cqe) == GNI_CQ_EVENT_TYPE_SMSG) { - msg_id = GNI_CQ_GET_MSG_ID(cqe); - txd_p = __desc_lkup_by_id(nic, msg_id); - } - - if (OFI_UNLIKELY(txd_p == NULL)) - GNIX_FATAL(FI_LOG_EP_DATA, "Unexpected CQE: 0x%lx", cqe); - - /* - * set retry count on the request to max to force - * delivering error'd CQ event to application - */ - if (!recov) { - status = GNI_RC_TRANSACTION_ERROR; - req = txd_p->req; - if (req) - req->tx_failures = UINT_MAX; - } - - *tx_status = status; - *txd = txd_p; - -} - -static int __nic_tx_progress(struct gnix_nic *nic, gni_cq_handle_t cq) -{ - int ret = FI_SUCCESS; - gni_return_t tx_status; - struct gnix_tx_descriptor *txd; - - do { - txd = NULL; - - COND_ACQUIRE(nic->requires_lock, &nic->lock); - __nic_get_completed_txd(nic, cq, &txd, - &tx_status); - COND_RELEASE(nic->requires_lock, &nic->lock); - - if (txd && txd->completer_fn) { - ret = txd->completer_fn(txd, tx_status); - if (ret != FI_SUCCESS) { - /* - * TODO: need to post error to CQ - */ - GNIX_WARN(FI_LOG_EP_DATA, - "TXD completer failed: %d", ret); - } - } - - if ((txd == NULL) || ret != FI_SUCCESS) - break; - } while (1); - - return ret; -} - -int _gnix_nic_progress(void *arg) -{ - struct gnix_nic *nic = (struct gnix_nic *)arg; - int ret = FI_SUCCESS; - - ret = __nic_tx_progress(nic, nic->tx_cq); - if (OFI_UNLIKELY(ret != FI_SUCCESS)) - return ret; - - if (nic->tx_cq_blk && nic->tx_cq_blk != nic->tx_cq) { - ret = __nic_tx_progress(nic, nic->tx_cq_blk); - if (OFI_UNLIKELY(ret != FI_SUCCESS)) - return ret; - } - - ret = __nic_rx_progress(nic); - if (ret != FI_SUCCESS) - return ret; - - ret = _gnix_vc_nic_progress(nic); - if (ret != FI_SUCCESS) - return ret; - - return ret; -} - -int _gnix_nic_free_rem_id(struct gnix_nic *nic, int remote_id) -{ - assert(nic); - - if ((remote_id < 0) || (remote_id > nic->vc_id_table_count)) - return -FI_EINVAL; - - _gnix_clear_bit(&nic->vc_id_bitmap, remote_id); - - return FI_SUCCESS; -} - -/* - * this function is needed to allow for quick lookup of a vc based on - * the contents of the GNI CQE coming off of the GNI RX CQ associated - * with GNI nic being used by this VC. Using a bitmap to expedite - * scanning vc's in the case of a GNI CQ overrun. - */ - -int _gnix_nic_get_rem_id(struct gnix_nic *nic, int *remote_id, void *entry) -{ - int ret = FI_SUCCESS; - void **table_base; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - /* - * TODO: really need to search bitmap for clear - * bit before resizing the table - */ - - COND_ACQUIRE(nic->requires_lock, &nic->vc_id_lock); - if (nic->vc_id_table_capacity == nic->vc_id_table_count) { - table_base = realloc(nic->vc_id_table, - 2 * nic->vc_id_table_capacity * - sizeof(void *)); - if (table_base == NULL) { - ret = -FI_ENOMEM; - goto err; - } - nic->vc_id_table_capacity *= 2; - nic->vc_id_table = table_base; - - ret = _gnix_realloc_bitmap(&nic->vc_id_bitmap, - nic->vc_id_table_capacity); - if (ret != FI_SUCCESS) { - assert(ret == -FI_ENOMEM); - goto err; - } - } - - nic->vc_id_table[nic->vc_id_table_count] = entry; - *remote_id = nic->vc_id_table_count; - - /* - * set bit in the bitmap - */ - - _gnix_set_bit(&nic->vc_id_bitmap, nic->vc_id_table_count); - - ++(nic->vc_id_table_count); -err: - COND_RELEASE(nic->requires_lock, &nic->vc_id_lock); - return ret; -} - -/* - * allocate a free list of tx descs for a gnix_nic struct. - */ - -static int __gnix_nic_tx_freelist_init(struct gnix_nic *nic, int n_descs) -{ - int i, ret = FI_SUCCESS; - struct gnix_tx_descriptor *desc_base, *desc_ptr; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - /* - * set up free list of tx descriptors. - */ - - desc_base = calloc(n_descs, sizeof(struct gnix_tx_descriptor)); - if (desc_base == NULL) { - ret = -FI_ENOMEM; - goto err; - } - - dlist_init(&nic->tx_desc_free_list); - dlist_init(&nic->tx_desc_active_list); - - for (i = 0, desc_ptr = desc_base; i < n_descs; i++, desc_ptr++) { - desc_ptr->id = i; - dlist_insert_tail(&desc_ptr->list, - &nic->tx_desc_free_list); - } - - nic->max_tx_desc_id = n_descs - 1; - nic->tx_desc_base = desc_base; - - ofi_spin_init(&nic->tx_desc_lock); - - return ret; - -err: - return ret; - -} - -/* - * clean up the tx descs free list - */ -static void __gnix_nic_tx_freelist_destroy(struct gnix_nic *nic) -{ - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - free(nic->tx_desc_base); - ofi_spin_destroy(&nic->tx_desc_lock); -} - -/* - * free a gnix nic and associated resources if refcnt drops to 0 - */ - -static void __nic_destruct(void *obj) -{ - int ret = FI_SUCCESS; - gni_return_t status = GNI_RC_SUCCESS; - struct gnix_nic *nic = (struct gnix_nic *) obj; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - /* Get us out of the progression tables we are destroying the nic - * and we don't want the wait progression thread to progress us - * after our structures are destroyed. - */ - pthread_mutex_lock(&gnix_nic_list_lock); - - dlist_remove(&nic->gnix_nic_list); - --gnix_nics_per_ptag[nic->ptag]; - dlist_remove(&nic->ptag_nic_list); - - pthread_mutex_unlock(&gnix_nic_list_lock); - __gnix_nic_tx_freelist_destroy(nic); - - /* - *free irq cq related resources - */ - - ret = __nic_teardown_irq_cq(nic); - if (ret != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_CTRL, - "__nic_teardown_irq_cq returned %s\n", - fi_strerror(-ret)); - - /* - * kill off progress thread, if any - */ - - if (nic->progress_thread) { - - ret = pthread_cancel(nic->progress_thread); - if ((ret != 0) && (ret != ESRCH)) { - GNIX_WARN(FI_LOG_EP_CTRL, - "pthread_cancel returned %d\n", ret); - goto err; - } - - ret = pthread_join(nic->progress_thread, - NULL); - if ((ret != 0) && (ret != ESRCH)) { - GNIX_WARN(FI_LOG_EP_CTRL, - "pthread_join returned %d\n", ret); - goto err; - } - - GNIX_INFO(FI_LOG_EP_CTRL, "pthread_join returned %d\n", ret); - nic->progress_thread = 0; - } - - /* Must free mboxes first, because the MR has a pointer to the - * nic handles below */ - ret = _gnix_mbox_allocator_destroy(nic->mbox_hndl); - if (ret != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_mbox_allocator_destroy returned %s\n", - fi_strerror(-ret)); - - /* - * see comments in the nic constructor about why - * the following code section is currently stubbed out. - */ -#if 0 - ret = _gnix_mbox_allocator_destroy(nic->s_rdma_buf_hndl); - if (ret != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_mbox_allocator_destroy returned %s\n", - fi_strerror(-ret)); - - ret = _gnix_mbox_allocator_destroy(nic->r_rdma_buf_hndl); - if (ret != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_mbox_allocator_destroy returned %s\n", - fi_strerror(-ret)); -#endif - - if (!nic->gni_cdm_hndl) { - GNIX_WARN(FI_LOG_EP_CTRL, "No CDM attached to nic, nic=%p"); - } - - assert(nic->gni_cdm_hndl != NULL); - - if (nic->rx_cq != NULL && nic->rx_cq != nic->rx_cq_blk) { - status = GNI_CqDestroy(nic->rx_cq); - if (status != GNI_RC_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "GNI_CqDestroy returned %s\n", - gni_err_str[status]); - ret = gnixu_to_fi_errno(status); - goto err; - } - } - - if (nic->rx_cq_blk != NULL) { - status = GNI_CqDestroy(nic->rx_cq_blk); - if (status != GNI_RC_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "GNI_CqDestroy returned %s\n", - gni_err_str[status]); - ret = gnixu_to_fi_errno(status); - goto err; - } - } - - if (nic->tx_cq != NULL && nic->tx_cq != nic->tx_cq_blk) { - status = GNI_CqDestroy(nic->tx_cq); - if (status != GNI_RC_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "GNI_CqDestroy returned %s\n", - gni_err_str[status]); - ret = gnixu_to_fi_errno(status); - goto err; - } - } - - if (nic->tx_cq_blk != NULL) { - status = GNI_CqDestroy(nic->tx_cq_blk); - if (status != GNI_RC_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "GNI_CqDestroy returned %s\n", - gni_err_str[status]); - ret = gnixu_to_fi_errno(status); - goto err; - } - } - - if (nic->allocd_gni_res & GNIX_NIC_CDM_ALLOCD) { - status = GNI_CdmDestroy(nic->gni_cdm_hndl); - if (status != GNI_RC_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "GNI_CdmDestroy returned %s\n", - gni_err_str[status]); - ret = gnixu_to_fi_errno(status); - goto err; - } - } - - if (nic->vc_id_table != NULL) { - free(nic->vc_id_table); - } else { - GNIX_WARN(FI_LOG_EP_CTRL, "vc_id_table was NULL\n"); - } - - /* - * destroy VC free list associated with this nic - */ - - _gnix_fl_destroy(&nic->vc_freelist); - - /* - * remove the nic from the linked lists - * for the domain and the global nic list - */ - -err: - _gnix_free_bitmap(&nic->vc_id_bitmap); - - free(nic); -} - -int _gnix_nic_free(struct gnix_nic *nic) -{ - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - if (nic == NULL) - return -FI_EINVAL; - - _gnix_ref_put(nic); - - return FI_SUCCESS; -} - -/* - * allocate a gnix_nic struct using attributes of the domain - */ - -int gnix_nic_alloc(struct gnix_fid_domain *domain, - struct gnix_nic_attr *attr, - struct gnix_nic **nic_ptr) -{ - int ret = FI_SUCCESS; - struct gnix_nic *nic = NULL; - uint32_t device_addr; - gni_return_t status; - uint32_t fake_cdm_id = GNIX_CREATE_CDM_ID; - gni_smsg_attr_t smsg_mbox_attr; - struct gnix_nic_attr *nic_attr = &default_attr; - uint32_t num_corespec_cpus = 0; - bool must_alloc_nic = false; - bool free_list_inited = false; - struct gnix_auth_key *auth_key; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - *nic_ptr = NULL; - nic_attr->gni_cdm_modes = gnix_cdm_modes; - - if (attr) { - ret = __gnix_nic_check_attr_sanity(attr); - if (ret != FI_SUCCESS) - return ret; - nic_attr = attr; - must_alloc_nic = nic_attr->must_alloc; - } - - auth_key = nic_attr->auth_key; - - /* - * If we've maxed out the number of nics for this domain/ptag, - * search the list of existing nics. Take the gnix_nic_list_lock - * here since the gnix_nic_list will be manipulated whether or - * not we attach to an existing nic or create a new one. - * - * Should not matter much that this is a pretty fat critical section - * since endpoint setup for RDM type will typically occur near - * app startup, likely in a single threaded region, and for the - * case of MSG, where there will likely be many 100s of EPs, after - * a few initial slow times through this section when nics are created, - * max nic count for the ptag will be reached and only the first part - * of the critical section - iteration over existing nics - will be - * happening. - */ - - pthread_mutex_lock(&gnix_nic_list_lock); - - /* - * we can reuse previously allocated nics as long as a - * must_alloc is not specified in the nic_attr arg. - */ - - if ((must_alloc_nic == false) && - (gnix_nics_per_ptag[auth_key->ptag] >= gnix_max_nics_per_ptag)) { - assert(!dlist_empty(&gnix_nic_list_ptag[auth_key->ptag])); - - nic = dlist_first_entry(&gnix_nic_list_ptag[auth_key->ptag], - struct gnix_nic, ptag_nic_list); - dlist_remove(&nic->ptag_nic_list); - dlist_insert_tail(&nic->ptag_nic_list, - &gnix_nic_list_ptag[auth_key->ptag]); - _gnix_ref_get(nic); - - GNIX_INFO(FI_LOG_EP_CTRL, "Reusing NIC:%p\n", nic); - } - - /* - * no nic found create a cdm and attach - */ - - if (!nic) { - - nic = calloc(1, sizeof(struct gnix_nic)); - if (nic == NULL) { - ret = -FI_ENOMEM; - goto err; - } - - nic->using_vmdh = domain->using_vmdh; - - if (nic_attr->use_cdm_id == false) { - ret = _gnix_cm_nic_create_cdm_id(domain, &fake_cdm_id); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_cm_nic_create_cdm_id returned %s\n", - fi_strerror(-ret)); - goto err; - } - } else - fake_cdm_id = nic_attr->cdm_id; - - if (nic_attr->gni_cdm_hndl == NULL) { - status = GNI_CdmCreate(fake_cdm_id, - auth_key->ptag, - auth_key->cookie, - gnix_cdm_modes, - &nic->gni_cdm_hndl); - if (status != GNI_RC_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, "GNI_CdmCreate returned %s\n", - gni_err_str[status]); - ret = gnixu_to_fi_errno(status); - goto err1; - } - nic->allocd_gni_res |= GNIX_NIC_CDM_ALLOCD; - } else { - nic->gni_cdm_hndl = nic_attr->gni_cdm_hndl; - } - - /* - * Okay, now go for the attach - */ - - if (nic_attr->gni_nic_hndl == NULL) { - status = GNI_CdmAttach(nic->gni_cdm_hndl, - 0, - &device_addr, - &nic->gni_nic_hndl); - if (status != GNI_RC_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, "GNI_CdmAttach returned %s\n", - gni_err_str[status]); - _gnix_dump_gni_res(auth_key->ptag); - ret = gnixu_to_fi_errno(status); - goto err1; - } - } else - nic->gni_nic_hndl = nic_attr->gni_nic_hndl; - - /* - * create TX CQs - first polling, then blocking - */ - - status = GNI_CqCreate(nic->gni_nic_hndl, - domain->params.tx_cq_size, - 0, /* no delay count */ - GNI_CQ_BLOCKING | - domain->gni_cq_modes, - NULL, /* useless handler */ - NULL, /* useless handler - context */ - &nic->tx_cq_blk); - if (status != GNI_RC_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "GNI_CqCreate returned %s\n", - gni_err_str[status]); - _gnix_dump_gni_res(auth_key->ptag); - ret = gnixu_to_fi_errno(status); - goto err1; - } - - /* Use blocking CQs for all operations if eager_auto_progress - * is used. */ - if (domain->params.eager_auto_progress) { - nic->tx_cq = nic->tx_cq_blk; - } else { - status = GNI_CqCreate(nic->gni_nic_hndl, - domain->params.tx_cq_size, - 0, /* no delay count */ - domain->gni_cq_modes, - NULL, /* useless handler */ - NULL, /* useless handler ctx */ - &nic->tx_cq); - if (status != GNI_RC_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "GNI_CqCreate returned %s\n", - gni_err_str[status]); - _gnix_dump_gni_res(auth_key->ptag); - ret = gnixu_to_fi_errno(status); - goto err1; - } - } - - - /* - * create RX CQs - first polling, then blocking - */ - - status = GNI_CqCreate(nic->gni_nic_hndl, - domain->params.rx_cq_size, - 0, - GNI_CQ_BLOCKING | - domain->gni_cq_modes, - NULL, - NULL, - &nic->rx_cq_blk); - if (status != GNI_RC_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "GNI_CqCreate returned %s\n", - gni_err_str[status]); - _gnix_dump_gni_res(auth_key->ptag); - ret = gnixu_to_fi_errno(status); - goto err1; - } - - /* Use blocking CQs for all operations if eager_auto_progress - * is used. */ - if (domain->params.eager_auto_progress) { - nic->rx_cq = nic->rx_cq_blk; - } else { - status = GNI_CqCreate(nic->gni_nic_hndl, - domain->params.rx_cq_size, - 0, - domain->gni_cq_modes, - NULL, - NULL, - &nic->rx_cq); - if (status != GNI_RC_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "GNI_CqCreate returned %s\n", - gni_err_str[status]); - _gnix_dump_gni_res(auth_key->ptag); - ret = gnixu_to_fi_errno(status); - goto err1; - } - } - - nic->device_addr = device_addr; - nic->ptag = auth_key->ptag; - nic->cookie = auth_key->cookie; - - nic->vc_id_table_capacity = domain->params.vc_id_table_capacity; - nic->vc_id_table = malloc(sizeof(void *) * - nic->vc_id_table_capacity); - if (nic->vc_id_table == NULL) { - GNIX_WARN(FI_LOG_EP_CTRL, - "malloc of vc_id_table failed\n"); - ret = -FI_ENOMEM; - goto err1; - } - - ret = _gnix_alloc_bitmap(&nic->vc_id_bitmap, - nic->vc_id_table_capacity, NULL); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "alloc_bitmap returned %d\n", ret); - goto err1; - } - ofi_spin_init(&nic->vc_id_lock); - - /* - * initialize free list for VC's - * In addition to hopefully allowing for a more compact - * allocation of VC structs, the free list is also import - * because there is a window of time when using auto progress - * that a thread may be going through the progress engine - * while one of the application threads is actively tearing - * down an endpoint (and hence its associated VCs) before the - * rem_id for the vc is removed from the vector. - * As a consequence, it is important that - * the memory allocated within the freelist allocator not be - * returned to the system prior to the freelist being destroyed - * as part of the nic destructor procedure. The freelist is - * destroyed in that procedure after the progress thread - * has been joined. - */ - - ret = _gnix_fl_init_ts(sizeof(struct gnix_vc), - offsetof(struct gnix_vc, fr_list), - GNIX_VC_FL_MIN_SIZE, - GNIX_VC_FL_INIT_REFILL_SIZE, - 0, - 0, - &nic->vc_freelist); - if (ret == FI_SUCCESS) { - free_list_inited = true; - } else { - GNIX_DEBUG(FI_LOG_EP_DATA, "_gnix_fl_init returned: %s\n", - fi_strerror(-ret)); - goto err1; - } - - ofi_spin_init(&nic->lock); - - ret = __gnix_nic_tx_freelist_init(nic, - domain->params.tx_cq_size); - if (ret != FI_SUCCESS) - goto err1; - - ofi_spin_init(&nic->prog_vcs_lock); - dlist_init(&nic->prog_vcs); - - _gnix_ref_init(&nic->ref_cnt, 1, __nic_destruct); - - smsg_mbox_attr.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT; - smsg_mbox_attr.mbox_maxcredit = domain->params.mbox_maxcredit; - smsg_mbox_attr.msg_maxsize = domain->params.mbox_msg_maxsize; - - status = GNI_SmsgBufferSizeNeeded(&smsg_mbox_attr, - &nic->mem_per_mbox); - if (status != GNI_RC_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "GNI_SmsgBufferSizeNeeded returned %s\n", - gni_err_str[status]); - ret = gnixu_to_fi_errno(status); - goto err1; - } - - /* - * set up mailbox allocator for SMSG mailboxes - */ - - ret = _gnix_mbox_allocator_create(nic, - nic->rx_cq, - domain->params.mbox_page_size, - (size_t)nic->mem_per_mbox, - domain->params.mbox_num_per_slab, - &nic->mbox_hndl); - - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_mbox_alloc returned %s\n", - fi_strerror(-ret)); - goto err1; - } - - /* - * use the mailbox allocator system to set up an - * pre-pinned RDMA bounce buffers for longer eager - * messages and other cases where zero-copy - * can't be safely used. - * - * One set of blocks is used for the send side. - * A second set of blocks is used for the receive - * side. Both sets of blocks are registered against - * the blocking RX CQ for this nic. - * - * TODO: hardwired constants, uff - * TODO: better to use a buddy allocator or some other - * allocator - * Disable these for now as we're not using and they - * chew up a lot of IOMMU space per nic. - */ - -#if 0 - ret = _gnix_mbox_allocator_create(nic, - NULL, - GNIX_PAGE_2MB, - 65536, - 512, - &nic->s_rdma_buf_hndl); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_mbox_alloc returned %s\n", - fi_strerror(-ret)); - _gnix_dump_gni_res(domain->ptag); - goto err1; - } - - ret = _gnix_mbox_allocator_create(nic, - NULL, - GNIX_PAGE_2MB, - 65536, - 512, - &nic->r_rdma_buf_hndl); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_mbox_alloc returned %s\n", - fi_strerror(-ret)); - _gnix_dump_gni_res(domain->ptag); - goto err1; - } -#endif - - ret = __nic_setup_irq_cq(nic); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "__nic_setup_irq_cq returned %s\n", - fi_strerror(-ret)); - _gnix_dump_gni_res(auth_key->ptag); - goto err1; - } - - /* - * if the domain is using PROGRESS_AUTO for data, set up - * a progress thread. - */ - - if (domain->data_progress == FI_PROGRESS_AUTO) { - - /* - * tell CLE job container that next thread should be - * runnable anywhere in the cpuset, don't treat as - * an error if one is returned, may have perf issues - * though... - */ - - ret = _gnix_get_num_corespec_cpus(&num_corespec_cpus); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "failed to get num corespec cpus\n"); - } - if (num_corespec_cpus > 0) { - ret = _gnix_job_disable_affinity_apply(); - } else { - ret = _gnix_job_enable_unassigned_cpus(); - } - if (ret != 0) - GNIX_WARN(FI_LOG_EP_CTRL, - "job_disable/unassigned cpus returned %d\n", - ret); - - ret = pthread_create(&nic->progress_thread, - NULL, - __gnix_nic_prog_thread_fn, - (void *)nic); - if (ret) - GNIX_WARN(FI_LOG_EP_CTRL, - "pthread_create call returned %d\n", ret); - } - - dlist_insert_tail(&nic->gnix_nic_list, &gnix_nic_list); - dlist_insert_tail(&nic->ptag_nic_list, - &gnix_nic_list_ptag[auth_key->ptag]); - - nic->smsg_callbacks = gnix_ep_smsg_callbacks; - - ++gnix_nics_per_ptag[auth_key->ptag]; - - GNIX_INFO(FI_LOG_EP_CTRL, "Allocated NIC:%p\n", nic); - } - - if (nic) { - nic->requires_lock = domain->thread_model != FI_THREAD_COMPLETION; - nic->using_vmdh = domain->using_vmdh; - } - - *nic_ptr = nic; - goto out; - -err1: - ofi_atomic_dec32(&gnix_id_counter); -err: - if (nic != NULL) { - __nic_teardown_irq_cq(nic); - if (nic->r_rdma_buf_hndl != NULL) - _gnix_mbox_allocator_destroy(nic->r_rdma_buf_hndl); - if (nic->s_rdma_buf_hndl != NULL) - _gnix_mbox_allocator_destroy(nic->s_rdma_buf_hndl); - if (nic->mbox_hndl != NULL) - _gnix_mbox_allocator_destroy(nic->mbox_hndl); - if (nic->rx_cq != NULL && nic->rx_cq != nic->rx_cq_blk) - GNI_CqDestroy(nic->rx_cq); - if (nic->rx_cq_blk != NULL) - GNI_CqDestroy(nic->rx_cq_blk); - if (nic->tx_cq != NULL && nic->tx_cq != nic->tx_cq_blk) - GNI_CqDestroy(nic->tx_cq); - if (nic->tx_cq_blk != NULL) - GNI_CqDestroy(nic->tx_cq_blk); - if ((nic->gni_cdm_hndl != NULL) && (nic->allocd_gni_res & - GNIX_NIC_CDM_ALLOCD)) - GNI_CdmDestroy(nic->gni_cdm_hndl); - if (free_list_inited == true) - _gnix_fl_destroy(&nic->vc_freelist); - free(nic); - } - -out: - pthread_mutex_unlock(&gnix_nic_list_lock); - return ret; -} - -void _gnix_nic_init(void) -{ - int i, rc; - - for (i = 0; i < GNI_PTAG_MAX; i++) { - dlist_init(&gnix_nic_list_ptag[i]); - } - - rc = _gnix_nics_per_rank(&gnix_max_nics_per_ptag); - if (rc == FI_SUCCESS) { - GNIX_DEBUG(FI_LOG_FABRIC, "gnix_max_nics_per_ptag: %u\n", - gnix_max_nics_per_ptag); - } else { - GNIX_WARN(FI_LOG_FABRIC, "_gnix_nics_per_rank failed: %d\n", - rc); - } - - if (getenv("GNIX_MAX_NICS") != NULL) - gnix_max_nics_per_ptag = atoi(getenv("GNIX_MAX_NICS")); - - /* - * Well if we didn't get 1 nic, that means we must really be doing - * FMA sharing. - */ - - if (gnix_max_nics_per_ptag == 0) { - gnix_max_nics_per_ptag = 1; - GNIX_WARN(FI_LOG_FABRIC, "Using inter-procss FMA sharing\n"); - } -} - diff --git a/prov/gni/src/gnix_poll.c b/prov/gni/src/gnix_poll.c deleted file mode 100644 index f134bc81102..00000000000 --- a/prov/gni/src/gnix_poll.c +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All - * rights reserved. - * Copyright (c) 2015-2016 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "gnix.h" -#include "gnix_poll.h" - -/******************************************************************************* - * API Functionality. - ******************************************************************************/ -DIRECT_FN int gnix_poll_open(struct fid_domain *domain, - struct fi_poll_attr *attr, - struct fid_poll **pollset) -{ - return -FI_ENOSYS; -} - -/** - * Poll progress and events across multiple completion queues and counters. - * - * @param[in] pollset the pollset - * @param[in/out] context user context values associated with completion - * queues or counters - * @param[in] count number of entries in context - * - * @return FI_SUCCESS upon successfully polling progress - * @return -FI_ERRNO upon an error - * @return -FI_ENOSYS if this operation is not supported - */ -DIRECT_FN int gnix_poll_poll(struct fid_poll *pollset, void **context, - int count) -{ - return -FI_ENOSYS; -} - -/** - * Associates a completions queue or counter with a poll set - * - * @param[in] pollset the pollset - * @param[in] event_fid the queue or counter - * @param[in] flags flags for the requests - * - * @return FI_SUCCESS upon adding the completion queue or counter - * @return -FI_ERRNO upon an error - * @return -FI_ENOSYS if this operation is not supported - */ -DIRECT_FN int gnix_poll_add(struct fid_poll *pollset, struct fid *event_fid, - uint64_t flags) -{ - return -FI_ENOSYS; -} - -/** - * Removes a completion queue or counter from a poll set. - * - * @param[in] pollset the pollset - * @param[in] event_fid the queue or counter - * @param[in] flags flags for the requests - * - * @return FI_SUCCESS upon removing the completion queue or counter - * @return -FI_ERRNO upon an error - * @return -FI_ENOSYS if this operation is not supported - */ -DIRECT_FN int gnix_poll_del(struct fid_poll *pollset, struct fid *event_fid, - uint64_t flags) -{ - return -FI_ENOSYS; -} diff --git a/prov/gni/src/gnix_progress.c b/prov/gni/src/gnix_progress.c deleted file mode 100644 index 523e3740ad5..00000000000 --- a/prov/gni/src/gnix_progress.c +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Copyright (c) 2017 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/* - * Progress common code - */ - -#include - -#include "gnix_progress.h" - -struct gnix_prog_obj { - struct dlist_entry list; - int ref_cnt; - void *obj; - int (*prog_fn)(void *data); -}; - - -int _gnix_prog_progress(struct gnix_prog_set *set) -{ - struct gnix_prog_obj *pobj, *tmp; - int rc; - - COND_READ_ACQUIRE(set->requires_lock, &set->lock); - - dlist_for_each_safe(&set->prog_objs, pobj, tmp, list) { - rc = pobj->prog_fn(pobj->obj); - if (rc) { - GNIX_WARN(FI_LOG_EP_CTRL, - "Obj(%p) prog function failed: %d\n", - pobj, rc); - } - } - - COND_RW_RELEASE(set->requires_lock, &set->lock); - - return FI_SUCCESS; -} - -int _gnix_prog_obj_add(struct gnix_prog_set *set, void *obj, - int (*prog_fn)(void *data)) -{ - struct gnix_prog_obj *pobj, *tmp; - - COND_WRITE_ACQUIRE(set->requires_lock, &set->lock); - - dlist_for_each_safe(&set->prog_objs, pobj, tmp, list) { - if (obj == pobj->obj && prog_fn == pobj->prog_fn) { - pobj->ref_cnt++; - COND_RW_RELEASE(set->requires_lock, &set->lock); - return FI_SUCCESS; - } - } - - pobj = malloc(sizeof(struct gnix_prog_obj)); - if (!pobj) { - GNIX_WARN(FI_LOG_EP_CTRL, "Failed to add OBJ to prog set.\n"); - COND_RW_RELEASE(set->requires_lock, &set->lock); - return -FI_ENOMEM; - } - - pobj->obj = obj; - pobj->prog_fn = prog_fn; - pobj->ref_cnt = 1; - dlist_init(&pobj->list); - dlist_insert_tail(&pobj->list, &set->prog_objs); - - COND_RW_RELEASE(set->requires_lock, &set->lock); - - GNIX_INFO(FI_LOG_EP_CTRL, "Added obj(%p) to set(%p)\n", - obj, set); - - return FI_SUCCESS; -} - -int _gnix_prog_obj_rem(struct gnix_prog_set *set, void *obj, - int (*prog_fn)(void *data)) -{ - struct gnix_prog_obj *pobj, *tmp; - - COND_WRITE_ACQUIRE(set->requires_lock, &set->lock); - - dlist_for_each_safe(&set->prog_objs, pobj, tmp, list) { - if (obj == pobj->obj && prog_fn == pobj->prog_fn) { - if (!--pobj->ref_cnt) { - dlist_remove(&pobj->list); - free(pobj); - GNIX_INFO(FI_LOG_EP_CTRL, - "Removed obj(%p) from set(%p)\n", - obj, set); - } - COND_RW_RELEASE(set->requires_lock, &set->lock); - return FI_SUCCESS; - } - } - - COND_RW_RELEASE(set->requires_lock, &set->lock); - - GNIX_WARN(FI_LOG_EP_CTRL, "Object not found on prog set.\n"); - return -FI_EINVAL; -} - -int _gnix_prog_init(struct gnix_prog_set *set) -{ - dlist_init(&set->prog_objs); - rwlock_init(&set->lock); - set->requires_lock = 1; - - return FI_SUCCESS; -} - -int _gnix_prog_fini(struct gnix_prog_set *set) -{ - struct gnix_prog_obj *pobj, *tmp; - - COND_WRITE_ACQUIRE(set->requires_lock, &set->lock); - - dlist_for_each_safe(&set->prog_objs, pobj, tmp, list) { - dlist_remove(&pobj->list); - free(pobj); - } - - COND_RW_RELEASE(set->requires_lock, &set->lock); - - rwlock_destroy(&set->lock); - - return FI_SUCCESS; -} - diff --git a/prov/gni/src/gnix_queue.c b/prov/gni/src/gnix_queue.c deleted file mode 100644 index ac2ea72d995..00000000000 --- a/prov/gni/src/gnix_queue.c +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include "gnix.h" -#include "gnix_queue.h" - -int _gnix_queue_create(struct gnix_queue **queue, alloc_func alloc_item, - free_func free_item, size_t entry_size, - size_t entry_count) -{ - struct gnix_queue *q; - struct slist_entry *temp; - int ret = FI_SUCCESS; - - if (!alloc_item || !free_item) { - ret = -FI_EINVAL; - goto err; - } - - q = calloc(1, sizeof(*q)); - if (!q) { - ret = -FI_ENOMEM; - goto err; - } - - q->alloc_item = alloc_item; - q->free_item = free_item; - - q->entry_size = entry_size; - - slist_init(&q->item_list); - slist_init(&q->free_list); - - for (size_t count = 0; count < entry_count; count++) { - temp = q->alloc_item(entry_size); - if (!temp) { - ret = -FI_ENOMEM; - goto err1; - } - - _gnix_queue_enqueue_free(q, temp); - } - - *queue = q; - - return ret; - -err1: - _gnix_queue_destroy(q); - *queue = NULL; -err: - return ret; -} - -void _gnix_queue_destroy(struct gnix_queue *queue) -{ - struct slist_entry *temp; - - while ((temp = _gnix_queue_dequeue(queue))) - queue->free_item(temp); - - while ((temp = _gnix_queue_dequeue_free(queue))) - queue->free_item(temp); - - free(queue); -} - -struct slist_entry *_gnix_queue_peek(struct gnix_queue *queue) -{ - return queue->item_list.head; -} - -struct slist_entry *_gnix_queue_get_free(struct gnix_queue *queue) -{ - struct slist_entry *ret; - - ret = _gnix_queue_dequeue_free(queue); - if (!ret) - ret = queue->alloc_item(queue->entry_size); - - return ret; -} - -struct slist_entry *_gnix_queue_dequeue(struct gnix_queue *queue) -{ - return slist_remove_head(&queue->item_list); -} - -struct slist_entry *_gnix_queue_dequeue_free(struct gnix_queue *queue) -{ - return slist_remove_head(&queue->free_list); -} - -void _gnix_queue_enqueue(struct gnix_queue *queue, struct slist_entry *item) -{ - gnix_slist_insert_tail(item, &queue->item_list); -} - -void _gnix_queue_enqueue_free(struct gnix_queue *queue, - struct slist_entry *item) -{ - slist_insert_head(item, &queue->free_list); -} diff --git a/prov/gni/src/gnix_rma.c b/prov/gni/src/gnix_rma.c deleted file mode 100644 index 518439abf04..00000000000 --- a/prov/gni/src/gnix_rma.c +++ /dev/null @@ -1,1586 +0,0 @@ -/* - * Copyright (c) 2015-2019 Cray Inc. All rights reserved. - * Copyright (c) 2015-2018 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2019 Triad National Security, LLC. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include - -#include "gnix.h" -#include "gnix_nic.h" -#include "gnix_vc.h" -#include "gnix_ep.h" -#include "gnix_mr.h" -#include "gnix_cm_nic.h" -#include "gnix_mbox_allocator.h" -#include "gnix_cntr.h" - -#include - -/* Threshold to switch from indirect transfer to chained transfer to move - * unaligned read data. */ -#define GNIX_RMA_UREAD_CHAINED_THRESH 60 -#define slist_entry_foreach(head, item)\ - for (item = head; item; item = item->next) - -static int __gnix_rma_send_err(struct gnix_fid_ep *ep, - struct gnix_fab_req *req, - int error) -{ - struct gnix_fid_cntr *cntr = NULL; - int rc = FI_SUCCESS; - uint64_t flags = req->flags & GNIX_RMA_COMPLETION_FLAGS; - - if (ep->send_cq) { - rc = _gnix_cq_add_error(ep->send_cq, req->user_context, - flags, 0, 0, 0, 0, 0, error, - gnixu_to_fi_errno(GNI_RC_TRANSACTION_ERROR), - NULL, 0); - if (rc) { - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_cq_add_error() failed: %d\n", rc); - } - } - - if ((req->type == GNIX_FAB_RQ_RDMA_WRITE) && - ep->write_cntr) - cntr = ep->write_cntr; - - if ((req->type == GNIX_FAB_RQ_RDMA_READ) && - ep->read_cntr) - cntr = ep->read_cntr; - - if (cntr) { - rc = _gnix_cntr_inc_err(cntr); - if (rc) - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_cntr_inc_err() failed: %d\n", rc); - } - - return rc; -} - -static int __gnix_rma_send_completion(struct gnix_fid_ep *ep, - struct gnix_fab_req *req) -{ - struct gnix_fid_cntr *cntr = NULL; - int rc; - uint64_t flags = req->flags & GNIX_RMA_COMPLETION_FLAGS; - - if ((req->flags & FI_COMPLETION) && ep->send_cq) { - rc = _gnix_cq_add_event(ep->send_cq, ep, req->user_context, - flags, 0, 0, 0, 0, FI_ADDR_NOTAVAIL); - if (rc) { - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_cq_add_event() failed: %d\n", rc); - } - } - - if ((req->type == GNIX_FAB_RQ_RDMA_WRITE) && - ep->write_cntr) { - cntr = ep->write_cntr; - } - - if ((req->type == GNIX_FAB_RQ_RDMA_READ) && - ep->read_cntr) { - cntr = ep->read_cntr; - } - - if (cntr) { - rc = _gnix_cntr_inc(cntr); - if (rc) - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_cntr_inc() failed: %d\n", rc); - } - - return FI_SUCCESS; -} - -static inline void __gnix_rma_copy_indirect_get_data(struct gnix_fab_req *req) -{ - int head_off = req->rma.rem_addr & GNI_READ_ALIGN_MASK; - memcpy((void *)req->rma.loc_addr, - (void *) ((uint8_t *) req->int_tx_buf + head_off), - req->rma.len); -} - -static void __gnix_rma_copy_chained_get_data(struct gnix_fab_req *req) -{ - int head_off, head_len, tail_len; - void *addr; - - head_off = req->rma.rem_addr & GNI_READ_ALIGN_MASK; - head_len = GNI_READ_ALIGN - head_off; - tail_len = (req->rma.rem_addr + req->rma.len) & GNI_READ_ALIGN_MASK; - - if (head_off) { - GNIX_INFO(FI_LOG_EP_DATA, "writing %d bytes to %p\n", - head_len, req->rma.loc_addr); - memcpy((void *)req->rma.loc_addr, - (void *) ((uint8_t *) req->int_tx_buf + head_off), - head_len); - } - - if (tail_len) { - addr = (void *) ((uint8_t *) req->rma.loc_addr + - req->rma.len - - tail_len); - - GNIX_INFO(FI_LOG_EP_DATA, "writing %d bytes to %p\n", - tail_len, addr); - memcpy((void *)addr, - (void *) ((uint8_t *) req->int_tx_buf + GNI_READ_ALIGN), - tail_len); - } -} - -static void __gnix_rma_more_fr_complete(struct gnix_fab_req *req) -{ - int rc; - - if (req->flags & FI_LOCAL_MR) { - GNIX_INFO(FI_LOG_EP_DATA, "freeing auto-reg MR: %p\n", - req->rma.loc_md); - rc = fi_close(&req->rma.loc_md->mr_fid.fid); - if (rc != FI_SUCCESS) - GNIX_FATAL(FI_LOG_DOMAIN, - "failed to close auto-registration, " - "rc=%d\n", rc); - - req->flags &= ~FI_LOCAL_MR; - } - - /* Schedule VC TX queue in case the VC is 'fenced'. */ - _gnix_vc_tx_schedule(req->vc); - - _gnix_fr_free(req->vc->ep, req); -} - -static void __gnix_rma_fr_complete(struct gnix_fab_req *req) -{ - int rc; - - if (req->flags & FI_LOCAL_MR) { - GNIX_INFO(FI_LOG_EP_DATA, "freeing auto-reg MR: %p\n", - req->rma.loc_md); - rc = fi_close(&req->rma.loc_md->mr_fid.fid); - if (rc != FI_SUCCESS) - GNIX_FATAL(FI_LOG_DOMAIN, - "failed to close auto-registration, " - "rc=%d\n", rc); - - req->flags &= ~FI_LOCAL_MR; - } - - ofi_atomic_dec32(&req->vc->outstanding_tx_reqs); - - /* Schedule VC TX queue in case the VC is 'fenced'. */ - _gnix_vc_tx_schedule(req->vc); - - _gnix_fr_free(req->vc->ep, req); -} - -static int __gnix_rma_post_err_no_retrans(struct gnix_fab_req *req, int error) -{ - int rc; - - rc = __gnix_rma_send_err(req->vc->ep, req, error); - if (rc != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_DATA, - "__gnix_rma_send_err() failed: %d\n", - rc); - - __gnix_rma_fr_complete(req); - return FI_SUCCESS; -} - -static int __gnix_rma_post_err(struct gnix_fab_req *req, int error) -{ - if (GNIX_EP_RDM(req->gnix_ep->type) && - _gnix_req_replayable(req)) { - req->tx_failures++; - GNIX_INFO(FI_LOG_EP_DATA, - "Requeueing failed request: %p\n", req); - return _gnix_vc_requeue_work_req(req); - } - - GNIX_WARN(FI_LOG_EP_DATA, "Failed %u transmits: %p error: %d\n", - req->tx_failures, req, error); - - __gnix_rma_post_err_no_retrans(req, error); - - return FI_SUCCESS; -} - -/* - * completer for a GNI_PostCqWrite, for now ignore error status - */ - -static int __gnix_rma_post_irq_complete(void *arg, gni_return_t tx_status) -{ - struct gnix_tx_descriptor *txd = (struct gnix_tx_descriptor *)arg; - struct gnix_vc *vc; - - vc = (struct gnix_vc *)txd->req; - _gnix_nic_tx_free(vc->ep->nic, txd); - - return FI_SUCCESS; -} - -/* SMSG callback for RMA data control message. */ -int __smsg_rma_data(void *data, void *msg) -{ - int ret = FI_SUCCESS; - struct gnix_vc *vc = (struct gnix_vc *)data; - struct gnix_smsg_rma_data_hdr *hdr = - (struct gnix_smsg_rma_data_hdr *)msg; - struct gnix_fid_ep *ep = vc->ep; - gni_return_t status; - - if (GNIX_ALLOW_FI_REMOTE_CQ_DATA(hdr->flags, ep->caps) && ep->recv_cq) { - ret = _gnix_cq_add_event(ep->recv_cq, ep, NULL, hdr->user_flags, - 0, 0, hdr->user_data, 0, - FI_ADDR_NOTAVAIL); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_cq_add_event returned %d\n", - ret); - } - } - - if (hdr->flags & FI_REMOTE_WRITE && ep->rwrite_cntr) { - ret = _gnix_cntr_inc(ep->rwrite_cntr); - if (ret != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_cntr_inc() failed: %d\n", - ret); - } - - if (hdr->flags & FI_REMOTE_READ && ep->rread_cntr) { - ret = _gnix_cntr_inc(ep->rread_cntr); - if (ret != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_cntr_inc() failed: %d\n", - ret); - } - - status = GNI_SmsgRelease(vc->gni_ep); - if (OFI_UNLIKELY(status != GNI_RC_SUCCESS)) { - GNIX_WARN(FI_LOG_EP_DATA, - "GNI_SmsgRelease returned %s\n", - gni_err_str[status]); - ret = gnixu_to_fi_errno(status); - } - - return ret; -} - -/* __gnix_rma_txd_data_complete() should match __gnix_rma_txd_complete() except - * for checking whether to send immediate data. */ -static int __gnix_rma_txd_data_complete(void *arg, gni_return_t tx_status) -{ - struct gnix_tx_descriptor *txd = (struct gnix_tx_descriptor *)arg; - struct gnix_fab_req *req = txd->req; - int rc; - - _gnix_nic_tx_free(req->gnix_ep->nic, txd); - - if (tx_status != GNI_RC_SUCCESS) - return __gnix_rma_post_err(req, FI_ECANCELED); - - /* Successful data delivery. Generate local completion. */ - rc = __gnix_rma_send_completion(req->gnix_ep, req); - if (rc != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_DATA, - "__gnix_rma_send_completion() failed: %d\n", - rc); - - __gnix_rma_fr_complete(req); - - return FI_SUCCESS; -} - -static int __gnix_rma_send_data_req(void *arg) -{ - struct gnix_fab_req *req = (struct gnix_fab_req *)arg; - struct gnix_fid_ep *ep = req->gnix_ep; - struct gnix_nic *nic = ep->nic; - struct gnix_tx_descriptor *txd; - gni_return_t status; - int rc; - int inject_err = _gnix_req_inject_err(req); - - rc = _gnix_nic_tx_alloc(nic, &txd); - if (rc) { - GNIX_INFO(FI_LOG_EP_DATA, - "_gnix_nic_tx_alloc() failed: %d\n", - rc); - return -FI_ENOSPC; - } - - txd->req = req; - txd->completer_fn = __gnix_rma_txd_data_complete; - txd->rma_data_hdr.flags = 0; - - if (GNIX_ALLOW_FI_REMOTE_CQ_DATA(req->flags, ep->caps)) { - txd->rma_data_hdr.flags |= FI_REMOTE_CQ_DATA; - txd->rma_data_hdr.user_flags = FI_RMA | FI_REMOTE_CQ_DATA; - if (req->type == GNIX_FAB_RQ_RDMA_WRITE) { - txd->rma_data_hdr.user_flags |= FI_REMOTE_WRITE; - } else { - txd->rma_data_hdr.user_flags |= FI_REMOTE_READ; - } - txd->rma_data_hdr.user_data = req->rma.imm; - } - - if (req->vc->peer_caps & FI_RMA_EVENT) { - if (req->type == GNIX_FAB_RQ_RDMA_WRITE) { - txd->rma_data_hdr.flags |= FI_REMOTE_WRITE; - } else { - txd->rma_data_hdr.flags |= FI_REMOTE_READ; - } - } - - COND_ACQUIRE(nic->requires_lock, &nic->lock); - if (inject_err) { - _gnix_nic_txd_err_inject(nic, txd); - status = GNI_RC_SUCCESS; - } else { - status = GNI_SmsgSendWTag(req->vc->gni_ep, - &txd->rma_data_hdr, - sizeof(txd->rma_data_hdr), - NULL, 0, txd->id, - GNIX_SMSG_T_RMA_DATA); - } - COND_RELEASE(nic->requires_lock, &nic->lock); - - if (status == GNI_RC_NOT_DONE) { - _gnix_nic_tx_free(nic, txd); - GNIX_INFO(FI_LOG_EP_DATA, - "GNI_SmsgSendWTag returned %s\n", - gni_err_str[status]); - } else if (status != GNI_RC_SUCCESS) { - _gnix_nic_tx_free(nic, txd); - GNIX_WARN(FI_LOG_EP_DATA, - "GNI_SmsgSendWTag returned %s\n", - gni_err_str[status]); - } else { - GNIX_INFO(FI_LOG_EP_DATA, "Sent RMA CQ data, req: %p\n", req); - } - - return gnixu_to_fi_errno(status); -} - -static int __gnix_rma_more_txd_complete(void *arg, gni_return_t tx_status) -{ - struct gnix_tx_descriptor *txd = (struct gnix_tx_descriptor *)arg; - struct gnix_fab_req *req = txd->req; - struct gnix_fab_req *more_req; - struct slist_entry *item; - int rc = FI_SUCCESS; - - if (tx_status != GNI_RC_SUCCESS) { - return __gnix_rma_post_err(req, FI_ECANCELED); - } - - free(txd->gni_more_ct_descs); - _gnix_nic_tx_free(req->gnix_ep->nic, txd); - - /* Successful Delivery. Copy any unaligned data if read req. */ - if ((req->flags & FI_MORE) && (req->type == GNIX_FAB_RQ_RDMA_READ)) { - if (req->flags & GNIX_RMA_INDIRECT) { - GNIX_DEBUG(FI_LOG_EP_DATA, - "Top TXD is Indirect. Copying\n"); - __gnix_rma_copy_indirect_get_data(req); - } else { - __gnix_rma_copy_chained_get_data(req); - } - slist_entry_foreach(req->rma.sle.next, item) { - more_req = container_of(item, struct gnix_fab_req, - rma.sle); - if (more_req->flags & GNIX_RMA_INDIRECT) { - GNIX_DEBUG(FI_LOG_EP_DATA, - "Chain Element is Indirect. Copying\n"); - __gnix_rma_copy_indirect_get_data(more_req); - } else { - __gnix_rma_copy_chained_get_data(more_req); - } - } - } - /* Completion event for top level request */ - rc = __gnix_rma_send_completion(req->vc->ep, req); - if (rc != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_DATA, - "__gnix_rma_send_completion() failed: %d\n", - rc); - __gnix_rma_fr_complete(req); - - /* Create completion events for each fab request */ - slist_entry_foreach(req->rma.sle.next, item) { - more_req = container_of(item, struct gnix_fab_req, rma.sle); - - rc = __gnix_rma_send_completion(more_req->vc->ep, more_req); - if (rc != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_DATA, - "__gnix_rma_send_completion() failed: %d\n", - rc); - __gnix_rma_more_fr_complete(more_req); - } - return FI_SUCCESS; -} - -static int __gnix_rma_txd_complete(void *arg, gni_return_t tx_status) -{ - struct gnix_tx_descriptor *txd = (struct gnix_tx_descriptor *)arg; - struct gnix_fab_req *req = txd->req; - int rc = FI_SUCCESS; - - /* Wait for both TXDs before processing RDMA chained requests. */ - if (req->flags & GNIX_RMA_CHAINED && req->flags & GNIX_RMA_RDMA) { - /* There are two TXDs involved with this request, an RDMA - * transfer to move the middle block and an FMA transfer to - * move unaligned head and/or tail. If this is the FMA TXD, - * copy the unaligned data to the user buffer. */ - if (txd->gni_desc.type == GNI_POST_FMA_GET) - __gnix_rma_copy_chained_get_data(req); - - /* Remember any failure. Retransmit both TXDs once both are - * complete. */ - req->rma.status |= tx_status; - - if (ofi_atomic_dec32(&req->rma.outstanding_txds) == 1) { - _gnix_nic_tx_free(req->gnix_ep->nic, txd); - GNIX_INFO(FI_LOG_EP_DATA, - "Received first RDMA chain TXD, req: %p\n", - req); - return FI_SUCCESS; - } - - tx_status = req->rma.status; - } - - if (tx_status != GNI_RC_SUCCESS) { - _gnix_nic_tx_free(req->gnix_ep->nic, txd); - return __gnix_rma_post_err(req, FI_ECANCELED); - } - - /* Successful delivery. Progress request. */ - if (req->flags & GNIX_RMA_INDIRECT) { - __gnix_rma_copy_indirect_get_data(req); - } else if (req->flags & GNIX_RMA_CHAINED && - !(req->flags & GNIX_RMA_RDMA)) { - __gnix_rma_copy_chained_get_data(req); - } - - _gnix_nic_tx_free(req->gnix_ep->nic, txd); - - if (GNIX_ALLOW_FI_REMOTE_CQ_DATA(req->flags, req->gnix_ep->caps) || - req->vc->peer_caps & FI_RMA_EVENT) { - /* control message needed for imm. data or a counter event. */ - req->tx_failures = 0; - req->work_fn = __gnix_rma_send_data_req; - _gnix_vc_requeue_work_req(req); - } else { - /* complete request */ - rc = __gnix_rma_send_completion(req->vc->ep, req); - if (rc != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_DATA, - "__gnix_rma_send_completion() failed: %d\n", - rc); - - __gnix_rma_fr_complete(req); - } - - return FI_SUCCESS; -} - -static gni_post_type_t __gnix_fr_post_type(int fr_type, int rdma) -{ - switch (fr_type) { - case GNIX_FAB_RQ_RDMA_WRITE: - return rdma ? GNI_POST_RDMA_PUT : GNI_POST_FMA_PUT; - case GNIX_FAB_RQ_RDMA_READ: - return rdma ? GNI_POST_RDMA_GET : GNI_POST_FMA_GET; - default: - break; - } - - GNIX_FATAL(FI_LOG_EP_DATA, "Unsupported post type: %d", fr_type); - return -FI_ENOSYS; -} - -static void __gnix_rma_fill_pd_chained_get(struct gnix_fab_req *req, - struct gnix_tx_descriptor *txd, - gni_mem_handle_t *rem_mdh) -{ - int head_off, head_len, tail_len, desc_idx = 0; - struct gnix_fid_mem_desc *loc_md; - struct gnix_fid_ep *ep = req->gnix_ep; - - if (req->int_tx_buf_e == NULL) { - req->int_tx_buf_e = _gnix_ep_get_int_tx_buf(ep); - if (req->int_tx_buf_e == NULL) { - GNIX_FATAL(FI_LOG_EP_DATA, "RAN OUT OF INT_TX_BUFS"); - /* TODO return error */ - } - } - - req->int_tx_buf = ((struct gnix_int_tx_buf *) req->int_tx_buf_e)->buf; - req->int_tx_mdh = _gnix_ep_get_int_tx_mdh(req->int_tx_buf_e); - - /* Copy head and tail through intermediate buffer. Copy - * aligned data directly to user buffer. */ - head_off = req->rma.rem_addr & GNI_READ_ALIGN_MASK; - head_len = head_off ? GNI_READ_ALIGN - head_off : 0; - tail_len = (req->rma.rem_addr + req->rma.len) & GNI_READ_ALIGN_MASK; - - /* Use full post descriptor for aligned data */ - loc_md = (struct gnix_fid_mem_desc *)req->rma.loc_md; - txd->gni_desc.local_mem_hndl = loc_md->mem_hndl; - txd->gni_desc.local_addr = (uint64_t)req->rma.loc_addr + head_len; - txd->gni_desc.remote_addr = (uint64_t)req->rma.rem_addr + head_len; - txd->gni_desc.length = req->rma.len - head_len - tail_len; - assert(txd->gni_desc.length); - txd->gni_desc.next_descr = &txd->gni_ct_descs[0]; - - if (head_off) { - txd->gni_ct_descs[0].ep_hndl = req->vc->gni_ep; - txd->gni_ct_descs[0].length = GNI_READ_ALIGN; - txd->gni_ct_descs[0].remote_addr = - req->rma.rem_addr & ~GNI_READ_ALIGN_MASK; - txd->gni_ct_descs[0].remote_mem_hndl = *rem_mdh; - txd->gni_ct_descs[0].local_addr = (uint64_t)req->int_tx_buf; - txd->gni_ct_descs[0].local_mem_hndl = req->int_tx_mdh; - - if (tail_len) - txd->gni_ct_descs[0].next_descr = - &txd->gni_ct_descs[1]; - else - txd->gni_ct_descs[0].next_descr = NULL; - - desc_idx++; - } - - if (tail_len) { - txd->gni_ct_descs[desc_idx].ep_hndl = req->vc->gni_ep; - txd->gni_ct_descs[desc_idx].length = GNI_READ_ALIGN; - txd->gni_ct_descs[desc_idx].remote_addr = - (req->rma.rem_addr + - req->rma.len) & ~GNI_READ_ALIGN_MASK; - txd->gni_ct_descs[desc_idx].remote_mem_hndl = *rem_mdh; - txd->gni_ct_descs[desc_idx].local_addr = - (uint64_t)req->int_tx_buf + GNI_READ_ALIGN; - txd->gni_ct_descs[desc_idx].local_mem_hndl = req->int_tx_mdh; - txd->gni_ct_descs[desc_idx].next_descr = NULL; - } - - GNIX_INFO(FI_LOG_EP_DATA, - "ct_rem_addr[0] = %p %p, ct_rem_addr[1] = %p %p\n", - txd->gni_ct_descs[0].remote_addr, - txd->gni_ct_descs[0].local_addr, - txd->gni_ct_descs[1].remote_addr, - txd->gni_ct_descs[1].local_addr); -} - -static void __gnix_rma_fill_pd_indirect_get(struct gnix_fab_req *req, - struct gnix_tx_descriptor *txd) -{ - int head_off = req->rma.rem_addr & GNI_READ_ALIGN_MASK; - struct gnix_fid_ep *ep = req->gnix_ep; - - if (req->int_tx_buf_e == NULL) { - req->int_tx_buf_e = _gnix_ep_get_int_tx_buf(ep); - if (req->int_tx_buf_e == NULL) { - GNIX_FATAL(FI_LOG_EP_DATA, "RAN OUT OF INT_TX_BUFS"); - /* TODO return error */ - } - } - - req->int_tx_buf = ((struct gnix_int_tx_buf *) req->int_tx_buf_e)->buf; - req->int_tx_mdh = _gnix_ep_get_int_tx_mdh(req->int_tx_buf_e); - - /* Copy all data through an intermediate buffer. */ - txd->gni_desc.local_addr = (uint64_t)req->int_tx_buf; - txd->gni_desc.local_mem_hndl = req->int_tx_mdh; - txd->gni_desc.length = CEILING(req->rma.len + head_off, GNI_READ_ALIGN); - txd->gni_desc.remote_addr = - (uint64_t)req->rma.rem_addr & ~GNI_READ_ALIGN_MASK; -} - -static void __gnix_rma_more_fill_pd_indirect_get(struct gnix_fab_req *req, - gni_ct_get_post_descriptor_t - *more_get, int idx) -{ - int head_off = req->rma.rem_addr & GNI_READ_ALIGN_MASK; - struct gnix_fid_ep *ep = req->gnix_ep; - - GNIX_DEBUG(FI_LOG_EP_DATA, "FI_MORE: filling indirect get\n"); - if (req->int_tx_buf_e == NULL) { - req->int_tx_buf_e = _gnix_ep_get_int_tx_buf(ep); - if (req->int_tx_buf_e == NULL) { - GNIX_FATAL(FI_LOG_EP_DATA, "RAN OUT OF INT_TX_BUFS"); - /* TODO return error */ - } - } - - req->int_tx_buf = ((struct gnix_int_tx_buf *) req->int_tx_buf_e)->buf; - req->int_tx_mdh = _gnix_ep_get_int_tx_mdh(req->int_tx_buf_e); - - /* Copy all data through an intermediate buffer. */ - more_get[idx].local_addr = (uint64_t)req->int_tx_buf; - more_get[idx].local_mem_hndl = req->int_tx_mdh; - more_get[idx].length = CEILING(req->rma.len + head_off, GNI_READ_ALIGN); - more_get[idx].remote_addr = - (uint64_t)req->rma.rem_addr & ~GNI_READ_ALIGN_MASK; -} - -int _gnix_rma_post_irq(struct gnix_vc *vc) -{ - int rc = FI_SUCCESS; - struct gnix_fid_ep *ep; - struct gnix_nic *nic; - struct gnix_tx_descriptor *txd; - gni_return_t status; - -#if 1 - if (vc->conn_state != GNIX_VC_CONNECTED) - return -FI_EINVAL; - - ep = vc->ep; - assert(ep != NULL); - - nic = ep->nic; - assert(nic != NULL); - - rc = _gnix_nic_tx_alloc(nic, &txd); - if (rc != FI_SUCCESS) - return rc; - - txd->completer_fn = __gnix_rma_post_irq_complete; - txd->req = (void *)vc; - txd->gni_desc.type = GNI_POST_CQWRITE; - txd->gni_desc.cq_mode = GNI_CQMODE_GLOBAL_EVENT; - /* - * try to send the cq write request through the network - * on the slow path - */ - txd->gni_desc.dlvr_mode = GNI_DLVMODE_IN_ORDER; - txd->gni_desc.remote_mem_hndl = vc->peer_irq_mem_hndl; - txd->gni_desc.cqwrite_value = vc->peer_id; - txd->gni_desc.rdma_mode = 0; - txd->gni_desc.src_cq_hndl = nic->tx_cq; /* check flags */ - - status = GNI_PostCqWrite(vc->gni_ep, - &txd->gni_desc); - if (OFI_UNLIKELY(status != GNI_RC_SUCCESS)) { - rc = gnixu_to_fi_errno(status); - _gnix_nic_tx_free(nic, txd); - } -#endif - - return rc; -} - -int _gnix_rma_post_rdma_chain_req(void *data) -{ - struct gnix_fab_req *req = (struct gnix_fab_req *)data; - struct gnix_fid_ep *ep = req->gnix_ep; - struct gnix_nic *nic = ep->nic; - struct gnix_tx_descriptor *bte_txd, *ct_txd; - gni_mem_handle_t mdh; - gni_return_t status; - int rc; - int inject_err = _gnix_req_inject_err(req); - int head_off, head_len, tail_len; - int fma_chain = 0; - - if (req->int_tx_buf_e == NULL) { - req->int_tx_buf_e = _gnix_ep_get_int_tx_buf(ep); - if (req->int_tx_buf_e == NULL) { - GNIX_WARN(FI_LOG_EP_DATA, "RAN OUT OF INT_TX_BUFS"); - return -FI_ENOSPC; - } - } - - req->int_tx_buf = ((struct gnix_int_tx_buf *) req->int_tx_buf_e)->buf; - req->int_tx_mdh = _gnix_ep_get_int_tx_mdh(req->int_tx_buf_e); - - if (!gnix_ops_allowed(ep, req->vc->peer_caps, req->flags)) { - rc = __gnix_rma_post_err_no_retrans(req, FI_EOPNOTSUPP); - if (rc != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_DATA, - "__gnix_rma_post_err_no_retrans() failed: %d\n", - rc); - return -FI_ECANCELED; - } - - rc = _gnix_nic_tx_alloc(nic, &bte_txd); - if (rc) { - GNIX_INFO(FI_LOG_EP_DATA, - "BTE _gnix_nic_tx_alloc() failed: %d\n", - rc); - return -FI_ENOSPC; - } - - rc = _gnix_nic_tx_alloc(nic, &ct_txd); - if (rc) { - _gnix_nic_tx_free(nic, bte_txd); - GNIX_INFO(FI_LOG_EP_DATA, - "CT _gnix_nic_tx_alloc() failed: %d\n", - rc); - return -FI_ENOSPC; - } - - _GNIX_CONVERT_MR_KEY(ep->auth_key->using_vmdh, - req->vc->peer_key_offset, - _gnix_convert_key_to_mhdl, - &req->rma.rem_mr_key, &mdh); - - /* BTE TXD */ - bte_txd->completer_fn = __gnix_rma_txd_complete; - bte_txd->req = req; - bte_txd->gni_desc.type = GNI_POST_RDMA_GET; - bte_txd->gni_desc.cq_mode = GNI_CQMODE_GLOBAL_EVENT; /* check flags */ - bte_txd->gni_desc.dlvr_mode = GNI_DLVMODE_PERFORMANCE; /* check flags */ - - head_off = req->rma.rem_addr & GNI_READ_ALIGN_MASK; - head_len = head_off ? GNI_READ_ALIGN - head_off : 0; - tail_len = (req->rma.rem_addr + req->rma.len) & GNI_READ_ALIGN_MASK; - - bte_txd->gni_desc.local_addr = (uint64_t)req->rma.loc_addr + head_len; - bte_txd->gni_desc.remote_addr = (uint64_t)req->rma.rem_addr + head_len; - bte_txd->gni_desc.length = req->rma.len - head_len - tail_len; - - bte_txd->gni_desc.remote_mem_hndl = mdh; - bte_txd->gni_desc.rdma_mode = 0; /* check flags */ - bte_txd->gni_desc.src_cq_hndl = nic->tx_cq; /* check flags */ - bte_txd->gni_desc.local_mem_hndl = req->rma.loc_md->mem_hndl; - - GNIX_LOG_DUMP_TXD(bte_txd); - - /* FMA TXD */ - ct_txd->completer_fn = __gnix_rma_txd_complete; - ct_txd->req = req; - ct_txd->gni_desc.type = GNI_POST_FMA_GET; - ct_txd->gni_desc.cq_mode = GNI_CQMODE_GLOBAL_EVENT; /* check flags */ - ct_txd->gni_desc.dlvr_mode = GNI_DLVMODE_PERFORMANCE; /* check flags */ - - ct_txd->gni_desc.remote_mem_hndl = mdh; - ct_txd->gni_desc.rdma_mode = 0; /* check flags */ - ct_txd->gni_desc.src_cq_hndl = nic->tx_cq; /* check flags */ - ct_txd->gni_desc.local_mem_hndl = req->int_tx_mdh; - ct_txd->gni_desc.length = GNI_READ_ALIGN; - - if (head_off) { - ct_txd->gni_desc.remote_addr = - req->rma.rem_addr & ~GNI_READ_ALIGN_MASK; - ct_txd->gni_desc.local_addr = (uint64_t)req->int_tx_buf; - - if (tail_len) { - ct_txd->gni_desc.next_descr = &ct_txd->gni_ct_descs[0]; - ct_txd->gni_ct_descs[0].ep_hndl = req->vc->gni_ep; - ct_txd->gni_ct_descs[0].length = GNI_READ_ALIGN; - ct_txd->gni_ct_descs[0].remote_addr = - (req->rma.rem_addr + - req->rma.len) & ~GNI_READ_ALIGN_MASK; - ct_txd->gni_ct_descs[0].remote_mem_hndl = mdh; - ct_txd->gni_ct_descs[0].local_addr = - (uint64_t)req->int_tx_buf + - GNI_READ_ALIGN; - ct_txd->gni_ct_descs[0].local_mem_hndl = - req->int_tx_mdh; - ct_txd->gni_ct_descs[0].next_descr = NULL; - fma_chain = 1; - } - } else { - ct_txd->gni_desc.remote_addr = - (req->rma.rem_addr + - req->rma.len) & ~GNI_READ_ALIGN_MASK; - ct_txd->gni_desc.local_addr = - (uint64_t)req->int_tx_buf + GNI_READ_ALIGN; - } - - GNIX_LOG_DUMP_TXD(ct_txd); - - COND_ACQUIRE(nic->requires_lock, &nic->lock); - - /* - * TODO: need work here too! - */ - - if (OFI_UNLIKELY(inject_err)) { - _gnix_nic_txd_err_inject(nic, bte_txd); - status = GNI_RC_SUCCESS; - } else { - status = GNI_PostRdma(req->vc->gni_ep, - &bte_txd->gni_desc); - } - - if (status != GNI_RC_SUCCESS) { - COND_RELEASE(nic->requires_lock, &nic->lock); - _gnix_nic_tx_free(nic, ct_txd); - _gnix_nic_tx_free(nic, bte_txd); - - GNIX_INFO(FI_LOG_EP_DATA, "GNI_Post*() failed: %s\n", - gni_err_str[status]); - return gnixu_to_fi_errno(status); - } - - if (OFI_UNLIKELY(inject_err)) { - _gnix_nic_txd_err_inject(nic, ct_txd); - status = GNI_RC_SUCCESS; - } else if (fma_chain) { - status = GNI_CtPostFma(req->vc->gni_ep, - &ct_txd->gni_desc); - } else { - status = GNI_PostFma(req->vc->gni_ep, - &ct_txd->gni_desc); - } - - if (status != GNI_RC_SUCCESS) { - COND_RELEASE(nic->requires_lock, &nic->lock); - _gnix_nic_tx_free(nic, ct_txd); - - /* Wait for the first TX to complete, then retransmit the - * entire thing. */ - ofi_atomic_set32(&req->rma.outstanding_txds, 1); - req->rma.status = GNI_RC_TRANSACTION_ERROR; - - GNIX_INFO(FI_LOG_EP_DATA, "GNI_Post*() failed: %s\n", - gni_err_str[status]); - return FI_SUCCESS; - } - - COND_RELEASE(nic->requires_lock, &nic->lock); - - /* Wait for both TXs to complete, then process the request. */ - ofi_atomic_set32(&req->rma.outstanding_txds, 2); - req->rma.status = 0; - - return FI_SUCCESS; -} - -/* Fill head and tail descriptors for chained gets */ -static void __gnix_rma_more_fill_sub_htd(struct gnix_fab_req *req, - gni_ct_get_post_descriptor_t *more_get, - gni_mem_handle_t *rem_mdh, - int *idx, int *entries) -{ - int head_off, tail_len; - struct gnix_fid_ep *ep = req->gnix_ep; - int indirect = !!(req->flags & GNIX_RMA_INDIRECT); - - head_off = req->rma.rem_addr & GNI_READ_ALIGN_MASK; - tail_len = (req->rma.rem_addr + req->rma.len) - & GNI_READ_ALIGN_MASK; - - /* Get int_tx_buf if unaligned and not indirect size. */ - if ((req->int_tx_buf_e == NULL) && - (head_off || tail_len) && !(indirect)) { - req->int_tx_buf_e = _gnix_ep_get_int_tx_buf(ep); - if (req->int_tx_buf_e == NULL) { - GNIX_FATAL(FI_LOG_EP_DATA, "RAN OUT OF INT_TX_BUFS"); - /* TODO return error */ - } - } else { - return; - } - - req->int_tx_buf = ((struct gnix_int_tx_buf *) - req->int_tx_buf_e)->buf; - req->int_tx_mdh = _gnix_ep_get_int_tx_mdh(req->int_tx_buf_e); - - if (head_off) { - assert(*idx < *entries); - GNIX_DEBUG(FI_LOG_EP_DATA, "FI_MORE: Chain Head Off\n"); - more_get[*idx].ep_hndl = req->vc->gni_ep; - more_get[*idx].length = GNI_READ_ALIGN; - more_get[*idx].remote_addr = - req->rma.rem_addr & ~GNI_READ_ALIGN_MASK; - more_get[*idx].remote_mem_hndl = *rem_mdh; - more_get[*idx].local_addr = (uint64_t)req->int_tx_buf; - more_get[*idx].local_mem_hndl = req->int_tx_mdh; - - if (*idx < (*entries) - 1) - more_get[*idx].next_descr = &more_get[(*idx) + 1]; - else - more_get[*idx].next_descr = NULL; - - (*idx)++; - } - if (tail_len) { - assert(*idx < *entries); - GNIX_DEBUG(FI_LOG_EP_DATA, "FI_MORE: Chain Tail Off\n"); - more_get[*idx].ep_hndl = req->vc->gni_ep; - more_get[*idx].length = GNI_READ_ALIGN; - more_get[*idx].remote_addr = - (req->rma.rem_addr + - req->rma.len) & ~GNI_READ_ALIGN_MASK; - more_get[*idx].remote_mem_hndl = *rem_mdh; - more_get[*idx].local_addr = - (uint64_t)req->int_tx_buf + GNI_READ_ALIGN; - more_get[*idx].local_mem_hndl = req->int_tx_mdh; - more_get[*idx].next_descr = NULL; - - if (*idx < (*entries) - 1) - more_get[*idx].next_descr = &more_get[(*idx) + 1]; - else - more_get[*idx].next_descr = NULL; - (*idx)++; - } -} - -static void __gnix_rma_more_fill_pd(struct gnix_fab_req *req, - struct gnix_tx_descriptor *txd) -{ - gni_ct_put_post_descriptor_t *more_put = NULL; - gni_ct_get_post_descriptor_t *more_get = NULL; - gni_mem_handle_t mdh; - struct gnix_fab_req *more_req; - struct gnix_fid_mem_desc *loc_md; - struct slist_entry *item; - int head_off, head_len = 0, tail_len = 0, entries = 0, idx = 0; - int indirect = !!(req->flags & GNIX_RMA_INDIRECT); - int sub_indirect; - - GNIX_DEBUG(FI_LOG_EP_DATA, "FI_MORE: Filling PD\n"); - /* If top level fab_req is unaligned, increment entries. */ - if (req->type == GNIX_FAB_RQ_RDMA_READ) { - head_off = req->rma.rem_addr & GNI_READ_ALIGN_MASK; - head_len = head_off ? GNI_READ_ALIGN - head_off : 0; - tail_len = (req->rma.rem_addr + req->rma.len) - & GNI_READ_ALIGN_MASK; - if (!indirect) { - if (head_off) { - entries++; - GNIX_DEBUG(FI_LOG_EP_DATA, - "FI_MORE: Txd Head Off\n"); - } - if (tail_len) { - entries++; - GNIX_DEBUG(FI_LOG_EP_DATA, - "FI_MORE: Txd Tail Off\n"); - } - } - /* Populate txd */ - if (indirect) { - GNIX_DEBUG(FI_LOG_EP_DATA, "FI_MORE: Txd Indirect\n"); - __gnix_rma_fill_pd_indirect_get(req, txd); - } else { - txd->gni_desc.local_addr = (uint64_t)req->rma.loc_addr + head_len; - txd->gni_desc.remote_addr = (uint64_t)req->rma.rem_addr + head_len; - txd->gni_desc.length = req->rma.len - head_len - tail_len; - assert(txd->gni_desc.length); - } - } - - /* Count number of sub post-descriptors to be chained. */ - slist_entry_foreach(req->rma.sle.next, item) { - entries++; /* Increment regardless of type */ - /* Get fab_req pointer */ - if (req->type == GNIX_FAB_RQ_RDMA_READ) { - more_req = container_of(item, struct gnix_fab_req, - rma.sle); - sub_indirect = !!(more_req->flags & GNIX_RMA_INDIRECT); - head_off = more_req->rma.rem_addr & GNI_READ_ALIGN_MASK; - head_len = head_off ? GNI_READ_ALIGN - head_off : 0; - tail_len = (more_req->rma.rem_addr + more_req->rma.len) - & GNI_READ_ALIGN_MASK; - - /* Additional increments if unaligned */ - if (!sub_indirect) { - if (head_off) { - entries++; - GNIX_DEBUG(FI_LOG_EP_DATA, - "FI_MORE: Chain Req Head off++\n"); - } - if (tail_len) { - entries++; - GNIX_DEBUG(FI_LOG_EP_DATA, - "FI_MORE: Chaing Req Tail off++\n"); - } - } - } - } - - GNIX_INFO(FI_LOG_EP_DATA, "FI_MORE: %d sub descs to be populated\n", - entries); - - /* Allocate space for sub descriptors */ - if (entries > 0) { - if (req->type == GNIX_FAB_RQ_RDMA_WRITE) { - txd->gni_more_ct_descs = malloc(entries * - sizeof(gni_ct_put_post_descriptor_t)); - more_put = (gni_ct_put_post_descriptor_t *) - txd->gni_more_ct_descs; - } else { - txd->gni_more_ct_descs = malloc(entries * - sizeof(gni_ct_get_post_descriptor_t)); - more_get = (gni_ct_get_post_descriptor_t *) - txd->gni_more_ct_descs; - /* Populate Head/Tail of TOP Req if req is unaligned */ - __gnix_rma_more_fill_sub_htd(req, more_get, - &txd->gni_desc.remote_mem_hndl, &idx, &entries); - } - } - - /* Populate sub descriptors */ - slist_entry_foreach(req->rma.sle.next, item) { - /* Get fab_req pointer */ - more_req = container_of(item, struct gnix_fab_req, rma.sle); - sub_indirect = !!(more_req->flags & GNIX_RMA_INDIRECT); - - /* Populate based on type */ - if (req->type == GNIX_FAB_RQ_RDMA_WRITE) { - assert(more_put); - more_put[idx].ep_hndl = more_req->vc->gni_ep; - more_put[idx].length = more_req->rma.len; - more_put[idx].remote_addr = more_req->rma.rem_addr; - more_put[idx].local_addr = (uint64_t)more_req-> - rma.loc_addr; - - _GNIX_CONVERT_MR_KEY(more_req->vc->ep->auth_key->using_vmdh, - more_req->vc->peer_key_offset, - _gnix_convert_key_to_mhdl_no_crc, - &more_req->rma.rem_mr_key, &mdh); - more_put[idx].remote_mem_hndl = mdh; - - if (idx < entries - 1) - more_put[idx].next_descr = &more_put[idx + 1]; - else - more_put[idx].next_descr = NULL; - idx++; - } else { - assert(more_get); - _GNIX_CONVERT_MR_KEY(more_req->vc->ep->auth_key->using_vmdh, - more_req->vc->peer_key_offset, - _gnix_convert_key_to_mhdl_no_crc, - &more_req->rma.rem_mr_key, &mdh); - more_get[idx].remote_mem_hndl = mdh; - more_get[idx].ep_hndl = more_req->vc->gni_ep; - - if (sub_indirect) { - __gnix_rma_more_fill_pd_indirect_get(more_req, more_get, idx); - if (idx < entries - 1) { - more_get[idx].next_descr = &more_get[idx + 1]; - } else { - more_get[idx].next_descr = NULL; - } - idx++; - - } else { - /* get main aligned data in first transaction */ - loc_md = (struct gnix_fid_mem_desc *)more_req->rma.loc_md; - more_get[idx].local_mem_hndl = loc_md->mem_hndl; - more_get[idx].local_addr = (uint64_t)more_req->rma.loc_addr + head_len; - more_get[idx].remote_addr = (uint64_t)more_req->rma.rem_addr + head_len; - more_get[idx].length = more_req->rma.len - head_len - tail_len; - GNIX_DEBUG(FI_LOG_EP_DATA, "rma.len: %d - head_len:%d - tail_len:%d\n", - more_req->rma.len, head_len, tail_len); - assert(more_get[idx].length); - - if (idx < entries - 1) { - more_get[idx].next_descr = &more_get[idx + 1]; - } else { - more_get[idx].next_descr = NULL; - } - idx++; - /* head/tail function */ - __gnix_rma_more_fill_sub_htd(more_req, more_get, &mdh, - &idx, &entries); - } - } - } - if (entries > 0) { - if (req->type == GNIX_FAB_RQ_RDMA_WRITE) { - txd->gni_desc.next_descr = &more_put[0]; - } else { - txd->gni_desc.next_descr = &more_get[0]; - } - } -} - -int _gnix_rma_more_post_req(void *data) -{ - struct gnix_fab_req *fab_req = (struct gnix_fab_req *)data; - struct gnix_fid_ep *ep = fab_req->gnix_ep; - struct gnix_nic *nic = ep->nic; - struct gnix_fid_mem_desc *loc_md; - struct gnix_tx_descriptor *txd; - gni_mem_handle_t mdh; - gni_return_t status; - int rc; - - if (!gnix_ops_allowed(ep, fab_req->vc->peer_caps, fab_req->flags)) { - rc = __gnix_rma_post_err_no_retrans(fab_req, FI_EOPNOTSUPP); - if (rc != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_DATA, - "__gnix_rma_post_err_no_retrans() failed: %d\n", - rc); - return -FI_ECANCELED; - } - - rc = _gnix_nic_tx_alloc(nic, &txd); - if (rc) { - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_nic_tx_alloc() failed: %d\n", - rc); - return -FI_ENOSPC; - } - - txd->completer_fn = __gnix_rma_more_txd_complete; - txd->req = fab_req; - - _GNIX_CONVERT_MR_KEY(ep->auth_key->using_vmdh, - fab_req->vc->peer_key_offset, - _gnix_convert_key_to_mhdl_no_crc, - &fab_req->rma.rem_mr_key, &mdh); - - txd->gni_desc.type = __gnix_fr_post_type(fab_req->type, 0); - txd->gni_desc.cq_mode = GNI_CQMODE_GLOBAL_EVENT; /* check flags */ - txd->gni_desc.dlvr_mode = GNI_DLVMODE_PERFORMANCE; /* check flags */ - - if (fab_req->type == GNIX_FAB_RQ_RDMA_WRITE) { - txd->gni_desc.local_addr = (uint64_t)fab_req->rma.loc_addr; - txd->gni_desc.length = fab_req->rma.len; - assert(txd->gni_desc.length); - txd->gni_desc.remote_addr = (uint64_t)fab_req->rma.rem_addr; - } - - loc_md = (struct gnix_fid_mem_desc *)fab_req->rma.loc_md; - txd->gni_desc.local_mem_hndl = loc_md->mem_hndl; /* assert? */ - txd->gni_desc.remote_mem_hndl = mdh; - txd->gni_desc.rdma_mode = 0; /* check flags */ - txd->gni_desc.src_cq_hndl = nic->tx_cq; /* check flags */ - txd->gni_desc.next_descr = NULL; /*Assume no sub descs */ - txd->gni_desc.prev_descr = NULL; - - __gnix_rma_more_fill_pd(fab_req, txd); - - GNIX_LOG_DUMP_TXD(txd); - - COND_ACQUIRE(nic->requires_lock, &nic->lock); - status = GNI_CtPostFma(fab_req->vc->gni_ep, &txd->gni_desc); - GNIX_DEBUG(FI_LOG_EP_DATA, "FI_MORE: Returned from GNI_CtPostFma\n"); - - COND_RELEASE(nic->requires_lock, &nic->lock); - - if (status != GNI_RC_SUCCESS) { - free(txd->gni_more_ct_descs); - _gnix_nic_tx_free(nic, txd); - GNIX_WARN(FI_LOG_EP_DATA, "GNI_Post*() failed: %s\n", - gni_err_str[status]); - } - - return gnixu_to_fi_errno(status); -} -int _gnix_rma_post_req(void *data) -{ - struct gnix_fab_req *fab_req = (struct gnix_fab_req *)data; - struct gnix_fid_ep *ep = fab_req->gnix_ep; - struct gnix_nic *nic = ep->nic; - struct gnix_fid_mem_desc *loc_md; - struct gnix_tx_descriptor *txd; - gni_mem_handle_t mdh; - gni_return_t status; - int rc; - int rdma = !!(fab_req->flags & GNIX_RMA_RDMA); - int indirect = !!(fab_req->flags & GNIX_RMA_INDIRECT); - int chained = !!(fab_req->flags & GNIX_RMA_CHAINED); - int inject_err = _gnix_req_inject_err(fab_req); - - if (!gnix_ops_allowed(ep, fab_req->vc->peer_caps, fab_req->flags)) { - rc = __gnix_rma_post_err_no_retrans(fab_req, FI_EOPNOTSUPP); - if (rc != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_DATA, - "__gnix_rma_post_err_no_retrans() failed: %d\n", - rc); - return -FI_ECANCELED; - } - - rc = _gnix_nic_tx_alloc(nic, &txd); - if (rc) { - GNIX_INFO(FI_LOG_EP_DATA, - "_gnix_nic_tx_alloc() failed: %d\n", - rc); - return -FI_ENOSPC; - } - - txd->completer_fn = __gnix_rma_txd_complete; - txd->req = fab_req; - - if (rdma) { - _GNIX_CONVERT_MR_KEY(ep->auth_key, - fab_req->vc->peer_key_offset, - _gnix_convert_key_to_mhdl, - &fab_req->rma.rem_mr_key, &mdh); - } else { - /* Mem handle CRC is not validated during FMA operations. Skip - * this costly calculation. */ - _GNIX_CONVERT_MR_KEY(ep->auth_key, - fab_req->vc->peer_key_offset, - _gnix_convert_key_to_mhdl_no_crc, - &fab_req->rma.rem_mr_key, &mdh); - } - - txd->gni_desc.type = __gnix_fr_post_type(fab_req->type, rdma); - txd->gni_desc.cq_mode = GNI_CQMODE_GLOBAL_EVENT; /* check flags */ - txd->gni_desc.dlvr_mode = GNI_DLVMODE_PERFORMANCE; /* check flags */ - - if (OFI_UNLIKELY(indirect)) { - __gnix_rma_fill_pd_indirect_get(fab_req, txd); - } else if (OFI_UNLIKELY(chained)) { - __gnix_rma_fill_pd_chained_get(fab_req, txd, &mdh); - } else { - txd->gni_desc.local_addr = (uint64_t)fab_req->rma.loc_addr; - txd->gni_desc.length = fab_req->rma.len; - txd->gni_desc.remote_addr = (uint64_t)fab_req->rma.rem_addr; - - loc_md = (struct gnix_fid_mem_desc *)fab_req->rma.loc_md; - if (loc_md) { - txd->gni_desc.local_mem_hndl = loc_md->mem_hndl; - } - } - - txd->gni_desc.remote_mem_hndl = mdh; - txd->gni_desc.rdma_mode = 0; /* check flags */ - txd->gni_desc.src_cq_hndl = nic->tx_cq; /* check flags */ - - GNIX_LOG_DUMP_TXD(txd); - - COND_ACQUIRE(nic->requires_lock, &nic->lock); - - if (OFI_UNLIKELY(inject_err)) { - _gnix_nic_txd_err_inject(nic, txd); - status = GNI_RC_SUCCESS; - } else if (chained) { - status = GNI_CtPostFma(fab_req->vc->gni_ep, &txd->gni_desc); - } else if (rdma) { - status = GNI_PostRdma(fab_req->vc->gni_ep, &txd->gni_desc); - } else { - status = GNI_PostFma(fab_req->vc->gni_ep, &txd->gni_desc); - } - - COND_RELEASE(nic->requires_lock, &nic->lock); - - if (status != GNI_RC_SUCCESS) { - _gnix_nic_tx_free(nic, txd); - GNIX_WARN(FI_LOG_EP_DATA, "GNI_Post*() failed: %s\n", - gni_err_str[status]); - } - - return gnixu_to_fi_errno(status); -} - -/** - * @brief Create an RMA request. - * - * Creates a new RMA request. Reads and writes are supported. GNI supports - * writing to local and remote addresses with any alignment and length. GNI - * requires reads to use four byte aligned remote address and length. For - * reads smaller than one cacheline, aligned data is read into an intermediate - * buffer, then partially copied to the user buffer (the code terms this an - * 'INDIRECT' transfer). For larger unaligned reads, the interior, aligned - * portion of remote data is pulled directly into the user provided buffer. - * The four bytes at the head and tail of an unaliged read are pulled into an - * intermediate buffer, then partially copied into the user buffer. This - * method is termed a 'CHAINED' transfer in the code. Unaligned reads smaller - * than the RDMA threshold can perform these 3 distinct transactions (head, - * middle, tail) in a single GNI chained FMA operation (resulting in a single - * GNI CQE). For unaligned reads larger than the RDMA threshold, two GNI posts - * are used, one RDMA TX to transfer the bulk of the data, another FMA TX to - * transfer the head and/or tail data. - * - * @param ep The endpiont to use for the RMA request. - * @param fr_type RMA request type. - * @param loc_addr Local address for the RMA request. - * @param len Length of the RMA request. - * @param mdesc Local memory descriptor for the RMA request. - * @param dest_addr Remote endpiont address for the RMA request. - * @param rem_addr Remote address for the RMA request. - * @param mkey Remote memory key for the RMA request. - * @param context Event context for the RMA request. - * @param flags Flags for the RMA request - * @param data Remote event data for the RMA request. - * - * @return FI_SUCCESS on success. FI_EINVAL for invalid parameter. -FI_ENOSPC - * for low memory. - */ -ssize_t _gnix_rma(struct gnix_fid_ep *ep, enum gnix_fab_req_type fr_type, - uint64_t loc_addr, size_t len, void *mdesc, - uint64_t dest_addr, uint64_t rem_addr, uint64_t mkey, - void *context, uint64_t flags, uint64_t data) -{ - struct gnix_vc *vc; - struct gnix_fab_req *req; - struct gnix_fid_mem_desc *md = NULL; - int rc; - int rdma; - struct fid_mr *auto_mr = NULL; - struct gnix_fab_req *more_req; - struct slist_entry *sle; - int connected; - struct gnix_auth_key *info; - - if (!(flags & FI_INJECT) && !ep->send_cq && - (((fr_type == GNIX_FAB_RQ_RDMA_WRITE) && !ep->write_cntr) || - ((fr_type == GNIX_FAB_RQ_RDMA_READ) && !ep->read_cntr))) { - return -FI_ENOCQ; - } - - if (flags & FI_TRIGGER) { - struct fi_triggered_context *trigger_context = - (struct fi_triggered_context *)context; - if ((trigger_context->event_type != FI_TRIGGER_THRESHOLD) || - (flags & FI_INJECT)) { - return -FI_EINVAL; - } - } - - if ((flags & FI_INJECT) && (len > GNIX_INJECT_SIZE)) { - GNIX_INFO(FI_LOG_EP_DATA, - "RMA length %d exceeds inject max size: %d\n", - len, GNIX_INJECT_SIZE); - return -FI_EINVAL; - } - - /* setup fabric request */ - req = _gnix_fr_alloc(ep); - if (!req) { - GNIX_INFO(FI_LOG_EP_DATA, "_gnix_fr_alloc() failed\n"); - return -FI_ENOSPC; - } - - rdma = len >= ep->domain->params.rma_rdma_thresh; - - req->type = fr_type; - req->gnix_ep = ep; - req->user_context = context; - req->work_fn = _gnix_rma_post_req; - req->rma.sle.next = NULL; - ofi_atomic_initialize32(&req->rma.outstanding_txds, 0); - - if (fr_type == GNIX_FAB_RQ_RDMA_READ && - (rem_addr & GNI_READ_ALIGN_MASK || len & GNI_READ_ALIGN_MASK)) { - if (len >= GNIX_RMA_UREAD_CHAINED_THRESH) { - GNIX_INFO(FI_LOG_EP_DATA, - "Using CT for unaligned GET, req: %p\n", - req); - flags |= GNIX_RMA_CHAINED; - } else { - GNIX_INFO(FI_LOG_EP_DATA, - "Using tmp buf for unaligned GET, req: %p\n", - req); - flags |= GNIX_RMA_INDIRECT; - } - - if (rdma) - req->work_fn = _gnix_rma_post_rdma_chain_req; - } - - if (!(flags & (GNIX_RMA_INDIRECT | FI_INJECT)) && !mdesc && - (rdma || fr_type == GNIX_FAB_RQ_RDMA_READ)) { - uint64_t requested_key; - - info = ep->auth_key; - assert(info); - - if (info->using_vmdh) - requested_key = _gnix_get_next_reserved_key(info); - else - requested_key = 0; - - /* We need to auto-register the source buffer. */ - rc = _gnix_mr_reg(&ep->domain->domain_fid.fid, (void *)loc_addr, - len, FI_READ | FI_WRITE, 0, requested_key, - 0, &auto_mr, NULL, ep->auth_key, - GNIX_PROV_REG); - if (rc != FI_SUCCESS) { - GNIX_INFO(FI_LOG_EP_DATA, - "Failed to auto-register local buffer: %d\n", - rc); - goto err_auto_reg; - } - flags |= FI_LOCAL_MR; - mdesc = (void *)auto_mr; - GNIX_INFO(FI_LOG_EP_DATA, "auto-reg MR: %p\n", auto_mr); - } - - if (mdesc) - md = container_of(mdesc, struct gnix_fid_mem_desc, mr_fid); - req->rma.loc_md = (void *)md; - - req->rma.rem_addr = rem_addr; - req->rma.rem_mr_key = mkey; - req->rma.len = len; - req->rma.imm = data; - req->flags = flags; - - if (req->flags & FI_INJECT) { - memcpy(req->inject_buf, (void *)loc_addr, len); - req->rma.loc_addr = (uint64_t)req->inject_buf; - } else { - req->rma.loc_addr = loc_addr; - } - - /* Inject interfaces always suppress completions. If - * SELECTIVE_COMPLETION is set, honor any setting. Otherwise, always - * deliver a completion. */ - if ((flags & GNIX_SUPPRESS_COMPLETION) || - (ep->send_selective_completion && !(flags & FI_COMPLETION))) { - req->flags &= ~FI_COMPLETION; - } else { - req->flags |= FI_COMPLETION; - } - - if (rdma) { - req->flags |= GNIX_RMA_RDMA; - } - - COND_ACQUIRE(ep->requires_lock, &ep->vc_lock); - - /* find VC for target */ - rc = _gnix_vc_ep_get_vc(ep, dest_addr, &vc); - if (rc) { - GNIX_INFO(FI_LOG_EP_DATA, - "_gnix_vc_ep_get_vc() failed, addr: %lx, rc:%d\n", - dest_addr, rc); - goto err_get_vc; - } - - req->vc = vc; - connected = (vc->conn_state == GNIX_VC_CONNECTED); - - /* Adding FI_FENCE to an FI_MORE list will break the FI_MORE Chain. - * Current FI_MORE implementation does not create remote CQ events. - * Remove FI_MORE flag when FI_FENCE or REMOTE EP requirements are - * present. We will also only allow FI_MORE if a prior connection has - * been established, so that the peer capabilities can be determined.*/ - if ((flags & FI_FENCE) || (flags & FI_REMOTE_CQ_DATA) || - !connected || (req->vc->peer_caps & FI_RMA_EVENT)) { - flags &= ~FI_MORE; - } - - /* Add reads/writes to slist when FI_MORE is present, Or - * if this is the first message in the chain without FI_MORE. - * When FI_MORE is not present, if the slists are not empty - * it is the first message without FI_MORE. - * Do not add reqs with FI_FENCE or REMOTE EP requirements requirements - * to the fi_more list. */ - if ((flags & FI_MORE) || - (!(flags & FI_MORE) && connected && - (!slist_empty(&ep->more_write) || !slist_empty(&ep->more_read)) && - !(flags & FI_FENCE || flags & FI_REMOTE_CQ_DATA || - req->vc->peer_caps & FI_RMA_EVENT))) { - if (fr_type == GNIX_FAB_RQ_RDMA_WRITE) { - slist_insert_tail(&req->rma.sle, &ep->more_write); - req->work_fn = _gnix_rma_more_post_req; - } else if (fr_type == GNIX_FAB_RQ_RDMA_READ) { - slist_insert_tail(&req->rma.sle, &ep->more_read); - req->work_fn = _gnix_rma_more_post_req; - } - - if (flags & FI_MORE) { - COND_RELEASE(ep->requires_lock, &ep->vc_lock); - return FI_SUCCESS; - } - } - - /* Initiate read/write chains on first message without FI_MORE. */ - if (!(flags & FI_MORE) && - (!(slist_empty(&ep->more_write)) || - !(slist_empty(&ep->more_read)))) { - if (!(slist_empty(&ep->more_write))) { - sle = ep->more_write.head; - more_req = container_of(sle, struct gnix_fab_req, - rma.sle); - GNIX_DEBUG(FI_LOG_EP_DATA, - "FI_MORE: got fab_request from more_write. Queuing Request\n"); - _gnix_vc_queue_tx_req(more_req); - slist_init(&ep->more_write); /* For future reqs */ - } - if (!(slist_empty(&ep->more_read))) { - sle = ep->more_read.head; - more_req = container_of(sle, struct gnix_fab_req, - rma.sle); - GNIX_DEBUG(FI_LOG_EP_DATA, - "FI_MORE: got fab_request from more_read. Queuing Request\n"); - _gnix_vc_queue_tx_req(more_req); - slist_init(&ep->more_read); - } - - /* Requests with FI_FENCE or REMOTE EP requirements are not - * added to the FI_MORE List. They must be queued separately. */ - if ((flags & FI_FENCE) || (flags & FI_REMOTE_CQ_DATA) || - (req->vc->peer_caps & FI_RMA_EVENT)) { - rc = _gnix_vc_queue_tx_req(req); - COND_RELEASE(ep->requires_lock, &ep->vc_lock); - return rc; - } - COND_RELEASE(ep->requires_lock, &ep->vc_lock); - return FI_SUCCESS; - } - - GNIX_DEBUG(FI_LOG_EP_DATA, "Queuing (%p %p %d)\n", - (void *)loc_addr, (void *)rem_addr, len); - - rc = _gnix_vc_queue_tx_req(req); - connected = (vc->conn_state == GNIX_VC_CONNECTED); - - COND_RELEASE(ep->requires_lock, &ep->vc_lock); - - /* - * If a new VC was allocated, progress CM before returning. - * If the VC is connected and there's a backlog, poke - * the nic progress engine befure returning. - */ - if (!connected) { - _gnix_cm_nic_progress(ep->cm_nic); - } else if (!dlist_empty(&vc->tx_queue)) { - _gnix_nic_progress(vc->ep->nic); - } - - return rc; - -err_get_vc: - COND_RELEASE(ep->requires_lock, &ep->vc_lock); - if (flags & FI_LOCAL_MR) { - fi_close(&auto_mr->fid); - flags &= ~FI_LOCAL_MR; - } -err_auto_reg: - _gnix_fr_free(req->vc->ep, req); - return rc; -} - diff --git a/prov/gni/src/gnix_sep.c b/prov/gni/src/gnix_sep.c deleted file mode 100644 index f8f9182b347..00000000000 --- a/prov/gni/src/gnix_sep.c +++ /dev/null @@ -1,1680 +0,0 @@ -/* - * Copyright (c) 2016-2018 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/* - * Endpoint common code - */ -#include -#include -#include - -#include "gnix.h" -#include "gnix_cm_nic.h" -#include "gnix_ep.h" -#include "gnix_vc.h" -#include "gnix_util.h" -#include "gnix_msg.h" -#include "gnix_cntr.h" -#include "gnix_rma.h" -#include "gnix_atomic.h" - -/****************************************************************************** - * Forward declaration for ops structures. - ******************************************************************************/ - -static struct fi_ops gnix_sep_fi_ops; -static struct fi_ops_ep gnix_sep_ops; -/* -static struct fi_ops gnix_tx_fi_ops; -static struct fi_ops_ep gnix_tx_ops; -*/ -static struct fi_ops_cm gnix_sep_rxtx_cm_ops; -static struct fi_ops_msg gnix_sep_msg_ops; -static struct fi_ops_rma gnix_sep_rma_ops; -static struct fi_ops_tagged gnix_sep_tagged_ops; -static struct fi_ops_atomic gnix_sep_atomic_ops; - -/******************************************************************************* - * SEP(EP) OPS API function implementations. - ******************************************************************************/ -/* TODO: - initialize capabilities for tx_priv? - initialize attr? -*/ - -static void __trx_destruct(void *obj) -{ - int index, n_ids; - int __attribute__((unused)) ret; - struct gnix_fid_trx *trx = (struct gnix_fid_trx *) obj; - struct gnix_fid_ep *ep_priv; - struct gnix_fid_sep *sep_priv; - int refs_held; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - ep_priv = trx->ep; - assert(ep_priv != NULL); - sep_priv = trx->sep; - assert(sep_priv != NULL); - - /* These assignments must be done before we free ep_priv */ - index = ep_priv->src_addr.gnix_addr.cdm_id - - sep_priv->cdm_id_base; - - n_ids = MAX(sep_priv->info->ep_attr->tx_ctx_cnt, - sep_priv->info->ep_attr->rx_ctx_cnt); - - refs_held = _gnix_ref_put(ep_priv); - - if (refs_held == 0) { - _gnix_ref_put(sep_priv->cm_nic); - /* Remove the context from the sep's list */ - if ((index >= 0) && (index < n_ids)) { - if (sep_priv->ep_table[index]) { - sep_priv->ep_table[index] = NULL; - } else { - GNIX_WARN(FI_LOG_EP_CTRL, - "rx/tx context already freed\n"); - } - } else { - GNIX_WARN(FI_LOG_EP_CTRL, - "rx/tx context index out of range\n"); - } - } - - _gnix_ref_put(sep_priv); - - free(trx); -} - -static int gnix_sep_tx_ctx(struct fid_ep *sep, int index, - struct fi_tx_attr *attr, - struct fid_ep **tx_ep, void *context) -{ - int ret = FI_SUCCESS; - struct gnix_fid_sep *sep_priv; - struct gnix_fid_ep *ep_priv = NULL; - struct gnix_fid_trx *tx_priv = NULL; - struct fid_ep *ep_ptr; - struct gnix_ep_attr ep_attr = {0}; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - sep_priv = container_of(sep, struct gnix_fid_sep, ep_fid); - - if (!sep_priv) { - GNIX_WARN(FI_LOG_EP_CTRL, "endpoint is not initialized\n"); - return -FI_EINVAL; - } - - if ((sep_priv->ep_fid.fid.fclass != FI_CLASS_SEP) || - (index >= sep_priv->info->ep_attr->tx_ctx_cnt)) - return -FI_EINVAL; - - if (attr && attr->op_flags & ~GNIX_EP_OP_FLAGS) { - GNIX_WARN(FI_LOG_EP_CTRL, "invalid op_flags\n"); - return -FI_EINVAL; - } - - /* caps and mode bits are required to be a subset of info */ - if (attr && attr->caps && (attr->caps & ~sep_priv->info->caps)) { - GNIX_WARN(FI_LOG_EP_CTRL, "invalid capabilities\n"); - return -FI_EINVAL; - } - - if (attr && attr->mode && (attr->mode & ~sep_priv->info->mode)) { - GNIX_WARN(FI_LOG_EP_CTRL, "invalid mode\n"); - return -FI_EINVAL; - } - - /* - * check to see if the tx context was already - * allocated - */ - - ofi_spin_lock(&sep_priv->sep_lock); - - if (sep_priv->tx_ep_table[index] != NULL) { - ret = -FI_EBUSY; - goto err; - } - - tx_priv = calloc(1, sizeof(struct gnix_fid_trx)); - if (!tx_priv) { - ret = -FI_ENOMEM; - goto err; - } - - tx_priv->ep_fid.fid.fclass = FI_CLASS_TX_CTX; - tx_priv->ep_fid.fid.context = context; - tx_priv->ep_fid.fid.ops = &gnix_sep_fi_ops; - tx_priv->ep_fid.ops = &gnix_sep_ops; - tx_priv->ep_fid.msg = &gnix_sep_msg_ops; - tx_priv->ep_fid.rma = &gnix_sep_rma_ops; - tx_priv->ep_fid.tagged = &gnix_sep_tagged_ops; - tx_priv->ep_fid.atomic = &gnix_sep_atomic_ops; - tx_priv->ep_fid.cm = &gnix_sep_rxtx_cm_ops; - tx_priv->index = index; - - /* if an EP already allocated for this index, use it */ - if (sep_priv->ep_table[index] != NULL) { - ep_priv = container_of(sep_priv->ep_table[index], - struct gnix_fid_ep, ep_fid); - sep_priv->tx_ep_table[index] = sep_priv->ep_table[index]; - _gnix_ref_get(ep_priv); - - } else { - - /* - * allocate the underlying gnix_fid_ep struct - */ - - ep_attr.use_cdm_id = true; - ep_attr.cdm_id = sep_priv->cdm_id_base + index; - ep_attr.cm_nic = sep_priv->cm_nic; - ep_attr.cm_ops = &gnix_sep_rxtx_cm_ops; - /* TODO: clean up this cm_nic */ - _gnix_ref_get(sep_priv->cm_nic); - ret = _gnix_ep_alloc(sep_priv->domain, - sep_priv->info, - &ep_attr, - &ep_ptr, context); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "gnix_ep_alloc returned %s\n", - fi_strerror(-ret)); - goto err; - } - - sep_priv->ep_table[index] = ep_ptr; - sep_priv->tx_ep_table[index] = ep_ptr; - ep_priv = container_of(ep_ptr, struct gnix_fid_ep, ep_fid); - if (sep_priv->av != NULL) { - ep_priv->av = sep_priv->av; - _gnix_ref_get(ep_priv->av); - _gnix_ep_init_vc(ep_priv); - } - } - - _gnix_ref_init(&tx_priv->ref_cnt, 1, __trx_destruct); - tx_priv->ep = ep_priv; - tx_priv->sep = sep_priv; - _gnix_ref_get(sep_priv); - tx_priv->caps = ep_priv->caps; - *tx_ep = &tx_priv->ep_fid; - tx_priv->op_flags = ep_priv->op_flags; - - if (attr) { - tx_priv->op_flags |= attr->op_flags; - memcpy(attr, sep_priv->info->tx_attr, - sizeof(struct fi_tx_attr)); - attr->op_flags = tx_priv->op_flags; - } -err: - ofi_spin_unlock(&sep_priv->sep_lock); - - return ret; -} - -static int gnix_sep_rx_ctx(struct fid_ep *sep, int index, - struct fi_rx_attr *attr, - struct fid_ep **rx_ep, void *context) -{ - int ret = FI_SUCCESS; - struct gnix_fid_sep *sep_priv; - struct gnix_fid_ep *ep_priv = NULL; - struct gnix_fid_trx *rx_priv = NULL; - struct fid_ep *ep_ptr; - struct gnix_ep_attr ep_attr = {0}; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - sep_priv = container_of(sep, struct gnix_fid_sep, ep_fid); - - if (!sep_priv) { - GNIX_WARN(FI_LOG_EP_CTRL, "endpoint is not initialized\n"); - return -FI_EINVAL; - } - - if ((sep_priv->ep_fid.fid.fclass != FI_CLASS_SEP) || - (index >= sep_priv->info->ep_attr->rx_ctx_cnt)) - return -FI_EINVAL; - - if (attr && attr->op_flags & ~GNIX_EP_OP_FLAGS) { - GNIX_WARN(FI_LOG_EP_CTRL, "invalid op_flags\n"); - return -FI_EINVAL; - } - - /* caps and mode bits are required to be a subset of info */ - if (attr && attr->caps && (attr->caps & ~sep_priv->info->caps)) { - GNIX_WARN(FI_LOG_EP_CTRL, "invalid capabilities\n"); - return -FI_EINVAL; - } - - if (attr && attr->mode && (attr->mode & ~sep_priv->info->mode)) { - GNIX_WARN(FI_LOG_EP_CTRL, "invalid mode\n"); - return -FI_EINVAL; - } - - /* - * check to see if the rx context was already - * allocated - */ - - ofi_spin_lock(&sep_priv->sep_lock); - - if (sep_priv->rx_ep_table[index] != NULL) { - ret = -FI_EBUSY; - goto err; - } - - rx_priv = calloc(1, sizeof(struct gnix_fid_trx)); - if (!rx_priv) { - ret = -FI_ENOMEM; - goto err; - } - - rx_priv->ep_fid.fid.fclass = FI_CLASS_RX_CTX; - rx_priv->ep_fid.fid.context = context; - rx_priv->ep_fid.fid.ops = &gnix_sep_fi_ops; - rx_priv->ep_fid.ops = &gnix_sep_ops; - rx_priv->ep_fid.msg = &gnix_sep_msg_ops; - rx_priv->ep_fid.rma = &gnix_sep_rma_ops; - rx_priv->ep_fid.tagged = &gnix_sep_tagged_ops; - rx_priv->ep_fid.atomic = &gnix_sep_atomic_ops; - rx_priv->ep_fid.cm = &gnix_sep_rxtx_cm_ops; - rx_priv->index = index; - - /* if an EP already allocated for this index, use it */ - if (sep_priv->ep_table[index] != NULL) { - ep_priv = container_of(sep_priv->ep_table[index], - struct gnix_fid_ep, ep_fid); - sep_priv->rx_ep_table[index] = sep_priv->ep_table[index]; - _gnix_ref_get(ep_priv); - } else { - - /* - * compute cdm_id and allocate an EP. - */ - - ep_attr.use_cdm_id = true; - ep_attr.cdm_id = sep_priv->cdm_id_base + index; - ep_attr.cm_nic = sep_priv->cm_nic; - ep_attr.cm_ops = &gnix_sep_rxtx_cm_ops; - _gnix_ref_get(sep_priv->cm_nic); - ret = _gnix_ep_alloc(sep_priv->domain, - sep_priv->info, - &ep_attr, - &ep_ptr, context); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "gnix_ep_alloc returned %s\n", - fi_strerror(-ret)); - goto err; - } - - sep_priv->ep_table[index] = ep_ptr; - sep_priv->rx_ep_table[index] = ep_ptr; - ep_priv = container_of(ep_ptr, struct gnix_fid_ep, ep_fid); - if (sep_priv->av != NULL) { - ep_priv->av = sep_priv->av; - _gnix_ref_get(ep_priv->av); - _gnix_ep_init_vc(ep_priv); - } - } - - _gnix_ref_init(&rx_priv->ref_cnt, 1, __trx_destruct); - rx_priv->ep = ep_priv; - rx_priv->sep = sep_priv; - _gnix_ref_get(sep_priv); - rx_priv->caps = ep_priv->caps; - *rx_ep = &rx_priv->ep_fid; - rx_priv->op_flags = ep_priv->op_flags; - - if (attr) { - rx_priv->op_flags |= attr->op_flags; - memcpy(attr, sep_priv->info->rx_attr, - sizeof(struct fi_rx_attr)); - attr->op_flags = rx_priv->op_flags; - } -err: - ofi_spin_unlock(&sep_priv->sep_lock); - - return ret; -} - -DIRECT_FN STATIC int gnix_sep_bind(fid_t fid, struct fid *bfid, uint64_t flags) -{ - int i, ret, n_ids; - struct gnix_fid_ep *ep; - struct gnix_fid_av *av; - struct gnix_fid_sep *sep; - struct gnix_fid_domain *domain_priv; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - switch (fid->fclass) { - case FI_CLASS_SEP: - break; - case FI_CLASS_TX_CTX: - case FI_CLASS_RX_CTX: - return gnix_ep_bind(fid, bfid, flags); - default: - return -FI_ENOSYS; - } - - sep = container_of(fid, struct gnix_fid_sep, ep_fid); - domain_priv = container_of(sep->domain, struct gnix_fid_domain, - domain_fid); - - ret = ofi_ep_bind_valid(&gnix_prov, bfid, flags); - if (ret) - return ret; - - switch (bfid->fclass) { - case FI_CLASS_AV: - - n_ids = MAX(sep->info->ep_attr->tx_ctx_cnt, - sep->info->ep_attr->rx_ctx_cnt); - - av = container_of(bfid, struct gnix_fid_av, av_fid.fid); - if (domain_priv != av->domain) { - return -FI_EINVAL; - } - - /* - * can't bind more than one AV - */ - - if (sep->av != NULL) - return -FI_EINVAL; - - sep->av = av; - _gnix_ref_get(sep->av); - - for (i = 0; i < n_ids; i++) { - ep = container_of(sep->ep_table[i], - struct gnix_fid_ep, ep_fid); - if (ep != NULL && ep->av == NULL) { - ep->av = av; - _gnix_ep_init_vc(ep); - _gnix_ref_get(ep->av); - } - } - - break; - - default: - ret = -FI_ENOSYS; - break; - } - - return ret; -} - -/******************************************************************************* - * Base SEP API function implementations. - ******************************************************************************/ -static int gnix_sep_control(fid_t fid, int command, void *arg) -{ - int ret = FI_SUCCESS; - struct gnix_fid_ep *ep; - struct gnix_fid_sep *sep; - struct gnix_fid_trx *trx_priv; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - switch (fid->fclass) { - case FI_CLASS_SEP: - /* nothing to do for scalable endpoints */ - return FI_SUCCESS; - case FI_CLASS_TX_CTX: - case FI_CLASS_RX_CTX: - trx_priv = container_of(fid, struct gnix_fid_trx, ep_fid); - ep = trx_priv->ep; - sep = trx_priv->sep; - break; - default: - return -FI_EINVAL; - } - - if (!ep) { - return -FI_EINVAL; - } - - switch (command) { - case FI_ENABLE: - if (GNIX_EP_RDM_DGM(ep->type)) { - if (ep->cm_nic == NULL) { - ret = -FI_EOPBADSTATE; - goto err; - } - - if (sep->enabled[trx_priv->index] == false) { - ret = _gnix_vc_cm_init(ep->cm_nic); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_vc_cm_nic_init call returned %d\n", - ret); - goto err; - } - - ret = _gnix_cm_nic_enable(ep->cm_nic); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_cm_nic_enable call returned %d\n", - ret); - goto err; - } - - ret = _gnix_ep_int_tx_pool_init(ep); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_ep_int_tx_pool_init call returned %d\n", - ret); - goto err; - } - - sep->enabled[trx_priv->index] = true; - } - - /* - * enable the EP - */ - - if (fid->fclass == FI_CLASS_TX_CTX) { - ret = _gnix_ep_tx_enable(ep); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_ep_tx_enable call returned %d\n", - ret); - goto err; - } - } - - if (fid->fclass == FI_CLASS_RX_CTX) { - ret = _gnix_ep_rx_enable(ep); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_ep_rx_enable call returned %d\n", - ret); - goto err; - } - } - - } - - break; - case FI_GETFIDFLAG: - case FI_SETFIDFLAG: - case FI_ALIAS: - default: - return -FI_ENOSYS; - } -err: - return ret; -} - -static void __sep_destruct(void *obj) -{ - int i; - struct fid_domain *domain; - struct gnix_fid_ep *ep; - struct gnix_fid_domain *domain_priv; - struct gnix_fid_sep *sep = (struct gnix_fid_sep *) obj; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - domain = sep->domain; - assert(domain != NULL); - domain_priv = container_of(domain, struct gnix_fid_domain, domain_fid); - - _gnix_ref_put(domain_priv); - - /* - * For now GNI provider doesn't require that an AV have been bound - * to the SEP itself. - */ - if (sep->av != NULL) { - _gnix_ref_put(sep->av); - sep->av = NULL; - } - - if (sep->ep_table) { - for (i = 0; i < sep->info->ep_attr->tx_ctx_cnt; i++) { - ep = container_of(sep->ep_table[i], - struct gnix_fid_ep, ep_fid); - if (ep == NULL) { - continue; - } - - /* tx/rx contexts still open. This should - * warn in gnix_sep_close, but we will still - * try to clean up a bit. */ - if (ep->av) { - _gnix_ref_put(ep->av); - ep->av = NULL; - } - } - - free(sep->ep_table); - } - - if (sep->tx_ep_table) - free(sep->tx_ep_table); - if (sep->rx_ep_table) - free(sep->rx_ep_table); - if (sep->enabled) - free(sep->enabled); - - fi_freeinfo(sep->info); - free(sep); -} - -static int gnix_sep_close(fid_t fid) -{ - int ret = FI_SUCCESS; - struct gnix_fid_sep *sep; - struct gnix_fid_trx *trx_priv; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - switch (fid->fclass) { - case FI_CLASS_SEP: - sep = container_of(fid, struct gnix_fid_sep, ep_fid.fid); - if (ofi_atomic_get32(&sep->ref_cnt.references) > 1) { - GNIX_WARN(FI_LOG_EP_CTRL, "Contexts associated with " - "this endpoint are still open\n"); - return -FI_EBUSY; - } - _gnix_ref_put(sep); - break; - case FI_CLASS_TX_CTX: - case FI_CLASS_RX_CTX: - trx_priv = container_of(fid, struct gnix_fid_trx, ep_fid); - _gnix_ref_put(trx_priv); - break; - default: - return -FI_EINVAL; - } - - return ret; -} - -int gnix_sep_open(struct fid_domain *domain, struct fi_info *info, - struct fid_ep **sep, void *context) -{ - struct gnix_fid_sep *sep_priv = NULL; - struct gnix_fid_domain *domain_priv = NULL; - int ret = FI_SUCCESS; - int n_ids = GNIX_SEP_MAX_CNT; - uint32_t cdm_id, cdm_id_base; - struct gnix_ep_name *name; - struct gnix_auth_key *auth_key; - uint32_t name_type = GNIX_EPN_TYPE_UNBOUND; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - if ((domain == NULL) || (info == NULL) || (sep == NULL) || - (info->ep_attr == NULL)) - return -FI_EINVAL; - - if (!GNIX_EP_RDM_DGM(info->ep_attr->type)) - return -FI_ENOSYS; - - /* - * check limits for rx and tx ctx's - */ - - if ((info->ep_attr->tx_ctx_cnt > n_ids) || - (info->ep_attr->rx_ctx_cnt > n_ids)) - return -FI_EINVAL; - - n_ids = MAX(info->ep_attr->tx_ctx_cnt, info->ep_attr->rx_ctx_cnt); - - domain_priv = container_of(domain, struct gnix_fid_domain, domain_fid); - - if (info->ep_attr->auth_key_size) { - auth_key = GNIX_GET_AUTH_KEY(info->ep_attr->auth_key, - info->ep_attr->auth_key_size, domain_priv->using_vmdh); - if (!auth_key) - return -FI_EINVAL; - } else { - auth_key = domain_priv->auth_key; - assert(auth_key); - } - - sep_priv = calloc(1, sizeof(*sep_priv)); - if (!sep_priv) - return -FI_ENOMEM; - - sep_priv->auth_key = auth_key; - sep_priv->type = info->ep_attr->type; - sep_priv->ep_fid.fid.fclass = FI_CLASS_SEP; - sep_priv->ep_fid.fid.context = context; - - sep_priv->ep_fid.fid.ops = &gnix_sep_fi_ops; - sep_priv->ep_fid.ops = &gnix_sep_ops; - sep_priv->ep_fid.cm = &gnix_ep_ops_cm; - sep_priv->domain = domain; - - sep_priv->info = fi_dupinfo(info); - sep_priv->info->addr_format = info->addr_format; - if (!sep_priv->info) { - GNIX_WARN(FI_LOG_EP_CTRL, - "fi_dupinfo NULL\n"); - ret = -FI_ENOMEM; - goto err; - } - - _gnix_ref_init(&sep_priv->ref_cnt, 1, __sep_destruct); - - sep_priv->caps = info->caps & GNIX_EP_PRIMARY_CAPS; - - sep_priv->ep_table = calloc(n_ids, sizeof(struct gnix_fid_ep *)); - if (sep_priv->ep_table == NULL) { - GNIX_WARN(FI_LOG_EP_CTRL, - "call returned NULL\n"); - ret = -FI_ENOMEM; - goto err; - } - - sep_priv->tx_ep_table = calloc(n_ids, sizeof(struct gnix_fid_ep *)); - if (sep_priv->tx_ep_table == NULL) { - GNIX_WARN(FI_LOG_EP_CTRL, - "call returned NULL\n"); - ret = -FI_ENOMEM; - goto err; - } - - sep_priv->rx_ep_table = calloc(n_ids, sizeof(struct gnix_fid_ep *)); - if (sep_priv->rx_ep_table == NULL) { - GNIX_WARN(FI_LOG_EP_CTRL, - "call returned NULL\n"); - ret = -FI_ENOMEM; - goto err; - } - - sep_priv->enabled = calloc(n_ids, sizeof(bool)); - if (sep_priv->enabled == NULL) { - GNIX_WARN(FI_LOG_EP_CTRL, - "call returned NULL\n"); - ret = -FI_ENOMEM; - goto err; - } - - /* - * allocate a block of cm nic ids for both tx/rx ctx - first - * checking to see if the application has specified a base - * via a node/service option to fi_getinfo - */ - - if (info->src_addr != NULL) { - name = (struct gnix_ep_name *) info->src_addr; - - if (name->name_type & GNIX_EPN_TYPE_BOUND) { - cdm_id_base = name->gnix_addr.cdm_id; - name_type = name->name_type; - } - } - - name_type |= GNIX_EPN_TYPE_SEP; - - cdm_id = (name_type & GNIX_EPN_TYPE_UNBOUND) ? -1 : cdm_id_base; - - ret = _gnix_get_new_cdm_id_set(domain_priv, n_ids, &cdm_id); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_get_new_cdm_id_set call returned %s\n", - fi_strerror(-ret)); - goto err; - } - - sep_priv->cdm_id_base = cdm_id; - - /* - * allocate cm_nic for this SEP - */ - ret = _gnix_cm_nic_alloc(domain_priv, - sep_priv->info, - cdm_id, - sep_priv->auth_key, - &sep_priv->cm_nic); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "gnix_cm_nic_alloc call returned %s\n", - fi_strerror(-ret)); - goto err; - } - - /* - * ep name of SEP is the same as the cm_nic - * since there's a one-to-one relationship - * between a given SEP and its cm_nic. - */ - sep_priv->my_name = sep_priv->cm_nic->my_name; - sep_priv->my_name.cm_nic_cdm_id = - sep_priv->cm_nic->my_name.gnix_addr.cdm_id; - sep_priv->my_name.rx_ctx_cnt = info->ep_attr->rx_ctx_cnt; - sep_priv->my_name.name_type = name_type; - - ofi_spin_init(&sep_priv->sep_lock); - _gnix_ref_get(domain_priv); - - *sep = &sep_priv->ep_fid; - return ret; - -err: - if (sep_priv->ep_table) - free(sep_priv->ep_table); - if (sep_priv->tx_ep_table) - free(sep_priv->tx_ep_table); - if (sep_priv->rx_ep_table) - free(sep_priv->rx_ep_table); - if (sep_priv) - free(sep_priv); - return ret; - -} - -/******************************************************************************* -ssize_t (*recv)(struct fid_ep *ep, void *buf, size_t len, void *desc, - fi_addr_t src_addr, void *context); -ssize_t (*send)(struct fid_ep *ep, const void *buf, size_t len, void *desc, - fi_addr_t dest_addr, void *context); - ******************************************************************************/ - -/* - * TODO: need to define the other msg/rma/amo methods for tx/rx contexts - */ - -DIRECT_FN STATIC ssize_t gnix_sep_recv(struct fid_ep *ep, void *buf, - size_t len, void *desc, - fi_addr_t src_addr, void *context) -{ - struct gnix_fid_trx *rx_ep = container_of(ep, struct gnix_fid_trx, - ep_fid); - - return _ep_recv(&rx_ep->ep->ep_fid, buf, len, desc, src_addr, - context, 0, 0, 0); -} - -DIRECT_FN STATIC ssize_t gnix_sep_recvv(struct fid_ep *ep, - const struct iovec *iov, - void **desc, size_t count, - fi_addr_t src_addr, - void *context) -{ - struct gnix_fid_trx *rx_ep = container_of(ep, struct gnix_fid_trx, - ep_fid); - - return _ep_recvv(&rx_ep->ep->ep_fid, iov, desc, count, src_addr, - context, 0, 0, 0); -} - -DIRECT_FN STATIC ssize_t gnix_sep_recvmsg(struct fid_ep *ep, - const struct fi_msg *msg, - uint64_t flags) -{ - struct gnix_fid_trx *rx_ep = container_of(ep, struct gnix_fid_trx, - ep_fid); - - return _ep_recvmsg(&rx_ep->ep->ep_fid, msg, flags & GNIX_RECVMSG_FLAGS, - 0, 0); -} - -DIRECT_FN STATIC ssize_t gnix_sep_send(struct fid_ep *ep, const void *buf, - size_t len, void *desc, - fi_addr_t dest_addr, void *context) -{ - struct gnix_fid_trx *tx_ep = container_of(ep, struct gnix_fid_trx, - ep_fid); - - return _ep_send(&tx_ep->ep->ep_fid, buf, len, desc, dest_addr, - context, 0, 0); -} - -DIRECT_FN ssize_t gnix_sep_sendv(struct fid_ep *ep, - const struct iovec *iov, - void **desc, size_t count, - fi_addr_t dest_addr, - void *context) -{ - struct gnix_fid_trx *tx_ep = container_of(ep, struct gnix_fid_trx, - ep_fid); - - return _ep_sendv(&tx_ep->ep->ep_fid, iov, desc, count, dest_addr, - context, 0, 0); -} - -DIRECT_FN ssize_t gnix_sep_sendmsg(struct fid_ep *ep, - const struct fi_msg *msg, - uint64_t flags) -{ - struct gnix_fid_trx *tx_ep = container_of(ep, struct gnix_fid_trx, - ep_fid); - - return _ep_sendmsg(&tx_ep->ep->ep_fid, msg, - flags & GNIX_SENDMSG_FLAGS, 0); -} - -DIRECT_FN ssize_t gnix_sep_msg_inject(struct fid_ep *ep, const void *buf, - size_t len, fi_addr_t dest_addr) -{ - struct gnix_fid_trx *tx_ep = container_of(ep, struct gnix_fid_trx, - ep_fid); - - return _ep_inject(&tx_ep->ep->ep_fid, buf, len, 0, dest_addr, 0, 0); -} - -DIRECT_FN ssize_t gnix_sep_senddata(struct fid_ep *ep, const void *buf, - size_t len, void *desc, uint64_t data, - fi_addr_t dest_addr, void *context) -{ - struct gnix_fid_trx *tx_ep = container_of(ep, struct gnix_fid_trx, - ep_fid); - - return _ep_senddata(&tx_ep->ep->ep_fid, buf, len, desc, data, - dest_addr, context, 0, 0); -} - -DIRECT_FN ssize_t -gnix_sep_msg_injectdata(struct fid_ep *ep, const void *buf, size_t len, - uint64_t data, fi_addr_t dest_addr) -{ - uint64_t flags; - struct gnix_fid_trx *tx_ep; - - if (!ep) { - return -FI_EINVAL; - } - - tx_ep = container_of(ep, struct gnix_fid_trx, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(tx_ep->ep->type)); - - flags = tx_ep->op_flags | FI_INJECT | FI_REMOTE_CQ_DATA | - GNIX_SUPPRESS_COMPLETION; - - return _gnix_send(tx_ep->ep, (uint64_t)buf, len, NULL, dest_addr, - NULL, flags, data, 0); -} - -DIRECT_FN STATIC ssize_t gnix_sep_trecv(struct fid_ep *ep, void *buf, - size_t len, - void *desc, fi_addr_t src_addr, - uint64_t tag, uint64_t ignore, - void *context) -{ - struct gnix_fid_trx *rx_ep = container_of(ep, struct gnix_fid_trx, - ep_fid); - - return _ep_recv(&rx_ep->ep->ep_fid, buf, len, desc, src_addr, context, - FI_TAGGED, tag, ignore); -} - -DIRECT_FN STATIC ssize_t gnix_sep_trecvv(struct fid_ep *ep, - const struct iovec *iov, - void **desc, size_t count, - fi_addr_t src_addr, - uint64_t tag, uint64_t ignore, - void *context) -{ - struct gnix_fid_trx *rx_ep = container_of(ep, struct gnix_fid_trx, - ep_fid); - - return _ep_recvv(&rx_ep->ep->ep_fid, iov, desc, count, src_addr, - context, FI_TAGGED, tag, ignore); -} - -DIRECT_FN STATIC ssize_t gnix_sep_trecvmsg(struct fid_ep *ep, - const struct fi_msg_tagged *msg, - uint64_t flags) -{ - const struct fi_msg _msg = { - .msg_iov = msg->msg_iov, - .desc = msg->desc, - .iov_count = msg->iov_count, - .addr = msg->addr, - .context = msg->context, - .data = msg->data - }; - - if (flags & ~GNIX_TRECVMSG_FLAGS) - return -FI_EINVAL; - - if ((flags & FI_CLAIM) && _msg.context == NULL) - return -FI_EINVAL; - - if ((flags & FI_DISCARD) && !(flags & (FI_PEEK | FI_CLAIM))) - return -FI_EINVAL; - - struct gnix_fid_trx *rx_ep = container_of(ep, struct gnix_fid_trx, - ep_fid); - - return _ep_recvmsg(&rx_ep->ep->ep_fid, &_msg, flags | FI_TAGGED, - msg->tag, msg->ignore); -} - -DIRECT_FN STATIC ssize_t gnix_sep_tsend(struct fid_ep *ep, const void *buf, - size_t len, void *desc, - fi_addr_t dest_addr, uint64_t tag, - void *context) -{ - struct gnix_fid_trx *tx_ep = container_of(ep, struct gnix_fid_trx, - ep_fid); - - return _ep_send(&tx_ep->ep->ep_fid, buf, len, desc, dest_addr, - context, FI_TAGGED, tag); -} - -DIRECT_FN STATIC ssize_t gnix_sep_tsendv(struct fid_ep *ep, - const struct iovec *iov, - void **desc, size_t count, - fi_addr_t dest_addr, - uint64_t tag, void *context) -{ - struct gnix_fid_trx *tx_ep = container_of(ep, struct gnix_fid_trx, - ep_fid); - - return _ep_sendv(&tx_ep->ep->ep_fid, iov, desc, count, dest_addr, - context, FI_TAGGED, tag); -} - -DIRECT_FN STATIC ssize_t gnix_sep_tsendmsg(struct fid_ep *ep, - const struct fi_msg_tagged *msg, - uint64_t flags) -{ - const struct fi_msg _msg = { - .msg_iov = msg->msg_iov, - .desc = msg->desc, - .iov_count = msg->iov_count, - .addr = msg->addr, - .context = msg->context, - .data = msg->data - }; - - if (flags & ~GNIX_SENDMSG_FLAGS) - return -FI_EINVAL; - - struct gnix_fid_trx *tx_ep = container_of(ep, struct gnix_fid_trx, - ep_fid); - - return _ep_sendmsg(&tx_ep->ep->ep_fid, &_msg, flags | FI_TAGGED, - msg->tag); -} - -DIRECT_FN STATIC ssize_t gnix_sep_tinject(struct fid_ep *ep, const void *buf, - size_t len, fi_addr_t dest_addr, - uint64_t tag) -{ - struct gnix_fid_trx *tx_ep = container_of(ep, struct gnix_fid_trx, - ep_fid); - - return _ep_inject(&tx_ep->ep->ep_fid, buf, len, 0, dest_addr, FI_TAGGED, - tag); -} - -DIRECT_FN STATIC ssize_t gnix_sep_tsenddata(struct fid_ep *ep, const void *buf, - size_t len, void *desc, - uint64_t data, fi_addr_t dest_addr, - uint64_t tag, void *context) -{ - struct gnix_fid_trx *tx_ep = container_of(ep, struct gnix_fid_trx, - ep_fid); - - return _ep_senddata(&tx_ep->ep->ep_fid, buf, len, desc, data, - dest_addr, context, FI_TAGGED, tag); -} - -DIRECT_FN STATIC ssize_t gnix_sep_tinjectdata(struct fid_ep *ep, - const void *buf, - size_t len, uint64_t data, - fi_addr_t dest_addr, uint64_t tag) -{ - struct gnix_fid_trx *tx_ep = container_of(ep, struct gnix_fid_trx, - ep_fid); - - return _ep_inject(&tx_ep->ep->ep_fid, buf, len, data, dest_addr, - FI_TAGGED | FI_REMOTE_CQ_DATA, tag); -} - -DIRECT_FN STATIC ssize_t -gnix_sep_read(struct fid_ep *ep, void *buf, size_t len, - void *desc, fi_addr_t src_addr, uint64_t addr, - uint64_t key, void *context) -{ - uint64_t flags; - struct gnix_fid_trx *rx_ep; - - if (!ep) { - return -FI_EINVAL; - } - - rx_ep = container_of(ep, struct gnix_fid_trx, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(rx_ep->ep->type)); - flags = rx_ep->op_flags | GNIX_RMA_READ_FLAGS_DEF; - - return _gnix_rma(rx_ep->ep, GNIX_FAB_RQ_RDMA_READ, - (uint64_t)buf, len, desc, - src_addr, addr, key, - context, flags, 0); -} - -DIRECT_FN STATIC ssize_t -gnix_sep_readv(struct fid_ep *ep, const struct iovec *iov, void **desc, - size_t count, fi_addr_t src_addr, uint64_t addr, uint64_t key, - void *context) -{ - uint64_t flags; - struct gnix_fid_trx *rx_ep; - - if (!ep || !iov || !desc || count > GNIX_MAX_RMA_IOV_LIMIT) { - return -FI_EINVAL; - } - - rx_ep = container_of(ep, struct gnix_fid_trx, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(rx_ep->ep->type)); - flags = rx_ep->op_flags | GNIX_RMA_READ_FLAGS_DEF; - - return _gnix_rma(rx_ep->ep, GNIX_FAB_RQ_RDMA_READ, - (uint64_t)iov[0].iov_base, iov[0].iov_len, desc[0], - src_addr, addr, key, - context, flags, 0); -} - -DIRECT_FN STATIC ssize_t -gnix_sep_readmsg(struct fid_ep *ep, const struct fi_msg_rma *msg, - uint64_t flags) -{ - struct gnix_fid_trx *rx_ep; - - if (!ep || !msg || !msg->msg_iov || !msg->rma_iov || !msg->desc || - msg->iov_count != 1 || msg->rma_iov_count != 1 || - msg->rma_iov[0].len > msg->msg_iov[0].iov_len) { - return -FI_EINVAL; - } - - rx_ep = container_of(ep, struct gnix_fid_trx, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(rx_ep->ep->type)); - - flags = (flags & GNIX_READMSG_FLAGS) | GNIX_RMA_READ_FLAGS_DEF; - - return _gnix_rma(rx_ep->ep, GNIX_FAB_RQ_RDMA_READ, - (uint64_t)msg->msg_iov[0].iov_base, - msg->msg_iov[0].iov_len, msg->desc[0], - msg->addr, msg->rma_iov[0].addr, msg->rma_iov[0].key, - msg->context, flags, msg->data); -} - -DIRECT_FN STATIC ssize_t -gnix_sep_write(struct fid_ep *ep, const void *buf, size_t len, void *desc, - fi_addr_t dest_addr, uint64_t addr, uint64_t key, void *context) -{ - uint64_t flags; - struct gnix_fid_trx *tx_ep; - - if (!ep) { - return -FI_EINVAL; - } - - tx_ep = container_of(ep, struct gnix_fid_trx, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(tx_ep->ep->type)); - flags = tx_ep->op_flags | GNIX_RMA_WRITE_FLAGS_DEF; - - return _gnix_rma(tx_ep->ep, GNIX_FAB_RQ_RDMA_WRITE, - (uint64_t)buf, len, desc, dest_addr, addr, key, - context, flags, 0); -} - -DIRECT_FN STATIC ssize_t -gnix_sep_writev(struct fid_ep *ep, const struct iovec *iov, void **desc, - size_t count, fi_addr_t dest_addr, uint64_t addr, uint64_t key, - void *context) -{ - uint64_t flags; - struct gnix_fid_trx *tx_ep; - - if (!ep || !iov || !desc || count > GNIX_MAX_RMA_IOV_LIMIT) { - return -FI_EINVAL; - } - - tx_ep = container_of(ep, struct gnix_fid_trx, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(tx_ep->ep->type)); - flags = tx_ep->op_flags | GNIX_RMA_WRITE_FLAGS_DEF; - - return _gnix_rma(tx_ep->ep, GNIX_FAB_RQ_RDMA_WRITE, - (uint64_t)iov[0].iov_base, iov[0].iov_len, desc[0], - dest_addr, addr, key, context, flags, 0); -} - -DIRECT_FN STATIC ssize_t -gnix_sep_writemsg(struct fid_ep *ep, const struct fi_msg_rma *msg, - uint64_t flags) -{ - struct gnix_fid_trx *trx_ep; - - if (!ep || !msg || !msg->msg_iov || !msg->rma_iov || - msg->iov_count != 1 || - msg->rma_iov_count > GNIX_MAX_RMA_IOV_LIMIT || - msg->rma_iov[0].len > msg->msg_iov[0].iov_len) { - return -FI_EINVAL; - } - - trx_ep = container_of(ep, struct gnix_fid_trx, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(trx_ep->ep->type)); - - flags = (flags & GNIX_WRITEMSG_FLAGS) | GNIX_RMA_WRITE_FLAGS_DEF; - - return _gnix_rma(trx_ep->ep, GNIX_FAB_RQ_RDMA_WRITE, - (uint64_t)msg->msg_iov[0].iov_base, - msg->msg_iov[0].iov_len, - msg->desc ? msg->desc[0] : NULL, - msg->addr, msg->rma_iov[0].addr, msg->rma_iov[0].key, - msg->context, flags, msg->data); -} - -DIRECT_FN STATIC ssize_t -gnix_sep_rma_inject(struct fid_ep *ep, const void *buf, - size_t len, fi_addr_t dest_addr, - uint64_t addr, uint64_t key) -{ - uint64_t flags; - struct gnix_fid_trx *trx_ep; - - if (!ep) { - return -FI_EINVAL; - } - - trx_ep = container_of(ep, struct gnix_fid_trx, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(trx_ep->ep->type)); - - flags = trx_ep->op_flags | FI_INJECT | GNIX_SUPPRESS_COMPLETION | - GNIX_RMA_WRITE_FLAGS_DEF; - - return _gnix_rma(trx_ep->ep, GNIX_FAB_RQ_RDMA_WRITE, - (uint64_t)buf, len, NULL, - dest_addr, addr, key, - NULL, flags, 0); -} - -DIRECT_FN STATIC ssize_t -gnix_sep_writedata(struct fid_ep *ep, const void *buf, size_t len, void *desc, - uint64_t data, fi_addr_t dest_addr, uint64_t addr, - uint64_t key, void *context) -{ - uint64_t flags; - struct gnix_fid_trx *trx_ep; - - if (!ep) { - return -FI_EINVAL; - } - - trx_ep = container_of(ep, struct gnix_fid_trx, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(trx_ep->ep->type)); - - flags = trx_ep->op_flags | FI_REMOTE_CQ_DATA | GNIX_RMA_WRITE_FLAGS_DEF; - - return _gnix_rma(trx_ep->ep, GNIX_FAB_RQ_RDMA_WRITE, - (uint64_t)buf, len, desc, - dest_addr, addr, key, - context, flags, data); -} - -DIRECT_FN STATIC ssize_t -gnix_sep_rma_injectdata(struct fid_ep *ep, const void *buf, size_t len, - uint64_t data, fi_addr_t dest_addr, uint64_t addr, - uint64_t key) -{ - uint64_t flags; - struct gnix_fid_trx *trx_ep; - - if (!ep) { - return -FI_EINVAL; - } - - trx_ep = container_of(ep, struct gnix_fid_trx, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(trx_ep->ep->type)); - - flags = trx_ep->op_flags | FI_INJECT | FI_REMOTE_CQ_DATA | - GNIX_SUPPRESS_COMPLETION | GNIX_RMA_WRITE_FLAGS_DEF; - - return _gnix_rma(trx_ep->ep, GNIX_FAB_RQ_RDMA_WRITE, - (uint64_t)buf, len, NULL, - dest_addr, addr, key, - NULL, flags, data); -} - -DIRECT_FN STATIC ssize_t -gnix_sep_atomic_write(struct fid_ep *ep, const void *buf, size_t count, - void *desc, fi_addr_t dest_addr, uint64_t addr, - uint64_t key, enum fi_datatype datatype, enum fi_op op, - void *context) -{ - struct gnix_fid_trx *trx_ep; - - struct fi_msg_atomic msg; - struct fi_ioc msg_iov; - struct fi_rma_ioc rma_iov; - uint64_t flags; - - if (!ep) - return -FI_EINVAL; - - if (_gnix_atomic_cmd(datatype, op, GNIX_FAB_RQ_AMO) < 0) - return -FI_EOPNOTSUPP; - - trx_ep = container_of(ep, struct gnix_fid_trx, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(trx_ep->ep->type)); - - msg_iov.addr = (void *)buf; - msg_iov.count = count; - msg.msg_iov = &msg_iov; - msg.desc = &desc; - msg.iov_count = 1; - msg.addr = dest_addr; - rma_iov.addr = addr; - rma_iov.count = 1; - rma_iov.key = key; - msg.rma_iov = &rma_iov; - msg.datatype = datatype; - msg.op = op; - msg.context = context; - - flags = trx_ep->op_flags | GNIX_ATOMIC_WRITE_FLAGS_DEF; - - return _gnix_atomic(trx_ep->ep, GNIX_FAB_RQ_AMO, &msg, - NULL, NULL, 0, NULL, NULL, 0, flags); -} - -DIRECT_FN STATIC ssize_t -gnix_sep_atomic_writev(struct fid_ep *ep, const struct fi_ioc *iov, void **desc, - size_t count, fi_addr_t dest_addr, uint64_t addr, - uint64_t key, enum fi_datatype datatype, enum fi_op op, - void *context) -{ - if (!ep || !iov || count > 1) - return -FI_EINVAL; - - return gnix_sep_atomic_write(ep, iov[0].addr, - iov[0].count, desc ? desc[0] : NULL, - dest_addr, addr, key, datatype, op, - context); -} - -DIRECT_FN STATIC ssize_t -gnix_sep_atomic_writemsg(struct fid_ep *ep, const struct fi_msg_atomic *msg, - uint64_t flags) -{ - struct gnix_fid_trx *trx_ep; - - if (!ep) - return -FI_EINVAL; - - if (_gnix_atomic_cmd(msg->datatype, msg->op, GNIX_FAB_RQ_AMO) < 0) - return -FI_EOPNOTSUPP; - - trx_ep = container_of(ep, struct gnix_fid_trx, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(trx_ep->ep->type)); - - flags = (flags & GNIX_ATOMICMSG_FLAGS) | GNIX_ATOMIC_WRITE_FLAGS_DEF; - - return _gnix_atomic(trx_ep->ep, GNIX_FAB_RQ_AMO, msg, - NULL, NULL, 0, NULL, NULL, 0, flags); -} - -DIRECT_FN STATIC ssize_t -gnix_sep_atomic_inject(struct fid_ep *ep, const void *buf, size_t count, - fi_addr_t dest_addr, uint64_t addr, uint64_t key, - enum fi_datatype datatype, enum fi_op op) -{ - struct gnix_fid_trx *trx_ep; - struct fi_msg_atomic msg; - struct fi_ioc msg_iov; - struct fi_rma_ioc rma_iov; - uint64_t flags; - - if (!ep) - return -FI_EINVAL; - - if (_gnix_atomic_cmd(datatype, op, GNIX_FAB_RQ_AMO) < 0) - return -FI_EOPNOTSUPP; - - trx_ep = container_of(ep, struct gnix_fid_trx, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(trx_ep->ep->type)); - - msg_iov.addr = (void *)buf; - msg_iov.count = count; - msg.msg_iov = &msg_iov; - msg.desc = NULL; - msg.iov_count = 1; - msg.addr = dest_addr; - rma_iov.addr = addr; - rma_iov.count = 1; - rma_iov.key = key; - msg.rma_iov = &rma_iov; - msg.datatype = datatype; - msg.op = op; - - flags = trx_ep->op_flags | FI_INJECT | GNIX_SUPPRESS_COMPLETION | - GNIX_ATOMIC_WRITE_FLAGS_DEF; - - return _gnix_atomic(trx_ep->ep, GNIX_FAB_RQ_AMO, &msg, - NULL, NULL, 0, NULL, NULL, 0, flags); -} - -DIRECT_FN STATIC ssize_t -gnix_sep_atomic_readwrite(struct fid_ep *ep, const void *buf, size_t count, - void *desc, void *result, void *result_desc, - fi_addr_t dest_addr, uint64_t addr, uint64_t key, - enum fi_datatype datatype, enum fi_op op, - void *context) -{ - struct gnix_fid_trx *trx_ep; - struct fi_msg_atomic msg; - struct fi_ioc msg_iov; - struct fi_rma_ioc rma_iov; - struct fi_ioc result_iov; - uint64_t flags; - - if (_gnix_atomic_cmd(datatype, op, GNIX_FAB_RQ_FAMO) < 0) - return -FI_EOPNOTSUPP; - - if (!ep) - return -FI_EINVAL; - - trx_ep = container_of(ep, struct gnix_fid_trx, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(trx_ep->ep->type)); - - msg_iov.addr = (void *)buf; - msg_iov.count = count; - msg.msg_iov = &msg_iov; - msg.desc = &desc; - msg.iov_count = 1; - msg.addr = dest_addr; - rma_iov.addr = addr; - rma_iov.count = 1; - rma_iov.key = key; - msg.rma_iov = &rma_iov; - msg.datatype = datatype; - msg.op = op; - msg.context = context; - result_iov.addr = result; - result_iov.count = 1; - - flags = trx_ep->op_flags | GNIX_ATOMIC_READ_FLAGS_DEF; - - return _gnix_atomic(trx_ep->ep, GNIX_FAB_RQ_FAMO, &msg, - NULL, NULL, 0, - &result_iov, &result_desc, 1, - flags); -} - -DIRECT_FN STATIC ssize_t -gnix_sep_atomic_readwritev(struct fid_ep *ep, const struct fi_ioc *iov, - void **desc, size_t count, struct fi_ioc *resultv, - void **result_desc, size_t result_count, - fi_addr_t dest_addr, uint64_t addr, uint64_t key, - enum fi_datatype datatype, enum fi_op op, - void *context) -{ - if (!iov || count > 1 || !resultv) - return -FI_EINVAL; - - return gnix_sep_atomic_readwrite(ep, iov[0].addr, iov[0].count, - desc ? desc[0] : NULL, - resultv[0].addr, - result_desc ? result_desc[0] : NULL, - dest_addr, addr, key, datatype, op, - context); -} - -DIRECT_FN STATIC ssize_t -gnix_sep_atomic_readwritemsg(struct fid_ep *ep, const struct fi_msg_atomic *msg, - struct fi_ioc *resultv, void **result_desc, - size_t result_count, uint64_t flags) -{ - struct gnix_fid_trx *trx_ep; - - if (!ep) - return -FI_EINVAL; - - if (_gnix_atomic_cmd(msg->datatype, msg->op, GNIX_FAB_RQ_FAMO) < 0) - return -FI_EOPNOTSUPP; - - trx_ep = container_of(ep, struct gnix_fid_trx, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(trx_ep->ep->type)); - - flags = (flags & GNIX_FATOMICMSG_FLAGS) | GNIX_ATOMIC_READ_FLAGS_DEF; - - return _gnix_atomic(trx_ep->ep, GNIX_FAB_RQ_FAMO, msg, NULL, NULL, 0, - resultv, result_desc, result_count, flags); -} - -DIRECT_FN STATIC ssize_t -gnix_sep_atomic_compwrite(struct fid_ep *ep, const void *buf, size_t count, - void *desc, const void *compare, void *compare_desc, - void *result, void *result_desc, fi_addr_t dest_addr, - uint64_t addr, uint64_t key, - enum fi_datatype datatype, enum fi_op op, - void *context) -{ - struct gnix_fid_trx *trx_ep; - struct fi_msg_atomic msg; - struct fi_ioc msg_iov; - struct fi_rma_ioc rma_iov; - struct fi_ioc result_iov; - struct fi_ioc compare_iov; - uint64_t flags; - - if (!ep) - return -FI_EINVAL; - - if (_gnix_atomic_cmd(datatype, op, GNIX_FAB_RQ_CAMO) < 0) - return -FI_EOPNOTSUPP; - - trx_ep = container_of(ep, struct gnix_fid_trx, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(trx_ep->ep->type)); - - msg_iov.addr = (void *)buf; - msg_iov.count = count; - msg.msg_iov = &msg_iov; - msg.desc = &desc; - msg.iov_count = 1; - msg.addr = dest_addr; - rma_iov.addr = addr; - rma_iov.count = 1; - rma_iov.key = key; - msg.rma_iov = &rma_iov; - msg.datatype = datatype; - msg.op = op; - msg.context = context; - result_iov.addr = result; - result_iov.count = 1; - compare_iov.addr = (void *)compare; - compare_iov.count = 1; - - flags = trx_ep->op_flags | GNIX_ATOMIC_READ_FLAGS_DEF; - - return _gnix_atomic(trx_ep->ep, GNIX_FAB_RQ_CAMO, &msg, - &compare_iov, &compare_desc, 1, - &result_iov, &result_desc, 1, - flags); -} - -DIRECT_FN STATIC ssize_t -gnix_sep_atomic_compwritev(struct fid_ep *ep, const struct fi_ioc *iov, - void **desc, size_t count, - const struct fi_ioc *comparev, - void **compare_desc, size_t compare_count, - struct fi_ioc *resultv, void **result_desc, - size_t result_count, fi_addr_t dest_addr, - uint64_t addr, uint64_t key, - enum fi_datatype datatype, enum fi_op op, - void *context) -{ - if (!iov || count > 1 || !resultv || !comparev) - return -FI_EINVAL; - - return gnix_sep_atomic_compwrite(ep, iov[0].addr, iov[0].count, - desc ? desc[0] : NULL, - comparev[0].addr, - compare_desc ? compare_desc[0] : NULL, - resultv[0].addr, - result_desc ? result_desc[0] : NULL, - dest_addr, addr, key, datatype, op, - context); -} - -DIRECT_FN STATIC ssize_t -gnix_sep_atomic_compwritemsg(struct fid_ep *ep, const struct fi_msg_atomic *msg, - const struct fi_ioc *comparev, void **compare_desc, - size_t compare_count, struct fi_ioc *resultv, - void **result_desc, size_t result_count, - uint64_t flags) -{ - struct gnix_fid_trx *trx_ep; - - if (!ep) - return -FI_EINVAL; - - if (_gnix_atomic_cmd(msg->datatype, msg->op, GNIX_FAB_RQ_CAMO) < 0) - return -FI_EOPNOTSUPP; - - trx_ep = container_of(ep, struct gnix_fid_trx, ep_fid); - assert(GNIX_EP_RDM_DGM_MSG(trx_ep->ep->type)); - - flags = (flags & GNIX_CATOMICMSG_FLAGS) | GNIX_ATOMIC_READ_FLAGS_DEF; - - return _gnix_atomic(trx_ep->ep, GNIX_FAB_RQ_CAMO, msg, - comparev, compare_desc, compare_count, - resultv, result_desc, result_count, - flags); -} - -/******************************************************************************* - * FI_OPS_* data structures. - ******************************************************************************/ - -static struct fi_ops gnix_sep_fi_ops = { - .size = sizeof(struct fi_ops), - .close = gnix_sep_close, - .bind = gnix_sep_bind, - .control = gnix_sep_control, - .ops_open = fi_no_ops_open -}; - -static struct fi_ops_ep gnix_sep_ops = { - .size = sizeof(struct fi_ops_ep), - .cancel = gnix_cancel, - .getopt = gnix_getopt, - .setopt = gnix_setopt, - .tx_ctx = gnix_sep_tx_ctx, - .rx_ctx = gnix_sep_rx_ctx, - .rx_size_left = fi_no_rx_size_left, - .tx_size_left = fi_no_tx_size_left, -}; - -static struct fi_ops_msg gnix_sep_msg_ops = { - .size = sizeof(struct fi_ops_msg), - .recv = gnix_sep_recv, - .recvv = gnix_sep_recvv, - .recvmsg = gnix_sep_recvmsg, - .send = gnix_sep_send, - .sendv = gnix_sep_sendv, - .sendmsg = gnix_sep_sendmsg, - .inject = gnix_sep_msg_inject, - .senddata = gnix_sep_senddata, - .injectdata = gnix_sep_msg_injectdata, -}; - -static struct fi_ops_rma gnix_sep_rma_ops = { - .size = sizeof(struct fi_ops_rma), - .read = gnix_sep_read, - .readv = gnix_sep_readv, - .readmsg = gnix_sep_readmsg, - .write = gnix_sep_write, - .writev = gnix_sep_writev, - .writemsg = gnix_sep_writemsg, - .inject = gnix_sep_rma_inject, - .writedata = gnix_sep_writedata, - .injectdata = gnix_sep_rma_injectdata, -}; - -static struct fi_ops_tagged gnix_sep_tagged_ops = { - .size = sizeof(struct fi_ops_tagged), - .recv = gnix_sep_trecv, - .recvv = gnix_sep_trecvv, - .recvmsg = gnix_sep_trecvmsg, - .send = gnix_sep_tsend, - .sendv = gnix_sep_tsendv, - .sendmsg = gnix_sep_tsendmsg, - .inject = gnix_sep_tinject, - .senddata = gnix_sep_tsenddata, - .injectdata = gnix_sep_tinjectdata, -}; - -static struct fi_ops_atomic gnix_sep_atomic_ops = { - .size = sizeof(struct fi_ops_atomic), - .write = gnix_sep_atomic_write, - .writev = gnix_sep_atomic_writev, - .writemsg = gnix_sep_atomic_writemsg, - .inject = gnix_sep_atomic_inject, - .readwrite = gnix_sep_atomic_readwrite, - .readwritev = gnix_sep_atomic_readwritev, - .readwritemsg = gnix_sep_atomic_readwritemsg, - .compwrite = gnix_sep_atomic_compwrite, - .compwritev = gnix_sep_atomic_compwritev, - .compwritemsg = gnix_sep_atomic_compwritemsg, - .writevalid = gnix_ep_atomic_valid, - .readwritevalid = gnix_ep_fetch_atomic_valid, - .compwritevalid = gnix_ep_cmp_atomic_valid, -}; - -/* - * rx/tx contexts don't do any connection management, - * nor does the underlying gnix_fid_ep struct - */ -static struct fi_ops_cm gnix_sep_rxtx_cm_ops = { - .size = sizeof(struct fi_ops_cm), - .setname = fi_no_setname, - .getname = fi_no_getname, - .getpeer = fi_no_getpeer, - .connect = fi_no_connect, - .listen = fi_no_listen, - .accept = fi_no_accept, - .reject = fi_no_reject, - .shutdown = fi_no_shutdown, - .join = fi_no_join, -}; diff --git a/prov/gni/src/gnix_shmem.c b/prov/gni/src/gnix_shmem.c deleted file mode 100644 index 990299a0dba..00000000000 --- a/prov/gni/src/gnix_shmem.c +++ /dev/null @@ -1,187 +0,0 @@ -/* - * Copyright (c) 2017 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - - -#include "gnix_shmem.h" - -#include -#include -#include -#include -#include -#include - -#include "rdma/fi_errno.h" -#include "gnix_util.h" - -#define GNIX_DEFAULT_ACCESS_PERMS (0755) - -extern uint32_t gnix_wait_shared_memory_timeout; - -int _gnix_shmem_create( - const char *path, - uint32_t size, - int (*init_func)(const char *path, uint32_t size, void *region), - struct gnix_shared_memory *region) -{ - int rc, fd, saved_errno, count; - void *buffer; - - if (!region || !path) { - GNIX_WARN(FI_LOG_FABRIC, "bad params, ret=-FINVAL"); - return -FI_EINVAL; - } - - fd = open(path, O_CREAT | O_EXCL | O_RDWR, - GNIX_DEFAULT_ACCESS_PERMS); - if (fd >= 0) { - buffer = malloc(size); - if (!buffer) { - GNIX_INFO(FI_LOG_FABRIC, - "failed to allocate memory for shared memory segment"); - return -FI_ENOMEM; - } - - if (init_func) { - rc = init_func(path, size, buffer); - if (rc != 0) { - free(buffer); - return rc; - } - } else - memset(buffer, 0, size); - - GNIX_INFO(FI_LOG_FABRIC, - "initializing shared memory segment, path=%s\n", - path); - - count = write(fd, buffer, size); - if (count != size) { - GNIX_WARN(FI_LOG_FABRIC, - "failed to write to shared memory segment, " - "expected=%d actual=%d\n", - size, count); - saved_errno = errno; - close(fd); - unlink(path); - free(buffer); - errno = saved_errno; - return -saved_errno; - } - free(buffer); - fchmod(fd, - S_IRUSR | S_IWUSR | - S_IRGRP | S_IWGRP | - S_IROTH | S_IWOTH); - } else { - - /* Unexpected error? */ - if (errno != EEXIST) { - saved_errno = errno; - GNIX_WARN(FI_LOG_FABRIC, - "open() of shared memory segment failed, errno=%d\n", - saved_errno); - - return -saved_errno; - } - - /* - * The file was created by another process, but it might not yet - * be initialized. Wait until it is. - */ - struct stat stat_info; - int wait_count = 0; - - do { - GNIX_INFO(FI_LOG_FABRIC, - "stat() of shared memory segment."); - rc = stat(path, &stat_info); - if (rc != 0) { - GNIX_DEBUG(FI_LOG_FABRIC, - "stat() of shared memory segment " - "failed, errno %d\n", errno); - return -errno; - } - - if (stat_info.st_size < size) { - GNIX_INFO(FI_LOG_FABRIC, - "Shared memory segment is %d bytes, " - "waiting for it to be %d\n", - stat_info.st_size, size); - sleep(1); - wait_count++; - } - } while ((stat_info.st_size < size) && - (wait_count < gnix_wait_shared_memory_timeout)); - - /* If still not found, give up. */ - if (stat_info.st_size < size) { - GNIX_WARN(FI_LOG_FABRIC, - "can't open() shared memory segment."); - return -ENODEV; - } - - GNIX_INFO(FI_LOG_FABRIC, "open() of shared memory segment."); - fd = open(path, O_RDWR, GNIX_DEFAULT_ACCESS_PERMS); - if (fd < 0) { - GNIX_WARN(FI_LOG_FABRIC, - "open() of shared memory segment " - "failed, errno %d\n", errno); - return -errno; - } - } - unlink(path); - - /* Get a pointer to the shared data structure. */ - region->addr = mmap(NULL, size, - PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - close(fd); - - if (region->addr == MAP_FAILED) { - GNIX_WARN(FI_LOG_FABRIC, "failed to map fd\n"); - return -ENOTBLK; - } - - region->size = size; - return 0; -} - -int _gnix_shmem_destroy(struct gnix_shared_memory *region) -{ - if (!region) - return -FI_EINVAL; - - if (region->addr == MAP_FAILED) - return -FI_EINVAL; - - return munmap(region->addr, region->size); -} diff --git a/prov/gni/src/gnix_smrn.c b/prov/gni/src/gnix_smrn.c deleted file mode 100644 index 8938fc00d0c..00000000000 --- a/prov/gni/src/gnix_smrn.c +++ /dev/null @@ -1,176 +0,0 @@ -/* - * Copyright (c) 2017 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "gnix_util.h" -#include "gnix_smrn.h" - -static struct gnix_smrn global_smrn; - -int _gnix_smrn_init(void) -{ - int ret; - - ofi_spin_init(&global_smrn.lock); - global_smrn.references = 0; - dlist_init(&global_smrn.rq_head); - - ret = _gnix_notifier_init(); - - return ret; -} - -int _gnix_smrn_open(struct gnix_smrn **smrn) -{ - struct gnix_smrn *tmp = &global_smrn; - int ret = FI_SUCCESS; - - ofi_spin_lock(&tmp->lock); - if (tmp->references == 0) - ret = _gnix_notifier_open(&tmp->notifier); - - if (!ret) - tmp->references += 1; - ofi_spin_unlock(&tmp->lock); - - if (!ret) - *smrn = tmp; - - return ret; -} - -int _gnix_smrn_close(struct gnix_smrn *smrn) -{ - int ret = FI_SUCCESS; - - ofi_spin_lock(&smrn->lock); - if (smrn->references == 0) - ret = -FI_EINVAL; - - if (smrn->references == 1) - ret = _gnix_notifier_close(smrn->notifier); - - if (!ret) - smrn->references -= 1; - ofi_spin_unlock(&smrn->lock); - - return ret; -} - -int _gnix_smrn_monitor(struct gnix_smrn *smrn, - struct gnix_smrn_rq *rq, - void *addr, - uint64_t len, - uint64_t cookie, - struct gnix_smrn_context *context) -{ - int ret; - - if (!context || !rq || !smrn) - return -FI_EINVAL; - - context->rq = rq; - context->cookie = cookie; - - ret = _gnix_notifier_monitor(smrn->notifier, addr, - len, (uint64_t) context); - if (ret == FI_SUCCESS) - GNIX_DEBUG(FI_LOG_FABRIC, - "monitoring addr=%p len=%d cookie=%p " - "context=%p rq=%p notifier=%p\n", - addr, len, context->cookie, - context, rq, smrn->notifier); - return ret; -} - -int _gnix_smrn_unmonitor(struct gnix_smrn *smrn, - uint64_t cookie, - struct gnix_smrn_context *context) -{ - if (!smrn) - return -FI_EINVAL; - - if (cookie != context->cookie) - return -FI_EINVAL; - - return _gnix_notifier_unmonitor(smrn->notifier, (uint64_t) context); -} - -static void __gnix_smrn_read_events(struct gnix_smrn *smrn) -{ - int ret; - struct gnix_smrn_context *context; - struct gnix_smrn_rq *rq; - int len = sizeof(uint64_t); - - do { - ret = _gnix_notifier_get_event(smrn->notifier, - (void *) &context, len); - if (ret != len) { - GNIX_DEBUG(FI_LOG_FABRIC, - "no more events to be read\n"); - break; - } - - GNIX_DEBUG(FI_LOG_FABRIC, - "found event, context=%p rq=%p cookie=%lx\n", - context, context->rq, context->cookie); - - rq = context->rq; - ofi_spin_lock(&rq->lock); - dlist_insert_tail(&context->entry, &rq->list); - ofi_spin_unlock(&rq->lock); - } while (ret == len); -} - -int _gnix_smrn_get_event(struct gnix_smrn *smrn, - struct gnix_smrn_rq *rq, - struct gnix_smrn_context **context) -{ - int ret; - - if (!smrn || !context) - return -FI_EINVAL; - - __gnix_smrn_read_events(smrn); - - ofi_spin_lock(&rq->lock); - if (!dlist_empty(&rq->list)) { - dlist_pop_front(&rq->list, struct gnix_smrn_context, - *context, entry); - ret = FI_SUCCESS; - } else - ret = -FI_EAGAIN; - ofi_spin_unlock(&rq->lock); - - return ret; -} - diff --git a/prov/gni/src/gnix_tags.c b/prov/gni/src/gnix_tags.c deleted file mode 100644 index 95cb882105c..00000000000 --- a/prov/gni/src/gnix_tags.c +++ /dev/null @@ -1,964 +0,0 @@ -/* - * Copyright (c) 2015-2016 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "rdma/fabric.h" -#include "rdma/fi_tagged.h" - -#include "gnix_tags.h" - -#include "gnix.h" -#include "gnix_util.h" - -#include - -#define NOT_FOUND(seq, gen) ((seq) == 0 && (gen) == 0) - -struct gnix_tag_storage_ops list_ops; -struct gnix_tag_storage_ops hlist_ops; -struct gnix_tag_storage_ops kdtree_ops; - -struct gnix_tag_search_element { - uint64_t tag; - uint64_t ignore; - void *context; - uint64_t flags; - int use_src_addr_matching; - struct gnix_address *addr; -}; - -static inline int get_bucket(struct gnix_tag_storage *ts, uint64_t tag) { - return fasthash64(&tag, sizeof(uint64_t), - 0xDEADBEEF) % ts->hlist.elements; -} - -static inline int __is_tag_older( - uint64_t oldest_seq, - uint64_t oldest_gen, - uint64_t current_seq, - uint64_t current_gen) -{ - return !((oldest_gen > current_gen) || - (oldest_gen == current_gen && oldest_seq > current_seq)); -} - -static inline int is_tag_older(uint64_t oldest_gen, - uint64_t oldest_seq, - struct gnix_tag_list_element *current) -{ - return __is_tag_older(oldest_seq, oldest_gen, - current->seq, current->gen); -} - -static inline void __update_hlist_head(struct gnix_hlist_head *h) -{ - struct gnix_tag_list_element *first; - - if (!dlist_empty(&h->head)) { - first = dlist_first_entry(&h->head, - struct gnix_tag_list_element, free); - - h->oldest_gen = first->gen; - h->oldest_tag_id = first->seq; - } -} - -/** - * @brief converts gnix_tag_list_element to gnix_fab_req - * - * @param elem dlist element embedded in a gnix_fab_req - * @return pointer to gnix_fab_req - */ -static inline struct gnix_fab_req *__to_gnix_fab_req( - struct gnix_tag_list_element *elem) -{ - struct gnix_fab_req *req; - - req = container_of(elem, struct gnix_fab_req, msg.tle); - - return req; -} - -/** - * @brief determines if a req matches the address parameters - * - * @param addr_to_find address to find in tag storage - * @param addr stored address - * @return 0 if the request does not match the parameters, 1 otherwise - */ -static inline int __req_matches_addr_params( - struct gnix_address *addr_to_find, - struct gnix_address *addr) -{ - return (GNIX_ADDR_UNSPEC(*addr) || - GNIX_ADDR_EQUAL(*addr_to_find, *addr)); -} - -int _gnix_req_matches_params( - struct gnix_fab_req *req, - uint64_t tag, - uint64_t ignore, - uint64_t flags, - void *context, - int use_src_addr_matching, - struct gnix_address *addr, - int matching_posted) -{ - int valid_request; - - /* adding some error checking to the first condition - * - * if the context is null, then FI_PEEK | FI_CLAIM should fail - */ - if ((flags & FI_CLAIM) && (flags & FI_PEEK)) - valid_request = (context != NULL && - context != req->msg.tle.context); - else if ((flags & FI_CLAIM) && !(flags & FI_PEEK)) - valid_request = (req->msg.tle.context != NULL && - context == req->msg.tle.context); - else - valid_request = req->msg.tle.context == NULL; - - /* shortcut */ - if (!valid_request) - return valid_request; - - if (use_src_addr_matching && matching_posted) { - /* if matching posted, flip the arguments so that the unspec check - * is done on the request in the tag store and not the address - * that was passed into the function - */ - valid_request &= __req_matches_addr_params(addr, &req->addr); - } else if (use_src_addr_matching && !matching_posted) { - valid_request &= __req_matches_addr_params(&req->addr, addr); - } - - return valid_request && ((req->msg.tag & ~ignore) == (tag & ~ignore)); -} - -static int __req_matches_context(struct dlist_entry *entry, const void *arg) -{ - struct gnix_tag_list_element *tle; - struct gnix_fab_req *req; - - tle = container_of(entry, struct gnix_tag_list_element, free); - req = __to_gnix_fab_req(tle); - - return req->user_context == arg; -} - -/* used to match elements in the posted lists */ -int _gnix_match_posted_tag(struct dlist_entry *entry, const void *arg) -{ - const struct gnix_tag_search_element *s_elem = arg; - struct gnix_tag_list_element *tle; - struct gnix_fab_req *req; - - tle = container_of(entry, struct gnix_tag_list_element, free); - req = __to_gnix_fab_req(tle); - - return _gnix_req_matches_params(req, s_elem->tag, req->msg.ignore, - s_elem->flags, s_elem->context, - s_elem->use_src_addr_matching, - s_elem->addr, 1); -} - -/* used to match elements in the unexpected lists */ -int _gnix_match_unexpected_tag(struct dlist_entry *entry, const void *arg) -{ - const struct gnix_tag_search_element *s_elem = arg; - struct gnix_tag_list_element *tle; - struct gnix_fab_req *req; - - tle = container_of(entry, struct gnix_tag_list_element, free); - req = __to_gnix_fab_req(tle); - - return _gnix_req_matches_params(req, s_elem->tag, s_elem->ignore, - s_elem->flags, s_elem->context, - s_elem->use_src_addr_matching, - s_elem->addr, 0); -} - -/* default attributes for tag storage objects */ -static struct gnix_tag_storage_attr default_attr = { - .type = GNIX_TAG_AUTOSELECT, - .use_src_addr_matching = 0, -}; - -/** - * @brief peeks into a tag list to find the first match using given parameters - * - * @param ts pointer to gnix_tag_storage_object - * @param tag tag to find - * @param ignore bits to ignore in tags - * @param list dlist to search - * @param flags fi_tagged flags - * @param context fi_context associated with tag - * @param addr gnix_address to find - * @param addr_ignore bits to ignore in address - * @return NULL, if no match is found, - * a non-NULL value, if a match is found - */ -static inline struct gnix_tag_list_element *__tag_list_peek_first_match( - struct gnix_tag_storage *ts, - uint64_t tag, - uint64_t ignore, - struct dlist_entry *list, - uint64_t flags, - void *context, - struct gnix_address *addr) -{ - struct dlist_entry *current; - struct gnix_tag_search_element s_elem = { - .tag = tag, - .ignore = ignore, - .flags = flags, - .context = context, - .use_src_addr_matching = ts->attr.use_src_addr_matching, - .addr = addr, - }; - - /* search the list for a matching element. stop at the first match */ - dlist_foreach(list, current) { - if (ts->match_func(current, &s_elem)) - return (struct gnix_tag_list_element *) current; - } - - return NULL; -} - -/** - * @brief finds and removes the first match in a tag list - * - * @param ts pointer to gnix_tag_storage_object - * @param tag tag to find - * @param ignore bits to ignore in tags - * @param list dlist to search - * @param flags fi_tagged flags - * @param context fi_context associated with tag - * @param addr gnix_address to find - * @param addr_ignore bits to ignore in address - * @return NULL, if no match is found, - * a non-NULL value, if a match is found - */ -static inline struct gnix_tag_list_element *__tag_list_find_element( - struct gnix_tag_storage *ts, - uint64_t tag, - uint64_t ignore, - struct dlist_entry *list, - uint64_t flags, - void *context, - struct gnix_address *addr) -{ - struct gnix_tag_search_element s_elem = { - .tag = tag, - .ignore = ignore, - .flags = flags, - .context = context, - .use_src_addr_matching = ts->attr.use_src_addr_matching, - .addr = addr, - }; - - /* search the list for a matching element. stop at the first match */ - return (struct gnix_tag_list_element *) - dlist_remove_first_match(list, - ts->match_func, &s_elem); -} - -/** - * @brief peeks into a tag list to find the first match using given parameters - * - * @param ts pointer to gnix_tag_storage_object - * @param tag tag to find - * @param ignore bits to ignore in tags - * @param list dlist to search - * @param flags fi_tagged flags - * @param context fi_context associated with tag - * @param addr gnix_address to find - * @param addr_ignore bits to ignore in address - * @return NULL, if no match is found, - * a non-NULL value, if a match is found - */ -static inline struct gnix_tag_list_element *__tag_hlist_find_first_match( - struct gnix_tag_storage *ts, - uint64_t tag, - uint64_t ignore, - struct dlist_entry *list, - uint64_t flags, - void *context, - struct gnix_address *addr, - uint64_t oldest_seq, - uint64_t oldest_gen, - int (*match_func)(struct dlist_entry *entry, const void *arg)) -{ - struct dlist_entry *current; - struct gnix_tag_list_element *tle; - struct gnix_tag_search_element s_elem = { - .tag = tag, - .ignore = ignore, - .flags = flags, - .context = context, - .use_src_addr_matching = ts->attr.use_src_addr_matching, - .addr = addr, - }; - - /* search the list for a matching element. stop at the first match */ - dlist_foreach(list, current) { - tle = container_of(current, struct gnix_tag_list_element, free); - if (!NOT_FOUND(oldest_seq, oldest_gen) && - is_tag_older(oldest_seq, oldest_gen, tle)) - break; - - if (match_func(current, &s_elem)) - return (struct gnix_tag_list_element *) current; - } - - return NULL; -} - -static inline struct gnix_fab_req *__gnix_tag_hlist_search_tag( - struct gnix_tag_storage *ts, - uint64_t tag, - uint64_t ignore, - uint64_t flags, - void *context, - struct gnix_address *addr, - int (*match_func)(struct dlist_entry *entry, const void *arg)) -{ - struct gnix_tag_list_element *tmp; - struct gnix_tag_list_element *oldest = NULL; - struct gnix_hlist_head *h; - uint64_t oldest_seq = 0; - uint64_t oldest_gen = 0; - int i, start, end; - - GNIX_INFO(FI_LOG_EP_CTRL, "searching hlist, " - "tag=%.16llx ignore=%.16llx flags=%.4llx context=%p\n", - tag, ignore, flags, context); - - if (ignore != 0) { - start = 0; - end = ts->hlist.elements; - } else { - start = get_bucket(ts, tag); - end = start + 1; - } - - for (i = start; i < end; i++) { - h = &ts->hlist.array[i]; - if (dlist_empty(&h->head)){ - GNIX_INFO(FI_LOG_EP_CTRL, "skipping list head %d\n", i); - continue; - } - - if (!NOT_FOUND(oldest_seq, oldest_gen) && - __is_tag_older(oldest_seq, oldest_gen, - h->oldest_tag_id, h->oldest_gen)) { - GNIX_INFO(FI_LOG_EP_CTRL, - "skipping list head %d due to older found," - "oldest=%llx:%llx head=%llx:%llx\n", - i, oldest_seq, oldest_gen, - h->oldest_tag_id, h->oldest_gen); - continue; - } - - GNIX_INFO(FI_LOG_EP_CTRL, "searching hlist bucket %d\n", i); - - tmp = __tag_hlist_find_first_match(ts, tag, ignore, - &h->head, flags, context, addr, - oldest_seq, oldest_gen, match_func); - - if (tmp) { - GNIX_INFO(FI_LOG_EP_CTRL, - "found a match, seq-gen=%llx:%llx\n", - tmp->seq, tmp->gen); - assert(tmp->seq != 0); - oldest = tmp; - oldest_seq = tmp->seq; - oldest_gen = tmp->gen; - } - } - - if (!oldest) - return NULL; - - return __to_gnix_fab_req(oldest); -} - -static inline void __remove_hlist_entry(struct gnix_tag_storage *ts, - struct gnix_fab_req *req) -{ - struct gnix_tag_list_element *tle; - struct dlist_entry *entry; - struct gnix_hlist_head *h; - int bucket = get_bucket(ts, req->msg.tag); - - tle = &req->msg.tle; - entry = &tle->free; - - // remove entry - dlist_remove(entry); - - // update bucket - h = &ts->hlist.array[bucket]; - __update_hlist_head(h); -} - -/** - * @brief checks attributes for invalid values - * - * @param attr attributes to be checked - * @return -FI_EINVAL, if attributes contain invalid values - * FI_SUCCESS, otherwise - */ -static inline int __check_for_invalid_attributes( - struct gnix_tag_storage_attr *attr) -{ - if (attr->type < 0 || attr->type >= GNIX_TAG_MAXTYPES) - return -FI_EINVAL; - - return FI_SUCCESS; -} - -int _gnix_tag_storage_init( - struct gnix_tag_storage *ts, - struct gnix_tag_storage_attr *attr, - int (*match_func)(struct dlist_entry *, const void *)) -{ - int ret; - struct gnix_tag_storage_attr *attributes = &default_attr; - - if (ts->state == GNIX_TS_STATE_INITIALIZED) { - GNIX_WARN(FI_LOG_EP_CTRL, - "attempted to initialize already active tag storage\n"); - return -FI_EINVAL; - } - - if (attr) { - if (__check_for_invalid_attributes(attr)) { - GNIX_WARN(FI_LOG_EP_CTRL, - "invalid attributes passed in to init\n"); - return -FI_EINVAL; - } - - attributes = attr; - } - - - - /* copy attributes */ - memcpy(&ts->attr, attributes, sizeof(struct gnix_tag_storage_attr)); - - switch (ts->attr.type) { - case GNIX_TAG_AUTOSELECT: - case GNIX_TAG_LIST: - ts->ops = &list_ops; - break; - case GNIX_TAG_HLIST: - ts->ops = &hlist_ops; - break; - case GNIX_TAG_KDTREE: - ts->ops = &kdtree_ops; - break; - default: - GNIX_FATAL(FI_LOG_EP_CTRL, "Invalid tag type: %d\n", - ts->attr.type); - } - - ret = ts->ops->init(ts); - if (ret) { - GNIX_WARN(FI_LOG_EP_CTRL, - "failed to initialize at ops->init\n"); - return ret; - } - - /* a different type of matching behavior is required for unexpected - * messages - */ - ts->match_func = match_func; - - ofi_atomic_initialize32(&ts->seq, 1); - ts->gen = 0; - ts->state = GNIX_TS_STATE_INITIALIZED; - - return FI_SUCCESS; -} - -int _gnix_tag_storage_destroy(struct gnix_tag_storage *ts) -{ - int ret; - - if (ts->state != GNIX_TS_STATE_INITIALIZED) - return -FI_EINVAL; - - ret = ts->ops->fini(ts); - if (ret) - return ret; - - ts->state = GNIX_TS_STATE_DESTROYED; - - return FI_SUCCESS; -} - -/* not implemented operations */ -static int __gnix_tag_no_init(struct gnix_tag_storage *ts) -{ - return -FI_ENOSYS; -} - -static int __gnix_tag_no_fini(struct gnix_tag_storage *ts) -{ - return -FI_ENOSYS; -} - -static int __gnix_tag_no_insert_tag( - struct gnix_tag_storage *ts, - uint64_t tag, - struct gnix_fab_req *req) -{ - return -FI_ENOSYS; -} - - -static struct gnix_fab_req *__gnix_tag_no_peek_tag( - struct gnix_tag_storage *ts, - uint64_t tag, - uint64_t ignore, - uint64_t flags, - void *context, - struct gnix_address *addr) -{ - return NULL; -} - -static struct gnix_fab_req *__gnix_tag_no_remove_tag( - struct gnix_tag_storage *ts, - uint64_t tag, - uint64_t ignore, - uint64_t flags, - void *context, - struct gnix_address *addr) -{ - return NULL; -} - -static struct gnix_fab_req *__gnix_tag_no_remove_req_by_context( - struct gnix_tag_storage *ts, - void *context) -{ - return NULL; -} - -static void __gnix_tag_no_remove_tag_by_req( - struct gnix_tag_storage *ts, - struct gnix_fab_req *req) -{ -} - -/* list operations */ - -static int __gnix_tag_list_init(struct gnix_tag_storage *ts) -{ - dlist_init(&ts->list.list); - - return FI_SUCCESS; -} - -static int __gnix_tag_list_fini(struct gnix_tag_storage *ts) -{ - if (!dlist_empty(&ts->list.list)) - return -FI_EAGAIN; - - return FI_SUCCESS; -} - -static int __gnix_tag_list_insert_tag( - struct gnix_tag_storage *ts, - uint64_t tag, - struct gnix_fab_req *req) -{ - struct gnix_tag_list_element *element; - - element = &req->msg.tle; - if (!dlist_empty(&element->free)) - return -FI_EALREADY; - - element->context = NULL; - dlist_insert_tail(&element->free, &ts->list.list); - - return FI_SUCCESS; -} - -static struct gnix_fab_req *__gnix_tag_list_peek_tag( - struct gnix_tag_storage *ts, - uint64_t tag, - uint64_t ignore, - uint64_t flags, - void *context, - struct gnix_address *addr) -{ - struct gnix_tag_list_element *element; - - element = __tag_list_peek_first_match(ts, tag, ignore, - &ts->list.list, flags, context, addr); - - if (!element) - return NULL; - - return __to_gnix_fab_req(element); -} - -static struct gnix_fab_req *__gnix_tag_list_remove_tag( - struct gnix_tag_storage *ts, - uint64_t tag, - uint64_t ignore, - uint64_t flags, - void *context, - struct gnix_address *addr) -{ - struct gnix_tag_list_element *element; - struct gnix_fab_req *req; - - element = __tag_list_find_element(ts, tag, ignore, &ts->list.list, - flags, context, addr); - if (!element) - return NULL; - - req = __to_gnix_fab_req(element); - - return req; -} - -static void __gnix_tag_list_remove_tag_by_req( - struct gnix_tag_storage *ts, - struct gnix_fab_req *req) -{ - struct gnix_tag_list_element *element; - struct dlist_entry *item; - - element = &req->msg.tle; - item = (struct dlist_entry *) &element->free; - dlist_remove(item); -} - -static struct gnix_fab_req *__gnix_tag_list_remove_req_by_context( - struct gnix_tag_storage *ts, - void *context) -{ - struct gnix_tag_list_element *element; - struct gnix_fab_req *req; - - element = (struct gnix_tag_list_element *) - dlist_remove_first_match(&ts->list.list, - __req_matches_context, context); - - if (!element) - return NULL; - - req = __to_gnix_fab_req(element); - - return req; -} - -/* hlist operations */ - -static int __gnix_tag_hlist_init(struct gnix_tag_storage *ts) -{ - struct gnix_hlist_head *h; - int i; - - ts->hlist.elements = 128; - ts->hlist.last_inserted_id = 0; - ts->hlist.oldest_tag_id = 0; - ts->hlist.current_gen = 0; - ts->hlist.array = calloc(ts->hlist.elements, - sizeof(struct gnix_hlist_head)); - if (!ts->hlist.array) - return -FI_ENOMEM; - - for (i = 0; i < ts->hlist.elements; i++) { - h = &ts->hlist.array[i]; - - dlist_init(&h->head); - h->oldest_gen = 0; - h->oldest_tag_id = 0; - } - - return FI_SUCCESS; -} - -static int __gnix_tag_hlist_fini(struct gnix_tag_storage *ts) -{ - int i; - struct gnix_hlist_head *h; - - for (i = 0; i < ts->hlist.elements; i++) { - h = &ts->hlist.array[i]; - - if (!dlist_empty(&h->head)) - return -FI_EAGAIN; - } - - free(ts->hlist.array); - - ts->hlist.elements = 0; - ts->hlist.array = NULL; - - return FI_SUCCESS; -} - -static int __gnix_tag_hlist_insert_tag( - struct gnix_tag_storage *ts, - uint64_t tag, - struct gnix_fab_req *req) -{ - struct gnix_tag_list_element *element; - struct gnix_hlist_head *h; - int bucket = get_bucket(ts, tag); - - element = &req->msg.tle; - if (!dlist_empty(&element->free)) - return -FI_EALREADY; - - dlist_init(&element->free); - element->context = NULL; - element->seq = ++ts->hlist.last_inserted_id; - if (!element->seq) { - element->seq = ts->hlist.last_inserted_id = 1; - ++ts->hlist.current_gen; - } - element->gen = ts->hlist.current_gen; - - h = &ts->hlist.array[bucket]; - - if (dlist_empty(&h->head)) { - h->oldest_gen = element->gen; - h->oldest_tag_id = element->seq; - } - dlist_insert_tail(&element->free, &h->head); - - GNIX_INFO(FI_LOG_EP_CTRL, "inserting new tag in hlist, " - "tag=%.16llx seq=%d gen=%d bucket=%d\n", - tag, element->seq, element->gen, bucket); - - return FI_SUCCESS; -} - -static struct gnix_fab_req *__gnix_tag_hlist_peek_tag( - struct gnix_tag_storage *ts, - uint64_t tag, - uint64_t ignore, - uint64_t flags, - void *context, - struct gnix_address *addr) -{ - return __gnix_tag_hlist_search_tag(ts, tag, ignore, - flags, context, addr, ts->match_func); -} - -static struct gnix_fab_req *__gnix_tag_hlist_remove_tag( - struct gnix_tag_storage *ts, - uint64_t tag, - uint64_t ignore, - uint64_t flags, - void *context, - struct gnix_address *addr) -{ - struct gnix_fab_req *req; - - req = __gnix_tag_hlist_search_tag(ts, tag, ignore, - flags, context, addr, ts->match_func); - - if (req) - __remove_hlist_entry(ts, req); - - return req; -} - -static void __gnix_tag_hlist_remove_tag_by_req( - struct gnix_tag_storage *ts, - struct gnix_fab_req *req) -{ - __remove_hlist_entry(ts, req); -} - -static struct gnix_fab_req *__gnix_tag_hlist_remove_req_by_context( - struct gnix_tag_storage *ts, - void *context) -{ - struct gnix_fab_req *req; - - req = __gnix_tag_hlist_search_tag(ts, 0, 0, - 0, context, NULL, __req_matches_context); - - if (req) - __remove_hlist_entry(ts, req); - - return req; -} - - -/* ignore is only used on inserting into posted tag storages - * addr_ignore is only used on inserting into post tag storages with - * use_src_addr_matching enabled - */ -int _gnix_insert_tag( - struct gnix_tag_storage *ts, - uint64_t tag, - struct gnix_fab_req *req, - uint64_t ignore) -{ - int ret; - - GNIX_DEBUG(FI_LOG_EP_CTRL, "inserting a message by tag, " - "ts=%p tag=%llx req=%p\n", ts, tag, req); - req->msg.tag = tag; - if (ts->match_func == _gnix_match_posted_tag) { - req->msg.ignore = ignore; - } - - ret = ts->ops->insert_tag(ts, tag, req); - - GNIX_DEBUG(FI_LOG_EP_CTRL, "ret=%i\n", ret); - - return ret; -} - -/* - * ignore parameter is not used for posted tag storages - */ -static struct gnix_fab_req *__remove_by_tag_and_addr( - struct gnix_tag_storage *ts, - uint64_t tag, - uint64_t ignore, - uint64_t flags, - void *context, - struct gnix_address *addr) -{ - struct gnix_fab_req *ret; - - /* assuming that flags and context are correct */ - GNIX_DEBUG(FI_LOG_EP_CTRL, "removing a message by tag, " - "ts=%p tag=%llx ignore=%llx flags=%llx context=%p " - "addr=%p\n", - ts, tag, ignore, flags, context, addr); - ret = ts->ops->remove_tag(ts, tag, ignore, flags, context, addr); - GNIX_DEBUG(FI_LOG_EP_CTRL, "ret=%p\n", ret); - - return ret; -} - -static struct gnix_fab_req *__peek_by_tag_and_addr( - struct gnix_tag_storage *ts, - uint64_t tag, - uint64_t ignore, - uint64_t flags, - void *context, - struct gnix_address *addr) -{ - struct gnix_fab_req *ret; - - /* assuming that flags and context are correct */ - GNIX_DEBUG(FI_LOG_EP_CTRL, "peeking a message by tag, " - "ts=%p tag=%llx ignore=%llx flags=%llx context=%p " - "addr=%p\n", - ts, tag, ignore, flags, context, addr); - - ret = ts->ops->peek_tag(ts, tag, ignore, flags, context, addr); - - if (ret != NULL && (flags & FI_CLAIM)) { - ret->msg.tle.context = context; - } - - GNIX_DEBUG(FI_LOG_EP_CTRL, "ret=%p\n", ret); - - return ret; -} - -struct gnix_fab_req *_gnix_match_tag( - struct gnix_tag_storage *ts, - uint64_t tag, - uint64_t ignore, - uint64_t flags, - void *context, - struct gnix_address *addr) -{ - if ((flags & FI_PEEK) && !(flags & FI_DISCARD)) - return __peek_by_tag_and_addr(ts, tag, ignore, flags, - context, addr); - else - return __remove_by_tag_and_addr(ts, tag, ignore, flags, - context, addr); -} - -struct gnix_fab_req *_gnix_remove_req_by_context( - struct gnix_tag_storage *ts, - void *context) -{ - return ts->ops->remove_req_by_context(ts, context); -} - -void _gnix_remove_tag( - struct gnix_tag_storage *ts, - struct gnix_fab_req *req) -{ - ts->ops->remove_tag_by_req(ts, req); -} - -struct gnix_tag_storage_ops list_ops = { - .init = __gnix_tag_list_init, - .fini = __gnix_tag_list_fini, - .insert_tag = __gnix_tag_list_insert_tag, - .peek_tag = __gnix_tag_list_peek_tag, - .remove_tag = __gnix_tag_list_remove_tag, - .remove_tag_by_req = __gnix_tag_list_remove_tag_by_req, - .remove_req_by_context = __gnix_tag_list_remove_req_by_context, -}; - -struct gnix_tag_storage_ops hlist_ops = { - .init = __gnix_tag_hlist_init, - .fini = __gnix_tag_hlist_fini, - .insert_tag = __gnix_tag_hlist_insert_tag, - .peek_tag = __gnix_tag_hlist_peek_tag, - .remove_tag = __gnix_tag_hlist_remove_tag, - .remove_tag_by_req = __gnix_tag_hlist_remove_tag_by_req, - .remove_req_by_context = __gnix_tag_hlist_remove_req_by_context, -}; - -struct gnix_tag_storage_ops kdtree_ops = { - .init = __gnix_tag_no_init, - .fini = __gnix_tag_no_fini, - .insert_tag = __gnix_tag_no_insert_tag, - .peek_tag = __gnix_tag_no_peek_tag, - .remove_tag = __gnix_tag_no_remove_tag, - .remove_tag_by_req = __gnix_tag_no_remove_tag_by_req, - .remove_req_by_context = __gnix_tag_no_remove_req_by_context, -}; diff --git a/prov/gni/src/gnix_trigger.c b/prov/gni/src/gnix_trigger.c deleted file mode 100644 index 66f7b299965..00000000000 --- a/prov/gni/src/gnix_trigger.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (c) 2016 Cray Inc. All rights reserved. - * Copyright (c) 2016 Los Alamos National Security, LLC. All rights reserved. - * - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/* - * Triggered operations handling. - */ - -#include "gnix_trigger.h" -#include "gnix_vc.h" -#include "gnix.h" - -int _gnix_trigger_queue_req(struct gnix_fab_req *req) -{ - struct fi_triggered_context *trigger_context; - struct fi_trigger_threshold *threshold; - struct gnix_fid_cntr *cntr; - struct gnix_fab_req *r; - size_t req_thresh; - - trigger_context = (struct fi_triggered_context *) - req->user_context; - threshold = &trigger_context->trigger.threshold; - cntr = container_of(threshold->cntr, struct gnix_fid_cntr, cntr_fid); - - if (ofi_atomic_get32(&cntr->cnt) >= threshold->threshold) { - GNIX_INFO(FI_LOG_EP_DATA, - "Trigger condition met: %p\n", - req); - - /* Trigger condition has already been met. */ - return 1; - } - - GNIX_INFO(FI_LOG_EP_DATA, - "Queueing triggered op: %p\n", - req); - - ofi_spin_lock(&cntr->trigger_lock); - if (dlist_empty(&cntr->trigger_list)) { - dlist_init(&req->dlist); - dlist_insert_head(&req->dlist, &cntr->trigger_list); - } else { - req_thresh = threshold->threshold; - - dlist_for_each(&cntr->trigger_list, r, dlist) { - trigger_context = (struct fi_triggered_context *) - r->user_context; - threshold = &trigger_context->trigger.threshold; - - /* Insert new req. after those with equal threshold and - * before those with greater threshold. */ - if (req_thresh < threshold->threshold) { - break; - } - } - - dlist_init(&req->dlist); - dlist_insert_before(&req->dlist, &r->dlist); - } - ofi_spin_unlock(&cntr->trigger_lock); - - return FI_SUCCESS; -} - -void _gnix_trigger_check_cntr(struct gnix_fid_cntr *cntr) -{ - struct fi_triggered_context *trigger_context; - struct fi_trigger_threshold *threshold; - struct gnix_fab_req *req, *req2; - size_t count; - - if (OFI_LIKELY(dlist_empty(&cntr->trigger_list))) { - return; - } - - count = ofi_atomic_get32(&cntr->cnt); - - ofi_spin_lock(&cntr->trigger_lock); - dlist_for_each_safe(&cntr->trigger_list, req, req2, dlist) { - trigger_context = (struct fi_triggered_context *) - req->user_context; - threshold = &trigger_context->trigger.threshold; - - if (count >= threshold->threshold) { - GNIX_INFO(FI_LOG_EP_DATA, - "Trigger condition met: %p\n", - req); - - dlist_remove_init(&req->dlist); - req->flags &= ~FI_TRIGGER; - _gnix_vc_queue_tx_req(req); - } else { - break; - } - } - ofi_spin_unlock(&cntr->trigger_lock); -} diff --git a/prov/gni/src/gnix_util.c b/prov/gni/src/gnix_util.c deleted file mode 100644 index 1de42890194..00000000000 --- a/prov/gni/src/gnix_util.c +++ /dev/null @@ -1,799 +0,0 @@ -/* - * Copyright (c) 2014 Intel Corporation, Inc. All rights reserved. - * Copyright (c) 2015-2017 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#if HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "alps/alps.h" -#include "alps/alps_toolAssist.h" -#include "alps/libalpsutil.h" -#include "alps/libalpslli.h" - -#include "gnix.h" -#include "gnix_util.h" - -static bool app_init; -/* Filled in by __gnix_app_init */ -static uint8_t gnix_app_ptag; -static uint32_t gnix_app_cookie; -static uint32_t gnix_pes_on_node; -static int gnix_pe_node_rank = -1; -#if HAVE_CRITERION -int gnix_first_pe_on_node; /* globally visible for criterion */ -#else -static int gnix_first_pe_on_node; -#endif -/* CCM/ccmlogin specific stuff */ -static bool ccm_init; -/* This file provides ccm_alps_info */ -#define CCM_ALPS_INFO_FILE "/tmp/ccm_alps_info" -typedef struct ccm_alps_info { - uint32_t version; - uint8_t ptag; - uint32_t cookie; -} ccm_alps_info_t; -/* Format for the nodelist filename: $HOME/.crayccm/ccmnodlist. */ -#define CCM_NODELIST_FN ".crayccm/ccm_nodelist." -/* alps specific stuff */ -static uint64_t gnix_apid; -static alpsAppLayout_t gnix_appLayout; -static uint32_t gnix_device_id; -static int gnix_cq_limit; -/* These are not used currently and could be static to gnix_alps_init */ -static int alps_init; -static int *gnix_app_placementList; -static int *gnix_app_targetNids; -static int *gnix_app_targetPes; -static int *gnix_app_targetLen; -static struct in_addr *gnix_app_targetIps; -static int *gnix_app_startPe; -static int *gnix_app_totalPes; -static int *gnix_app_nodePes; -static int *gnix_app_peCpus; - -ofi_spin_t __gnix_alps_lock; - -int _gnix_get_cq_limit(void) -{ - return gnix_cq_limit; -} - -static inline void __gnix_ccm_cleanup(void) -{ - ccm_init = false; -} - -static inline void __gnix_alps_cleanup(void) -{ - alps_app_lli_lock(); - - if (gnix_app_placementList) - free(gnix_app_placementList); - if (gnix_app_targetNids) - free(gnix_app_targetNids); - if (gnix_app_targetPes) - free(gnix_app_targetPes); - if (gnix_app_targetLen) - free(gnix_app_targetLen); - if (gnix_app_targetIps) - free(gnix_app_targetIps); - if (gnix_app_startPe) - free(gnix_app_startPe); - if (gnix_app_totalPes) - free(gnix_app_totalPes); - if (gnix_app_nodePes) - free(gnix_app_nodePes); - if (gnix_app_peCpus) - free(gnix_app_peCpus); - - alps_init = false; - - alps_app_lli_unlock(); -} - -void _gnix_app_cleanup(void) -{ - if (alps_init) { - __gnix_alps_cleanup(); - } else if (ccm_init) { - __gnix_ccm_cleanup(); - } -} - -/* There are two types of errors that can happen in this function: - * - CCM ALPS info file not found - * - Failure while trying to get ptag, cookie and PEs/node - * Currently we don't distinguish between the two. - */ -static int __gnix_ccm_init(void) -{ - int rc, fd; - FILE *f; - char *nodefile; - char nodelist[PATH_MAX]; - const char *home; - ccm_alps_info_t info; - uint32_t num_nids = 0; - - GNIX_DEBUG(FI_LOG_FABRIC, "Reading job info file %s\n", - CCM_ALPS_INFO_FILE); - - fd = open(CCM_ALPS_INFO_FILE, O_RDONLY); - if (fd < 0) { - return -FI_EIO; - } - - rc = read(fd, &info, sizeof(ccm_alps_info_t)); - if (rc != sizeof(ccm_alps_info_t)) - return -FI_EIO; - - gnix_app_ptag = info.ptag; - gnix_app_cookie = info.cookie; - - close(fd); - GNIX_DEBUG(FI_LOG_FABRIC, "Ptag=0x%x, cookie=0x%x\n", - gnix_app_ptag, gnix_app_cookie); - - home = getenv("HOME"); - /* use the WLM node file if using PBS */ - nodefile = getenv("PBS_NODEFILE"); - if (!nodefile) { - const char *jobid = getenv("SLURM_JOB_ID"); - if (!jobid) { - jobid = getenv("SLURM_JOBID"); - } - snprintf(nodelist, PATH_MAX, "%s/%s%s", home ? home : ".", - CCM_NODELIST_FN, jobid ? jobid : "sdb"); - nodefile = nodelist; - } - f = fopen(nodefile, "r"); - if (f) { - char mynid[PATH_MAX]; - char next_nid[PATH_MAX]; - - rc = gethostname(mynid, PATH_MAX); - if (rc) { - /* use the first address */ - rc = fscanf(f, "%s\n", mynid); - /* assume this one worked, error case is same */ - num_nids++; - } - while (true) { - rc = fscanf(f, "%s\n", next_nid); - if (rc == 1) { - if (strcmp(mynid, next_nid) == 0) { - num_nids++; - } - } else { - break; - } - } - gnix_pes_on_node = num_nids; - fclose(f); - } else { - /* what would be a better default? */ - GNIX_WARN(FI_LOG_FABRIC, - "CCM nodelist not found. Assuming 1 PE per node\n"); - gnix_pes_on_node = 1; - } - GNIX_DEBUG(FI_LOG_FABRIC, "pes per node=%u\n", gnix_pes_on_node); - - /* Don't really need to do this here, but wanted to be clear */ - gnix_app_placementList = NULL; - gnix_app_targetNids = NULL; - gnix_app_targetPes = NULL; - gnix_app_targetLen = NULL; - gnix_app_targetIps = NULL; - gnix_app_startPe = NULL; - gnix_app_totalPes = NULL; - gnix_app_nodePes = NULL; - gnix_app_peCpus = NULL; - - ccm_init = true; - return FI_SUCCESS; -} - -static int __gnix_alps_init(void) -{ - char *cptr = NULL; - int ret = FI_SUCCESS; - int my_pe = -1; - int alps_status = 0; - size_t alps_count; - alpsAppLLIGni_t *rdmacred_rsp = NULL; - alpsAppGni_t *rdmacred_buf = NULL; - - ofi_spin_lock(&__gnix_alps_lock); - /* lli_lock doesn't return anything useful */ - ret = alps_app_lli_lock(); - - if (alps_init) { - /* alps lli lock protects alps_init for now */ - alps_app_lli_unlock(); - ofi_spin_unlock(&__gnix_alps_lock); - return ret; - } - - /* - * First get our apid - */ - ret = alps_app_lli_put_request(ALPS_APP_LLI_ALPS_REQ_APID, NULL, 0); - if (ret != ALPS_APP_LLI_ALPS_STAT_OK) { - GNIX_WARN(FI_LOG_FABRIC, "lli put failed, ret=%d(%s)\n", ret, - strerror(errno)); - ret = -FI_EIO; - goto err; - } - - ret = alps_app_lli_get_response(&alps_status, &alps_count); - if (alps_status != ALPS_APP_LLI_ALPS_STAT_OK) { - GNIX_WARN(FI_LOG_FABRIC, "lli get response failed, " - "alps_status=%d(%s)\n", alps_status, - strerror(errno)); - ret = -FI_EIO; - goto err; - } - - ret = alps_app_lli_get_response_bytes(&gnix_apid, sizeof(gnix_apid)); - if (ret != ALPS_APP_LLI_ALPS_STAT_OK) { - GNIX_WARN(FI_LOG_FABRIC, - "lli get response failed, ret=%d(%s)\n", - ret, strerror(errno)); - ret = -FI_EIO; - goto err; - } - - /* - * now get the GNI rdma credentials info - */ - ret = alps_app_lli_put_request(ALPS_APP_LLI_ALPS_REQ_GNI, NULL, 0); - if (ret != ALPS_APP_LLI_ALPS_STAT_OK) { - GNIX_WARN(FI_LOG_FABRIC, "lli put failed, ret=%d(%s)\n", - ret, strerror(errno)); - ret = -FI_EIO; - goto err; - } - - ret = alps_app_lli_get_response(&alps_status, &alps_count); - if (alps_status != ALPS_APP_LLI_ALPS_STAT_OK) { - GNIX_WARN(FI_LOG_FABRIC, - "lli get response failed, alps_status=%d(%s)\n", - alps_status, strerror(errno)); - ret = -FI_EIO; - goto err; - } - - rdmacred_rsp = malloc(alps_count); - if (rdmacred_rsp == NULL) { - ret = -FI_ENOMEM; - goto err; - } - - memset(rdmacred_rsp, 0, alps_count); - - ret = alps_app_lli_get_response_bytes(rdmacred_rsp, alps_count); - if (ret != ALPS_APP_LLI_ALPS_STAT_OK) { - GNIX_WARN(FI_LOG_FABRIC, - "lli get response failed, ret=%d(%s)\n", - ret, strerror(errno)); - ret = -FI_EIO; - goto err; - } - - rdmacred_buf = (alpsAppGni_t *) rdmacred_rsp->u.buf; - - /* - * just use the first ptag/cookie for now - */ - - gnix_app_ptag = rdmacred_buf[0].ptag; - gnix_app_cookie = rdmacred_buf[0].cookie; - - /* - * alps_get_placement_info(uint64_t apid, alpsAppLayout_t *appLayout, - * int **placementList, int **targetNids, int **targetPes, - * int **targetLen, struct in_addr **targetIps, int **startPe, - * int **totalPes, int **nodePes, int **peCpus); - */ - ret = alps_get_placement_info(gnix_apid, &gnix_appLayout, - &gnix_app_placementList, - &gnix_app_targetNids, - &gnix_app_targetPes, - &gnix_app_targetLen, - &gnix_app_targetIps, - &gnix_app_startPe, - &gnix_app_totalPes, - &gnix_app_nodePes, - &gnix_app_peCpus); - if (ret != 1) { - GNIX_WARN(FI_LOG_FABRIC, - "alps_get_placement_info failed, ret=%d(%s)\n", - ret, strerror(errno)); - ret = -FI_EIO; - goto err; - } - - gnix_pes_on_node = gnix_appLayout.numPesHere; - gnix_first_pe_on_node = gnix_appLayout.firstPe; - - if ((cptr = getenv("PMI_FORK_RANK")) != NULL) { - my_pe = atoi(cptr); - } else { - if ((cptr = getenv("ALPS_APP_PE")) != NULL) { - my_pe = atoi(cptr); - } - } - - /* - * compute local pe rank, assuming we got our global PE rank - * via either an ALPS (or ALPS SLURM plugin) or Cray PMI, - * otherwise set to -1. - */ - if (my_pe != -1) - gnix_pe_node_rank = my_pe - gnix_first_pe_on_node; - - alps_init = true; - - ret = 0; -err: - alps_app_lli_unlock(); - ofi_spin_unlock(&__gnix_alps_lock); - if (rdmacred_rsp != NULL) { - free(rdmacred_rsp); - } - - return ret; -} - -static int __gnix_app_init(void) -{ - int ret; - - if (app_init) { - return FI_SUCCESS; - } - - /* Try CCM first */ - ret = __gnix_ccm_init(); - if (ret) { - ret = __gnix_alps_init(); - } - - if (ret == FI_SUCCESS) { - app_init = true; - } - - gnix_device_id = 0; - return ret; - -} - -int gnixu_get_rdma_credentials(void *addr, uint8_t *ptag, uint32_t *cookie) -{ - int ret = FI_SUCCESS; - - /*TODO: If addr is used, ensure that ep->info->addr_format is checked*/ - - if ((ptag == NULL) || (cookie == NULL)) { - return -FI_EINVAL; - } - - ret = __gnix_app_init(); - if (ret) { - GNIX_WARN(FI_LOG_FABRIC, - "__gnix_app_init() failed, ret=%d(%s)\n", - ret, strerror(errno)); - return ret; - } - - /* - * TODO: need to handle non null addr differently at some point, - * a non-NULL addr can be used to acquire RDMA credentials other than - * those assigned by ALPS/nativized slurm. - */ - *ptag = gnix_app_ptag; - *cookie = gnix_app_cookie; - - return ret; -} - - -#define NUM_GNI_RC (GNI_RC_ERROR_NOMEM+1) -static int gnix_rc_table[NUM_GNI_RC] = { - [GNI_RC_SUCCESS] = FI_SUCCESS, - [GNI_RC_NOT_DONE] = -FI_EAGAIN, - [GNI_RC_INVALID_PARAM] = -FI_EINVAL, - [GNI_RC_ERROR_RESOURCE] = -FI_EBUSY, - [GNI_RC_TIMEOUT] = -FI_ETIMEDOUT, - [GNI_RC_PERMISSION_ERROR] = -FI_EACCES, - [GNI_RC_DESCRIPTOR_ERROR] = -FI_EOTHER, - [GNI_RC_ALIGNMENT_ERROR] = -FI_EINVAL, - [GNI_RC_INVALID_STATE] = -FI_EOPBADSTATE, - [GNI_RC_NO_MATCH] = -FI_EINVAL, - [GNI_RC_SIZE_ERROR] = -FI_ETOOSMALL, - [GNI_RC_TRANSACTION_ERROR] = -FI_ECANCELED, - [GNI_RC_ILLEGAL_OP] = -FI_EOPNOTSUPP, - [GNI_RC_ERROR_NOMEM] = -FI_ENOMEM -}; - -int gnixu_to_fi_errno(int err) -{ - if (err >= 0 && err < NUM_GNI_RC) - return gnix_rc_table[err]; - else - return -FI_EOTHER; -} - -/* Indicate that the next task spawned will be restricted to cores assigned to - * corespec. */ -int _gnix_task_is_not_app(void) -{ - size_t count; - int fd; - char filename[PATH_MAX]; - int rc = 0; - char val_str[] = "0"; - int val_str_len = strlen(val_str); - - snprintf(filename, PATH_MAX, "/proc/self/task/%ld/task_is_app", - syscall(SYS_gettid)); - fd = open(filename, O_WRONLY); - if (fd < 0) { - GNIX_WARN(FI_LOG_FABRIC, "open(%s) failed, errno=%s\n", - filename, strerror(errno)); - return -errno; - } - - count = write(fd, val_str, val_str_len); - if (count != val_str_len) { - GNIX_WARN(FI_LOG_FABRIC, "write(%s, %s) failed, errno=%s\n", - filename, val_str, strerror(errno)); - rc = -errno; - } - close(fd); - - return rc; -} - -static int gnix_write_proc_job(char *val_str) -{ - size_t count; - int fd; - int rc = 0; - char *filename = "/proc/job"; - int val_str_len = strlen(val_str); - - fd = open(filename, O_WRONLY); - if (fd < 0) { - GNIX_WARN(FI_LOG_FABRIC, "open(%s) failed, errno=%s\n", - filename, strerror(errno)); - return -errno; - } - - count = write(fd, val_str, val_str_len); - if (count != val_str_len) { - GNIX_WARN(FI_LOG_FABRIC, "write(%s) failed, errno=%s\n", - val_str, strerror(errno)); - rc = -errno; - } - close(fd); - - return rc; -} - -/* Indicate that the next task spawned will be restricted to CPUs that are not - * assigned to the app and not assigned to corespec. */ -int _gnix_job_enable_unassigned_cpus(void) -{ - return gnix_write_proc_job("enable_affinity_unassigned_cpus"); -} - -/* Indicate that the next task spawned will be restricted to CPUs that are - * assigned to the app. */ -int _gnix_job_disable_unassigned_cpus(void) -{ - return gnix_write_proc_job("disable_affinity_unassigned_cpus"); -} - -/* Indicate that the next task spawned should adhere to the affinity rules. */ -int _gnix_job_enable_affinity_apply(void) -{ - return gnix_write_proc_job("enable_affinity_apply"); -} - -/* Indicate that the next task spawned should avoid the affinity rules and be - * allowed to run anywhere in the app cpuset. */ -int _gnix_job_disable_affinity_apply(void) -{ - return gnix_write_proc_job("disable_affinity_apply"); -} - - -int _gnix_job_fma_limit(uint32_t dev_id, uint8_t ptag, uint32_t *limit) -{ - gni_return_t status; - gni_job_res_desc_t job_res_desc; - - if (!limit) { - return -FI_EINVAL; - } - - status = GNI_GetJobResInfo(dev_id, ptag, GNI_JOB_RES_FMA, &job_res_desc); - if (status) { - GNIX_WARN(FI_LOG_FABRIC, - "GNI_GetJobResInfo(%d, %d) failed, status=%s\n", - dev_id, ptag, gni_err_str[status]); - return -FI_EINVAL; - } - - *limit = job_res_desc.limit; - GNIX_INFO(FI_LOG_FABRIC, "fma_limit: %u\n", job_res_desc.limit); - - return FI_SUCCESS; -} - -int _gnix_job_cq_limit(uint32_t dev_id, uint8_t ptag, uint32_t *limit) -{ - gni_return_t status; - gni_job_res_desc_t job_res_desc; - - if (!limit) { - return -FI_EINVAL; - } - - status = GNI_GetJobResInfo(dev_id, ptag, GNI_JOB_RES_CQ, &job_res_desc); - if (status) { - GNIX_WARN(FI_LOG_FABRIC, - "GNI_GetJobResInfo(%d, %d) failed, status=%s\n", - dev_id, ptag, gni_err_str[status]); - return -FI_EINVAL; - } - - *limit = job_res_desc.limit; - GNIX_INFO(FI_LOG_FABRIC, "cq_limit: %u\n", job_res_desc.limit); - - return FI_SUCCESS; -} - -int _gnix_pes_on_node(uint32_t *num_pes) -{ - int rc; - - if (!num_pes) { - return -FI_EINVAL; - } - - rc = __gnix_app_init(); - if (rc) { - GNIX_WARN(FI_LOG_FABRIC, - "__gnix_app_init() failed, ret=%d(%s)\n", - rc, strerror(errno)); - return rc; - } - - *num_pes = gnix_pes_on_node; - GNIX_INFO(FI_LOG_FABRIC, "num_pes: %u\n", gnix_appLayout.numPesHere); - - return FI_SUCCESS; -} - -int _gnix_pe_node_rank(int *pe_node_rank) -{ - int rc; - - if (!pe_node_rank) { - return -FI_EINVAL; - } - - rc = __gnix_app_init(); - if (rc) { - GNIX_WARN(FI_LOG_FABRIC, - "__gnix_app_init() failed, ret=%d(%s)\n", - rc, strerror(errno)); - return rc; - } - - if (gnix_pe_node_rank != -1) { - *pe_node_rank = gnix_pe_node_rank; - rc = FI_SUCCESS; - } else - rc = -FI_EADDRNOTAVAIL; - - GNIX_INFO(FI_LOG_FABRIC, "pe_node_rank: %u\n", gnix_pe_node_rank); - - return rc; -} - -int _gnix_nics_per_rank(uint32_t *nics_per_rank) -{ - int rc; - uint32_t npes, fmas, cqs, limiting_resource; - - if (!nics_per_rank) { - return -FI_EINVAL; - } - - rc = __gnix_app_init(); - if (rc) { - GNIX_WARN(FI_LOG_FABRIC, - "__gnix_app_init() failed, ret=%d(%s)\n", - rc, strerror(errno)); - return rc; - } - - rc = _gnix_job_fma_limit(gnix_device_id, gnix_app_ptag, &fmas); - if (rc) { - return rc; - } - - rc = _gnix_job_cq_limit(gnix_device_id, gnix_app_ptag, &cqs); - if (rc) { - return rc; - } - - gnix_cq_limit = cqs; - cqs /= GNIX_CQS_PER_EP; - - rc = _gnix_pes_on_node(&npes); - if (rc) { - return rc; - } - - limiting_resource = fmas > cqs ? cqs : fmas; - - *nics_per_rank = limiting_resource / npes; - - return FI_SUCCESS; -} - -void _gnix_dump_gni_res(uint8_t ptag) -{ - int i; - gni_return_t status; - gni_dev_res_desc_t dev_res_desc; - gni_job_res_desc_t job_res_desc; -#define BUF_SZ 4096 - char buf[BUF_SZ]; - int size = BUF_SZ, written = 0; - - if (!fi_log_enabled(&gnix_prov, FI_LOG_WARN, FI_LOG_FABRIC)) - return; - - written += snprintf(buf + written, size - written, - "Device Resources:\n"); - for (i = GNI_DEV_RES_FIRST+1; i < GNI_DEV_RES_LAST; i++) { - status = GNI_GetDevResInfo(0, i, &dev_res_desc); - if (status == GNI_RC_SUCCESS) { - written += snprintf(buf + written, size - written, - "dev res: %9s, avail: %lu res: %lu held: %lu total: %lu\n", - gni_dev_res_to_str(i), - dev_res_desc.available, - dev_res_desc.reserved, - dev_res_desc.held, - dev_res_desc.total); - } - } - - GNIX_WARN(FI_LOG_FABRIC, "%s", buf); - - written = 0; - written += snprintf(buf + written, size - written, - "Job Resources:\n"); - for (i = GNI_JOB_RES_FIRST+1; i < GNI_JOB_RES_LAST; i++) { - status = GNI_GetJobResInfo(0, ptag, i, &job_res_desc); - if (status == GNI_RC_SUCCESS) { - written += snprintf(buf + written, size - written, - "ptag[%d] job res: %9s used: %lu limit: %lu\n", - ptag, gni_job_res_to_str(i), - job_res_desc.used, - job_res_desc.limit); - } - } - - GNIX_WARN(FI_LOG_FABRIC, "%s", buf); -} - -int _gnix_get_num_corespec_cpus(uint32_t *num_core_spec_cpus) -{ - int ret = -FI_ENODATA; - int ncpus = 0; - FILE *fd = NULL; - char buffer[4096], *line, *field; - static bool already_called; - static uint32_t cached_num_corespec_cpus; - - if (num_core_spec_cpus == NULL) - return -FI_EINVAL; - - if (already_called == true) { - *num_core_spec_cpus = cached_num_corespec_cpus; - return FI_SUCCESS; - } - - fd = fopen("/proc/job", "r"); - if (!fd) { - GNIX_WARN(FI_LOG_FABRIC, - "open of /proc/job returned %s", strerror(errno)); - return -errno; - } - - while (1) { - line = fgets(buffer, sizeof(buffer), fd); - if (!line) - break; - - line = strstr(line, "corespec"); - if (line != NULL) { - field = strtok(line, " "); - field = strtok(NULL, " "); - if (!strcmp(field, "num_sys_cpus")) { - field = strtok(NULL, " "); - ncpus = atoi(field); - } - ret = FI_SUCCESS; - break; - } - } - - *num_core_spec_cpus = ncpus; - cached_num_corespec_cpus = ncpus; - - already_called = true; - - fclose(fd); - - return ret; -} - diff --git a/prov/gni/src/gnix_vc.c b/prov/gni/src/gnix_vc.c deleted file mode 100644 index 05938aa42ab..00000000000 --- a/prov/gni/src/gnix_vc.c +++ /dev/null @@ -1,2236 +0,0 @@ -/* - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * Copyright (c) 2015-2018 Los Alamos National Security, LLC. - * All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/* - * code for managing VC's - */ - -#include -#include -#include - -#include "gnix.h" -#include "gnix_vc.h" -#include "gnix_util.h" -#include "gnix_datagram.h" -#include "gnix_cm_nic.h" -#include "gnix_nic.h" -#include "gnix_ep.h" -#include "gnix_mbox_allocator.h" -#include "gnix_hashtable.h" -#include "gnix_av.h" -#include "gnix_trigger.h" -#include "gnix_vector.h" -#include "gnix_xpmem.h" -#include "gnix_cq.h" - -/* - * forward declarations and local struct defs. - */ - -struct wq_hndl_conn_req { - gni_smsg_attr_t src_smsg_attr; - int src_vc_id; - struct gnix_vc *vc; - uint64_t src_vc_ptr; - gni_mem_handle_t irq_mem_hndl; - xpmem_segid_t peer_segid; -}; - -static int __gnix_vc_conn_ack_prog_fn(void *data, int *complete_ptr); -static int __gnix_vc_conn_ack_comp_fn(void *data); -static int __gnix_vc_push_tx_reqs(struct gnix_vc *vc); - -static int __gnix_vc_work_schedule(struct gnix_vc *vc); -static int _gnix_vc_sched_new_conn(struct gnix_vc *vc); - -/******************************************************************************* - * Helper functions - ******************************************************************************/ - -/** - * Set key to the given gnix_addr. - * - * NOTE: If struct gnix_address is ever bit packed or packed by - * the compiler this assignment may not set key to the correct - * bytes. - */ -static inline void __gnix_vc_set_ht_key(void *gnix_addr, - gnix_ht_key_t *key) -{ - *key = *((gnix_ht_key_t *)gnix_addr); -} - -static struct gnix_vc *_gnix_ep_vc_lookup(struct gnix_fid_ep *ep, uint64_t key) -{ - struct gnix_vc *vc = NULL; - int ret; - int i; - - assert(ep->av); - - - for (i = 0; i < GNIX_ADDR_CACHE_SIZE; i++) - { - if (ep->addr_cache[i].addr == key && ep->addr_cache[i].vc != NULL) - return ep->addr_cache[i].vc; - } - - if (ep->av->type == FI_AV_TABLE) { - ret = _gnix_vec_at(ep->vc_table, (void **)&vc, key); - if (ret != FI_SUCCESS) { - vc = NULL; - } - } else { - vc = (struct gnix_vc *)_gnix_ht_lookup(ep->vc_ht, key); - } - - if (vc) { - ep->addr_cache[ep->last_cached].addr = key; - ep->addr_cache[ep->last_cached].vc = vc; - ep->last_cached = (ep->last_cached + 1) % 5; - } - - return vc; -} - -static int _gnix_ep_vc_store(struct gnix_fid_ep *ep, struct gnix_vc *vc, - uint64_t key) -{ - int ret; - - assert(ep->av); - - if (ep->av->type == FI_AV_TABLE) { - ret = _gnix_vec_insert_at(ep->vc_table, (void *)vc, key); - } else { - ret = _gnix_ht_insert(ep->vc_ht, key, vc); - } - - return ret; -} - -static int __gnix_vc_gnix_addr_equal(struct dlist_entry *item, const void *arg) -{ - struct gnix_vc *vc = dlist_entry(item, struct gnix_vc, list); - - return GNIX_ADDR_EQUAL(vc->peer_addr, *(struct gnix_address *)arg); -} - -/* Find an unmapped VC that matches 'dest_addr' and map it into the EP's VC - * look up table. - * - * Note: EP must be locked. */ -static struct gnix_vc *__gnix_vc_lookup_unmapped(struct gnix_fid_ep *ep, - fi_addr_t dest_addr) -{ - struct gnix_av_addr_entry av_entry; - struct dlist_entry *entry; - struct gnix_vc *vc; - int ret; - - /* Determine if the fi_addr now exists in the AV. */ - ret = _gnix_av_lookup(ep->av, dest_addr, &av_entry); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_av_lookup for addr 0x%lx returned %s\n", - dest_addr, fi_strerror(-ret)); - return NULL; - } - - /* Find a pre-existing, unmapped VC that matches the gnix_address - * mapped by dest_addr. */ - entry = dlist_remove_first_match(&ep->unmapped_vcs, - __gnix_vc_gnix_addr_equal, - (void *)&av_entry.gnix_addr); - if (entry) { - /* Found a matching, unmapped VC. Map dest_addr to the VC in - * the EP's VC look up table. */ - vc = dlist_entry(entry, struct gnix_vc, list); - GNIX_INFO(FI_LOG_EP_CTRL, - "Found unmapped VC: %p gnix_addr: 0x%lx fi_addr: 0x%lx\n", - vc, vc->peer_addr, vc->peer_fi_addr); - - ret = _gnix_ep_vc_store(ep, vc, dest_addr); - if (OFI_UNLIKELY(ret != FI_SUCCESS)) { - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_ep_vc_store returned %s\n", - fi_strerror(-ret)); - dlist_insert_tail(&vc->list, &ep->unmapped_vcs); - return NULL; - } - - return vc; - } - - return NULL; -} - -/** - * Look up the vc by fi_addr_t, if it's found just return it, - * otherwise allocate a new vc, insert it into the hashtable, - * and vector for FI_AV_TABLE AV type, and start connection setup. - * - * assumptions: ep is non-null; - * dest_addr is valid; - * vc_ptr is non-null. - * - * Note: EP must be locked. - */ -static int __gnix_vc_get_vc_by_fi_addr(struct gnix_fid_ep *ep, fi_addr_t dest_addr, - struct gnix_vc **vc_ptr) -{ - struct gnix_fid_av *av; - int ret = FI_SUCCESS; - struct gnix_av_addr_entry av_entry; - struct gnix_vc *vc; - - GNIX_DBG_TRACE(FI_LOG_EP_CTRL, "\n"); - - GNIX_DEBUG(FI_LOG_EP_CTRL, - "ep->vc_table = %p, ep->vc_table->vector = %p\n", - ep->vc_table, ep->vc_table->vector); - - av = ep->av; - if (OFI_UNLIKELY(av == NULL)) { - GNIX_WARN(FI_LOG_EP_CTRL, "av field NULL for ep %p\n", ep); - return -FI_EINVAL; - } - - /* Use FI address to lookup in EP VC table. */ - vc = _gnix_ep_vc_lookup(ep, dest_addr); - if (vc) { - *vc_ptr = vc; - return FI_SUCCESS; - } - - /* VC is not mapped yet. We can receive a connection request from a - * remote peer before the target EP has bound to an AV or before the - * remote peer has had it's address inserted into the target EP's AV. - * Those requests will result in a connection as usual, but the VC will - * not be mapped into an EP's AV until the EP attempts to send to the - * remote peer. Check the 'unmapped VC' list to see if such a VC - * exists and map it into the AV here. */ - vc = __gnix_vc_lookup_unmapped(ep, dest_addr); - if (vc) { - *vc_ptr = vc; - return FI_SUCCESS; - } - - /* No VC exists for the peer yet. Look up full AV entry for the - * destination address. */ - ret = _gnix_av_lookup(av, dest_addr, &av_entry); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_av_lookup for addr 0x%llx returned %s \n", - dest_addr, fi_strerror(-ret)); - goto err_w_lock; - } - - /* Allocate new VC with AV entry. */ - ret = _gnix_vc_alloc(ep, &av_entry, &vc); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_vc_alloc returned %s\n", - fi_strerror(-ret)); - goto err_w_lock; - } - - /* Map new VC through the EP connection table. */ - ret = _gnix_ep_vc_store(ep, vc, dest_addr); - if (OFI_UNLIKELY(ret != FI_SUCCESS)) { - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_ep_vc_store returned %s\n", - fi_strerror(-ret)); - goto err_w_lock; - } - - /* Initiate new VC connection. */ - ret = _gnix_vc_connect(vc); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_vc_connect returned %s\n", - fi_strerror(-ret)); - goto err_w_lock; - } - - *vc_ptr = vc; - return ret; - -err_w_lock: - if (vc != NULL) - _gnix_vc_destroy(vc); - return ret; -} - -/******************************************************************************* - * connection request /response message pack/unpack functions - ******************************************************************************/ - -/* - * pack a connection request. Contents: - * - target_addr (the addr of the targeted EP for the conn req) - * - src_addr (the address of the EP originating the conn req) - * - src_vc_id (the vc id the mbox the originating EP allocated to - * build this connection) - * - src_vc_vaddr (virt. address of the vc struct allocated at the originating - * EP to build this connection) - * - src_smsg_attr (smsg attributes of the mbox allocated at the - * originating EP for this connection) - * - src_irq_cq_mhdl (GNI memory handle for irq cq for originating EP) - */ -static void __gnix_vc_pack_conn_req(char *sbuf, - struct gnix_address *target_addr, - struct gnix_address *src_addr, - int src_vc_id, - uint64_t src_vc_vaddr, - gni_smsg_attr_t *src_smsg_attr, - gni_mem_handle_t *src_irq_cq_mhdl, - uint64_t caps, - xpmem_segid_t my_segid, - uint8_t name_type, - uint8_t rx_ctx_cnt, - uint32_t key_offset) -{ - size_t __attribute__((unused)) len; - char *cptr = sbuf; - uint8_t rtype = GNIX_VC_CONN_REQ; - - /* - * sanity checks - */ - - assert(sbuf != NULL); - - len = sizeof(rtype) + - sizeof(struct gnix_address) * 2 + - sizeof(int) + - sizeof(uint64_t) * 2 + - sizeof(gni_smsg_attr_t) + - sizeof(gni_mem_handle_t) + - sizeof(xpmem_segid_t) + - sizeof(name_type) + - sizeof(rx_ctx_cnt) + - sizeof(key_offset); - - assert(len <= GNIX_CM_NIC_MAX_MSG_SIZE); - - memcpy(cptr, &rtype, sizeof(rtype)); - cptr += sizeof(rtype); - memcpy(cptr, target_addr, sizeof(struct gnix_address)); - cptr += sizeof(struct gnix_address); - memcpy(cptr, src_addr, sizeof(struct gnix_address)); - cptr += sizeof(struct gnix_address); - memcpy(cptr, &src_vc_id, sizeof(int)); - cptr += sizeof(int); - memcpy(cptr, &src_vc_vaddr, sizeof(uint64_t)); - cptr += sizeof(uint64_t); - memcpy(cptr, src_smsg_attr, sizeof(gni_smsg_attr_t)); - cptr += sizeof(gni_smsg_attr_t); - memcpy(cptr, src_irq_cq_mhdl, sizeof(gni_mem_handle_t)); - cptr += sizeof(gni_mem_handle_t); - memcpy(cptr, &caps, sizeof(uint64_t)); - cptr += sizeof(uint64_t); - memcpy(cptr, &my_segid, sizeof(xpmem_segid_t)); - cptr += sizeof(xpmem_segid_t); - memcpy(cptr, &name_type, sizeof(name_type)); - cptr += sizeof(name_type); - memcpy(cptr, &rx_ctx_cnt, sizeof(rx_ctx_cnt)); - cptr += sizeof(rx_ctx_cnt); - memcpy(cptr, &key_offset, sizeof(key_offset)); -} - -/* - * unpack a connection request message - */ -static void __gnix_vc_unpack_conn_req(char *rbuf, - struct gnix_address *target_addr, - struct gnix_address *src_addr, - int *src_vc_id, - uint64_t *src_vc_vaddr, - gni_smsg_attr_t *src_smsg_attr, - gni_mem_handle_t *src_irq_cq_mhndl, - uint64_t *caps, - xpmem_segid_t *peer_segid, - uint8_t *name_type, - uint8_t *rx_ctx_cnt, - uint32_t *key_offset) -{ - size_t __attribute__((unused)) len; - char *cptr = rbuf; - - /* - * sanity checks - */ - - assert(rbuf); - - cptr += sizeof(uint8_t); - memcpy(target_addr, cptr, sizeof(struct gnix_address)); - cptr += sizeof(struct gnix_address); - memcpy(src_addr, cptr, sizeof(struct gnix_address)); - cptr += sizeof(struct gnix_address); - memcpy(src_vc_id, cptr, sizeof(int)); - cptr += sizeof(int); - memcpy(src_vc_vaddr, cptr, sizeof(uint64_t)); - cptr += sizeof(uint64_t); - memcpy(src_smsg_attr, cptr, sizeof(gni_smsg_attr_t)); - cptr += sizeof(gni_smsg_attr_t); - memcpy(src_irq_cq_mhndl, cptr, sizeof(gni_mem_handle_t)); - cptr += sizeof(gni_mem_handle_t); - memcpy(caps, cptr, sizeof(uint64_t)); - cptr += sizeof(uint64_t); - memcpy(peer_segid, cptr, sizeof(xpmem_segid_t)); - cptr += sizeof(xpmem_segid_t); - memcpy(name_type, cptr, sizeof(*name_type)); - cptr += sizeof(*name_type); - memcpy(rx_ctx_cnt, cptr, sizeof(*rx_ctx_cnt)); - cptr += sizeof(*rx_ctx_cnt); - memcpy(key_offset, cptr, sizeof(*key_offset)); -} - -/* - * pack a connection response. Contents: - * - src_vc_vaddr (vaddr of the vc struct allocated at the originating - * EP to build this connection) - * - resp_vc_id (the vc id of the mbox the responding EP allocated to - * build this connection) - * - resp_smsg_attr (smsg attributes of the mbox allocated at the - * responding EP for this connection) - * - resp_irq_cq_mhndl (GNI memhndl for irq cq of responding EP) - */ - -static void __gnix_vc_pack_conn_resp(char *sbuf, - uint64_t src_vc_vaddr, - uint64_t resp_vc_vaddr, - int resp_vc_id, - gni_smsg_attr_t *resp_smsg_attr, - gni_mem_handle_t *resp_irq_cq_mhndl, - uint64_t caps, - xpmem_segid_t my_segid, - uint32_t key_offset) -{ - size_t __attribute__((unused)) len; - char *cptr = sbuf; - uint8_t rtype = GNIX_VC_CONN_RESP; - - /* - * sanity checks - */ - - assert(sbuf != NULL); - - len = sizeof(rtype) + - sizeof(uint64_t) * 3 + - sizeof(int) + - sizeof(gni_smsg_attr_t) + - sizeof(gni_mem_handle_t) + - sizeof(xpmem_segid_t) + - sizeof(uint32_t); - assert(len <= GNIX_CM_NIC_MAX_MSG_SIZE); - - memcpy(cptr, &rtype, sizeof(rtype)); - cptr += sizeof(rtype); - memcpy(cptr, &src_vc_vaddr, sizeof(uint64_t)); - cptr += sizeof(uint64_t); - memcpy(cptr, &resp_vc_vaddr, sizeof(uint64_t)); - cptr += sizeof(uint64_t); - memcpy(cptr, &resp_vc_id, sizeof(int)); - cptr += sizeof(int); - memcpy(cptr, resp_smsg_attr, sizeof(gni_smsg_attr_t)); - cptr += sizeof(gni_smsg_attr_t); - memcpy(cptr, resp_irq_cq_mhndl, sizeof(gni_mem_handle_t)); - cptr += sizeof(gni_mem_handle_t); - memcpy(cptr, &caps, sizeof(uint64_t)); - cptr += sizeof(uint64_t); - memcpy(cptr, &my_segid, sizeof(xpmem_segid_t)); - cptr += sizeof(xpmem_segid_t); - memcpy(cptr, &key_offset, sizeof(uint32_t)); -} - -/* - * unpack a connection request response - */ -static void __gnix_vc_unpack_resp(char *rbuf, - uint64_t *src_vc_vaddr, - uint64_t *resp_vc_vaddr, - int *resp_vc_id, - gni_smsg_attr_t *resp_smsg_attr, - gni_mem_handle_t *resp_irq_cq_mhndl, - uint64_t *caps, - xpmem_segid_t *peer_segid, - uint32_t *key_offset) -{ - char *cptr = rbuf; - - cptr += sizeof(uint8_t); - - memcpy(src_vc_vaddr, cptr, sizeof(uint64_t)); - cptr += sizeof(uint64_t); - memcpy(resp_vc_vaddr, cptr, sizeof(uint64_t)); - cptr += sizeof(uint64_t); - memcpy(resp_vc_id, cptr, sizeof(int)); - cptr += sizeof(int); - memcpy(resp_smsg_attr, cptr, sizeof(gni_smsg_attr_t)); - cptr += sizeof(gni_smsg_attr_t); - memcpy(resp_irq_cq_mhndl, cptr, sizeof(gni_mem_handle_t)); - cptr += sizeof(gni_mem_handle_t); - memcpy(caps, cptr, sizeof(uint64_t)); - cptr += sizeof(uint64_t); - memcpy(peer_segid, cptr, sizeof(xpmem_segid_t)); - cptr += sizeof(xpmem_segid_t); - memcpy(key_offset, cptr, sizeof(uint32_t)); -} - -static void __gnix_vc_get_msg_type(char *rbuf, - uint8_t *rtype) -{ - assert(rtype); - memcpy(rtype, rbuf, sizeof(uint8_t)); -} - -/* - * helper function to initialize an SMSG connection, plus - * a mem handle to use for delivering IRQs to peer when needed - */ -int _gnix_vc_smsg_init(struct gnix_vc *vc, int peer_id, - gni_smsg_attr_t *peer_smsg_attr, - gni_mem_handle_t *peer_irq_mem_hndl) -{ - int ret = FI_SUCCESS; - struct gnix_fid_ep *ep; - struct gnix_fid_domain *dom; - struct gnix_mbox *mbox = NULL; - gni_smsg_attr_t local_smsg_attr; - gni_return_t __attribute__((unused)) status; - ssize_t __attribute__((unused)) len; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - assert(vc); - - ep = vc->ep; - assert(ep); - - dom = ep->domain; - if (dom == NULL) - return -FI_EINVAL; - - mbox = vc->smsg_mbox; - assert (mbox); - - local_smsg_attr.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT; - local_smsg_attr.msg_buffer = mbox->base; - local_smsg_attr.buff_size = vc->ep->nic->mem_per_mbox; - local_smsg_attr.mem_hndl = *mbox->memory_handle; - local_smsg_attr.mbox_offset = (uint64_t)mbox->offset; - local_smsg_attr.mbox_maxcredit = dom->params.mbox_maxcredit; - local_smsg_attr.msg_maxsize = dom->params.mbox_msg_maxsize; - - /* - * now build the SMSG connection - */ - - COND_ACQUIRE(ep->nic->requires_lock, &ep->nic->lock); - - status = GNI_EpCreate(ep->nic->gni_nic_hndl, - ep->nic->tx_cq, - &vc->gni_ep); - if (status != GNI_RC_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "GNI_EpCreate returned %s\n", gni_err_str[status]); - ret = gnixu_to_fi_errno(status); - goto err; - } - - status = GNI_EpBind(vc->gni_ep, - vc->peer_addr.device_addr, - vc->peer_addr.cdm_id); - if (status != GNI_RC_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "GNI_EpBind returned %s\n", gni_err_str[status]); - ret = gnixu_to_fi_errno(status); - goto err1; - } - - status = GNI_SmsgInit(vc->gni_ep, - &local_smsg_attr, - peer_smsg_attr); - if (status != GNI_RC_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "GNI_SmsgInit returned %s\n", gni_err_str[status]); - ret = gnixu_to_fi_errno(status); - goto err1; - } - - status = GNI_EpSetEventData(vc->gni_ep, - vc->vc_id, - peer_id); - if (status != GNI_RC_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "GNI_EpSetEventData returned %s\n", - gni_err_str[status]); - ret = gnixu_to_fi_errno(status); - goto err1; - } - - if (peer_irq_mem_hndl != NULL) - vc->peer_irq_mem_hndl = *peer_irq_mem_hndl; - - COND_RELEASE(ep->nic->requires_lock, &ep->nic->lock); - return ret; -err1: - GNI_EpDestroy(vc->gni_ep); -err: - COND_RELEASE(ep->nic->requires_lock, &ep->nic->lock); - return ret; -} - -static int __gnix_vc_connect_to_self(struct gnix_vc *vc) -{ - int ret = FI_SUCCESS; - struct gnix_fid_domain *dom = NULL; - struct gnix_fid_ep *ep = NULL; - struct gnix_cm_nic *cm_nic = NULL; - struct gnix_mbox *mbox = NULL; - gni_smsg_attr_t smsg_mbox_attr; - xpmem_apid_t peer_apid; - xpmem_segid_t my_segid; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - ep = vc->ep; - if (ep == NULL) - return -FI_EINVAL; - - cm_nic = ep->cm_nic; - if (cm_nic == NULL) - return -FI_EINVAL; - - dom = ep->domain; - if (dom == NULL) - return -FI_EINVAL; - - assert(vc->conn_state == GNIX_VC_CONN_NONE); - vc->conn_state = GNIX_VC_CONNECTING; - - assert(vc->smsg_mbox == NULL); - - ret = _gnix_mbox_alloc(vc->ep->nic->mbox_hndl, &mbox); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_mbox_alloc returned %s\n", - fi_strerror(-ret)); - return -FI_ENOSPC; - } - vc->smsg_mbox = mbox; - - smsg_mbox_attr.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT; - smsg_mbox_attr.msg_buffer = mbox->base; - smsg_mbox_attr.buff_size = vc->ep->nic->mem_per_mbox; - smsg_mbox_attr.mem_hndl = *mbox->memory_handle; - smsg_mbox_attr.mbox_offset = (uint64_t)mbox->offset; - smsg_mbox_attr.mbox_maxcredit = dom->params.mbox_maxcredit; - smsg_mbox_attr.msg_maxsize = dom->params.mbox_msg_maxsize; - - ret = _gnix_vc_smsg_init(vc, vc->vc_id, &smsg_mbox_attr, NULL); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_vc_smsg_init returned %s\n", - fi_strerror(-ret)); - goto err_mbox_init; - } - - /* TODO: use special send-to-self mechanism to avoid overhead of XPMEM - * when just sending a message to oneself. */ - ret = _gnix_xpmem_get_my_segid(ep->xpmem_hndl, &my_segid); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gni_xpmem_get_my_segid returned %s\n", - fi_strerror(-ret)); - } - - ret = _gnix_xpmem_get_apid(ep->xpmem_hndl, my_segid, &peer_apid); - if (ret == FI_SUCCESS) { - vc->modes |= GNIX_VC_MODE_XPMEM; - vc->peer_apid = peer_apid; - } else { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gni_xpmem_get_apiid returned %s\n", - fi_strerror(-ret)); - } - - vc->peer_id = vc->vc_id; - vc->peer_irq_mem_hndl = ep->nic->irq_mem_hndl; - vc->peer_caps = ep->caps; - vc->peer_key_offset = ep->auth_key->key_offset; - vc->conn_state = GNIX_VC_CONNECTED; - - ret = _gnix_vc_sched_new_conn(vc); - if (ret != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_vc_sched_new_conn returned %s\n", - fi_strerror(-ret)); - - GNIX_DEBUG(FI_LOG_EP_CTRL, "moving vc %p state to connected\n", vc); - return ret; - -err_mbox_init: - _gnix_mbox_free(vc->smsg_mbox); - vc->smsg_mbox = NULL; - - return ret; -} - -/******************************************************************************* - * functions for handling incoming connection request/response messages - ******************************************************************************/ - -static int __gnix_vc_hndl_conn_resp(struct gnix_cm_nic *cm_nic, - char *msg_buffer, - struct gnix_address src_cm_nic_addr) -{ - int ret = FI_SUCCESS; - int peer_id; - struct gnix_vc *vc = NULL; - uint64_t peer_vc_addr; - struct gnix_fid_ep *ep; - gni_smsg_attr_t peer_smsg_attr; - gni_mem_handle_t tmp_mem_hndl; - uint64_t peer_caps; - xpmem_segid_t peer_segid; - xpmem_apid_t peer_apid; - uint32_t peer_key_offset; - bool accessible; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - /* - * unpack the message - */ - - __gnix_vc_unpack_resp(msg_buffer, - (uint64_t *)&vc, - &peer_vc_addr, - &peer_id, - &peer_smsg_attr, - &tmp_mem_hndl, - &peer_caps, - &peer_segid, - &peer_key_offset); - - GNIX_DEBUG(FI_LOG_EP_CTRL, - "resp rx: (From Aries 0x%x Id %d src vc %p peer vc addr 0x%lx)\n", - src_cm_nic_addr.device_addr, - src_cm_nic_addr.cdm_id, - vc, - peer_vc_addr); - - ep = vc->ep; - assert(ep != NULL); - - COND_ACQUIRE(ep->requires_lock, &ep->vc_lock); - - /* - * at this point vc should be in connecting state - */ - if (vc->conn_state != GNIX_VC_CONNECTING) { - GNIX_WARN(FI_LOG_EP_CTRL, - "vc %p not in connecting state, rather %d\n", - vc, vc->conn_state); - ret = -FI_EINVAL; - goto err; - } - - /* - * build the SMSG connection - */ - - ret = _gnix_vc_smsg_init(vc, peer_id, &peer_smsg_attr, - &tmp_mem_hndl); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_vc_smsg_init returned %s\n", - fi_strerror(-ret)); - goto err; - } - - /* - * see if we can do xpmem with this EP - */ - - ret = _gnix_xpmem_accessible(ep, src_cm_nic_addr, &accessible); - if ((ret == FI_SUCCESS) && (accessible == true)) { - ret = _gnix_xpmem_get_apid(ep->xpmem_hndl, - peer_segid, - &peer_apid); - if (ret == FI_SUCCESS) { - vc->modes |= GNIX_VC_MODE_XPMEM; - vc->peer_apid = peer_apid; - } - } - - /* - * transition the VC to connected - * put in to the nic's work queue for - * further processing - */ - - vc->peer_caps = peer_caps; - vc->peer_key_offset = peer_key_offset; - vc->peer_id = peer_id; - vc->conn_state = GNIX_VC_CONNECTED; - GNIX_DEBUG(FI_LOG_EP_CTRL, - " moving vc %p to state connected\n",vc); - - ret = _gnix_vc_sched_new_conn(vc); - if (ret != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_vc_sched_new_conn returned %s\n", - fi_strerror(-ret)); - - COND_RELEASE(ep->requires_lock, &ep->vc_lock); - - return ret; -err: - vc->conn_state = GNIX_VC_CONN_ERROR; - - COND_RELEASE(ep->requires_lock, &ep->vc_lock); - return ret; -} - -static int __gnix_vc_hndl_conn_req(struct gnix_cm_nic *cm_nic, - char *msg_buffer, - struct gnix_address src_cm_nic_addr) -{ - int ret = FI_SUCCESS; - gni_return_t __attribute__((unused)) status; - struct gnix_fid_ep *ep = NULL; - gnix_ht_key_t key; - struct gnix_av_addr_entry entry; - struct gnix_address src_addr, target_addr; - struct gnix_vc *vc = NULL; - struct gnix_work_req *work_req; - int src_vc_id; - gni_smsg_attr_t src_smsg_attr; - uint64_t src_vc_ptr; - uint64_t peer_caps; - struct wq_hndl_conn_req *data = NULL; - gni_mem_handle_t tmp_mem_hndl; - int src_mapped = 0; - fi_addr_t fi_addr; - xpmem_segid_t peer_segid; - xpmem_apid_t peer_apid; - uint8_t name_type, rx_ctx_cnt; - bool accessible; - ssize_t __attribute__((unused)) len; - struct gnix_ep_name *error_data; - uint32_t key_offset; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - /* - * unpack the message - */ - - __gnix_vc_unpack_conn_req(msg_buffer, - &target_addr, - &src_addr, - &src_vc_id, - &src_vc_ptr, - &src_smsg_attr, - &tmp_mem_hndl, - &peer_caps, - &peer_segid, - &name_type, - &rx_ctx_cnt, - &key_offset); - - GNIX_DEBUG(FI_LOG_EP_CTRL, - "conn req rx: (From Aries addr 0x%x Id %d to Aries 0x%x Id %d src vc 0x%lx )\n", - src_addr.device_addr, - src_addr.cdm_id, - target_addr.device_addr, - target_addr.cdm_id, - src_vc_ptr); - - /* - * lookup the ep from the addr_to_ep_ht using the target_addr - * in the datagram - */ - - __gnix_vc_set_ht_key(&target_addr, &key); - - ep = (struct gnix_fid_ep *)_gnix_ht_lookup(cm_nic->addr_to_ep_ht, - key); - if (ep == NULL) { - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_ht_lookup addr_to_ep failed\n"); - return -FI_ENOENT; - } - - /* - * look to see if there is a VC already for the - * address of the connecting EP. - */ - - COND_ACQUIRE(ep->requires_lock, &ep->vc_lock); - - /* If we already have an AV bound, see if sender's address is already - * mapped. */ - if (ep->av) { - ret = _gnix_av_reverse_lookup(ep->av, src_addr, &fi_addr); - if (ret == FI_SUCCESS) { - src_mapped = 1; - vc = _gnix_ep_vc_lookup(ep, fi_addr); - } - } - - /* - * if there is no corresponding vc in the hash, - * or there is an entry and it's not in connecting state - * go down the conn req ack route. - */ - if ((vc == NULL) || - (vc->conn_state == GNIX_VC_CONN_NONE)) { - if (vc == NULL) { - entry.gnix_addr = src_addr; - entry.cm_nic_cdm_id = src_cm_nic_addr.cdm_id; - ret = _gnix_vc_alloc(ep, - &entry, - &vc); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_vc_alloc returned %s\n", - fi_strerror(-ret)); - goto err; - } - - vc->conn_state = GNIX_VC_CONNECTING; - vc->peer_key_offset = key_offset; - - if (src_mapped) { - /* We have an AV which maps the incoming - * address. Store the new VC in our VC lookup - * table. */ - ret = _gnix_ep_vc_store(ep, vc, fi_addr); - if (OFI_UNLIKELY(ret != FI_SUCCESS)) { - _gnix_vc_destroy(vc); - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_ep_vc_store returned %s\n", - fi_strerror(-ret)); - goto err; - } - } else { - /* We lack an AV and/or the entry to map the - * incoming address. Keep VC in special table - * until it is mapped for a TX operation. */ - GNIX_INFO(FI_LOG_EP_CTRL, - "Received conn. request from unmapped peer EP, vc: %p addr: 0x%lx\n", - vc, src_addr); - - dlist_insert_tail(&vc->list, &ep->unmapped_vcs); - - /* - * see issue 4521 for the error_data size allocated - */ - if (vc->ep->caps & FI_SOURCE) { - error_data = - calloc(1, GNIX_CQ_MAX_ERR_DATA_SIZE); - if (error_data == NULL) { - ret = -FI_ENOMEM; - goto err; - } - vc->gnix_ep_name = (void *) error_data; - - error_data->gnix_addr = src_addr; - error_data->name_type = name_type; - - error_data->cm_nic_cdm_id = - cm_nic->my_name.cm_nic_cdm_id; - error_data->cookie = - cm_nic->my_name.cookie; - - error_data->rx_ctx_cnt = rx_ctx_cnt; - } - } - } else { - vc->conn_state = GNIX_VC_CONNECTING; - } - - vc->peer_caps = peer_caps; - vc->peer_key_offset = key_offset; - /* - * prepare a work request to - * initiate an request response - */ - - work_req = calloc(1, sizeof(*work_req)); - if (work_req == NULL) { - ret = -FI_ENOMEM; - goto err; - } - - data = calloc(1, sizeof(struct wq_hndl_conn_req)); - if (data == NULL) { - ret = -FI_ENOMEM; - goto err; - } - memcpy(&data->src_smsg_attr, - &src_smsg_attr, - sizeof(src_smsg_attr)); - data->vc = vc; - data->src_vc_id = src_vc_id; - data->src_vc_ptr = src_vc_ptr; - data->irq_mem_hndl = tmp_mem_hndl; - data->peer_segid = peer_segid; - - work_req->progress_fn = __gnix_vc_conn_ack_prog_fn; - work_req->data = data; - work_req->completer_fn = __gnix_vc_conn_ack_comp_fn; - work_req->completer_data = data; - - /* - * add the work request to the tail of the - * cm_nic's work queue, progress the cm_nic. - */ - - ofi_spin_lock(&cm_nic->wq_lock); - dlist_insert_before(&work_req->list, &cm_nic->cm_nic_wq); - ofi_spin_unlock(&cm_nic->wq_lock); - } else { - - /* - * we can only be in connecting state if we - * reach here. We have all the informatinon, - * and the other side will get the information - * at some point, so go ahead and build SMSG connection. - */ - if (vc->conn_state != GNIX_VC_CONNECTING) { - GNIX_WARN(FI_LOG_EP_CTRL, - "vc %p not in connecting state nor in cm wq\n", - vc, vc->conn_state); - ret = -FI_EINVAL; - goto err; - } - - ret = _gnix_vc_smsg_init(vc, src_vc_id, - &src_smsg_attr, - &tmp_mem_hndl); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_vc_smsg_init returned %s\n", - fi_strerror(-ret)); - goto err; - } - - ret = _gnix_xpmem_accessible(ep, src_cm_nic_addr, &accessible); - if ((ret == FI_SUCCESS) && (accessible == true)) { - ret = _gnix_xpmem_get_apid(ep->xpmem_hndl, - peer_segid, - &peer_apid); - if (ret == FI_SUCCESS) { - vc->modes |= GNIX_VC_MODE_XPMEM; - vc->peer_apid = peer_apid; - } - } - - vc->peer_caps = peer_caps; - vc->peer_key_offset = key_offset; - vc->peer_id = src_vc_id; - vc->conn_state = GNIX_VC_CONNECTED; - GNIX_DEBUG(FI_LOG_EP_CTRL, "moving vc %p state to connected\n", - vc); - - ret = _gnix_vc_sched_new_conn(vc); - if (ret != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_vc_sched_new_conn returned %s\n", - fi_strerror(-ret)); - } - -err: - COND_RELEASE(ep->requires_lock, &ep->vc_lock); - - return ret; -} - -/* - * callback function to process incoming messages - */ -static int __gnix_vc_recv_fn(struct gnix_cm_nic *cm_nic, - char *msg_buffer, - struct gnix_address src_cm_nic_addr) -{ - int ret = FI_SUCCESS; - uint8_t mtype; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - __gnix_vc_get_msg_type(msg_buffer, &mtype); - - GNIX_DEBUG(FI_LOG_EP_CTRL, "got a message of type %d\n", mtype); - - switch (mtype) { - case GNIX_VC_CONN_REQ: - ret = __gnix_vc_hndl_conn_req(cm_nic, - msg_buffer, - src_cm_nic_addr); - break; - case GNIX_VC_CONN_RESP: - ret = __gnix_vc_hndl_conn_resp(cm_nic, - msg_buffer, - src_cm_nic_addr); - break; - default: - GNIX_FATAL(FI_LOG_EP_CTRL, "Invalid message type: %d\n", - mtype); - } - - return ret; -} - -/* - * progress function for progressing a connection - * ACK. - */ - -static int __gnix_vc_conn_ack_prog_fn(void *data, int *complete_ptr) -{ - int ret = FI_SUCCESS; - int complete = 0; - struct wq_hndl_conn_req *work_req_data; - struct gnix_vc *vc; - struct gnix_mbox *mbox = NULL; - gni_smsg_attr_t smsg_mbox_attr; - struct gnix_fid_ep *ep = NULL; - struct gnix_fid_domain *dom = NULL; - struct gnix_cm_nic *cm_nic = NULL; - xpmem_segid_t my_segid; - char sbuf[GNIX_CM_NIC_MAX_MSG_SIZE] = {0}; - xpmem_apid_t peer_apid; - bool accessible; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - - work_req_data = (struct wq_hndl_conn_req *)data; - - vc = work_req_data->vc; - if (vc == NULL) - return -FI_EINVAL; - - ep = vc->ep; - if (ep == NULL) - return -FI_EINVAL; - - dom = ep->domain; - if (dom == NULL) - return -FI_EINVAL; - - cm_nic = ep->cm_nic; - if (cm_nic == NULL) - return -FI_EINVAL; - - COND_ACQUIRE(ep->requires_lock, &ep->vc_lock); - - /* - * we may have already been moved to connected or - * the datagram from an earlier conn request for this - * vc was posted to GNI datagram state machine. The - * connection will be completed in the __gnix_vc_hndl_conn_resp - * datagram callback in the latter case. - */ - if ((vc->conn_state == GNIX_VC_CONNECTED) || - (vc->modes & GNIX_VC_MODE_DG_POSTED)) { - complete = 1; - goto exit; - } - - /* - * first see if we still need a mailbox - */ - - if (vc->smsg_mbox == NULL) { - ret = _gnix_mbox_alloc(ep->nic->mbox_hndl, - &mbox); - if (ret == FI_SUCCESS) - vc->smsg_mbox = mbox; - else - goto exit; - } - - mbox = vc->smsg_mbox; - - /* - * prep the smsg_mbox_attr - */ - - smsg_mbox_attr.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT; - smsg_mbox_attr.msg_buffer = mbox->base; - smsg_mbox_attr.buff_size = ep->nic->mem_per_mbox; - smsg_mbox_attr.mem_hndl = *mbox->memory_handle; - smsg_mbox_attr.mbox_offset = (uint64_t)mbox->offset; - smsg_mbox_attr.mbox_maxcredit = dom->params.mbox_maxcredit; - smsg_mbox_attr.msg_maxsize = dom->params.mbox_msg_maxsize; - - /* - * serialize the resp message in the buffer - */ - - ret = _gnix_xpmem_get_my_segid(ep->xpmem_hndl, - &my_segid); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, "_gni_xpmem_get_my_segid returned %s\n", - fi_strerror(-ret)); - } - - __gnix_vc_pack_conn_resp(sbuf, - work_req_data->src_vc_ptr, - (uint64_t)vc, - vc->vc_id, - &smsg_mbox_attr, - &ep->nic->irq_mem_hndl, - ep->caps, - my_segid, - ep->auth_key->key_offset); - - /* - * try to send the message, if it succeeds, - * initialize mailbox and move vc to connected - * state. - */ - - ret = _gnix_cm_nic_send(cm_nic, - sbuf, - GNIX_CM_NIC_MAX_MSG_SIZE, - vc->peer_cm_nic_addr); - if (ret == FI_SUCCESS) { - ret = _gnix_vc_smsg_init(vc, - work_req_data->src_vc_id, - &work_req_data->src_smsg_attr, - &work_req_data->irq_mem_hndl); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_vc_smsg_init returned %s\n", - fi_strerror(-ret)); - goto exit; - } - - /* - * TODO: xpmem setup here - */ - - ret = _gnix_xpmem_accessible(ep, vc->peer_cm_nic_addr, - &accessible); - if ((ret == FI_SUCCESS) && (accessible == true)) { - ret = _gnix_xpmem_get_apid(ep->xpmem_hndl, - work_req_data->peer_segid, - &peer_apid); - if (ret == FI_SUCCESS) { - vc->modes |= GNIX_VC_MODE_XPMEM; - vc->peer_apid = peer_apid; - } - } - - complete = 1; - vc->conn_state = GNIX_VC_CONNECTED; - vc->peer_id = work_req_data->src_vc_id; - GNIX_DEBUG(FI_LOG_EP_CTRL, - "moving vc %p to connected\n",vc); - vc->modes |= GNIX_VC_MODE_DG_POSTED; - - ret = _gnix_vc_sched_new_conn(vc); - if (ret != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_vc_sched_new_conn returned %s\n", - fi_strerror(-ret)); - } else if (ret == -FI_EAGAIN) { - ret = FI_SUCCESS; - } else { - GNIX_FATAL(FI_LOG_EP_CTRL, "_gnix_cm_nic_send returned %s\n", - fi_strerror(-ret)); - } - -exit: - COND_RELEASE(ep->requires_lock, &ep->vc_lock); - - *complete_ptr = complete; - return ret; -} - -static int __gnix_vc_conn_req_prog_fn(void *data, int *complete_ptr) -{ - int ret = FI_SUCCESS; - int complete = 0; - struct gnix_vc *vc = (struct gnix_vc *)data; - struct gnix_mbox *mbox = NULL; - gni_smsg_attr_t smsg_mbox_attr; - struct gnix_fid_ep *ep = NULL; - struct gnix_fid_domain *dom = NULL; - struct gnix_cm_nic *cm_nic = NULL; - xpmem_segid_t my_segid; - char sbuf[GNIX_CM_NIC_MAX_MSG_SIZE] = {0}; - struct gnix_auth_key *auth_key; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - ep = vc->ep; - if (ep == NULL) - return -FI_EINVAL; - - dom = ep->domain; - if (dom == NULL) - return -FI_EINVAL; - - cm_nic = ep->cm_nic; - if (cm_nic == NULL) - return -FI_EINVAL; - - auth_key = ep->auth_key; - if (auth_key == NULL) - return -FI_EINVAL; - - assert(auth_key->enabled); - - COND_ACQUIRE(ep->requires_lock, &ep->vc_lock); - - if ((vc->conn_state == GNIX_VC_CONNECTING) || - (vc->conn_state == GNIX_VC_CONNECTED)) { - complete = 1; - goto err; - } - - /* - * first see if we still need a mailbox - */ - - if (vc->smsg_mbox == NULL) { - ret = _gnix_mbox_alloc(vc->ep->nic->mbox_hndl, - &mbox); - if (ret == FI_SUCCESS) - vc->smsg_mbox = mbox; - else - goto err; - } - - mbox = vc->smsg_mbox; - - /* - * prep the smsg_mbox_attr - */ - - smsg_mbox_attr.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT; - smsg_mbox_attr.msg_buffer = mbox->base; - smsg_mbox_attr.buff_size = vc->ep->nic->mem_per_mbox; - smsg_mbox_attr.mem_hndl = *mbox->memory_handle; - smsg_mbox_attr.mbox_offset = (uint64_t)mbox->offset; - smsg_mbox_attr.mbox_maxcredit = dom->params.mbox_maxcredit; - smsg_mbox_attr.msg_maxsize = dom->params.mbox_msg_maxsize; - - /* - * serialize the message in the buffer - */ - - GNIX_DEBUG(FI_LOG_EP_CTRL, - "conn req tx: (From Aries addr 0x%x Id %d to Aries 0x%x Id %d CM NIC Id %d vc %p)\n", - ep->src_addr.gnix_addr.device_addr, - ep->src_addr.gnix_addr.cdm_id, - vc->peer_addr.device_addr, - vc->peer_addr.cdm_id, - vc->peer_cm_nic_addr.cdm_id, - vc); - - ret = _gnix_xpmem_get_my_segid(ep->xpmem_hndl, - &my_segid); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_xpmem_get_my_segid returned %s\n", - fi_strerror(-ret)); - } - - __gnix_vc_pack_conn_req(sbuf, - &vc->peer_addr, - &ep->src_addr.gnix_addr, - vc->vc_id, - (uint64_t)vc, - &smsg_mbox_attr, - &ep->nic->irq_mem_hndl, - ep->caps, - my_segid, - ep->src_addr.name_type, - ep->src_addr.rx_ctx_cnt, - auth_key->key_offset); - - /* - * try to send the message, if -FI_EAGAIN is returned, okay, - * just don't mark complete. - */ - - ret = _gnix_cm_nic_send(cm_nic, - sbuf, - GNIX_CM_NIC_MAX_MSG_SIZE, - vc->peer_cm_nic_addr); - if (ret == FI_SUCCESS) { - complete = 1; - vc->conn_state = GNIX_VC_CONNECTING; - GNIX_DEBUG(FI_LOG_EP_CTRL, "moving vc %p state to connecting\n", - vc); - vc->modes |= GNIX_VC_MODE_DG_POSTED; - } else if (ret == -FI_EAGAIN) { - ret = FI_SUCCESS; - } else { - GNIX_FATAL(FI_LOG_EP_CTRL, "_gnix_cm_nic_send returned %s\n", - fi_strerror(-ret)); - } - -err: - COND_RELEASE(ep->requires_lock, &ep->vc_lock); - *complete_ptr = complete; - return ret; -} - -/* - * conn ack completer function for work queue element, - * free the previously allocated wq_hndl_conn_req - * data struct - */ -static int __gnix_vc_conn_ack_comp_fn(void *data) -{ - free(data); - return FI_SUCCESS; -} - -/* - * connect completer function for work queue element, - * sort of a NO-OP for now. - */ -static int __gnix_vc_conn_req_comp_fn(void *data) -{ - return FI_SUCCESS; -} - -/******************************************************************************* - * Internal API functions - ******************************************************************************/ -int _gnix_vc_alloc(struct gnix_fid_ep *ep_priv, - struct gnix_av_addr_entry *entry, struct gnix_vc **vc) - -{ - int ret = FI_SUCCESS; - int remote_id; - struct gnix_vc *vc_ptr = NULL; - struct gnix_nic *nic = NULL; - struct dlist_entry *de = NULL; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - nic = ep_priv->nic; - if (nic == NULL) - return -FI_EINVAL; - - /* - * allocate VC from domain's vc_freelist - */ - - ret = _gnix_fl_alloc(&de, &nic->vc_freelist); - while (ret == -FI_EAGAIN) - ret = _gnix_fl_alloc(&de, &nic->vc_freelist); - if (ret == FI_SUCCESS) { - vc_ptr = container_of(de, struct gnix_vc, fr_list); - } else - return ret; - - vc_ptr->conn_state = GNIX_VC_CONN_NONE; - if (entry) { - memcpy(&vc_ptr->peer_addr, - &entry->gnix_addr, - sizeof(struct gnix_address)); - vc_ptr->peer_cm_nic_addr.device_addr = - entry->gnix_addr.device_addr; - vc_ptr->peer_cm_nic_addr.cdm_id = - entry->cm_nic_cdm_id; - } else { - vc_ptr->peer_addr.device_addr = -1; - vc_ptr->peer_addr.cdm_id = -1; - vc_ptr->peer_cm_nic_addr.device_addr = -1; - vc_ptr->peer_cm_nic_addr.cdm_id = -1; - } - vc_ptr->ep = ep_priv; - - dlist_init(&vc_ptr->prog_list); - dlist_init(&vc_ptr->work_queue); - dlist_init(&vc_ptr->tx_queue); - - vc_ptr->peer_fi_addr = FI_ADDR_NOTAVAIL; - - dlist_init(&vc_ptr->list); - - ofi_atomic_initialize32(&vc_ptr->outstanding_tx_reqs, 0); - ret = _gnix_alloc_bitmap(&vc_ptr->flags, 1, NULL); - assert(!ret); - - /* - * we need an id for the vc to allow for quick lookup - * based on GNI_CQ_GET_INST_ID - */ - - ret = _gnix_nic_get_rem_id(nic, &remote_id, vc_ptr); - if (ret != FI_SUCCESS) - goto err; - vc_ptr->vc_id = remote_id; - vc_ptr->gnix_ep_name = NULL; - - *vc = vc_ptr; - - return ret; - -err: - if (vc_ptr) - free(vc_ptr); - return ret; -} - -static void __gnix_vc_cancel(struct gnix_vc *vc) -{ - struct gnix_nic *nic = vc->ep->nic; - - COND_ACQUIRE(nic->requires_lock, &nic->prog_vcs_lock); - if (!dlist_empty(&vc->prog_list)) - dlist_remove_init(&vc->prog_list); - COND_RELEASE(nic->requires_lock, &nic->prog_vcs_lock); -} - -/* Destroy an unconnected VC. More Support is needed to shutdown and destroy - * an active VC. */ -int _gnix_vc_destroy(struct gnix_vc *vc) -{ - int ret = FI_SUCCESS; - struct gnix_nic *nic = NULL; - gni_return_t status = GNI_RC_NOT_DONE; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - if (vc->ep == NULL) { - GNIX_WARN(FI_LOG_EP_CTRL, "ep null\n"); - return -FI_EINVAL; - } - - nic = vc->ep->nic; - if (nic == NULL) { - GNIX_WARN(FI_LOG_EP_CTRL, "ep nic null for vc %p\n", vc); - return -FI_EINVAL; - } - - /* - * move vc state to terminating - */ - - vc->conn_state = GNIX_VC_CONN_TERMINATING; - - /* - * try to unbind the gni_ep if non-NULL. - * If there are SMSG or PostFMA/RDMA outstanding - * wait here for them to complete - */ - - if (vc->gni_ep != NULL) { - while (status == GNI_RC_NOT_DONE) { - - COND_ACQUIRE(nic->requires_lock, &nic->lock); - status = GNI_EpUnbind(vc->gni_ep); - COND_RELEASE(nic->requires_lock, &nic->lock); - - if ((status != GNI_RC_NOT_DONE) && - (status != GNI_RC_SUCCESS)) { - GNIX_WARN(FI_LOG_EP_CTRL, - "GNI_EpUnBind returned %s\n", - gni_err_str[status]); - break; - } - - if (status == GNI_RC_NOT_DONE) - _gnix_nic_progress(nic); - } - COND_ACQUIRE(nic->requires_lock, &nic->lock); - status = GNI_EpDestroy(vc->gni_ep); - COND_RELEASE(nic->requires_lock, &nic->lock); - if (status != GNI_RC_SUCCESS) - GNIX_WARN(FI_LOG_EP_CTRL, - "GNI_EpDestroy returned %s\n", - gni_err_str[status]); - } - - /* - * if the vc is in a nic's work queue, remove it - */ - __gnix_vc_cancel(vc); - - /* - * We may eventually want to check the state of the VC, if we - * implement true VC shutdown. - - if ((vc->conn_state != GNIX_VC_CONN_NONE) - && (vc->conn_state != GNIX_VC_CONN_TERMINATED)) { - GNIX_WARN(FI_LOG_EP_CTRL, - "vc conn state %d\n", - vc->conn_state); - GNIX_WARN(FI_LOG_EP_CTRL, "vc conn state error\n"); - return -FI_EBUSY; - } - */ - - /* - * if send_q not empty, return -FI_EBUSY - * Note for FI_EP_MSG type eps, this behavior - * may not be correct for handling fi_shutdown. - */ - - if (!dlist_empty(&vc->tx_queue)) - GNIX_FATAL(FI_LOG_EP_CTRL, "VC TX queue not empty\n"); - - if (ofi_atomic_get32(&vc->outstanding_tx_reqs)) - GNIX_FATAL(FI_LOG_EP_CTRL, - "VC outstanding_tx_reqs out of sync: %d\n", - ofi_atomic_get32(&vc->outstanding_tx_reqs)); - - if (vc->smsg_mbox != NULL) { - ret = _gnix_mbox_free(vc->smsg_mbox); - if (ret != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_mbox_free returned %s\n", - fi_strerror(-ret)); - vc->smsg_mbox = NULL; - } - - ret = _gnix_nic_free_rem_id(nic, vc->vc_id); - if (ret != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_CTRL, - "__gnix_vc_free_id returned %s\n", - fi_strerror(-ret)); - - _gnix_free_bitmap(&vc->flags); - - if (vc->gnix_ep_name != NULL) { - free(vc->gnix_ep_name); - vc->gnix_ep_name = NULL; - } - - /* - * put VC back on the freelist - */ - - vc->conn_state = GNIX_VC_CONN_NONE; - _gnix_fl_free(&vc->fr_list, &nic->vc_freelist); - - return ret; -} - -int _gnix_vc_connect(struct gnix_vc *vc) -{ - int ret = FI_SUCCESS; - struct gnix_fid_ep *ep = NULL; - struct gnix_cm_nic *cm_nic = NULL; - struct gnix_work_req *work_req; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - /* - * can happen that we are already connecting, or - * are connected - */ - - if ((vc->conn_state == GNIX_VC_CONNECTING) || - (vc->conn_state == GNIX_VC_CONNECTED)) { - return FI_SUCCESS; - } - - ep = vc->ep; - if (ep == NULL) - return -FI_EINVAL; - - cm_nic = ep->cm_nic; - if (cm_nic == NULL) - return -FI_EINVAL; - - /* - * only endpoints of type FI_EP_RDM use this - * connection method - */ - if (!GNIX_EP_RDM_DGM(ep->type)) - return -FI_EINVAL; - - /* - * check if this EP is connecting to itself - */ - - if (GNIX_ADDR_EQUAL(ep->src_addr.gnix_addr, vc->peer_addr)) { - return __gnix_vc_connect_to_self(vc); - } - - /* - * allocate a work request and put it - * on the cm_nic work queue. - */ - - work_req = calloc(1, sizeof(*work_req)); - if (work_req == NULL) - return -FI_ENOMEM; - - work_req->progress_fn = __gnix_vc_conn_req_prog_fn; - work_req->data = vc; - work_req->completer_fn = __gnix_vc_conn_req_comp_fn; - work_req->completer_data = vc; - - /* - * add the work request to the tail of the - * cm_nic's work queue, progress the cm_nic. - */ - - ofi_spin_lock(&cm_nic->wq_lock); - dlist_insert_before(&work_req->list, &cm_nic->cm_nic_wq); - ofi_spin_unlock(&cm_nic->wq_lock); - - return ret; -} - -/****************************************************************************** - * - * VC RX progress - * - *****************************************************************************/ - -/* Process a VC's SMSG mailbox. - * - * Note: EP must be locked. */ -int _gnix_vc_dequeue_smsg(struct gnix_vc *vc) -{ - int ret = FI_SUCCESS; - struct gnix_nic *nic; - gni_return_t status; - void *msg_ptr; - uint8_t tag; - - GNIX_TRACE(FI_LOG_EP_DATA, "\n"); - - nic = vc->ep->nic; - assert(nic != NULL); - - do { - tag = GNI_SMSG_ANY_TAG; - status = GNI_SmsgGetNextWTag(vc->gni_ep, - &msg_ptr, - &tag); - - if (status == GNI_RC_SUCCESS) { - GNIX_DEBUG(FI_LOG_EP_DATA, "Found RX (%p)\n", vc); - ret = nic->smsg_callbacks[tag](vc, msg_ptr); - if (ret != FI_SUCCESS) { - /* Stalled, reschedule */ - break; - } - } else if (status == GNI_RC_NOT_DONE) { - /* No more work. */ - ret = FI_SUCCESS; - break; - } else { - GNIX_WARN(FI_LOG_EP_DATA, - "GNI_SmsgGetNextWTag returned %s\n", - gni_err_str[status]); - ret = gnixu_to_fi_errno(status); - break; - } - } while (1); - - return ret; -} - -/* Progress VC RXs. Reschedule VC if more there is more work. - * - * Note: EP must be locked. */ -static int __gnix_vc_rx_progress(struct gnix_vc *vc) -{ - int ret; - - /* Process pending RXs */ - COND_ACQUIRE(vc->ep->nic->requires_lock, &vc->ep->nic->lock); - ret = _gnix_vc_dequeue_smsg(vc); - COND_RELEASE(vc->ep->nic->requires_lock, &vc->ep->nic->lock); - - if (ret != FI_SUCCESS) { - /* We didn't finish processing RXs. Low memory likely. - * Try again later. Return error to abort processing - * other VCs. */ - _gnix_vc_rx_schedule(vc); - return -FI_EAGAIN; - } - - /* Return success to continue processing other VCs */ - return FI_SUCCESS; -} - -/****************************************************************************** - * - * VC work progress - * - *****************************************************************************/ - -/* Schedule deferred request processing. Usually used in RX completers. - * - * Note: EP must be locked. */ -int _gnix_vc_queue_work_req(struct gnix_fab_req *req) -{ - struct gnix_vc *vc = req->vc; - - dlist_insert_tail(&req->dlist, &vc->work_queue); - __gnix_vc_work_schedule(vc); - - return FI_SUCCESS; -} - -/* Schedule deferred request processing. Used in TX completers where VC lock is - * not yet held. */ -int _gnix_vc_requeue_work_req(struct gnix_fab_req *req) -{ - int ret; - - COND_ACQUIRE(req->gnix_ep->requires_lock, &req->gnix_ep->vc_lock); - ret = _gnix_vc_queue_work_req(req); - COND_RELEASE(req->gnix_ep->requires_lock, &req->gnix_ep->vc_lock); - - return ret; -} - -/* Process deferred request work on the VC. - * - * Note: EP must be locked. */ -static int __gnix_vc_push_work_reqs(struct gnix_vc *vc) -{ - int ret, fi_rc = FI_SUCCESS; - struct gnix_fab_req *req; - - while (1) { - req = dlist_first_entry(&vc->work_queue, - struct gnix_fab_req, - dlist); - if (!req) - break; - - dlist_remove_init(&req->dlist); - - ret = req->work_fn(req); - if (ret != FI_SUCCESS) { - /* Re-schedule failed work. */ - _gnix_vc_queue_work_req(req); - - /* FI_ENOSPC is reserved to indicate a lack of - * TXDs, which are shared by all VCs on the - * NIC. The other likely error is FI_EAGAIN - * due to a lack of SMSG credits. */ - if ((ret != -FI_ENOSPC) && - (ret != -FI_EAGAIN)) { - /* - * TODO: Report error (via CQ err?) - * Note: This error can't be reported here. - */ - GNIX_FATAL(FI_LOG_EP_DATA, - "Failed to push request %p: %s\n", - req, fi_strerror(-ret)); - } - - fi_rc = -FI_EAGAIN; - break; - } else { - GNIX_INFO(FI_LOG_EP_DATA, - "Request processed: %p\n", req); - } - } - - return fi_rc; -} - -/****************************************************************************** - * - * VC TX progress - * - *****************************************************************************/ - -/* Attempt to initiate a TX request. If the TX queue is blocked (due to low - * resources or a FI_FENCE request), schedule the request to be sent later. - * - * Note: EP must be locked. */ -int _gnix_vc_queue_tx_req(struct gnix_fab_req *req) -{ - int rc = FI_SUCCESS, queue_tx = 0; - struct gnix_vc *vc = req->vc; - struct gnix_fid_ep *ep = req->gnix_ep; - struct gnix_fab_req *more_req; - int connected; - struct slist_entry *sle; - - /* Check if there is an outstanding fi_more chain to initiate */ - if ((!(req->flags & FI_MORE)) && (!(slist_empty(&ep->more_write)) || - !(slist_empty(&ep->more_read)))) { - if (!slist_empty(&ep->more_write)) { - sle = ep->more_write.head; - more_req = container_of(sle, struct gnix_fab_req, - rma.sle); - GNIX_DEBUG(FI_LOG_EP_DATA, "FI_MORE: got fab_request " - "from more_write. Queuing Request\n"); - _gnix_vc_queue_tx_req(more_req); - slist_init(&ep->more_write); - } - if (!slist_empty(&ep->more_read)) { - sle = ep->more_read.head; - more_req = container_of(sle, struct gnix_fab_req, - rma.sle); - GNIX_DEBUG(FI_LOG_EP_DATA, "FI_MORE: got fab_request " - "from more_read. Queuing Request\n"); - _gnix_vc_queue_tx_req(more_req); - slist_init(&ep->more_read); - } - } - - if (req->flags & FI_TRIGGER) { - rc = _gnix_trigger_queue_req(req); - - /* FI_SUCCESS means the request was queued to wait for the - * trigger condition. */ - if (rc == FI_SUCCESS) - return FI_SUCCESS; - } - - connected = (vc->conn_state == GNIX_VC_CONNECTED); - - if ((req->flags & FI_FENCE) && ofi_atomic_get32(&vc->outstanding_tx_reqs)) { - /* Fence request must be queued until all outstanding TX - * requests are completed. Subsequent requests will be queued - * due to non-empty tx_queue. */ - queue_tx = 1; - GNIX_DEBUG(FI_LOG_EP_DATA, - "Queued FI_FENCE request (%p) on VC\n", - req); - } else if (connected && dlist_empty(&vc->tx_queue)) { - ofi_atomic_inc32(&vc->outstanding_tx_reqs); - - /* try to initiate request */ - rc = req->work_fn(req); - if (rc == FI_SUCCESS) { - GNIX_DEBUG(FI_LOG_EP_DATA, - "TX request processed: %p (OTX: %d)\n", - req, ofi_atomic_get32(&vc->outstanding_tx_reqs)); - } else if (rc != -FI_ECANCELED) { - ofi_atomic_dec32(&vc->outstanding_tx_reqs); - queue_tx = 1; - GNIX_DEBUG(FI_LOG_EP_DATA, - "Queued request (%p) on full VC\n", - req); - } - } else { - queue_tx = 1; - GNIX_DEBUG(FI_LOG_EP_DATA, - "Queued request (%p) on busy VC\n", - req); - } - - if (OFI_UNLIKELY(queue_tx)) { - dlist_insert_tail(&req->dlist, &vc->tx_queue); - _gnix_vc_tx_schedule(vc); - } - - return FI_SUCCESS; -} - -/* Push TX requests queued on the VC. - * - * Note: EP must be locked. */ -static int __gnix_vc_push_tx_reqs(struct gnix_vc *vc) -{ - int ret, fi_rc = FI_SUCCESS; - struct gnix_fab_req *req; - - req = dlist_first_entry(&vc->tx_queue, struct gnix_fab_req, dlist); - while (req) { - if ((req->flags & FI_FENCE) && - ofi_atomic_get32(&vc->outstanding_tx_reqs)) { - GNIX_DEBUG(FI_LOG_EP_DATA, - "TX request queue stalled on FI_FENCE request: %p (%d)\n", - req, ofi_atomic_get32(&vc->outstanding_tx_reqs)); - /* Success is returned to allow processing of more VCs. - * This VC will be rescheduled when the fence request - * is completed. */ - break; - } - - ofi_atomic_inc32(&vc->outstanding_tx_reqs); - dlist_remove_init(&req->dlist); - - ret = req->work_fn(req); - if (ret == FI_SUCCESS) { - GNIX_DEBUG(FI_LOG_EP_DATA, - "TX request processed: %p (OTX: %d)\n", - req, ofi_atomic_get32(&vc->outstanding_tx_reqs)); - } else if (ret != -FI_ECANCELED) { - /* Work failed. Reschedule to put this VC - * back on the end of the list and return - * -FI_EAGAIN. */ - - GNIX_DEBUG(FI_LOG_EP_DATA, - "Failed to push TX request %p: %s\n", - req, fi_strerror(-ret)); - fi_rc = -FI_EAGAIN; - - /* FI_ENOSPC is reserved to indicate a lack of - * TXDs, which are shared by all VCs on the - * NIC. The other likely error is FI_EAGAIN - * due to a lack of SMSG credits. */ - - if ((ret != -FI_ENOSPC) && (ret != -FI_EAGAIN)) { - /* TODO report error? */ - GNIX_WARN(FI_LOG_EP_DATA, - "Failed to push TX request %p: %s\n", - req, fi_strerror(-ret)); - } - - dlist_insert_head(&req->dlist, &vc->tx_queue); - ofi_atomic_dec32(&vc->outstanding_tx_reqs); - - /* _gnix_vc_tx_schedule() must come after the request - * is inserted into the VC's tx_queue. */ - _gnix_vc_tx_schedule(vc); - break; - - } - - req = dlist_first_entry(&vc->tx_queue, - struct gnix_fab_req, - dlist); - } - - return fi_rc; -} - -/* Return next VC needing progress on the NIC. */ -static struct gnix_vc *__gnix_nic_next_pending_vc(struct gnix_nic *nic) -{ - struct gnix_vc *vc = NULL; - - COND_ACQUIRE(nic->requires_lock, &nic->prog_vcs_lock); - vc = dlist_first_entry(&nic->prog_vcs, struct gnix_vc, prog_list); - if (vc) - dlist_remove_init(&vc->prog_list); - COND_RELEASE(nic->requires_lock, &nic->prog_vcs_lock); - - if (vc) { - GNIX_INFO(FI_LOG_EP_CTRL, "Dequeued progress VC (%p)\n", vc); - _gnix_clear_bit(&vc->flags, GNIX_VC_FLAG_SCHEDULED); - } - - return vc; -} - -int _gnix_vc_progress(struct gnix_vc *vc) -{ - int ret, ret_tx; - - ret = __gnix_vc_rx_progress(vc); - if (ret != FI_SUCCESS) - GNIX_DEBUG(FI_LOG_EP_CTRL, - "__gnix_vc_rx_progress failed: %d\n", ret); - - ret = __gnix_vc_push_work_reqs(vc); - if (ret != FI_SUCCESS) - GNIX_DEBUG(FI_LOG_EP_CTRL, - "__gnix_vc_push_work_reqs failed: %d\n", ret); - - ret_tx = __gnix_vc_push_tx_reqs(vc); - if (ret != FI_SUCCESS) - GNIX_DEBUG(FI_LOG_EP_CTRL, - "__gnix_vc_push_tx_reqs failed: %d\n", ret); - - return ret_tx; -} - -/* Progress all NIC VCs needing work. */ -int _gnix_vc_nic_progress(struct gnix_nic *nic) -{ - struct gnix_vc *vc; - int ret; - - /* - * we can't just spin and spin in this loop because - * none of the functions invoked below end up dequeuing - * GNI CQE's and subsequently freeing up TX descriptors. - * So, if the tx reqs routine returns -FI_EAGAIN, break out. - */ - while ((vc = __gnix_nic_next_pending_vc(nic))) { - COND_ACQUIRE(vc->ep->requires_lock, &vc->ep->vc_lock); - - if (vc->conn_state == GNIX_VC_CONNECTED) { - ret = _gnix_vc_progress(vc); - } - - COND_RELEASE(vc->ep->requires_lock, &vc->ep->vc_lock); - - if (ret != FI_SUCCESS) - break; - } - - return FI_SUCCESS; -} - -/* Schedule VC for progress. - * - * Note: EP must be locked. - * TODO: Better implementation for rx/work/tx VC scheduling. */ -int _gnix_vc_schedule(struct gnix_vc *vc) -{ - struct gnix_nic *nic = vc->ep->nic; - - if (!_gnix_test_and_set_bit(&vc->flags, GNIX_VC_FLAG_SCHEDULED)) { - COND_ACQUIRE(nic->requires_lock, &nic->prog_vcs_lock); - dlist_insert_tail(&vc->prog_list, &nic->prog_vcs); - COND_RELEASE(nic->requires_lock, &nic->prog_vcs_lock); - GNIX_DEBUG(FI_LOG_EP_CTRL, "Scheduled VC (%p)\n", vc); - } - - return FI_SUCCESS; -} - -/* Schedule the VC for RX progress. */ -int _gnix_vc_rx_schedule(struct gnix_vc *vc) -{ - return _gnix_vc_schedule(vc); -} - -/* Schedule the VC for work progress. */ -static int __gnix_vc_work_schedule(struct gnix_vc *vc) -{ - return _gnix_vc_schedule(vc); -} - -/* Schedule the VC for TX progress. */ -int _gnix_vc_tx_schedule(struct gnix_vc *vc) -{ - return _gnix_vc_schedule(vc); -} - -/* For a newly scheduled VC. Do any queued work now that the connection is - * complete. - * - * Note: EP must be locked. */ -int _gnix_vc_sched_new_conn(struct gnix_vc *vc) -{ - _gnix_vc_schedule(vc); - return _gnix_vc_progress(vc); -} - -/* Look up an EP's VC using fi_addr_t. - * - * Note: EP must be locked. */ -int _gnix_vc_ep_get_vc(struct gnix_fid_ep *ep, fi_addr_t dest_addr, - struct gnix_vc **vc_ptr) -{ - int ret; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - if (GNIX_EP_RDM_DGM(ep->type)) { - ret = __gnix_vc_get_vc_by_fi_addr(ep, dest_addr, vc_ptr); - if (OFI_UNLIKELY(ret != FI_SUCCESS)) { - GNIX_WARN(FI_LOG_EP_DATA, - "__gnix_vc_get_vc_by_fi_addr returned %s\n", - fi_strerror(-ret)); - return ret; - } - } else if (ep->type == FI_EP_MSG) { - if (GNIX_EP_CONNECTED(ep)) { - *vc_ptr = ep->vc; - } else { - return -FI_EINVAL; - } - } else { - GNIX_WARN(FI_LOG_EP_DATA, "Invalid endpoint type: %d\n", - ep->type); - return -FI_EINVAL; - } - - return FI_SUCCESS; -} - -fi_addr_t _gnix_vc_peer_fi_addr(struct gnix_vc *vc) -{ - int rc; - - /* If FI_SOURCE capability was requested, do a reverse lookup of a VC's - * FI address once. Skip translation on connected EPs (no AV). */ - if (vc->ep->av && vc->peer_fi_addr == FI_ADDR_NOTAVAIL) { - rc = _gnix_av_reverse_lookup(vc->ep->av, - vc->peer_addr, - &vc->peer_fi_addr); - if (rc != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_av_reverse_lookup() failed: %d\n", - rc); - } - - return vc->peer_fi_addr; -} - -int _gnix_vc_cm_init(struct gnix_cm_nic *cm_nic) -{ - int ret = FI_SUCCESS; - gnix_cm_nic_rcv_cb_func *ofunc = NULL; - struct gnix_nic *nic = NULL; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - nic = cm_nic->nic; - assert(nic != NULL); - - COND_ACQUIRE(nic->requires_lock, &nic->lock); - ret = _gnix_cm_nic_reg_recv_fn(cm_nic, - __gnix_vc_recv_fn, - &ofunc); - if ((ofunc != NULL) && - (ofunc != __gnix_vc_recv_fn)) { - GNIX_WARN(FI_LOG_EP_DATA, "callback reg failed: %s\n", - fi_strerror(-ret)); - } - - COND_RELEASE(nic->requires_lock, &nic->lock); - - return ret; -} - diff --git a/prov/gni/src/gnix_vector.c b/prov/gni/src/gnix_vector.c deleted file mode 100644 index b4e7c3f966f..00000000000 --- a/prov/gni/src/gnix_vector.c +++ /dev/null @@ -1,539 +0,0 @@ -/* - * Copyright (c) 2015-2016 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2016 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "gnix_vector.h" - -static gnix_vector_ops_t __gnix_vec_lockless_ops; -static gnix_vector_ops_t __gnix_vec_locked_ops; - -/******************************************************************************* - * INTERNAL HELPER FNS - ******************************************************************************/ -static inline uint32_t __gnix_vec_get_new_size(gnix_vector_t *vec, uint32_t index) -{ - uint32_t new_size = vec->attr.cur_size; - - if (vec->attr.vec_increase_type == GNIX_VEC_INCREASE_ADD) { - do { - new_size += vec->attr.vec_increase_step; - } while (index >= new_size); - } else { - if (new_size) - new_size *= vec->attr.vec_increase_step; - else - new_size = (new_size + 1) * vec->attr.vec_increase_step; - - while (index >= new_size) - new_size *= vec->attr.vec_increase_step; - - } - - if (new_size > vec->attr.vec_maximum_size) { - GNIX_WARN(FI_LOG_EP_CTRL, "Maximum vector size of %lu " - "reached in __gnix_vec_new_size\n", vec->attr.vec_maximum_size); - new_size = vec->attr.vec_maximum_size; - } - - return new_size; -} - -static inline void __gnix_vec_close_entries(gnix_vector_t *vec) -{ - memset(vec->vector, 0, (sizeof(gnix_vec_entry_t) * vec->attr.cur_size)); -} - -/******************************************************************************* - * INTERNAL WORKER FNS - ******************************************************************************/ -static inline int __gnix_vec_resize(gnix_vector_t *vec, uint32_t new_size) -{ - void *tmp; - - if (new_size <= vec->attr.cur_size) { - GNIX_WARN(FI_LOG_EP_DATA, "In __gnix_vec_resize, the new vector" - "size is less than or equal to the current size.\n"); - } - - tmp = realloc(vec->vector, new_size * sizeof(gnix_vec_entry_t)); - - if (!tmp) { - GNIX_WARN(FI_LOG_EP_CTRL, "Insufficient memory in " - "__gnix_vec_resize\n"); - return -FI_ENOMEM; - } - - vec->vector = tmp; - - if (new_size > vec->attr.cur_size) { - memset(vec->vector + vec->attr.cur_size, 0, - (sizeof(gnix_vec_entry_t) * (new_size - vec->attr.cur_size))); - } - - vec->attr.cur_size = new_size; - - return FI_SUCCESS; -} - -static inline int __gnix_vec_create(gnix_vector_t *vec, gnix_vec_attr_t *attr) -{ - if (OFI_UNLIKELY(vec->state == GNIX_VEC_STATE_READY)) { - GNIX_DEBUG(FI_LOG_EP_DATA, "The vector (%p) is already ready.\n", - vec); - return -FI_EINVAL; - } - - vec->vector = calloc(attr->vec_initial_size, sizeof(gnix_vec_entry_t)); - - if (OFI_UNLIKELY(!vec->vector)) { - GNIX_WARN(FI_LOG_EP_CTRL, "Insufficient memory in " - "_gnix_vec_init.\n"); - return -FI_ENOMEM; - } else { - attr->cur_size = attr->vec_initial_size; - } - - memcpy(&vec->attr, attr, sizeof(gnix_vec_attr_t)); - - return FI_SUCCESS; -} - -static inline int __gnix_vec_close(gnix_vector_t *vec) -{ - if (OFI_UNLIKELY(vec->state == GNIX_VEC_STATE_DEAD)) { - GNIX_DEBUG(FI_LOG_EP_DATA, "The vector (%p) is already dead.\n", - vec); - return -FI_EINVAL; - } - - free(vec->vector); - vec->ops = NULL; - vec->attr.cur_size = 0; - vec->state = GNIX_VEC_STATE_DEAD; - - return FI_SUCCESS; -} - -static inline int __gnix_vec_insert_at(gnix_vector_t *vec, - gnix_vec_entry_t *entry, - gnix_vec_index_t index) -{ - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - if (OFI_UNLIKELY(index >= vec->attr.vec_maximum_size)) { - GNIX_WARN(FI_LOG_EP_CTRL, "Invalid parameter to " - "__gnix_vec_insert_at\n"); - return -FI_EINVAL; - } - - if (OFI_UNLIKELY(vec->state == GNIX_VEC_STATE_DEAD)) { - GNIX_FATAL(FI_LOG_EP_CTRL, "gnix_vector_t is in state " - "GNIX_VEC_STATE_DEAD in __gnix_vec_insert_at.\n"); - } - - if (index >= vec->attr.cur_size) { - uint32_t new_size = __gnix_vec_get_new_size(vec, index); - int ret = __gnix_vec_resize(vec, new_size); - - if (OFI_UNLIKELY(ret)) - return ret; - } - - if (vec->vector[index]) { - GNIX_WARN(FI_LOG_EP_CTRL, "Existing element found in " - "__gnix_vec_insert_at\n"); - return -FI_ECANCELED; - } else { - vec->vector[index] = entry; - return FI_SUCCESS; - } -} - -static inline int __gnix_vec_remove_at(gnix_vector_t *vec, - gnix_vec_index_t index) -{ - if (OFI_UNLIKELY(vec->state == GNIX_VEC_STATE_DEAD)) { - GNIX_FATAL(FI_LOG_EP_CTRL, "gnix_vector_t is in state " - "GNIX_VEC_STATE_DEAD in __gnix_vec_remove_at.\n"); - } else if (index >= vec->attr.cur_size) { - GNIX_WARN(FI_LOG_EP_CTRL, "Index (%lu) too large in " - "__gnix_vec_remove_at\n", index); - return -FI_EINVAL; - } else { - if (!vec->vector[index]) { - GNIX_WARN(FI_LOG_EP_CTRL, "No entry exists in " - "__gnix_vec_remove_at\n"); - return -FI_ECANCELED; - } else { - vec->vector[index] = NULL; - } - } - return FI_SUCCESS; -} - -static inline int __gnix_vec_at(gnix_vector_t *vec, void **element, - gnix_vec_index_t index) -{ - if (OFI_UNLIKELY(vec->state == GNIX_VEC_STATE_DEAD)) { - GNIX_FATAL(FI_LOG_EP_CTRL, "gnix_vector_t is in state " - "GNIX_VEC_STATE_DEAD in __gnix_vec_at.\n"); - } else if (index >= vec->attr.cur_size) { - GNIX_WARN(FI_LOG_EP_CTRL, "Index (%lu) too large in " - "__gnix_vec_at\n", index); - return -FI_EINVAL; - } else { - if (OFI_LIKELY((uint64_t) vec->vector[index])) { - *element = vec->vector[index]; - } else { - GNIX_DEBUG(FI_LOG_EP_CTRL, "There is no element at index " - "(%lu) in __gnix_vec_at\n", index); - return -FI_ECANCELED; - } - } - return FI_SUCCESS; -} - -/******************************************************************************* - * LOCKLESS FNS - ******************************************************************************/ -static int __gnix_vec_lf_init(gnix_vector_t *vec, gnix_vec_attr_t *attr) -{ - int ret; - - ret = __gnix_vec_create(vec, attr); - vec->ops = &__gnix_vec_lockless_ops; - vec->state = GNIX_VEC_STATE_READY; - - return ret; -} - -static int __gnix_vec_lf_close(gnix_vector_t *vec) -{ - int ret; - - __gnix_vec_close_entries(vec); - ret = __gnix_vec_close(vec); - - return ret; -} - -static int __gnix_vec_lf_resize(gnix_vector_t *vec, uint32_t size) -{ - return __gnix_vec_resize(vec, size); -} - -static int __gnix_vec_lf_insert_last(gnix_vector_t *vec, - gnix_vec_entry_t *entry) -{ - return __gnix_vec_insert_at(vec, entry, vec->attr.cur_size - 1); -} - -static int __gnix_vec_lf_insert_at(gnix_vector_t *vec, - gnix_vec_entry_t *entry, - gnix_vec_index_t index) -{ - return __gnix_vec_insert_at(vec, entry, index); -} - -static int __gnix_vec_lf_remove_last(gnix_vector_t *vec) -{ - return __gnix_vec_remove_at(vec, vec->attr.cur_size - 1); -} - -static int __gnix_vec_lf_remove_at(gnix_vector_t *vec, - gnix_vec_index_t index) -{ - return __gnix_vec_remove_at(vec, index); -} - -static int __gnix_vec_lf_last(gnix_vector_t *vec, void **element) -{ - return __gnix_vec_at(vec, element, vec->attr.cur_size - 1); -} - -static int __gnix_vec_lf_at(gnix_vector_t *vec, void **element, gnix_vec_index_t index) -{ - return __gnix_vec_at(vec, element, index); -} - -gnix_vec_entry_t *__gnix_vec_lf_iter_next(struct gnix_vector_iter *iter) -{ - uint32_t i; - - for (i = iter->cur_idx; i < iter->vec->attr.cur_size; i++) { - if (iter->vec->vector[i]) { - iter->cur_idx = i + 1; - return iter->vec->vector[i]; - } - } - - iter->cur_idx = iter->vec->attr.cur_size; - return NULL; -} - -/******************************************************************************* - * LOCKED FNS - ******************************************************************************/ -static int __gnix_vec_lk_init(gnix_vector_t *vec, gnix_vec_attr_t *attr) -{ - int ret; - - rwlock_init(&vec->lock); - ret = __gnix_vec_create(vec, attr); - vec->ops = &__gnix_vec_locked_ops; - vec->state = GNIX_VEC_STATE_READY; - - return ret; -} - -static int __gnix_vec_lk_close(gnix_vector_t *vec) -{ - int ret; - - rwlock_wrlock(&vec->lock); - - __gnix_vec_close_entries(vec); - ret = __gnix_vec_close(vec); - - rwlock_unlock(&vec->lock); - - rwlock_destroy(&vec->lock); - - return ret; -} - -static int __gnix_vec_lk_resize(gnix_vector_t *vec, uint32_t size) -{ - int ret; - - rwlock_wrlock(&vec->lock); - - ret = __gnix_vec_resize(vec, size); - - rwlock_unlock(&vec->lock); - - return ret; -} - -static int __gnix_vec_lk_insert_last(gnix_vector_t *vec, - gnix_vec_entry_t *entry) -{ - int ret; - - rwlock_wrlock(&vec->lock); - - ret = __gnix_vec_insert_at(vec, entry, vec->attr.cur_size - 1); - - rwlock_unlock(&vec->lock); - - return ret; -} - -static int __gnix_vec_lk_insert_at(gnix_vector_t *vec, - gnix_vec_entry_t *entry, - gnix_vec_index_t index) -{ - int ret; - - rwlock_wrlock(&vec->lock); - - ret = __gnix_vec_insert_at(vec, entry, index); - - rwlock_unlock(&vec->lock); - - return ret; -} - -static int __gnix_vec_lk_remove_last(gnix_vector_t *vec) -{ - int ret; - - rwlock_wrlock(&vec->lock); - - ret = __gnix_vec_remove_at(vec, vec->attr.cur_size - 1); - - rwlock_unlock(&vec->lock); - - return ret; -} - -static int __gnix_vec_lk_remove_at(gnix_vector_t *vec, - gnix_vec_index_t index) -{ - int ret; - - rwlock_wrlock(&vec->lock); - - ret = __gnix_vec_remove_at(vec, index); - - rwlock_unlock(&vec->lock); - - return ret; -} - -static int __gnix_vec_lk_last(gnix_vector_t *vec, void **element) -{ - int ret; - - rwlock_rdlock(&vec->lock); - - ret = __gnix_vec_at(vec, element, vec->attr.cur_size - 1); - - rwlock_unlock(&vec->lock); - - return ret; -} - -static int __gnix_vec_lk_at(gnix_vector_t *vec, void **element, gnix_vec_index_t index) -{ - int ret; - - rwlock_rdlock(&vec->lock); - - ret = __gnix_vec_at(vec, element, index); - - rwlock_unlock(&vec->lock); - - return ret; -} - -gnix_vec_entry_t *__gnix_vec_lk_iter_next(struct gnix_vector_iter *iter) -{ - uint32_t i; - gnix_vec_entry_t *entry; - - rwlock_rdlock(&iter->vec->lock); - - for (i = iter->cur_idx; i < iter->vec->attr.cur_size; i++) { - if (iter->vec->vector[i]) { - iter->cur_idx = i + 1; - entry = iter->vec->vector[i]; - rwlock_unlock(&iter->vec->lock); - - return entry; - } - } - - iter->cur_idx = iter->vec->attr.cur_size; - - rwlock_unlock(&iter->vec->lock); - - return NULL; -} - -/******************************************************************************* - * API FNS - ******************************************************************************/ -/** - * Create the initial vector. The user is responsible for initializing the - * "attr" parameter prior to calling this function. - * - * @param[in] vec the vector to initialize - * @param[in] attr the vector attributes - * - * @return FI_SUCCESS Upon successfully creating the vector - * @return -FI_EINVAL Upon receiving an invalid parameter - * @return -FI_ENOMEM Upon insufficient memory to create the vector - */ -int _gnix_vec_init(struct gnix_vector *vec, gnix_vec_attr_t *attr) -{ - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - if (OFI_UNLIKELY(!vec || !attr || - attr->vec_initial_size > attr->vec_maximum_size)) { - GNIX_WARN(FI_LOG_EP_CTRL, "Invalid parameter to _gnix_vec_init." - "\n"); - return -FI_EINVAL; - } - - if (attr->vec_internal_locking == GNIX_VEC_LOCKED) { - return __gnix_vec_lk_init(vec, attr); - } else { - return __gnix_vec_lf_init(vec, attr); - } -} - -/** - * Close the vector elements and then the vector. - * - * @param[in] vec the vector to close - * - * @return FI_SUCCESS Upon successfully closing the vector - * @return -FI_EINVAL Upon a uninitialized or dead vector - */ -int _gnix_vec_close(gnix_vector_t *vec) -{ - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - if (OFI_UNLIKELY(!vec)) { - GNIX_WARN(FI_LOG_EP_CTRL, "Invalid parameter to _gnix_vec_close." - "\n"); - return -FI_EINVAL; - } else { - if (vec->attr.vec_internal_locking == GNIX_VEC_LOCKED) { - return __gnix_vec_lk_close(vec); - } else { - return __gnix_vec_lf_close(vec); - } - } -} - -static gnix_vector_ops_t __gnix_vec_lockless_ops = { - .resize = __gnix_vec_lf_resize, - - .insert_last = __gnix_vec_lf_insert_last, - .insert_at = __gnix_vec_lf_insert_at, - - .remove_last = __gnix_vec_lf_remove_last, - .remove_at = __gnix_vec_lf_remove_at, - - .last = __gnix_vec_lf_last, - .at = __gnix_vec_lf_at, - - .iter_next = __gnix_vec_lf_iter_next, -}; - -static gnix_vector_ops_t __gnix_vec_locked_ops = { - .resize = __gnix_vec_lk_resize, - - .insert_last = __gnix_vec_lk_insert_last, - .insert_at = __gnix_vec_lk_insert_at, - - .remove_last = __gnix_vec_lk_remove_last, - .remove_at = __gnix_vec_lk_remove_at, - - .last = __gnix_vec_lk_last, - .at = __gnix_vec_lk_at, - - .iter_next = __gnix_vec_lk_iter_next, -}; diff --git a/prov/gni/src/gnix_wait.c b/prov/gni/src/gnix_wait.c deleted file mode 100644 index 9b2547fb5ca..00000000000 --- a/prov/gni/src/gnix_wait.c +++ /dev/null @@ -1,590 +0,0 @@ -/* - * Copyright (c) 2015-2018 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include -#include -#include "gnix.h" -#include "gnix_wait.h" -#include "gnix_nic.h" -#include "gnix_cm_nic.h" -#include "gnix_eq.h" -/* - * Gnix wait progress thread declarations for making sure nic progress - * occurs when inside a gnix_wait call - */ - -static pthread_t gnix_wait_thread; -static pthread_mutex_t gnix_wait_mutex = PTHREAD_MUTEX_INITIALIZER; -static pthread_cond_t gnix_wait_cond; -/* This is protected by the wait mutex and is only operated on under the - * mutex, the mutex protects us from losing wake_ups, from the conditional. - * This could be changed to an atomic but the variable would still need to - * be protected under the mutex. - */ -static int gnix_wait_thread_enabled; -static ofi_atomic32_t gnix_wait_refcnt; - -uint32_t gnix_wait_thread_sleep_time = 20; - -/* - * It is necessary to have a separate thread making progress in order for the - * wait functions to succeed. This version of that thread is designed - * to always make progress so we don't hard stall while sitting on fi_wait. - */ -static void *__gnix_wait_nic_prog_thread_fn(void *the_arg) -{ - int ret = FI_SUCCESS, prev_state; - struct gnix_nic *nic1, *nic2; - struct gnix_fid_eq *eq1, *eq2; - struct gnix_cm_nic *cm_nic1, *cm_nic2; - sigset_t sigmask; - DLIST_HEAD(gnix_nic_prog_list); - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - /* - * temporarily disable cancelability while we set up - * some stuff - */ - - pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &prev_state); - - /* - * help out Cray core-spec, say we're not an app thread - * and can be run on core-spec cpus. - */ - ret = _gnix_task_is_not_app(); - if (ret) - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_task_is_not_app call returned %d\n", - ret); - - /* - * block all signals, don't want this thread to catch - * signals that may be for app threads - */ - - memset(&sigmask, 0, sizeof(sigset_t)); - ret = sigfillset(&sigmask); - if (ret) { - GNIX_WARN(FI_LOG_EP_CTRL, - "sigfillset call returned %d\n", ret); - } else { - - ret = pthread_sigmask(SIG_SETMASK, - &sigmask, NULL); - if (ret) - GNIX_WARN(FI_LOG_EP_CTRL, - "pthread_sigmask call returned %d\n", ret); - } - - /* - * okay now we're ready to be cancelable. - */ - - pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &prev_state); - - while (1) { - /* Check if we're tearing down. */ - pthread_testcancel(); - - /* Wait until we're signaled to poll. */ - pthread_mutex_lock(&gnix_wait_mutex); - pthread_cleanup_push((void (*)(void *))pthread_mutex_unlock, - (void *)&gnix_wait_mutex); - if (!gnix_wait_thread_enabled) { - pthread_cond_wait(&gnix_wait_cond, &gnix_wait_mutex); - } - - pthread_cleanup_pop(1); - - /* Progress all EQs. */ - pthread_mutex_lock(&gnix_eq_list_lock); - - dlist_for_each_safe(&gnix_eq_list, eq1, eq2, gnix_fid_eq_list) { - _gnix_eq_progress(eq1); - } - - pthread_mutex_unlock(&gnix_eq_list_lock); - - /* Progress all NICs. */ - pthread_mutex_lock(&gnix_nic_list_lock); - - dlist_for_each_safe(&gnix_nic_list, nic1, nic2, gnix_nic_list) { - dlist_insert_tail(&nic1->gnix_nic_prog_list, &gnix_nic_prog_list); - _gnix_ref_get(nic1); - } - - pthread_mutex_unlock(&gnix_nic_list_lock); - - dlist_for_each_safe(&gnix_nic_prog_list, nic1, nic2, gnix_nic_prog_list) { - _gnix_nic_progress(nic1); - dlist_remove_init(&nic1->gnix_nic_prog_list); - _gnix_ref_put(nic1); - } - - /* Progress all CM NICs. */ - pthread_mutex_lock(&gnix_cm_nic_list_lock); - - dlist_for_each_safe(&gnix_cm_nic_list, cm_nic1, cm_nic2, - cm_nic_list) { - _gnix_cm_nic_progress((void *)cm_nic1); - } - - pthread_mutex_unlock(&gnix_cm_nic_list_lock); - - usleep(gnix_wait_thread_sleep_time); - } - - return NULL; -} - -static void __gnix_wait_start_progress(void) -{ - int ret; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - pthread_mutex_lock(&gnix_wait_mutex); - if (!gnix_wait_thread) { - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - pthread_cond_init(&gnix_wait_cond, NULL); - ofi_atomic_initialize32(&gnix_wait_refcnt, 0); - ret = _gnix_job_disable_affinity_apply(); - if (ret != 0) - GNIX_WARN(WAIT_SUB, - "_gnix_job_disable call returned %d\n", ret); - - ret = pthread_create(&gnix_wait_thread, NULL, - __gnix_wait_nic_prog_thread_fn, NULL); - if (ret) - GNIX_WARN(WAIT_SUB, - "pthread_create call returned %d\n", ret); - } - ofi_atomic_inc32(&gnix_wait_refcnt); - pthread_mutex_unlock(&gnix_wait_mutex); -} - -static void __gnix_wait_stop_progress(void) -{ - int ret; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - pthread_mutex_lock(&gnix_wait_mutex); - if (gnix_wait_thread) { - if (ofi_atomic_dec32(&gnix_wait_refcnt) == 0) { - ret = pthread_cancel(gnix_wait_thread); - if (ret) - GNIX_WARN(WAIT_SUB, - "pthread_cancel call returned %d\n", - ret); - - gnix_wait_thread_enabled++; - pthread_cond_signal(&gnix_wait_cond); - pthread_mutex_unlock(&gnix_wait_mutex); - ret = pthread_join(gnix_wait_thread, NULL); - if (ret) - GNIX_WARN(WAIT_SUB, - "pthread_join call returned %d\n", - ret); - - gnix_wait_thread = 0; - } else { - pthread_mutex_unlock(&gnix_wait_mutex); - } - } else - pthread_mutex_unlock(&gnix_wait_mutex); - - return; - -} - -/******************************************************************************* - * Forward declarations for FI_OPS_* structures. - ******************************************************************************/ -static struct fi_ops gnix_fi_ops; -static struct fi_ops_wait gnix_wait_ops; - -/******************************************************************************* - * List match functions. - ******************************************************************************/ -static int gnix_match_fid(struct slist_entry *item, const void *fid) -{ - struct gnix_wait_entry *entry; - - entry = container_of(item, struct gnix_wait_entry, entry); - - return (entry->wait_obj == (struct fid *) fid); -} - -/******************************************************************************* - * Exposed helper functions. - ******************************************************************************/ -int _gnix_wait_set_add(struct fid_wait *wait, struct fid *wait_obj) -{ - struct gnix_fid_wait *wait_priv; - struct gnix_wait_entry *wait_entry; - - GNIX_TRACE(WAIT_SUB, "\n"); - - wait_entry = calloc(1, sizeof(*wait_entry)); - if (!wait_entry) { - GNIX_WARN(WAIT_SUB, - "failed to allocate memory for wait entry.\n"); - return -FI_ENOMEM; - } - - wait_priv = container_of(wait, struct gnix_fid_wait, wait.fid); - - wait_entry->wait_obj = wait_obj; - - gnix_slist_insert_tail(&wait_entry->entry, &wait_priv->set); - - return FI_SUCCESS; -} - -int _gnix_wait_set_remove(struct fid_wait *wait, struct fid *wait_obj) -{ - struct gnix_fid_wait *wait_priv; - struct gnix_wait_entry *wait_entry; - struct slist_entry *found; - - GNIX_TRACE(WAIT_SUB, "\n"); - - wait_priv = container_of(wait, struct gnix_fid_wait, wait.fid); - - found = slist_remove_first_match(&wait_priv->set, gnix_match_fid, - wait_obj); - - if (found) { - wait_entry = container_of(found, struct gnix_wait_entry, - entry); - free(wait_entry); - - return FI_SUCCESS; - } - - return -FI_EINVAL; -} - -int _gnix_get_wait_obj(struct fid_wait *wait, void *arg) -{ - struct fi_mutex_cond mutex_cond; - struct gnix_fid_wait *wait_priv; - size_t copy_size; - const void *src; - - GNIX_TRACE(WAIT_SUB, "\n"); - - if (!wait || !arg) - return -FI_EINVAL; - - wait_priv = container_of(wait, struct gnix_fid_wait, wait); - - switch (wait_priv->type) { - case FI_WAIT_FD: - copy_size = sizeof(wait_priv->fd[WAIT_READ]); - src = &wait_priv->fd[WAIT_READ]; - break; - case FI_WAIT_MUTEX_COND: - mutex_cond.mutex = &wait_priv->mutex; - mutex_cond.cond = &wait_priv->cond; - - copy_size = sizeof(mutex_cond); - src = &mutex_cond; - break; - default: - GNIX_WARN(WAIT_SUB, "wait type: %d not supported.\n", - wait_priv->type); - return -FI_EINVAL; - } - - memcpy(arg, src, copy_size); - - return FI_SUCCESS; -} - -void _gnix_signal_wait_obj(struct fid_wait *wait) -{ - static char msg = 'g'; - size_t len = sizeof(msg); - struct gnix_fid_wait *wait_priv; - - wait_priv = container_of(wait, struct gnix_fid_wait, wait); - - switch (wait_priv->type) { - case FI_WAIT_UNSPEC: - GNIX_TRACE(WAIT_SUB, - "The Read FD is %d Write is %d\n", - wait_priv->fd[WAIT_READ], - wait_priv->fd[WAIT_WRITE]); - /* This is a non-blocking write as the fd could become full */ - write(wait_priv->fd[WAIT_WRITE], &msg, len); - break; - default: - GNIX_WARN(WAIT_SUB, - "error signaling wait object: type: %d not supported.\n", - wait_priv->type); - return; - } -} - -/******************************************************************************* - * Internal helper functions. - ******************************************************************************/ -static int gnix_verify_wait_attr(struct fi_wait_attr *attr) -{ - GNIX_TRACE(WAIT_SUB, "\n"); - - if (!attr || attr->flags) - return -FI_EINVAL; - - switch (attr->wait_obj) { - case FI_WAIT_UNSPEC: - attr->wait_obj = FI_WAIT_UNSPEC; - break; - default: - GNIX_WARN(WAIT_SUB, "wait type: %d not supported.\n", - attr->wait_obj); - return -FI_EINVAL; - } - - return FI_SUCCESS; -} - -static int gnix_init_wait_obj(struct gnix_fid_wait *wait, enum fi_wait_obj type) -{ - GNIX_TRACE(WAIT_SUB, "\n"); - - wait->type = type; - - switch (type) { - case FI_WAIT_UNSPEC: - if (socketpair(AF_LOCAL, SOCK_STREAM, 0, wait->fd)) - goto err; - - if (fi_fd_nonblock(wait->fd[WAIT_READ])) - goto cleanup; - - if (fi_fd_nonblock(wait->fd[WAIT_WRITE])) - goto cleanup; - - break; - default: - GNIX_WARN(WAIT_SUB, "Invalid wait type: %d\n", - type); - return -FI_EINVAL; - } - - return FI_SUCCESS; - -cleanup: - close(wait->fd[WAIT_READ]); - close(wait->fd[WAIT_WRITE]); -err: - GNIX_WARN(WAIT_SUB, "%s\n", strerror(errno)); - return -FI_EOTHER; -} - -/******************************************************************************* - * API Functionality. - ******************************************************************************/ -static int gnix_wait_control(struct fid *wait, int command, void *arg) -{ -/* - struct fid_wait *wait_fid_priv; - - GNIX_TRACE(WAIT_SUB, "\n"); - - wait_fid_priv = container_of(wait, struct fid_wait, fid); -*/ - - switch (command) { - case FI_GETWAIT: - return -FI_ENOSYS; - default: - return -FI_EINVAL; - } -} - -/** - * Waits on a wait set until one or more of it's underlying objects is signaled. - * - * @param[in] wait the wait object set - * @param[in] timeout time to wait for a signal, in milliseconds - * - * @return FI_SUCCESS upon successfully waiting - * @return -FI_ERRNO upon failure - * @return -FI_ENOSYS if this operation is not supported - */ -DIRECT_FN int gnix_wait_wait(struct fid_wait *wait, int timeout) -{ - int err = 0, ret; - char c; - struct gnix_fid_wait *wait_priv; - - GNIX_TRACE(WAIT_SUB, "\n"); - - wait_priv = container_of(wait, struct gnix_fid_wait, wait.fid); - switch (wait_priv->type) { - case FI_WAIT_UNSPEC: - pthread_mutex_lock(&gnix_wait_mutex); - gnix_wait_thread_enabled++; - pthread_cond_signal(&gnix_wait_cond); - pthread_mutex_unlock(&gnix_wait_mutex); - GNIX_DEBUG(WAIT_SUB, - "Calling fi_poll_fd %d timeout %d\n", - wait_priv->fd[WAIT_READ], - timeout); - err = fi_poll_fd(wait_priv->fd[WAIT_READ], timeout); - GNIX_DEBUG(WAIT_SUB, "Return code from poll was %d\n", err); - if (err == 0) { - err = -FI_ETIMEDOUT; - } else { - while (err > 0) { - ret = ofi_read_socket(wait_priv->fd[WAIT_READ], - &c, - 1); - GNIX_DEBUG(WAIT_SUB, "ret is %d C is %c\n", - ret, - c); - if (ret != 1) { - GNIX_ERR(WAIT_SUB, - "failed to read wait_fd\n"); - err = 0; - break; - } - err--; - } - } - break; - default: - GNIX_WARN(WAIT_SUB, "Invalid wait object type\n"); - return -FI_EINVAL; - } - pthread_mutex_lock(&gnix_wait_mutex); - gnix_wait_thread_enabled--; - pthread_mutex_unlock(&gnix_wait_mutex); - return err; -} - -int gnix_wait_close(struct fid *wait) -{ - struct gnix_fid_wait *wait_priv; - - GNIX_TRACE(WAIT_SUB, "\n"); - - wait_priv = container_of(wait, struct gnix_fid_wait, wait.fid); - - if (!slist_empty(&wait_priv->set)) { - GNIX_WARN(WAIT_SUB, - "resources still connected to wait set.\n"); - return -FI_EBUSY; - } - - if (wait_priv->type == FI_WAIT_FD) { - close(wait_priv->fd[WAIT_READ]); - close(wait_priv->fd[WAIT_WRITE]); - } - - _gnix_ref_put(wait_priv->fabric); - - free(wait_priv); - - __gnix_wait_stop_progress(); - return FI_SUCCESS; -} - -DIRECT_FN int gnix_wait_open(struct fid_fabric *fabric, - struct fi_wait_attr *attr, - struct fid_wait **waitset) -{ - struct gnix_fid_fabric *fab_priv; - struct gnix_fid_wait *wait_priv; - int ret = FI_SUCCESS; - - GNIX_TRACE(WAIT_SUB, "\n"); - - ret = gnix_verify_wait_attr(attr); - if (ret) - goto err; - - fab_priv = container_of(fabric, struct gnix_fid_fabric, fab_fid); - - wait_priv = calloc(1, sizeof(*wait_priv)); - if (!wait_priv) { - GNIX_WARN(WAIT_SUB, - "failed to allocate memory for wait set.\n"); - ret = -FI_ENOMEM; - goto err; - } - - ret = gnix_init_wait_obj(wait_priv, attr->wait_obj); - if (ret) - goto cleanup; - - slist_init(&wait_priv->set); - - wait_priv->wait.fid.fclass = FI_CLASS_WAIT; - wait_priv->wait.fid.ops = &gnix_fi_ops; - wait_priv->wait.ops = &gnix_wait_ops; - - wait_priv->fabric = fab_priv; - - _gnix_ref_get(fab_priv); - *waitset = &wait_priv->wait; - - __gnix_wait_start_progress(); - return ret; - -cleanup: - free(wait_priv); -err: - return ret; -} - -/******************************************************************************* - * FI_OPS_* data structures. - ******************************************************************************/ -static struct fi_ops gnix_fi_ops = { - .size = sizeof(struct fi_ops), - .close = gnix_wait_close, - .bind = fi_no_bind, - .control = gnix_wait_control, - .ops_open = fi_no_ops_open -}; - -static struct fi_ops_wait gnix_wait_ops = { - .size = sizeof(struct fi_ops_wait), - .wait = gnix_wait_wait -}; diff --git a/prov/gni/src/gnix_xpmem.c b/prov/gni/src/gnix_xpmem.c deleted file mode 100644 index 8381dbaed54..00000000000 --- a/prov/gni/src/gnix_xpmem.c +++ /dev/null @@ -1,616 +0,0 @@ -/* - * Copyright (c) 2016 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2017 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#if HAVE_CONFIG_H -#include -#endif /* HAVE_CONFIG_H */ - -#include "gnix.h" -#include "gnix_mr.h" -#include "gnix_hashtable.h" -#include "gnix_xpmem.h" - - -#if HAVE_XPMEM - -bool gnix_xpmem_disabled = false; - -#define XPMEM_PAGE_SIZE 4096 - -static pthread_mutex_t gnix_xpmem_lock = PTHREAD_MUTEX_INITIALIZER; -static xpmem_segid_t gnix_my_segid; -static int gnix_xpmem_ref_cnt; - -static void *__gnix_xpmem_attach_seg(void *handle, - void *address, - size_t length, - struct _gnix_fi_reg_context *, - void *context); - -static int __gnix_xpmem_detach_seg(void *handle, - void *context); - -static int __gnix_xpmem_destroy_mr_cache(void *context); - -struct gnix_xpmem_ht_entry { - struct gnix_mr_cache *mr_cache; - struct gnix_xpmem_handle *xp_hndl; - xpmem_apid_t apid; -}; - -/* - * TODO: should be adjustable from domain params - * Note notifier is set to NULL since xpmem device driver - * handles mmu notifiers internally so we don't need to use - * KDREG. - */ -static gnix_mr_cache_attr_t _gnix_xpmem_default_mr_cache_attr = { - .soft_reg_limit = 128, - .hard_reg_limit = 16384, - .hard_stale_limit = 128, -#if HAVE_KDREG - .lazy_deregistration = 1, -#else - .lazy_deregistration = 0, -#endif - .reg_callback = __gnix_xpmem_attach_seg, - .dereg_callback = __gnix_xpmem_detach_seg, - .destruct_callback = __gnix_xpmem_destroy_mr_cache, - .elem_size = sizeof(struct gnix_xpmem_access_handle), - .smrn = NULL, -}; - -/******************************************************************************* - * INTERNAL HELPER FNS - ******************************************************************************/ - -static void __xpmem_hndl_destruct(void *obj) -{ - int __attribute__((unused)) ret; - struct gnix_xpmem_handle *hndl = (struct gnix_xpmem_handle *) obj; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - ret = _gnix_ht_destroy(hndl->apid_ht); - if (ret == FI_SUCCESS) { - free(hndl->apid_ht); - hndl->apid_ht = NULL; - } else { - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_ht_destroy returned %s\n", - fi_strerror(-ret)); - } - - pthread_mutex_lock(&gnix_xpmem_lock); - - gnix_xpmem_ref_cnt--; - /* - * if refcnt drops to zero for entire xpmem use, remove - * this process' segment from xpmem. - */ - if (gnix_xpmem_ref_cnt == 0) { - ret = xpmem_remove(gnix_my_segid); - if (ret) - GNIX_WARN(FI_LOG_EP_CTRL, - "xpmem_remove returned error %s\n", - strerror(errno)); - } - - pthread_mutex_unlock(&gnix_xpmem_lock); - - free(hndl); -} - -static void __gnix_xpmem_destroy_ht_entry(void *val) -{ - int __attribute__((unused)) ret; - struct gnix_xpmem_ht_entry *entry = (struct gnix_xpmem_ht_entry *)val; - - GNIX_TRACE(FI_LOG_EP_DATA, "\n"); - - ret = _gnix_mr_cache_destroy(entry->mr_cache); - if (ret != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_CTRL, - "_gnix_mr_cache_destroy returned error %s\n", - fi_strerror(-ret)); - - xpmem_release(entry->apid); - free(entry); -} - -static void *__gnix_xpmem_attach_seg(void *handle, - void *address, - size_t length, - struct _gnix_fi_reg_context *reg_context, - void *context) -{ - struct gnix_xpmem_access_handle *access_hndl = - (struct gnix_xpmem_access_handle *)handle; - struct gnix_xpmem_ht_entry *entry = context; - struct xpmem_addr xpmem_addr; - size_t top, attach_len; - - GNIX_TRACE(FI_LOG_EP_DATA, "\n"); - - xpmem_addr.apid = entry->apid; - - /* - * xpmem requires page aligned addresses for attach operation - */ - xpmem_addr.offset = (off_t) FLOOR((uint64_t)address, XPMEM_PAGE_SIZE); - top = CEILING(((uint64_t)address + length), XPMEM_PAGE_SIZE); - attach_len = top - FLOOR((uint64_t)address, XPMEM_PAGE_SIZE); - - access_hndl->attach_addr = xpmem_attach(xpmem_addr, - attach_len, - NULL); - if (access_hndl->attach_addr != (void *)-1L) { - access_hndl->xp_hndl = entry->xp_hndl; - _gnix_ref_get(entry->xp_hndl); - access_hndl->remote_base_addr = (void *)xpmem_addr.offset; - access_hndl->access_len = attach_len; - access_hndl->entry = entry; - return handle; - } else { - GNIX_WARN(FI_LOG_EP_DATA, - "xpmem_attach returned %s xpmem_addr %ld:0x%016lx len %d\n", - strerror(errno), xpmem_addr.apid, xpmem_addr.offset, - attach_len); - /* TODO: dump /proc/self/maps ? */ - exit(-1); - return NULL; - } -} - -static int __gnix_xpmem_detach_seg(void *handle, void *context) -{ - int ret; - struct gnix_xpmem_access_handle *access_hndl; - - GNIX_TRACE(FI_LOG_EP_DATA, "\n"); - - access_hndl = (struct gnix_xpmem_access_handle *)handle; - assert(access_hndl); - - ret = xpmem_detach(access_hndl->attach_addr); - if (ret) - GNIX_WARN(FI_LOG_EP_DATA, "xpmem_detach returned %s\n", - strerror(errno)); - _gnix_ref_put(access_hndl->xp_hndl); - return ret; -} - -/* - * TODO: do we need a destructor callback for mr cache? - */ -static int __gnix_xpmem_destroy_mr_cache(void *context) -{ - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - /* - * use iterator over mr cache entries and invoke - * xpmem_detach on each - */ - return FI_SUCCESS; -} - -/******************************************************************************* - * the stuff - ******************************************************************************/ - -int _gnix_xpmem_handle_create(struct gnix_fid_domain *dom, - struct gnix_xpmem_handle **handle) -{ - int ret = FI_SUCCESS; - struct gnix_xpmem_handle *hndl = NULL; - struct gnix_hashtable_attr ht_attr = {0}; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - hndl = calloc(1, sizeof *hndl); - if (!hndl) - return -FI_ENOMEM; - - pthread_mutex_lock(&gnix_xpmem_lock); - - if (gnix_xpmem_ref_cnt == 0) { - gnix_my_segid = xpmem_make(0, XPMEM_MAXADDR_SIZE, - XPMEM_PERMIT_MODE, - (void *)0666); - if (gnix_my_segid == -1L) { - GNIX_WARN(FI_LOG_DOMAIN, "xpmem make failed - %s\n", - strerror(errno)); - ret = -errno; - pthread_mutex_unlock(&gnix_xpmem_lock); - goto exit; - } - - gnix_xpmem_ref_cnt++; - } - - pthread_mutex_unlock(&gnix_xpmem_lock); - - _gnix_ref_init(&hndl->ref_cnt, 1, - __xpmem_hndl_destruct); - ofi_spin_init(&hndl->lock); - - /* - * initialize xpmem_apid_t key'd hash table for - * retrieving r/b tree for that apid - */ - - hndl->apid_ht = calloc(1, sizeof(struct gnix_hashtable)); - if (hndl->apid_ht == NULL) - goto exit; - - /* - * TODO: use domain parameters to adjust these - */ - - ht_attr.ht_initial_size = 1024; /* will we ever have more than - this many local processes? */ - ht_attr.ht_maximum_size = 1024 * 1024; - ht_attr.ht_increase_step = 1024; - ht_attr.ht_increase_type = GNIX_HT_INCREASE_MULT; - ht_attr.ht_collision_thresh = 500; - ht_attr.ht_hash_seed = 0xdeadbeefbeefdead; - ht_attr.ht_internal_locking = 0; - ht_attr.destructor = __gnix_xpmem_destroy_ht_entry; - - ret = _gnix_ht_init(hndl->apid_ht, - &ht_attr); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_CTRL, "_gnix_ht_init returned %s\n", - fi_strerror(-ret)); - goto exit; - } - - *handle = hndl; - return ret; - -exit: - if (hndl != NULL) { - if (hndl->apid_ht != NULL) - free(hndl->apid_ht); - free(hndl); - } - - return ret; -} - -int _gnix_xpmem_handle_destroy(struct gnix_xpmem_handle *hndl) -{ - int ret = FI_SUCCESS; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - _gnix_ref_put(hndl); - - return ret; -} - - -int _gnix_xpmem_access_hndl_get(struct gnix_xpmem_handle *xp_hndl, - xpmem_apid_t peer_apid, - uint64_t remote_vaddr, - size_t len, - struct gnix_xpmem_access_handle **access_hndl) -{ - int ret = FI_SUCCESS; - struct gnix_xpmem_ht_entry *entry; - gnix_mr_cache_attr_t mr_cache_attr = {0}; - - GNIX_TRACE(FI_LOG_EP_DATA, "\n"); - - /* - * use peer_apid to look up the reg cache - * - if not in the hash, create and insert - */ - - ofi_spin_lock(&xp_hndl->lock); - - entry = _gnix_ht_lookup(xp_hndl->apid_ht, - (gnix_ht_key_t)peer_apid); - - /* - * okay need to create an mr_cache for this apid - */ - if (OFI_UNLIKELY(entry == NULL)) { - - entry = calloc(1, sizeof *entry); - if (entry == NULL) { - ret = -FI_ENOMEM; - goto exit_w_lock; - } - - entry->apid = peer_apid; - entry->xp_hndl = xp_hndl; - - memcpy(&mr_cache_attr, &_gnix_xpmem_default_mr_cache_attr, - sizeof(gnix_mr_cache_attr_t)); - mr_cache_attr.reg_context = entry; - mr_cache_attr.dereg_context = entry; - mr_cache_attr.destruct_context = entry; - ret = _gnix_mr_cache_init(&entry->mr_cache, - &mr_cache_attr); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_mr_cache_init returned %s\n", - fi_strerror(-ret)); - goto exit_w_lock; - } - ret = _gnix_ht_insert(xp_hndl->apid_ht, - (gnix_ht_key_t)peer_apid, - entry); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_ht_insert returned %s\n", - fi_strerror(-ret)); - goto exit_w_lock; - } - } - - ret = _gnix_mr_cache_register(entry->mr_cache, - remote_vaddr, - len, - NULL, - (void **)access_hndl); - if (ret != FI_SUCCESS) { - GNIX_WARN(FI_LOG_EP_DATA, - "_gnix_mr_cache_register returned %s\n", - fi_strerror(-ret)); - goto exit_w_lock; - } - -exit_w_lock: - ofi_spin_unlock(&xp_hndl->lock); - return ret; - -} - -int _gnix_xpmem_access_hndl_put(struct gnix_xpmem_access_handle *access_hndl) -{ - int ret = FI_SUCCESS; - struct gnix_xpmem_ht_entry *entry; - struct gnix_xpmem_handle *xp_hndl; - - GNIX_TRACE(FI_LOG_EP_DATA, "\n"); - - entry = access_hndl->entry; - if (!entry) { - GNIX_WARN(FI_LOG_EP_DATA, "entry is null\n"); - return -FI_EINVAL; - } - - xp_hndl = entry->xp_hndl; - if (!xp_hndl) { - GNIX_WARN(FI_LOG_EP_DATA, "entry->xp_hndl is null\n"); - return -FI_EINVAL; - } - - ofi_spin_lock(&xp_hndl->lock); - - ret = _gnix_mr_cache_deregister(entry->mr_cache, - access_hndl); - if (ret != FI_SUCCESS) - GNIX_WARN(FI_LOG_EP_DATA, "_gnix_mr_cache_deregister returned %s\n", - fi_strerror(-ret)); - - ofi_spin_unlock(&xp_hndl->lock); - - return ret; -} - -int _gnix_xpmem_accessible(struct gnix_fid_ep *ep, - struct gnix_address addr, - bool *accessible) -{ - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - if (!ep || !accessible) - return -FI_EINVAL; - - if (gnix_xpmem_disabled == true) { - *accessible = false; - return FI_SUCCESS; - } - - if (ep->domain->params.xpmem_enabled == false) { - *accessible = false; - return FI_SUCCESS; - } - - /* - * if the endpoint's device_addr is the same as tht - * of the supplied address, return true, else false - */ - - *accessible = (ep->src_addr.gnix_addr.device_addr == - addr.device_addr) ? true : false; - - return FI_SUCCESS; -} - -int _gnix_xpmem_copy(struct gnix_xpmem_access_handle *access_hndl, - void *dst_addr, - void *remote_start_addr, - size_t len) -{ - void *local_start_addr, *remote_base_addr; - uint64_t attach_addr, reg_len; - - GNIX_TRACE(FI_LOG_EP_DATA, "\n"); - - if (!access_hndl) - return -FI_EINVAL; - - attach_addr = (uint64_t)access_hndl->attach_addr; - remote_base_addr = access_hndl->remote_base_addr; - reg_len = access_hndl->access_len; - - /* - * check that the access handle limits and the - * copy request are consistent - */ - - if (((uint64_t)remote_start_addr < (uint64_t)remote_base_addr) || - ((uint64_t)remote_start_addr >= - ((uint64_t)remote_base_addr + reg_len))) - return -FI_EINVAL; - - if (((uint64_t)remote_start_addr + len) > - ((uint64_t)remote_base_addr + reg_len)) - return -FI_EINVAL; - - local_start_addr = (void *)((uint8_t *)attach_addr + - ((uint8_t *)remote_start_addr - - (uint8_t *)remote_base_addr)); - GNIX_DEBUG(FI_LOG_EP_DATA, - "xpmem copy dst addr 0x%016lx start addr 0x%016lx, len %ld\n", - (uint64_t)dst_addr, (uint64_t)local_start_addr, len); - memcpy(dst_addr, local_start_addr, len); - - return FI_SUCCESS; -} - -int _gnix_xpmem_get_my_segid(struct gnix_xpmem_handle *xp_hndl, - xpmem_segid_t *seg_id) -{ - int ret = FI_SUCCESS; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - *seg_id = gnix_my_segid; - return ret; - -} - -int _gnix_xpmem_get_apid(struct gnix_xpmem_handle *xp_hndl, - xpmem_segid_t segid, - xpmem_apid_t *peer_apid) -{ - int ret = FI_SUCCESS; - xpmem_apid_t apid; - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - apid = xpmem_get(segid, XPMEM_RDWR, XPMEM_PERMIT_MODE, - (void *)0666); - if (apid == -1L) { - GNIX_WARN(FI_LOG_DOMAIN, "xpmem_get returned %s\n", - strerror(errno)); - ret = -errno; - } else { - *peer_apid = apid; - } - - return ret; -} - -#else - -bool gnix_xpmem_disabled = true; - -/******************************************************************************* - * almost stub functions when xpmem configuration is disabled - ******************************************************************************/ - -int _gnix_xpmem_handle_create(struct gnix_fid_domain *dom, - struct gnix_xpmem_handle **handle) -{ - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - return FI_SUCCESS; -} - -int _gnix_xpmem_handle_destroy(struct gnix_xpmem_handle *hndl) -{ - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - return FI_SUCCESS; -} - - -int _gnix_xpmem_access_hndl_get(struct gnix_xpmem_handle *xp_hndl, - xpmem_apid_t peer_apid, - uint64_t remote_vaddr, - size_t len, - struct gnix_xpmem_access_handle **access_hndl) -{ - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - return FI_SUCCESS; -} - -int _gnix_xpmem_access_hndl_put(struct gnix_xpmem_access_handle *access_hndl) -{ - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - return FI_SUCCESS; -} - -int _gnix_xpmem_accessible(struct gnix_fid_ep *ep, - struct gnix_address addr, - bool *accessible) -{ - - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - - if (accessible == NULL) - return -FI_EINVAL; - - *accessible = false; - - return FI_SUCCESS; -} - -int _gnix_xpmem_copy(struct gnix_xpmem_access_handle *access_hndl, - void *dst_addr, - void *remote_start_addr, - size_t len) -{ - return -FI_ENOSYS; -} - -int _gnix_xpmem_get_my_segid(struct gnix_xpmem_handle *xp_hndl, - xpmem_segid_t *seg_id) -{ - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - return FI_SUCCESS; -} - -int _gnix_xpmem_get_apid(struct gnix_xpmem_handle *xp_hndl, - xpmem_segid_t segid, - xpmem_apid_t *peer_apid) -{ - GNIX_TRACE(FI_LOG_EP_CTRL, "\n"); - return FI_SUCCESS; -} - -#endif /* HAVE_XPMEM */ diff --git a/prov/gni/test/allocator.c b/prov/gni/test/allocator.c deleted file mode 100644 index 7776bb068ec..00000000000 --- a/prov/gni/test/allocator.c +++ /dev/null @@ -1,683 +0,0 @@ -/* - * Copyright (c) 2015-2018 Los Alamos National Security, LLC. All rights reserved. - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include - -#include "gnix.h" -#include "gnix_mbox_allocator.h" - -#include -#include "gnix_rdma_headers.h" -#include "common.h" - -#define ALLOCD_WITH_NIC 0 - -/* Note: Set to ~FI_NOTIFY_FLAGS_ONLY since this was written before api 1.5 */ -static uint64_t mode_bits = ~FI_NOTIFY_FLAGS_ONLY; -static struct fid_fabric *fab; -static struct fid_domain *dom; -static struct fid_ep *ep; -static struct fi_info *hints; -static struct fi_info *fi; -static struct gnix_fid_ep *ep_priv; -static struct gnix_mbox_alloc_handle *allocator; - -static void __allocator_setup(uint32_t version, int mr_mode) -{ - int ret = 0; - - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - - hints->domain_attr->cq_data_size = 4; - hints->mode = mode_bits; - hints->domain_attr->mr_mode = mr_mode; - - hints->fabric_attr->prov_name = strdup("gni"); - - ret = fi_getinfo(version, NULL, 0, 0, hints, &fi); - cr_assert_eq(ret, FI_SUCCESS, "fi_getinfo"); - - ret = fi_fabric(fi->fabric_attr, &fab, NULL); - cr_assert_eq(ret, FI_SUCCESS, "fi_fabric"); - - ret = fi_domain(fab, fi, &dom, NULL); - cr_assert_eq(ret, FI_SUCCESS, "fi_domain"); - - ret = fi_endpoint(dom, fi, &ep, NULL); - cr_assert_eq(ret, FI_SUCCESS, "fi_endpoint"); - - ep_priv = container_of(ep, struct gnix_fid_ep, ep_fid); -} - -static void allocator_setup_basic(void) -{ - __allocator_setup(fi_version(), GNIX_MR_BASIC); -} - -static void allocator_setup_scalable(void) -{ - __allocator_setup(fi_version(), GNIX_MR_SCALABLE); -} - -void allocator_teardown(void) -{ - int ret = 0; - - ret = fi_close(&ep->fid); - cr_assert_eq(ret, FI_SUCCESS, "failure in closing ep."); - ret = fi_close(&dom->fid); - cr_assert_eq(ret, FI_SUCCESS, "failure in closing domain."); - ret = fi_close(&fab->fid); - cr_assert_eq(ret, FI_SUCCESS, "failure in closing fabric."); - fi_freeinfo(fi); - fi_freeinfo(hints); -} - -/* - * Count how many slabs are present in an allocator. - */ -static size_t count_slabs(struct gnix_mbox_alloc_handle *handle) -{ - size_t count = 0; - - for (struct slist_entry *entry = handle->slab_list.head; entry; - entry = entry->next) { - count++; - } - - return count; -} - -/* - * Absolute value function that returns a ptrdiff_t. - */ -static ptrdiff_t abs_value(ptrdiff_t x) -{ - return x * ((x > 0) - (x < 0)); -} - -#ifndef __aarch64__ -/* - * Open /proc/self/maps and count the number of times the hugetlbfs - * string is present. Return value is the count; - * - * TODO: this approach doesn't work on Cray ARM systems. Large - * page regions don't show being backed by files in - * /var/lib/hugetlbfs. Need to fix with something better. - */ -static int verify_hugepages(void) -{ - int ret = 0; - FILE *fd; - char *line; - size_t size = 1024; - - fd = fopen("/proc/self/maps", "r"); - if (!fd) { - fprintf(stderr, "error opening /proc/self/maps.\n"); - return ret; - } - - line = malloc(size); - if (!line) { - fprintf(stderr, "error mallocing space for line.\n"); - return ret; - } - - while (getline(&line, &size, fd) != -1) { - if (strstr(line, "hugetlbfs")) { - ret++; - } - } - - free(line); - fclose(fd); - - return ret; -} -#endif - -/* - * Open an allocator with the given parameters and immediately close it. Verify - * that everything returned a successful error code. Note that for large - * page sizes over ~64 MB, it can be iffy whether or not large pages can - * be synthesized if the linux page cache has become highly fragmented, so - * we have a may fail parameter that checks to see if the error return is - * -FI_ENOMEM, in which case don't treat as fatal. - */ -static void open_close_allocator(enum gnix_page_size page_size, - size_t mbox_size, - size_t mpmmap, bool may_fail) -{ - int ret; - - ret = _gnix_mbox_allocator_create(ep_priv->nic, NULL, page_size, - mbox_size, mpmmap, &allocator); - if ((ret == -FI_ENOMEM) && (may_fail == true)) { - fprintf(stderr, "Allocation of page size %d MB failed -" - "skipping\n", page_size); - return; - } - cr_assert_eq(ret, FI_SUCCESS, "_gnix_mbox_allocator_create failed5."); -#ifndef __aarch64__ - cr_expect_eq(verify_hugepages(), 2 + ALLOCD_WITH_NIC, - "memory not found in /proc/self/maps."); -#endif - - ret = _gnix_mbox_allocator_destroy(allocator); - cr_assert_eq(ret, FI_SUCCESS, "_gnix_mbox_allocator_destroy failed."); -#ifndef __aarch64__ - cr_expect_eq(verify_hugepages(), 1 + ALLOCD_WITH_NIC, - "memory not released in /proc/self/maps."); -#endif -} - - -TestSuite(mbox_creation_basic, - .init = allocator_setup_basic, - .fini = allocator_teardown); - -TestSuite(mbox_creation_scalable, - .init = allocator_setup_scalable, - .fini = allocator_teardown); - -static inline void __alloc_single_page(void) -{ - /* - * Test creation of all predefined page sizes. - */ - open_close_allocator(GNIX_PAGE_2MB, 100, 100, false); - open_close_allocator(GNIX_PAGE_4MB, 100, 100, false); - open_close_allocator(GNIX_PAGE_8MB, 100, 100, false); - open_close_allocator(GNIX_PAGE_16MB, 100, 100, false); - open_close_allocator(GNIX_PAGE_32MB, 100, 100, false); - open_close_allocator(GNIX_PAGE_64MB, 100, 100, true); - open_close_allocator(GNIX_PAGE_128MB, 100, 100, true); - open_close_allocator(GNIX_PAGE_256MB, 100, 100, true); - open_close_allocator(GNIX_PAGE_512MB, 100, 100, true); -} - -Test(mbox_creation_basic, alloc_single_page) -{ - __alloc_single_page(); -} - -Test(mbox_creation_scalable, alloc_single_page) -{ - __alloc_single_page(); -} - - -Test(mbox_creation_basic, alloc_three_pages) -{ - /* - * This should allocate a single slab that's 3 pages in size. - */ - open_close_allocator(GNIX_PAGE_4MB, 1000, 12000, false); -} - -Test(mbox_creation_scalable, alloc_three_pages) -{ - /* - * This should allocate a single slab that's 3 pages in size. - */ - open_close_allocator(GNIX_PAGE_4MB, 1000, 12000, false); -} - -static inline void __alloc_mbox(void) -{ - int ret; - - struct gnix_mbox *mail_box; - struct slist_entry *entry; - struct gnix_slab *slab; - - char test_string[] = "hello allocator."; - - ret = _gnix_mbox_allocator_create(ep_priv->nic, NULL, GNIX_PAGE_4MB, - 1000, 12000, &allocator); - cr_assert_eq(ret, FI_SUCCESS, "_gnix_mbox_allocator_create failed1."); - - /* - *value is 4 because the provider has internally already opened - * an mbox allocator and 2 rdma slabs at this point. - */ -#ifndef __aarch64__ - cr_expect_eq(verify_hugepages(), 2 + ALLOCD_WITH_NIC, - "memory not found in /proc/self/maps."); -#endif - - ret = _gnix_mbox_alloc(allocator, &mail_box); - cr_expect_eq(ret, FI_SUCCESS, "_gnix_mbox_alloc failed."); - - cr_expect(mail_box); - - entry = allocator->slab_list.head; - cr_assert(entry); - - slab = container_of(entry, struct gnix_slab, list_entry); - - cr_expect_eq(mail_box->slab, slab, - "slab list head and mail box slab pointer are not equal."); - cr_expect_eq(mail_box->memory_handle, &mail_box->slab->memory_handle, - "mail_box memory handle not equal to slab memory handle."); - cr_expect_eq(mail_box->offset, 0, "offset is not 0."); - cr_expect_eq(mail_box->base, mail_box->slab->base, - "mail_box base not equal to slab base."); - - /* - * Write our test strings and make sure they're equal. - */ - memcpy(mail_box->base, test_string, sizeof(test_string)); - cr_expect_str_eq((char *) mail_box->base, test_string); - - /* - * Mailboxes haven't been returned so destroy will return -FI_EBUSY. - */ - ret = _gnix_mbox_allocator_destroy(allocator); - cr_assert_eq(ret, -FI_EBUSY, - "_gnix_mbox_allocator_destroy should have returned -FI_EBUSY."); - - /* - * Free allocated mailboxes so we can destroy cleanly. - */ - ret = _gnix_mbox_free(mail_box); - cr_expect_eq(ret, FI_SUCCESS, "_gnix_mbox_free failed."); - - ret = _gnix_mbox_allocator_destroy(allocator); - cr_assert_eq(ret, FI_SUCCESS, "_gnix_mbox_allocator_destroy failed."); - -#ifndef __aarch64__ - cr_expect_eq(verify_hugepages(), 1 + ALLOCD_WITH_NIC, - "memory not released in /proc/self/maps."); -#endif - -} - -Test(mbox_creation_basic, alloc_mbox) -{ - __alloc_mbox(); -} - -Test(mbox_creation_scalable, alloc_mbox) -{ - __alloc_mbox(); -} - -/* - * Page size needs to be one of the predefined enums. 2200 is not a valid page - * size. This actually gets expanded to 2200 * 1024 * 1024. - */ -static inline void __page_size_fail(void) -{ - int ret; - - ret = _gnix_mbox_allocator_create(ep_priv->nic, NULL, 2200, - 1000, 12000, &allocator); - cr_assert_eq(ret, -FI_EINVAL, - "Creating allocator with bogus page size succeeded."); - cr_assert_eq(allocator, NULL); - /* - *value is 3 because the provider has internally already opened - * an mbox allocator and two other slabs at this point. - */ -#ifndef __aarch64__ - cr_expect_eq(verify_hugepages(), 1 + ALLOCD_WITH_NIC, - "Huge page open, but shouldn't be"); -#endif - - ret = _gnix_mbox_allocator_destroy(allocator); - cr_assert_eq(ret, -FI_EINVAL, - "_gnix_mbox_allocator_destroy succeeded on NULL handle."); -} - -Test(mbox_creation_basic, page_size_fail) -{ - __page_size_fail(); -} - -Test(mbox_creation_scalable, page_size_fail) -{ - __page_size_fail(); -} - -static inline void __mbox_size_fail(void) -{ - int ret; - - /* - * mbox_size can't be zero. - */ - ret = _gnix_mbox_allocator_create(ep_priv->nic, NULL, GNIX_PAGE_4MB, - 0, 12000, &allocator); - cr_assert_eq(ret, -FI_EINVAL, - "Creating allocator with zero mbox size succeeded."); - - cr_assert_eq(allocator, NULL); -#ifndef __aarch64__ - cr_expect_eq(verify_hugepages(), 1 + ALLOCD_WITH_NIC, - "Huge page open, but shouldn't be"); -#endif - - ret = _gnix_mbox_allocator_destroy(allocator); - cr_assert_eq(ret, -FI_EINVAL, - "_gnix_mbox_allocator_destroy succeeded on NULL handle."); -} - -Test(mbox_creation_basic, mbox_size_fail) -{ - __mbox_size_fail(); -} - -Test(mbox_creation_scalable, mbox_size_fail) -{ - __mbox_size_fail(); -} - -static inline void __mpmmap_size_fail(void) -{ - int ret; - - /* - * Can't have zero mailboxes per mmap. - */ - ret = _gnix_mbox_allocator_create(ep_priv->nic, NULL, GNIX_PAGE_4MB, - 1000, 0, &allocator); - cr_assert_eq(ret, -FI_EINVAL, - "Creating allocator with zero mailboxes per mmap succeeded."); - cr_assert_eq(allocator, NULL); -#ifndef __aarch64__ - cr_expect_eq(verify_hugepages(), 1 + ALLOCD_WITH_NIC, - "Huge page open, but shouldn't be"); -#endif - - ret = _gnix_mbox_allocator_destroy(allocator); - cr_assert_eq(ret, -FI_EINVAL, - "_gnix_mbox_allocator_destroy succeeded on NULL handle."); -} - -Test(mbox_creation_basic, mpmmap_size_fail) -{ - __mpmmap_size_fail(); -} - -Test(mbox_creation_scalable, mpmmap_size_fail) -{ - __mpmmap_size_fail(); -} - -static inline void __null_allocator_fail(void) -{ - int ret; - - /* - * Can't have a NULL allocator. - */ - ret = _gnix_mbox_allocator_create(ep_priv->nic, NULL, GNIX_PAGE_4MB, - 1000, 100, NULL); - cr_assert_eq(ret, -FI_EINVAL, - "Creating allocator with null allocator succeeded."); -#ifndef __aarch64__ - cr_expect_eq(verify_hugepages(), 1 + ALLOCD_WITH_NIC, - "Huge page open, but shouldn't be"); -#endif - - ret = _gnix_mbox_allocator_destroy(allocator); - cr_assert_eq(ret, -FI_EINVAL, - "_gnix_mbox_allocator_destroy succeeded on NULL handle."); -} - -Test(mbox_creation_basic, null_allocator_fail) -{ - __null_allocator_fail(); -} - -Test(mbox_creation_scalable, null_allocator_fail) -{ - __null_allocator_fail(); -} - -static inline void __multi_allocation(void) -{ - int ret; - - size_t array_size = 5; - size_t mbox_size = 1000; - - ptrdiff_t expected; - ptrdiff_t actual; - - struct gnix_mbox *mbox_arr[array_size]; - - ret = _gnix_mbox_allocator_create(ep_priv->nic, NULL, GNIX_PAGE_4MB, - mbox_size, array_size, &allocator); - cr_assert_eq(ret, FI_SUCCESS, "_gnix_mbox_allocator_create failed2."); -#ifndef __aarch64__ - cr_expect_eq(verify_hugepages(), 2 + ALLOCD_WITH_NIC, - "memory not found in /proc/self/maps."); -#endif - - /* - * Create an array of mailboxes of size array_size. - */ - for (int i = 0; i < array_size; i++) { - ret = _gnix_mbox_alloc(allocator, &(mbox_arr[i])); - cr_expect_eq(ret, FI_SUCCESS, "_gnix_mbox_alloc failed."); - cr_expect(mbox_arr[i]); - } - - /* - * Compare each mailbox to each other mailbox excluding the diagonal. - * The expected base should be a function of the mbox_size and the - * difference between their positions in the array. We can verify this - * against the offset inside the mailbox object. - */ - for (int i = 0; i < array_size; i++) { - for (int j = 0; j < array_size; j++) { - if (i == j) - continue; - - actual = abs_value(mbox_arr[i]->offset - - mbox_arr[j]->offset); - expected = abs_value(i - j) * mbox_size; - - cr_expect_eq(actual, expected, - "Expected offsets and actual base offsets are not equal."); - } - } - - for (int i = 0; i < array_size; i++) { - ret = _gnix_mbox_free(mbox_arr[i]); - cr_expect_eq(ret, FI_SUCCESS, "_gnix_mbox_free failed."); - } - - ret = _gnix_mbox_allocator_destroy(allocator); - cr_assert_eq(ret, FI_SUCCESS, "_gnix_mbox_allocator_destroy failed."); - -#ifndef __aarch64__ - cr_expect_eq(verify_hugepages(), 1 + ALLOCD_WITH_NIC, - "memory not released in /proc/self/maps."); -#endif -} - -Test(mbox_creation_basic, multi_allocation) -{ - __multi_allocation(); -} - -Test(mbox_creation_scalable, multi_allocation) -{ - __multi_allocation(); -} - -static inline void __check_errors(void) -{ - int ret; - - struct gnix_mbox_alloc_handle *allocator; - struct gnix_slab *slab; - struct gnix_mbox *mail_box; - size_t position; - - ret = _gnix_mbox_allocator_create(ep_priv->nic, NULL, GNIX_PAGE_4MB, - 1000, 12000, &allocator); - cr_assert_eq(ret, FI_SUCCESS, "_gnix_mbox_allocator_create failed3"); -#ifndef __aarch64__ - cr_expect_eq(verify_hugepages(), 2 + ALLOCD_WITH_NIC, - "memory not found in /proc/self/maps."); -#endif - - ret = _gnix_mbox_alloc(allocator, &mail_box); - cr_expect_eq(ret, FI_SUCCESS, "_gnix_mbox_alloc failed."); - - cr_expect(mail_box); - - /* Force various error paths */ - slab = mail_box->slab; - mail_box->slab = NULL; - ret = _gnix_mbox_free(mail_box); - cr_expect_eq(ret, -FI_EINVAL, "_gnix_mbox_free did not fail."); - mail_box->slab = slab; - - allocator = mail_box->slab->allocator; - mail_box->slab->allocator = NULL; - ret = _gnix_mbox_free(mail_box); - cr_expect_eq(ret, -FI_EINVAL, "_gnix_mbox_free did not fail."); - mail_box->slab->allocator = allocator; - - position = mail_box->offset / mail_box->slab->allocator->mbox_size; - ret = _gnix_test_and_clear_bit(mail_box->slab->used, position); - cr_expect_eq(ret, 1, "bitmap clear failed."); - ret = _gnix_mbox_free(mail_box); - cr_expect_eq(ret, -FI_EINVAL, "_gnix_mbox_free did not fail."); - ret = _gnix_test_and_set_bit(mail_box->slab->used, position); - cr_expect_eq(ret, 0, "bitmap set failed."); - - /* - * Free allocated mailboxes so we can destroy cleanly. - */ - ret = _gnix_mbox_free(mail_box); - cr_expect_eq(ret, FI_SUCCESS, "_gnix_mbox_free failed."); - - ret = _gnix_mbox_allocator_destroy(allocator); - cr_assert_eq(ret, FI_SUCCESS, "_gnix_mbox_allocator_destroy failed."); - -#ifndef __aarch64__ - cr_expect_eq(verify_hugepages(), 1 + ALLOCD_WITH_NIC, - "memory not released in /proc/self/maps."); -#endif -} - -Test(mbox_creation_basic, check_errors) -{ - __check_errors(); -} - -Test(mbox_creation_scalable, check_errors) -{ - __check_errors(); -} - -/* - * Force the creation of two slabs by setting mpmmap to 1 and making a mailbox - * the size of the entire page. - */ -static inline void __two_slabs(void) -{ - int ret; - - /* - * Only have one mail box per slab. - */ - size_t mbox_size = GNIX_PAGE_4MB * 1024 * 1024; - size_t mpmmap = 1; - - struct gnix_mbox *mbox_arr[2]; - - ret = _gnix_mbox_allocator_create(ep_priv->nic, NULL, GNIX_PAGE_4MB, - mbox_size, mpmmap, &allocator); - cr_assert_eq(ret, FI_SUCCESS, "_gnix_mbox_allocator_create failed4."); -#ifndef __aarch64__ - cr_expect_eq(verify_hugepages(), 2 + ALLOCD_WITH_NIC, - "memory not found in /proc/self/maps."); -#endif - - /* - * Should use previously allocated slab - */ - ret = _gnix_mbox_alloc(allocator, &(mbox_arr[0])); - cr_expect_eq(ret, FI_SUCCESS, "_gnix_mbox_alloc failed."); - - /* - * Will need another slab. Allocation will occur. - */ - ret = _gnix_mbox_alloc(allocator, &(mbox_arr[1])); - cr_expect_eq(ret, FI_SUCCESS, "_gnix_mbox_alloc failed."); - - /* - * The bases should be different. The base is a per slab concept. - */ - cr_expect_neq(mbox_arr[0]->base, mbox_arr[1]->base, - "Bases are the same."); - - /* - * The linked list should contain two slabs. - */ - cr_expect_eq(2, count_slabs(allocator)); - - ret = _gnix_mbox_free(mbox_arr[0]); - cr_expect_eq(ret, FI_SUCCESS, "_gnix_mbox_free failed."); - - ret = _gnix_mbox_free(mbox_arr[1]); - cr_expect_eq(ret, FI_SUCCESS, "_gnix_mbox_free failed."); - - ret = _gnix_mbox_allocator_destroy(allocator); - cr_assert_eq(ret, FI_SUCCESS, "_gnix_mbox_allocator_destroy failed."); - -#ifndef __aarch64__ - cr_expect_eq(verify_hugepages(), 1 + ALLOCD_WITH_NIC, - "memory not released in /proc/self/maps."); -#endif -} - -Test(mbox_creation_basic, two_slabs) -{ - __two_slabs(); -} - -Test(mbox_creation_scalable, two_slabs) -{ - __two_slabs(); -} diff --git a/prov/gni/test/api.c b/prov/gni/test/api.c deleted file mode 100644 index 5808e069383..00000000000 --- a/prov/gni/test/api.c +++ /dev/null @@ -1,1173 +0,0 @@ -/* - * Copyright (c) 2015-2017 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * Copyright (c) 2020 Triad National Security, LLC. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include "gnix_rdma_headers.h" -#include "fi_ext_gni.h" -#include "gnix_util.h" -#include "common.h" - -#if 1 -#define dbg_printf(...) -#else -#define dbg_printf(...) \ - do { \ - printf(__VA_ARGS__); \ - fflush(stdout); \ - } while (0) -#endif - -#define NUMEPS 2 - -/* Note: Set to ~FI_NOTIFY_FLAGS_ONLY since this was written before api 1.5 */ -static uint64_t mode_bits = ~FI_NOTIFY_FLAGS_ONLY; -static struct fid_fabric *fab; -static struct fid_domain *dom[NUMEPS]; -static struct fi_gni_ops_domain *gni_domain_ops[NUMEPS]; -static struct fid_ep *ep[NUMEPS]; -static struct fid_av *av[NUMEPS]; -static void *ep_name[NUMEPS]; -static fi_addr_t gni_addr[NUMEPS]; -static struct fid_cq *msg_cq[NUMEPS]; -static struct fi_info *fi[NUMEPS]; -static struct fi_cq_attr cq_attr; -static struct fi_info *hints[NUMEPS]; - -#define BUF_SZ (1<<20) -static char *target, *target_base; -static char *source, *source_base; -static char *uc_target; -static char *uc_source; -static struct fid_mr *rem_mr[NUMEPS], *loc_mr[NUMEPS]; -static uint64_t mr_key[NUMEPS]; - -static struct fid_cntr *send_cntr[NUMEPS], *recv_cntr[NUMEPS]; -static struct fi_cntr_attr cntr_attr = { - .events = FI_CNTR_EVENTS_COMP, - .flags = 0 -}; -static uint64_t sends[NUMEPS] = {0}, recvs[NUMEPS] = {0}, - send_errs[NUMEPS] = {0}, recv_errs[NUMEPS] = {0}; - -static void rdm_api_setup_ep(uint32_t version, int mr_mode) -{ - int ret, i, j; - struct fi_av_attr attr; - size_t addrlen = 0; - - /* Get info about fabric services with the provided hints */ - for (i = 0; i < NUMEPS; i++) { - hints[i]->domain_attr->mr_mode = mr_mode; - - ret = fi_getinfo(version, NULL, 0, 0, hints[i], - &fi[i]); - cr_assert(!ret, "fi_getinfo"); - } - - memset(&attr, 0, sizeof(attr)); - attr.type = FI_AV_MAP; - attr.count = NUMEPS; - - cq_attr.format = FI_CQ_FORMAT_TAGGED; - cq_attr.size = 1024; - cq_attr.wait_obj = 0; - - /* 3x BUF_SZ for multi recv testing */ - target_base = malloc(GNIT_ALIGN_LEN(BUF_SZ * 3)); - assert(target_base); - target = GNIT_ALIGN_BUFFER(char *, target_base); - - source_base = malloc(GNIT_ALIGN_LEN(BUF_SZ)); - assert(source_base); - source = GNIT_ALIGN_BUFFER(char *, source_base); - - uc_target = malloc(BUF_SZ); - assert(uc_target); - - uc_source = malloc(BUF_SZ); - assert(uc_source); - - ret = fi_fabric(fi[0]->fabric_attr, &fab, NULL); - cr_assert(!ret, "fi_fabric"); - - for (i = 0; i < NUMEPS; i++) { - ret = fi_domain(fab, fi[i], dom + i, NULL); - cr_assert(!ret, "fi_domain"); - - ret = fi_open_ops(&dom[i]->fid, FI_GNI_DOMAIN_OPS_1, - 0, (void **) (gni_domain_ops + i), NULL); - - ret = fi_av_open(dom[i], &attr, av + i, NULL); - cr_assert(!ret, "fi_av_open"); - - ret = fi_endpoint(dom[i], fi[i], ep + i, NULL); - cr_assert(!ret, "fi_endpoint"); - - ret = fi_cq_open(dom[i], &cq_attr, msg_cq + i, 0); - cr_assert(!ret, "fi_cq_open"); - - ret = fi_ep_bind(ep[i], &msg_cq[i]->fid, FI_SEND | FI_RECV); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_getname(&ep[i]->fid, NULL, &addrlen); - cr_assert(addrlen > 0); - - ep_name[i] = malloc(addrlen); - cr_assert(ep_name[i] != NULL); - - ret = fi_getname(&ep[i]->fid, ep_name[i], &addrlen); - cr_assert(ret == FI_SUCCESS); - } - - for (i = 0; i < NUMEPS; i++) { - /* Insert all gni addresses into each av */ - for (j = 0; j < NUMEPS; j++) { - ret = fi_av_insert(av[i], ep_name[j], 1, &gni_addr[j], - 0, NULL); - cr_assert(ret == 1); - } - - ret = fi_ep_bind(ep[i], &av[i]->fid, 0); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_cntr_open(dom[i], &cntr_attr, send_cntr + i, 0); - cr_assert(!ret, "fi_cntr_open"); - - ret = fi_ep_bind(ep[i], &send_cntr[i]->fid, FI_SEND); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_cntr_open(dom[i], &cntr_attr, recv_cntr + i, 0); - cr_assert(!ret, "fi_cntr_open"); - - ret = fi_ep_bind(ep[i], &recv_cntr[i]->fid, FI_RECV); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_enable(ep[i]); - cr_assert(!ret, "fi_ep_enable"); - - ret = fi_enable(ep[i]); - cr_assert_eq(ret, -FI_EOPBADSTATE); - - - } - - for (i = 0; i < NUMEPS; i++) { - int target_requested_key = - USING_SCALABLE(fi[i]) ? (i * 2) : 0; - int source_requested_key = - USING_SCALABLE(fi[i]) ? (i * 2) + 1 : 0; - - ret = fi_mr_reg(dom[i], - target, - 3 * BUF_SZ, - FI_REMOTE_WRITE, - 0, - target_requested_key, - 0, - rem_mr + i, - &target); - cr_assert_eq(ret, 0); - - ret = fi_mr_reg(dom[i], - source, - BUF_SZ, - FI_REMOTE_WRITE, - 0, - source_requested_key, - 0, - loc_mr + i, - &source); - cr_assert_eq(ret, 0); - - if (USING_SCALABLE(fi[i])) { - MR_ENABLE(rem_mr[i], target, 3 * BUF_SZ); - MR_ENABLE(loc_mr[i], source, BUF_SZ); - } - - mr_key[i] = fi_mr_key(rem_mr[i]); - } -} - -void __rdm_api_setup(uint32_t version, int mr_mode) -{ - int i; - - SKIP_IF_SCALABLE_LT_1_5(version, mr_mode); - - for (i = 0; i < NUMEPS; i++) { - hints[i] = fi_allocinfo(); - cr_assert(hints[i], "fi_allocinfo"); - - hints[i]->domain_attr->cq_data_size = NUMEPS * 2; - hints[i]->domain_attr->data_progress = FI_PROGRESS_AUTO; - hints[i]->mode = mode_bits; - hints[i]->fabric_attr->prov_name = strdup("gni"); - } -} - -static void rdm_api_setup_basic(void) -{ - __rdm_api_setup(fi_version(), GNIX_MR_BASIC); -} - -static void rdm_api_setup_scalable(void) -{ - __rdm_api_setup(fi_version(), GNIX_MR_SCALABLE); -} - -void api_setup(void) -{ -} - -void api_teardown(void) -{ -} - -static void rdm_api_teardown_common(bool unreg) -{ - int ret = 0, i = 0; - - for (; i < NUMEPS; i++) { - fi_close(&recv_cntr[i]->fid); - fi_close(&send_cntr[i]->fid); - - if (unreg) { - fi_close(&loc_mr[i]->fid); - fi_close(&rem_mr[i]->fid); - } - - ret = fi_close(&ep[i]->fid); - cr_assert(!ret, "failure in closing ep."); - - ret = fi_close(&msg_cq[i]->fid); - cr_assert(!ret, "failure in send cq."); - - ret = fi_close(&av[i]->fid); - cr_assert(!ret, "failure in closing av."); - - ret = fi_close(&dom[i]->fid); - cr_assert(!ret, "failure in closing domain."); - - fi_freeinfo(fi[i]); - free(ep_name[i]); - fi_freeinfo(hints[i]); - } - - free(uc_source); - free(uc_target); - free(target_base); - free(source_base); - - ret = fi_close(&fab->fid); - cr_assert(!ret, "failure in closing fabric."); -} - -static void rdm_api_teardown(void) -{ - rdm_api_teardown_common(true); -} - -void rdm_api_init_data(char *buf, int len, char seed) -{ - int i; - - for (i = 0; i < len; i++) - buf[i] = seed++; -} - -int rdm_api_check_data(char *buf1, char *buf2, int len) -{ - int i; - - for (i = 0; i < len; i++) { - if (buf1[i] != buf2[i]) { - printf("data mismatch, elem: %d, exp: %hhx, act: %hhx\n" - , i, buf1[i], buf2[i]); - return 0; - } - } - - return 1; -} - -void rdm_api_check_cqe(struct fi_cq_tagged_entry *cqe, void *ctx, - uint64_t flags, void *addr, size_t len, - uint64_t data, struct fid_ep *fid_ep) -{ - struct gnix_fid_ep *gnix_ep = get_gnix_ep(fid_ep); - - cr_assert(cqe->op_context == ctx, "CQE Context mismatch"); - cr_assert(cqe->flags == flags, "CQE flags mismatch"); - - if (flags & FI_RECV) { - cr_assert(cqe->len == len, "CQE length mismatch"); - cr_assert(cqe->buf == addr, "CQE address mismatch"); - - /* TODO: Remove GNIX_ALLOW_FI_REMOTE_CQ_DATA and only check - * flags for FI_RMA_EVENT */ - if (GNIX_ALLOW_FI_REMOTE_CQ_DATA(flags, gnix_ep->caps)) - cr_assert(cqe->data == data, "CQE data mismatch"); - } else { - cr_assert(cqe->len == 0, "Invalid CQE length"); - cr_assert(cqe->buf == 0, "Invalid CQE address"); - cr_assert(cqe->data == 0, "Invalid CQE data"); - } - - cr_assert(cqe->tag == 0, "Invalid CQE tag"); -} - -void rdm_api_check_cntrs(uint64_t s[], uint64_t r[], uint64_t s_e[], - uint64_t r_e[]) -{ - int i = 0; - - for (; i < NUMEPS; i++) { - sends[i] += s[i]; - recvs[i] += r[i]; - send_errs[i] += s_e[i]; - recv_errs[i] += r_e[i]; - - cr_assert(fi_cntr_read(send_cntr[i]) == sends[i], - "Bad send count"); - cr_assert(fi_cntr_read(recv_cntr[i]) == recvs[i], - "Bad recv count"); - cr_assert(fi_cntr_readerr(send_cntr[i]) == send_errs[i], - "Bad send err count"); - cr_assert(fi_cntr_readerr(recv_cntr[i]) == recv_errs[i], - "Bad recv err count"); - } -} - -/******************************************************************************* - * Test MSG functions - ******************************************************************************/ - -#define MSG_SEND_ALLOWED(caps) \ - ((caps & FI_MSG) && ((caps & FI_SEND) || !(caps & FI_RECV))) -#define MSG_RECV_ALLOWED(caps) \ - ((caps & FI_MSG) && ((caps & FI_RECV) || !(caps & FI_SEND))) -#define TAG_SEND_ALLOWED(caps) \ - ((caps & FI_TAGGED) && ((caps & FI_SEND) || !(caps & FI_RECV))) -#define TAG_RECV_ALLOWED(caps) \ - ((caps & FI_TAGGED) && ((caps & FI_RECV) || !(caps & FI_SEND))) -#define WRITE_ALLOWED(caps, rcaps) \ - ((caps & FI_RMA) && \ - ((caps & FI_WRITE) || !(caps & FI_READ)) && \ - ((rcaps & FI_RMA) || (rcaps & FI_REMOTE_WRITE)) \ - ) -#define READ_ALLOWED(caps, rcaps) \ - ((caps & FI_RMA) && \ - ((caps & FI_READ) || !(caps & FI_WRITE)) && \ - (((rcaps & FI_RMA) && \ - !(rcaps & (FI_READ | FI_WRITE | FI_REMOTE_WRITE))) || \ - (rcaps & FI_REMOTE_READ) \ - ) \ - ) -static int write_allowed(uint64_t rma_amo, uint64_t caps, uint64_t rcaps) -{ - dbg_printf("write %s caps:%s, rcaps:%s\n", - fi_tostr(&rma_amo, FI_TYPE_CAPS), - fi_tostr(&caps, FI_TYPE_CAPS), - fi_tostr(&rcaps, FI_TYPE_CAPS)); - if ((caps & rma_amo) && - ((caps & FI_WRITE) || !(caps & FI_READ))) { - if ((rcaps & rma_amo) && - ((rcaps & FI_REMOTE_WRITE) || - (!(rcaps & (FI_READ | FI_WRITE | FI_REMOTE_READ))) - ) - ) { - return 1; - } - } - return 0; -} - -static int read_allowed(uint64_t rma_amo, uint64_t caps, uint64_t rcaps) -{ - dbg_printf("read %s caps:%s, rcaps:%s\n", - fi_tostr(&rma_amo, FI_TYPE_CAPS), - fi_tostr(&caps, FI_TYPE_CAPS), - fi_tostr(&rcaps, FI_TYPE_CAPS)); - if ((caps & rma_amo) && - ((caps & FI_READ) || !(caps & FI_WRITE))) { - if ((rcaps & rma_amo) && - ((rcaps & FI_REMOTE_READ) || - (!(rcaps & (FI_READ | FI_WRITE | FI_REMOTE_WRITE))) - ) - ) { - return 1; - } - } - return 0; -} - -TestSuite(rdm_api_basic, - .init = rdm_api_setup_basic, - .fini = rdm_api_teardown); - -TestSuite(rdm_api_scalable, - .init = rdm_api_setup_scalable, - .fini = rdm_api_teardown); - -/* - * ssize_t fi_send(struct fid_ep *ep, void *buf, size_t len, - * void *desc, fi_addr_t dest_addr, void *context); - * - * ssize_t fi_recv(struct fid_ep *ep, void * buf, size_t len, - * void *desc, fi_addr_t src_addr, void *context); - */ -void api_send_recv(int len) -{ - ssize_t sz; - uint64_t caps = fi[0]->caps; - - rdm_api_init_data(source, len, 0xab); - rdm_api_init_data(target, len, 0); - - sz = fi_send(ep[0], source, len, loc_mr[0], gni_addr[1], target); - if (MSG_SEND_ALLOWED(caps)) { - cr_assert(sz == 0, "fi_send failed caps:0x%lx err:%ld", - caps, sz); - } else { - cr_assert(sz < 0, "fi_send should fail caps:0x%lx err:%ld", - caps, sz); - } - - sz = fi_recv(ep[1], target, len, rem_mr[1], gni_addr[0], source); - if (MSG_RECV_ALLOWED(caps)) { - cr_assert(sz == 0, "fi_recv failed caps:0x%lx err:%ld", - caps, sz); - } else { - cr_assert(sz < 0, "fi_recv should fail caps:0x%lx err:%ld", - caps, sz); - } -} - -Test(api, dom_caps) -{ - int ret; - - hints[0] = fi_allocinfo(); - cr_assert(hints[0], "fi_allocinfo"); - - hints[0]->mode = mode_bits; - hints[0]->fabric_attr->prov_name = strdup("gni"); - hints[0]->domain_attr->mr_mode = GNIX_DEFAULT_MR_MODE; - - /* we only support REMOTE_COMM */ - hints[0]->domain_attr->caps = FI_LOCAL_COMM; - ret = fi_getinfo(fi_version(), NULL, 0, 0, hints[0], &fi[0]); - cr_assert_eq(ret, -FI_ENODATA, "fi_getinfo"); - - hints[0]->domain_attr->caps = FI_REMOTE_COMM; - ret = fi_getinfo(fi_version(), NULL, 0, 0, hints[0], &fi[0]); - cr_assert_eq(ret, 0, "fi_getinfo"); - - fi_freeinfo(fi[0]); - - hints[0]->domain_attr->mr_mode = FI_MR_UNSPEC; - ret = fi_getinfo(FI_VERSION(1, 0), NULL, 0, 0, hints[0], &fi[0]); - cr_assert_eq(ret, 0, "fi_getinfo"); - - fi_freeinfo(hints[0]); - fi_freeinfo(fi[0]); -} - -static inline void __msg_no_caps(uint32_t version, int mr_mode) -{ - hints[0]->caps = 0; - hints[1]->caps = 0; - rdm_api_setup_ep(version, mr_mode); - api_send_recv(BUF_SZ); -} - -Test(rdm_api_basic, msg_no_caps) -{ - __msg_no_caps(fi_version(), GNIX_MR_BASIC); -} - -Test(rdm_api_scalable, msg_no_caps) -{ - __msg_no_caps(fi_version(), GNIX_MR_SCALABLE); -} - -static inline void __msg_send_rcv(uint32_t version, int mr_mode) -{ - hints[0]->caps = FI_MSG; - hints[1]->caps = FI_MSG; - rdm_api_setup_ep(version, mr_mode); - api_send_recv(BUF_SZ); -} - -Test(rdm_api_basic, msg_send_rcv) -{ - __msg_send_rcv(fi_version(), GNIX_MR_BASIC); -} - -Test(rdm_api_scalable, msg_send_rcv) -{ - __msg_send_rcv(fi_version(), GNIX_MR_SCALABLE); -} - -static inline void __msg_send_only(uint32_t version, int mr_mode) -{ - hints[0]->caps = FI_MSG | FI_SEND; - hints[1]->caps = FI_MSG | FI_SEND; - rdm_api_setup_ep(version, mr_mode); - api_send_recv(BUF_SZ); -} - -Test(rdm_api_basic, msg_send_only) -{ - __msg_send_only(fi_version(), GNIX_MR_BASIC); -} - -Test(rdm_api_scalable, msg_send_only) -{ - __msg_send_only(fi_version(), GNIX_MR_SCALABLE); -} - -static inline void __msg_recv_only(uint32_t version, int mr_mode) -{ - hints[0]->caps = FI_MSG | FI_RECV; - hints[1]->caps = FI_MSG | FI_RECV; - rdm_api_setup_ep(version, mr_mode); - api_send_recv(BUF_SZ); -} - -Test(rdm_api_basic, msg_recv_only) -{ - __msg_recv_only(fi_version(), GNIX_MR_BASIC); -} - -Test(rdm_api_scalable, msg_recv_only) -{ - __msg_recv_only(fi_version(), GNIX_MR_SCALABLE); -} - -static inline void __msg_send_rcv_w_tagged(uint32_t version, int mr_mode) -{ - hints[0]->caps = FI_TAGGED; - hints[1]->caps = FI_TAGGED; - rdm_api_setup_ep(version, mr_mode); - api_send_recv(BUF_SZ); -} - -Test(rdm_api_basic, msg_send_rcv_w_tagged) -{ - __msg_send_rcv_w_tagged(fi_version(), GNIX_MR_BASIC); -} - -Test(rdm_api_scalable, msg_send_rcv_w_tagged) -{ - __msg_send_rcv_w_tagged(fi_version(), GNIX_MR_SCALABLE); -} - -void api_tagged_send_recv(int len) -{ - ssize_t sz; - uint64_t caps = fi[0]->caps; - - rdm_api_init_data(source, len, 0xab); - rdm_api_init_data(target, len, 0); - - sz = fi_tsend(ep[0], source, len, loc_mr, gni_addr[1], len, target); - if (TAG_SEND_ALLOWED(caps)) { - cr_assert(sz == 0, "fi_tsend failed caps:0x%lx err:%ld", - caps, sz); - } else { - cr_assert(sz < 0, "fi_tsend should fail caps:0x%lx err:%ld", - caps, sz); - } - - sz = fi_trecv(ep[1], target, len, rem_mr, gni_addr[0], len, 0, source); - if (TAG_RECV_ALLOWED(caps)) { - cr_assert(sz == 0, "fi_trecv failed caps:0x%lx err:%ld", - caps, sz); - } else { - cr_assert(sz < 0, "fi_trecv should fail caps:0x%lx err:%ld", - caps, sz); - } -} - -static inline void __tsend(uint32_t version, int mr_mode) -{ - hints[0]->caps = FI_TAGGED; - hints[1]->caps = FI_TAGGED; - rdm_api_setup_ep(version, mr_mode); - api_tagged_send_recv(BUF_SZ); -} - -Test(rdm_api_basic, tsend) -{ - __tsend(fi_version(), GNIX_MR_BASIC); -} - -Test(rdm_api_scalable, tsend) -{ - __tsend(fi_version(), GNIX_MR_SCALABLE); -} - -static inline void __tsend_only(uint32_t version, int mr_mode) -{ - hints[0]->caps = FI_TAGGED | FI_SEND; - hints[1]->caps = FI_TAGGED | FI_SEND; - rdm_api_setup_ep(version, mr_mode); - api_tagged_send_recv(BUF_SZ); -} - -Test(rdm_api_basic, tsend_only) -{ - __tsend_only(fi_version(), GNIX_MR_BASIC); -} - -Test(rdm_api_scalable, tsend_only) -{ - __tsend_only(fi_version(), GNIX_MR_SCALABLE); -} - -static inline void __trecv_only(uint32_t version, int mr_mode) -{ - hints[0]->caps = FI_TAGGED | FI_RECV; - hints[1]->caps = FI_TAGGED | FI_RECV; - rdm_api_setup_ep(version, mr_mode); - api_tagged_send_recv(BUF_SZ); -} - -Test(rdm_api_basic, trecv_only) -{ - __trecv_only(fi_version(), GNIX_MR_BASIC); -} - -Test(rdm_api_scalable, trecv_only) -{ - __trecv_only(fi_version(), GNIX_MR_SCALABLE); -} - -static inline void __tsend_rcv_w_msg(uint32_t version, int mr_mode) -{ - hints[0]->caps = FI_MSG; - hints[1]->caps = FI_MSG; - rdm_api_setup_ep(version, mr_mode); - api_tagged_send_recv(BUF_SZ); -} - -Test(rdm_api_basic, tsend_rcv_w_msg) -{ - __tsend_rcv_w_msg(fi_version(), GNIX_MR_BASIC); -} - -Test(rdm_api_scalable, tsend_rcv_w_msg) -{ - __tsend_rcv_w_msg(fi_version(), GNIX_MR_SCALABLE); -} - -#define READ_CTX 0x4e3dda1aULL -void api_write_read(int len) -{ - int ret; - struct fi_cq_tagged_entry cqe; - struct fi_cq_err_entry err_cqe = {0}; - - rdm_api_init_data(source, len, 0xab); - rdm_api_init_data(target, len, 0); - - fi_write(ep[0], source, len, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi[0], target, target), mr_key[1], - target); - - while ((ret = fi_cq_read(msg_cq[0], &cqe, 1)) == -FI_EAGAIN) - pthread_yield(); - - if (ret == -FI_EAVAIL) { - fi_cq_readerr(msg_cq[0], &err_cqe, 0); - dbg_printf("fi_cq_readerr err:%d\n", err_cqe.err); - } - - if (write_allowed(FI_RMA, fi[0]->caps, fi[1]->caps)) { - cr_assert(ret == 1, - "fi_write failed caps:0x%lx ret:%d", - fi[0]->caps, ret); - } else { - cr_assert(err_cqe.err == FI_EOPNOTSUPP, - "fi_write should fail caps:0x%lx err:%d", - fi[0]->caps, err_cqe.err); - } - - fi_read(ep[0], source, len, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi[0], target, target), mr_key[1], - (void *)READ_CTX); - - while ((ret = fi_cq_read(msg_cq[0], &cqe, 1)) == -FI_EAGAIN) - pthread_yield(); - - if (ret == -FI_EAVAIL) { - fi_cq_readerr(msg_cq[0], &err_cqe, 0); - dbg_printf("fi_cq_readerr err:%d\n", err_cqe.err); - } - - if (read_allowed(FI_RMA, fi[0]->caps, fi[1]->caps)) { - cr_assert(ret == 1, - "fi_read failed caps:0x%lx rcaps:0x%lx", - fi[0]->caps, fi[1]->caps); - } else { - cr_assert(err_cqe.err == FI_EOPNOTSUPP, - "fi_read should fail caps:0x%lx rcaps:0x%lx", - fi[0]->caps, fi[1]->caps); - } -} - -static inline void __rma_only(uint32_t version, int mr_mode) -{ - hints[0]->caps = FI_RMA; - hints[1]->caps = FI_RMA; - rdm_api_setup_ep(version, mr_mode); - api_write_read(BUF_SZ); -} - -Test(rdm_api_basic, rma_only) -{ - __rma_only(fi_version(), GNIX_MR_BASIC); -} - -Test(rdm_api_scalable, rma_only) -{ - __rma_only(fi_version(), GNIX_MR_SCALABLE); -} - -static inline void __rma_write_only(uint32_t version, int mr_mode) -{ - hints[0]->caps = FI_RMA | FI_WRITE; - hints[1]->caps = FI_RMA | FI_REMOTE_WRITE; - rdm_api_setup_ep(version, mr_mode); - api_write_read(BUF_SZ); -} - -Test(rdm_api_basic, rma_write_only, .disabled = true) -{ - __rma_write_only(fi_version(), GNIX_MR_BASIC); -} - -Test(rdm_api_scalable, rma_write_only, .disabled = true) -{ - __rma_write_only(fi_version(), GNIX_MR_SCALABLE); -} - -static inline void __rma_write_no_remote(uint32_t version, int mr_mode) -{ - hints[0]->caps = FI_RMA | FI_WRITE; - hints[1]->caps = FI_RMA | FI_WRITE; - rdm_api_setup_ep(version, mr_mode); - api_write_read(BUF_SZ); -} - -Test(rdm_api_basic, rma_write_no_remote) -{ - __rma_write_no_remote(fi_version(), GNIX_MR_BASIC); -} - -Test(rdm_api_scalable, rma_write_no_remote) -{ - __rma_write_no_remote(fi_version(), GNIX_MR_SCALABLE); -} - -static inline void __rma_read_only(uint32_t version, int mr_mode) -{ - hints[0]->caps = FI_RMA | FI_READ; - hints[1]->caps = FI_RMA | FI_REMOTE_READ; - rdm_api_setup_ep(version, mr_mode); - api_write_read(BUF_SZ); -} - -Test(rdm_api_basic, rma_read_only, .disabled = true) -{ - __rma_read_only(fi_version(), GNIX_MR_BASIC); -} - -Test(rdm_api_scalable, rma_read_only) -{ - __rma_read_only(fi_version(), GNIX_MR_SCALABLE); -} - -static inline void __rma_read_no_remote(uint32_t version, int mr_mode) -{ - hints[0]->caps = FI_RMA | FI_READ; - hints[1]->caps = FI_RMA | FI_READ; - rdm_api_setup_ep(version, mr_mode); - api_write_read(BUF_SZ); -} - -Test(rdm_api_basic, rma_read_no_remote) -{ - __rma_read_no_remote(fi_version(), GNIX_MR_BASIC); -} - -Test(rdm_api_scalable, rma_read_no_remote) -{ - __rma_read_no_remote(fi_version(), GNIX_MR_SCALABLE); -} - -static inline void __rma_write_read_w_msg(uint32_t version, int mr_mode) -{ - hints[0]->caps = FI_MSG; - hints[1]->caps = FI_MSG; - rdm_api_setup_ep(version, mr_mode); - api_write_read(BUF_SZ); -} - -Test(rdm_api_basic, rma_write_read_w_msg) -{ - __rma_write_read_w_msg(fi_version(), GNIX_MR_BASIC); -} - -Test(rdm_api_scalable, rma_write_read_w_msg) -{ - __rma_write_read_w_msg(fi_version(), GNIX_MR_SCALABLE); -} - -void api_do_read_buf(void) -{ - int ret; - int len = 8*1024; - ssize_t sz; - struct fi_cq_tagged_entry cqe; - struct fi_cq_err_entry err_cqe; - - rdm_api_init_data(source, BUF_SZ, 0); - rdm_api_init_data(target, BUF_SZ, 0xad); - - /* cause a chained transaction */ - sz = fi_read(ep[0], source+6, len, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi[0], target, target+6), mr_key[1], - (void *)READ_CTX); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(msg_cq[0], &cqe, 1)) == -FI_EAGAIN) - pthread_yield(); - - if (ret == -FI_EAVAIL) { - fi_cq_readerr(msg_cq[0], &err_cqe, 0); - dbg_printf("fi_cq_readerr err:%d\n", err_cqe.err); - } - - if (read_allowed(FI_RMA, fi[0]->caps, fi[1]->caps)) { - cr_assert(ret == 1, - "fi_read failed caps:0x%lx rcaps:0x%lx", - fi[0]->caps, fi[1]->caps); - } else { - cr_assert(err_cqe.err == FI_EOPNOTSUPP, - "fi_read should fail caps:0x%lx rcaps:0x%lx", - fi[0]->caps, fi[1]->caps); - } -} - -static inline void __read_chained(uint32_t version, int mr_mode) -{ - hints[0]->caps = FI_RMA; - hints[1]->caps = FI_RMA; - rdm_api_setup_ep(version, mr_mode); - api_do_read_buf(); -} - -Test(rdm_api_basic, read_chained) -{ - __read_chained(fi_version(), GNIX_MR_BASIC); -} - -Test(rdm_api_scalable, read_chained) -{ - __read_chained(fi_version(), GNIX_MR_SCALABLE); -} - -static inline void __read_chained_remote(uint32_t version, int mr_mode) -{ - hints[0]->caps = FI_RMA | FI_READ; - hints[1]->caps = FI_RMA | FI_REMOTE_READ; - rdm_api_setup_ep(version, mr_mode); - api_do_read_buf(); -} - -Test(rdm_api_basic, read_chained_remote, .disabled = true) -{ - __read_chained_remote(fi_version(), GNIX_MR_BASIC); -} - -Test(rdm_api_scalable, read_chained_remote, .disabled = true) -{ - __read_chained_remote(fi_version(), GNIX_MR_SCALABLE); -} - -static inline void __read_chained_w_write(uint32_t version, int mr_mode) -{ - hints[0]->caps = FI_RMA | FI_WRITE; - hints[1]->caps = FI_RMA | FI_REMOTE_READ; - rdm_api_setup_ep(version, mr_mode); - api_do_read_buf(); -} - -Test(rdm_api_basic, read_chained_w_write, .disabled = true) -{ - __read_chained_w_write(fi_version(), GNIX_MR_BASIC); -} - -Test(rdm_api_scalable, read_chained_w_write) -{ - __read_chained_w_write(fi_version(), GNIX_MR_SCALABLE); -} - -static inline void __read_chained_no_remote(uint32_t version, int mr_mode) -{ - hints[0]->caps = FI_RMA | FI_READ; - hints[1]->caps = FI_RMA | FI_READ; - rdm_api_setup_ep(version, mr_mode); - api_do_read_buf(); -} - -Test(rdm_api_basic, read_chained_no_remote) -{ - __read_chained_no_remote(fi_version(), GNIX_MR_BASIC); -} - -Test(rdm_api_scalable, read_chained_no_remote) -{ - __read_chained_no_remote(fi_version(), GNIX_MR_SCALABLE); -} - -#define SOURCE_DATA 0xBBBB0000CCCCULL -#define TARGET_DATA 0xAAAA0000DDDDULL -#define FETCH_SOURCE_DATA 0xACEDACEDULL - -void do_atomic_write_fetch(void) -{ - int ret; - ssize_t sz; - uint64_t operand; - struct fi_cq_tagged_entry cqe; - struct fi_cq_err_entry err_cqe; - - /* u64 */ - *((uint64_t *)source) = SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - - sz = fi_atomic(ep[0], source, 1, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi[0], target, target), mr_key[1], - FI_UINT64, FI_ATOMIC_WRITE, target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(msg_cq[0], &cqe, 1)) == -FI_EAGAIN) - pthread_yield(); - - if (ret == -FI_EAVAIL) { - fi_cq_readerr(msg_cq[0], &err_cqe, 0); - dbg_printf("fi_cq_readerr err:%d\n", err_cqe.err); - } - - if (write_allowed(FI_ATOMIC, fi[0]->caps, fi[1]->caps)) { - cr_assert(ret == 1, - "fi_atomic failed caps:0x%lx rcaps:0x%lx", - fi[0]->caps, fi[1]->caps); - } else { - cr_assert(err_cqe.err == FI_EOPNOTSUPP, - "fi_atomic should fail caps:0x%lx rcaps:0x%lx", - fi[0]->caps, fi[1]->caps); - } - - /* u64 */ - operand = SOURCE_DATA; - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_fetch_atomic(ep[0], &operand, 1, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi[0], target, target), - mr_key[1], FI_UINT64, FI_ATOMIC_READ, target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(msg_cq[0], &cqe, 1)) == -FI_EAGAIN) - pthread_yield(); - - if (ret == -FI_EAVAIL) { - fi_cq_readerr(msg_cq[0], &err_cqe, 0); - dbg_printf("fi_cq_readerr err:%d\n", err_cqe.err); - } - - if (read_allowed(FI_ATOMIC, fi[0]->caps, fi[1]->caps)) { - cr_assert(ret == 1, - "fi_fetch_atomic failed caps:0x%lx rcaps:0x%lx", - fi[0]->caps, fi[1]->caps); - } else { - cr_assert(err_cqe.err == FI_EOPNOTSUPP, - "fi_fetch_atomic should fail caps:0x%lx rcaps:0x%lx", - fi[0]->caps, fi[1]->caps); - } -} - -static inline void __amo_write_read(uint32_t version, int mr_mode) -{ - hints[0]->caps = FI_ATOMIC; - hints[1]->caps = FI_ATOMIC; - rdm_api_setup_ep(version, mr_mode); - do_atomic_write_fetch(); -} - -Test(rdm_api_basic, amo_write_read) -{ - __amo_write_read(fi_version(), GNIX_MR_BASIC); -} - -Test(rdm_api_scalable, amo_write_read) -{ - __amo_write_read(fi_version(), GNIX_MR_SCALABLE); -} - -static inline void __amo_write_only(uint32_t version, int mr_mode) -{ - hints[0]->caps = FI_ATOMIC | FI_WRITE; - hints[1]->caps = FI_ATOMIC | FI_REMOTE_WRITE; - rdm_api_setup_ep(version, mr_mode); - do_atomic_write_fetch(); -} - -Test(rdm_api_basic, amo_write_only, .disabled = true) -{ - __amo_write_only(fi_version(), GNIX_MR_BASIC); -} - -Test(rdm_api_scalable, amo_write_only) -{ - __amo_write_only(fi_version(), GNIX_MR_SCALABLE); -} - -static inline void __amo_write_no_remote(uint32_t version, int mr_mode) -{ - hints[0]->caps = FI_ATOMIC | FI_WRITE; - hints[1]->caps = FI_ATOMIC | FI_WRITE; - rdm_api_setup_ep(version, mr_mode); - do_atomic_write_fetch(); -} - -Test(rdm_api_basic, amo_write_no_remote) -{ - __amo_write_no_remote(fi_version(), GNIX_MR_BASIC); -} - -Test(rdm_api_scalable, amo_write_no_remote) -{ - __amo_write_no_remote(fi_version(), GNIX_MR_SCALABLE); -} - -static inline void __amo_read_only(uint32_t version, int mr_mode) -{ - hints[0]->caps = FI_ATOMIC | FI_READ; - hints[1]->caps = FI_ATOMIC | FI_REMOTE_READ; - rdm_api_setup_ep(version, mr_mode); - do_atomic_write_fetch(); -} - -Test(rdm_api_basic, amo_read_only, .disabled = true) -{ - __amo_read_only(fi_version(), GNIX_MR_BASIC); -} - -Test(rdm_api_scalable, amo_read_only) -{ - __amo_read_only(fi_version(), GNIX_MR_SCALABLE); -} - -static inline void __amo_read_no_remote(uint32_t version, int mr_mode) -{ - hints[0]->caps = FI_ATOMIC | FI_READ; - hints[1]->caps = FI_ATOMIC | FI_READ; - rdm_api_setup_ep(version, mr_mode); - do_atomic_write_fetch(); -} - -Test(rdm_api_basic, amo_read_no_remote) -{ - __amo_read_no_remote(fi_version(), GNIX_MR_BASIC); -} - -Test(rdm_api_scalable, amo_read_no_remote) -{ - __amo_read_no_remote(fi_version(), GNIX_MR_SCALABLE); -} - -static inline void __amo_write_read_w_msg(uint32_t version, int mr_mode) -{ - hints[0]->caps = FI_MSG; - hints[1]->caps = FI_MSG; - rdm_api_setup_ep(version, mr_mode); - api_write_read(BUF_SZ); -} - -Test(rdm_api_basic, amo_write_read_w_msg) -{ - __amo_write_read_w_msg(fi_version(), GNIX_MR_BASIC); -} - -Test(rdm_api_scalable, amo_write_read_w_msg) -{ - __amo_write_read_w_msg(fi_version(), GNIX_MR_SCALABLE); -} - -TestSuite(api, .init = api_setup, .fini = api_teardown, .disabled = false); - -Test(api, getinfo_w_null_hints) -{ - int ret; - - ret = fi_getinfo(fi_version(), NULL, 0, 0, NULL, &fi[0]); - cr_assert(ret == FI_SUCCESS, "fi_getinfo returned: %s", - fi_strerror(-ret)); -} diff --git a/prov/gni/test/api_cntr.c b/prov/gni/test/api_cntr.c deleted file mode 100644 index cf3a878cc49..00000000000 --- a/prov/gni/test/api_cntr.c +++ /dev/null @@ -1,686 +0,0 @@ -/* - * Copyright (c) 2016-2017 Cray Inc. All rights reserved. - * Copyright (c) 2020 Triad National Security, LLC. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include "gnix_rdma_headers.h" -#include "fi_ext_gni.h" -#include "common.h" - -#if 1 -#define dbg_printf(...) -#else -#define dbg_printf(...) \ - do { \ - printf(__VA_ARGS__); \ - fflush(stdout); \ - } while (0) -#endif - -#define NUMEPS 2 - -/* Note: Set to ~FI_NOTIFY_FLAGS_ONLY since this was written before api 1.5 */ -static uint64_t mode_bits = ~FI_NOTIFY_FLAGS_ONLY; -static struct fid_fabric *fab; -static struct fid_domain *dom[NUMEPS]; -static struct fi_gni_ops_domain *gni_domain_ops[NUMEPS]; -static struct fid_ep *ep[NUMEPS]; -static struct fid_av *av[NUMEPS]; -static void *ep_name[NUMEPS]; -static fi_addr_t gni_addr[NUMEPS]; -static struct fi_info *fi[NUMEPS]; -static struct fi_info *hints[NUMEPS]; - -#define BUF_SZ (1<<20) -static char *target, *target_base; -static char *source, *source_base; -static char *uc_target; -static char *uc_source; -static struct fid_mr *rem_mr[NUMEPS], *loc_mr[NUMEPS]; -static uint64_t mr_key[NUMEPS]; -static uint64_t cntr_bind_flags; - -static struct fid_cntr *send_cntr[NUMEPS], *recv_cntr[NUMEPS]; -static struct fid_cntr *write_cntr[NUMEPS], *read_cntr[NUMEPS]; -static struct fi_cntr_attr cntr_attr = { - .events = FI_CNTR_EVENTS_COMP, - .flags = 0 -}; - -#define RMA_WRITE_ALLOWED(flags) \ - (flags & FI_WRITE) -#define RMA_READ_ALLOWED(flags) \ - (flags & FI_READ) -#define MSG_SEND_ALLOWED(flags) \ - (flags & FI_SEND) -#define MSG_RECV_ALLOWED(flags) \ - (flags & FI_RECV) - -void api_cntr_bind(uint64_t flags) -{ - int ret, i; - - for (i = 0; i < NUMEPS; i++) { - if (RMA_WRITE_ALLOWED(flags)) { - ret = fi_ep_bind(ep[i], &write_cntr[i]->fid, FI_WRITE); - cr_assert(!ret, "fi_ep_bind"); - } - - if (RMA_READ_ALLOWED(flags)) { - ret = fi_ep_bind(ep[i], &read_cntr[i]->fid, FI_READ); - cr_assert(!ret, "fi_ep_bind"); - } - - if (MSG_SEND_ALLOWED(flags)) { - ret = fi_ep_bind(ep[i], &send_cntr[i]->fid, FI_SEND); - cr_assert(!ret, "fi_ep_bind"); - } - - if (MSG_RECV_ALLOWED(flags)) { - ret = fi_ep_bind(ep[i], &recv_cntr[i]->fid, FI_RECV); - cr_assert(!ret, "fi_ep_bind"); - } - - ret = fi_enable(ep[i]); - cr_assert(!ret, "fi_enable"); - } -} - -static inline void __api_cntr_setup(uint32_t version, int mr_mode) -{ - int ret, i, j; - struct fi_av_attr attr = {0}; - size_t addrlen = 0; - - for (i = 0; i < NUMEPS; i++) { - hints[i] = fi_allocinfo(); - cr_assert(hints[i], "fi_allocinfo"); - - hints[i]->domain_attr->data_progress = FI_PROGRESS_AUTO; - hints[i]->mode = mode_bits; - hints[i]->fabric_attr->prov_name = strdup("gni"); - hints[i]->domain_attr->mr_mode = mr_mode; - } - - /* Get info about fabric services with the provided hints */ - for (i = 0; i < NUMEPS; i++) { - ret = fi_getinfo(version, NULL, 0, 0, hints[i], - &fi[i]); - cr_assert(!ret, "fi_getinfo"); - } - - attr.type = FI_AV_MAP; - attr.count = NUMEPS; - - /* 3x BUF_SZ for multi recv testing */ - target_base = malloc(GNIT_ALIGN_LEN(BUF_SZ * 3)); - assert(target_base); - target = GNIT_ALIGN_BUFFER(char *, target_base); - - source_base = malloc(GNIT_ALIGN_LEN(BUF_SZ)); - assert(source_base); - source = GNIT_ALIGN_BUFFER(char *, source_base); - - uc_target = malloc(BUF_SZ); - assert(uc_target); - - uc_source = malloc(BUF_SZ); - assert(uc_source); - - ret = fi_fabric(fi[0]->fabric_attr, &fab, NULL); - cr_assert(!ret, "fi_fabric"); - - for (i = 0; i < NUMEPS; i++) { - ret = fi_domain(fab, fi[i], dom + i, NULL); - cr_assert(!ret, "fi_domain"); - - ret = fi_open_ops(&dom[i]->fid, FI_GNI_DOMAIN_OPS_1, - 0, (void **) (gni_domain_ops + i), NULL); - - ret = fi_av_open(dom[i], &attr, av + i, NULL); - cr_assert(!ret, "fi_av_open"); - - ret = fi_endpoint(dom[i], fi[i], ep + i, NULL); - cr_assert(!ret, "fi_endpoint"); - - ret = fi_cntr_open(dom[i], &cntr_attr, write_cntr + i, 0); - cr_assert(!ret, "fi_cntr_open"); - - ret = fi_cntr_open(dom[i], &cntr_attr, read_cntr + i, 0); - cr_assert(!ret, "fi_cntr_open"); - - ret = fi_cntr_open(dom[i], &cntr_attr, send_cntr + i, 0); - cr_assert(!ret, "fi_cntr_open"); - - ret = fi_cntr_open(dom[i], &cntr_attr, recv_cntr + i, 0); - cr_assert(!ret, "fi_cntr_open"); - - ret = fi_getname(&ep[i]->fid, NULL, &addrlen); - cr_assert(addrlen > 0); - - ep_name[i] = malloc(addrlen); - cr_assert(ep_name[i] != NULL); - - ret = fi_getname(&ep[i]->fid, ep_name[i], &addrlen); - cr_assert(ret == FI_SUCCESS); - } - - for (i = 0; i < NUMEPS; i++) { - /* Insert all gni addresses into each av */ - for (j = 0; j < NUMEPS; j++) { - ret = fi_av_insert(av[i], ep_name[j], 1, &gni_addr[j], - 0, NULL); - cr_assert(ret == 1); - } - - ret = fi_ep_bind(ep[i], &av[i]->fid, 0); - cr_assert(!ret, "fi_ep_bind"); - - } - - for (i = 0; i < NUMEPS; i++) { - int target_requested_key = - USING_SCALABLE(fi[i]) ? (i * 2) : 0; - int source_requested_key = - USING_SCALABLE(fi[i]) ? (i * 2) + 1 : 0; - - ret = fi_mr_reg(dom[i], - target, - 3 * BUF_SZ, - FI_REMOTE_WRITE, - 0, - target_requested_key, - 0, - rem_mr + i, - &target); - cr_assert_eq(ret, 0); - - ret = fi_mr_reg(dom[i], - source, - BUF_SZ, - FI_REMOTE_WRITE, - 0, - source_requested_key, - 0, - loc_mr + i, - &source); - cr_assert_eq(ret, 0); - - if (USING_SCALABLE(fi[i])) { - MR_ENABLE(rem_mr[i], target, 3 * BUF_SZ); - MR_ENABLE(loc_mr[i], source, BUF_SZ); - } - mr_key[i] = fi_mr_key(rem_mr[i]); - } -} - -static void api_cntr_setup_basic(void) -{ - __api_cntr_setup(fi_version(), GNIX_MR_BASIC); -} - -static void api_cntr_setup_scalable(void) -{ - __api_cntr_setup(fi_version(), GNIX_MR_SCALABLE); -} - -static void api_cntr_teardown_common(bool unreg) -{ - int ret = 0, i = 0; - - for (; i < NUMEPS; i++) { - fi_close(&write_cntr[i]->fid); - fi_close(&read_cntr[i]->fid); - fi_close(&send_cntr[i]->fid); - fi_close(&recv_cntr[i]->fid); - - if (unreg) { - fi_close(&loc_mr[i]->fid); - fi_close(&rem_mr[i]->fid); - } - - ret = fi_close(&ep[i]->fid); - cr_assert(!ret, "failure in closing ep."); - - ret = fi_close(&av[i]->fid); - cr_assert(!ret, "failure in closing av."); - - ret = fi_close(&dom[i]->fid); - cr_assert(!ret, "failure in closing domain."); - - fi_freeinfo(fi[i]); - free(ep_name[i]); - fi_freeinfo(hints[i]); - } - - free(uc_source); - free(uc_target); - free(target_base); - free(source_base); - - ret = fi_close(&fab->fid); - cr_assert(!ret, "failure in closing fabric."); -} - -static void api_cntr_teardown(void) -{ - api_cntr_teardown_common(true); -} - -void api_cntr_init_data(char *buf, int len, char seed) -{ - int i; - - for (i = 0; i < len; i++) - buf[i] = seed++; -} - -int api_cntr_check_data(char *buf1, char *buf2, int len) -{ - int i; - - for (i = 0; i < len; i++) { - if (buf1[i] != buf2[i]) { - printf("data mismatch, elem: %d, exp: %hhx, act: %hhx\n" - , i, buf1[i], buf2[i]); - return 0; - } - } - - return 1; -} - -void api_cntr_write_allowed(ssize_t sz, uint64_t flags, char *fn) -{ - if (RMA_WRITE_ALLOWED(flags)) { - cr_assert(sz == 0, "%s failed flags:0x%lx sz:%ld", - fn, flags, sz); - } else { - cr_assert(sz < 0, "%s should fail flags:0x%lx sz:%ld", - fn, flags, sz); - } -} - -void api_cntr_read_allowed(ssize_t sz, uint64_t flags, char *fn) -{ - if (RMA_READ_ALLOWED(cntr_bind_flags)) { - cr_assert(sz == 0, "%s failed flags:0x%lx sz:%ld", - fn, flags, sz); - } else { - cr_assert(sz < 0, "%s should fail flags:0x%lx sz:%ld", - fn, flags, sz); - } -} - -void api_cntr_send_allowed(ssize_t sz, uint64_t flags, char *fn) -{ - if (MSG_SEND_ALLOWED(flags)) { - cr_assert(sz == 0, "%s failed flags:0x%lx sz:%ld", - fn, flags, sz); - } else { - cr_assert(sz < 0, "%s should fail flags:0x%lx sz:%ld", - fn, flags, sz); - } -} - -void api_cntr_recv_allowed(ssize_t sz, uint64_t flags, char *fn) -{ - if (MSG_RECV_ALLOWED(cntr_bind_flags)) { - cr_assert(sz == 0, "%s failed flags:0x%lx sz:%ld", - fn, flags, sz); - } else { - cr_assert(sz < 0, "%s should fail flags:0x%lx sz:%ld", - fn, flags, sz); - } -} - -TestSuite(api_cntr_basic, - .init = api_cntr_setup_basic, - .fini = api_cntr_teardown, - .disabled = false); - -TestSuite(api_cntr_scalable, - .init = api_cntr_setup_scalable, - .fini = api_cntr_teardown, - .disabled = false); - -void api_cntr_send_recv(int len) -{ - ssize_t sz; - - api_cntr_init_data(source, len, 0xab); - api_cntr_init_data(target, len, 0); - - sz = fi_send(ep[0], source, len, loc_mr[0], gni_addr[1], target); - api_cntr_send_allowed(sz, cntr_bind_flags, "fi_send"); - - sz = fi_recv(ep[1], target, len, rem_mr[1], gni_addr[0], source); - api_cntr_recv_allowed(sz, cntr_bind_flags, "fi_recv"); -} - -void api_cntr_write_read(int len) -{ - ssize_t sz; - struct iovec iov; - struct fi_msg_rma rma_msg; - struct fi_rma_iov rma_iov; - - api_cntr_init_data(source, len, 0xab); - api_cntr_init_data(target, len, 0); - - iov.iov_base = NULL; - iov.iov_len = 0; - - sz = fi_write(ep[0], source, len, - loc_mr[0], gni_addr[1], (uint64_t)target, mr_key[1], - target); - api_cntr_write_allowed(sz, cntr_bind_flags, "fi_write"); - - sz = fi_writev(ep[0], &iov, (void **)loc_mr, 1, - gni_addr[1], (uint64_t)target, mr_key[1], - target); - api_cntr_write_allowed(sz, cntr_bind_flags, "fi_writev"); - - iov.iov_len = len; - iov.iov_base = source; - - rma_iov.addr = (uint64_t)target; - rma_iov.len = len; - rma_iov.key = mr_key[1]; - rma_msg.msg_iov = &iov; - rma_msg.desc = (void **)loc_mr; - rma_msg.iov_count = 1; - rma_msg.addr = gni_addr[1]; - rma_msg.rma_iov = &rma_iov; - rma_msg.rma_iov_count = 1; - rma_msg.context = target; - rma_msg.data = (uint64_t)target; - - sz = fi_writemsg(ep[0], &rma_msg, 0); - api_cntr_write_allowed(sz, cntr_bind_flags, "fi_writemsg"); - -#define WRITE_DATA 0x5123da1a145 - sz = fi_writedata(ep[0], source, len, loc_mr[0], WRITE_DATA, - gni_addr[1], (uint64_t)target, mr_key[1], - target); - api_cntr_write_allowed(sz, cntr_bind_flags, "fi_writedata"); - -#define READ_CTX 0x4e3dda1aULL - sz = fi_read(ep[0], source, len, - loc_mr[0], gni_addr[1], (uint64_t)target, mr_key[1], - (void *)READ_CTX); - api_cntr_read_allowed(sz, cntr_bind_flags, "fi_read"); - - sz = fi_readv(ep[0], &iov, (void **)loc_mr, 1, - gni_addr[1], (uint64_t)target, mr_key[1], - target); - api_cntr_read_allowed(sz, cntr_bind_flags, "fi_readv"); - - sz = fi_readmsg(ep[0], &rma_msg, 0); - api_cntr_read_allowed(sz, cntr_bind_flags, "fi_readmsg"); - - sz = fi_inject_write(ep[0], source, 64, - gni_addr[1], (uint64_t)target, mr_key[1]); - cr_assert_eq(sz, 0); -} - -static inline void __msg(void) -{ - cntr_bind_flags = FI_SEND | FI_RECV; - api_cntr_bind(cntr_bind_flags); - api_cntr_send_recv(BUF_SZ); -} - -Test(api_cntr_basic, msg) -{ - __msg(); -} - -static inline void __msg_send_only(void) -{ - cntr_bind_flags = FI_SEND; - api_cntr_bind(cntr_bind_flags); - api_cntr_send_recv(BUF_SZ); -} - -Test(api_cntr_basic, msg_send_only) -{ - __msg_send_only(); -} - -static inline void __msg_recv_only(void) -{ - cntr_bind_flags = FI_RECV; - api_cntr_bind(cntr_bind_flags); - api_cntr_send_recv(BUF_SZ); -} - -Test(api_cntr_basic, msg_recv_only) -{ - __msg_recv_only(); -} - -static inline void __msg_no_cntr(void) -{ - cntr_bind_flags = 0; - api_cntr_bind(cntr_bind_flags); - api_cntr_send_recv(BUF_SZ); -} - -Test(api_cntr_basic, msg_no_cntr) -{ - __msg_no_cntr(); -} - - -static inline void __rma(void) -{ - cntr_bind_flags = FI_WRITE | FI_READ; - api_cntr_bind(cntr_bind_flags); - api_cntr_write_read(BUF_SZ); -} - -Test(api_cntr_basic, rma) -{ - __rma(); -} - -static inline void __rma_write_only(void) -{ - cntr_bind_flags = FI_WRITE; - api_cntr_bind(cntr_bind_flags); - api_cntr_write_read(BUF_SZ); -} - -Test(api_cntr_basic, rma_write_only) -{ - __rma_write_only(); -} - -static inline void __rma_read_only(void) -{ - cntr_bind_flags = FI_READ; - api_cntr_bind(cntr_bind_flags); - api_cntr_write_read(BUF_SZ); -} - -Test(api_cntr_basic, rma_read_only) -{ - __rma_read_only(); -} - -static inline void __rma_no_cntr(void) -{ - cntr_bind_flags = 0; - api_cntr_bind(cntr_bind_flags); - api_cntr_send_recv(BUF_SZ); -} - -Test(api_cntr_basic, rma_no_cntr) -{ - __rma_no_cntr(); -} - -#define SOURCE_DATA 0xBBBB0000CCCCULL -#define TARGET_DATA 0xAAAA0000DDDDULL -#define FETCH_SOURCE_DATA 0xACEDACEDULL - -void api_cntr_atomic(void) -{ - ssize_t sz; - - /* u64 */ - *((uint64_t *)source) = SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_atomic(ep[0], source, 1, loc_mr[0], - gni_addr[1], (uint64_t)target, mr_key[1], - FI_UINT64, FI_ATOMIC_WRITE, target); - api_cntr_write_allowed(sz, cntr_bind_flags, "fi_atomic"); - - *((uint64_t *)source) = SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_fetch_atomic(ep[0], source, 1, loc_mr[0], - source, loc_mr[0], - gni_addr[1], (uint64_t)target, mr_key[1], - FI_UINT64, FI_ATOMIC_WRITE, target); - api_cntr_read_allowed(sz, cntr_bind_flags, "fi_atomic"); - - sz = fi_inject_atomic(ep[0], source, 1, - gni_addr[1], (uint64_t)target, mr_key[1], - FI_INT64, FI_MIN); - cr_assert_eq(sz, 0); -} - -Test(api_cntr_basic, atomic) -{ - cntr_bind_flags = FI_WRITE | FI_READ; - api_cntr_bind(cntr_bind_flags); - api_cntr_atomic(); -} - -Test(api_cntr_basic, atomic_send_only) -{ - cntr_bind_flags = FI_WRITE; - api_cntr_bind(cntr_bind_flags); - api_cntr_atomic(); -} - -Test(api_cntr_basic, atomic_recv_only) -{ - cntr_bind_flags = FI_READ; - api_cntr_bind(cntr_bind_flags); - api_cntr_atomic(); -} - -Test(api_cntr_scalable, msg) -{ - cntr_bind_flags = FI_SEND | FI_RECV; - api_cntr_bind(cntr_bind_flags); - api_cntr_send_recv(BUF_SZ); -} - -Test(api_cntr_scalable, msg_send_only) -{ - cntr_bind_flags = FI_SEND; - api_cntr_bind(cntr_bind_flags); - api_cntr_send_recv(BUF_SZ); -} - -Test(api_cntr_scalable, msg_recv_only) -{ - cntr_bind_flags = FI_RECV; - api_cntr_bind(cntr_bind_flags); - api_cntr_send_recv(BUF_SZ); -} - -Test(api_cntr_scalable, msg_no_cntr) -{ - __msg_no_cntr(); -} - -Test(api_cntr_scalable, atomic) -{ - cntr_bind_flags = FI_WRITE | FI_READ; - api_cntr_bind(cntr_bind_flags); - api_cntr_atomic(); -} - -Test(api_cntr_scalable, atomic_send_only) -{ - cntr_bind_flags = FI_WRITE; - api_cntr_bind(cntr_bind_flags); - api_cntr_atomic(); -} - -Test(api_cntr_scalable, atomic_recv_only) -{ - cntr_bind_flags = FI_READ; - api_cntr_bind(cntr_bind_flags); - api_cntr_atomic(); -} - -Test(api_cntr_scalable, rma) -{ - __rma(); -} - -Test(api_cntr_scalable, rma_write_only) -{ - __rma_write_only(); -} - -Test(api_cntr_scalable, rma_read_only) -{ - __rma_read_only(); -} - -Test(api_cntr_scalable, rma_no_cntr) -{ - __rma_no_cntr(); -} - diff --git a/prov/gni/test/api_cq.c b/prov/gni/test/api_cq.c deleted file mode 100644 index 26296f6ad72..00000000000 --- a/prov/gni/test/api_cq.c +++ /dev/null @@ -1,614 +0,0 @@ -/* - * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights reserved. - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * Copyright (c) 2020 Triad National Security, LLC. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include "gnix_rdma_headers.h" -#include "fi_ext_gni.h" -#include "common.h" - -#if 1 -#define dbg_printf(...) -#else -#define dbg_printf(...) \ - do { \ - printf(__VA_ARGS__); \ - fflush(stdout); \ - } while (0) -#endif - -#define NUMEPS 2 - -/* Note: Set to ~FI_NOTIFY_FLAGS_ONLY since this was written before api 1.5 */ -static uint64_t mode_bits = ~FI_NOTIFY_FLAGS_ONLY; -static struct fid_fabric *fab; -static struct fid_domain *dom[NUMEPS]; -static struct fi_gni_ops_domain *gni_domain_ops[NUMEPS]; -static struct fid_ep *ep[NUMEPS]; -static struct fid_av *av[NUMEPS]; -static void *ep_name[NUMEPS]; -static fi_addr_t gni_addr[NUMEPS]; -static struct fid_cq *msg_cq[NUMEPS]; -static struct fi_info *fi[NUMEPS]; -static struct fi_cq_attr cq_attr; -static struct fi_info *hints[NUMEPS]; - -#define BUF_SZ (1<<20) -static char *target, *target_base; -static char *source, *source_base; -static char *uc_target; -static char *uc_source; -static struct fid_mr *rem_mr[NUMEPS], *loc_mr[NUMEPS]; -static uint64_t mr_key[NUMEPS]; -static uint64_t cq_bind_flags; - -void api_cq_bind(uint64_t flags) -{ - int ret, i; - - for (i = 0; i < NUMEPS; i++) { - ret = fi_ep_bind(ep[i], &msg_cq[i]->fid, flags); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_enable(ep[i]); - cr_assert(!ret, "fi_enable"); - } -} - -static inline void __api_cq_setup(uint32_t version, int mr_mode) -{ - int ret, i, j; - struct fi_av_attr attr; - size_t addrlen = 0; - - for (i = 0; i < NUMEPS; i++) { - hints[i] = fi_allocinfo(); - cr_assert(hints[i], "fi_allocinfo"); - - hints[i]->domain_attr->cq_data_size = NUMEPS * 2; - hints[i]->domain_attr->data_progress = FI_PROGRESS_AUTO; - hints[i]->domain_attr->mr_mode = mr_mode; - hints[i]->mode = mode_bits; - hints[i]->fabric_attr->prov_name = strdup("gni"); - } - - /* Get info about fabric services with the provided hints */ - for (i = 0; i < NUMEPS; i++) { - ret = fi_getinfo(version, NULL, 0, 0, hints[i], - &fi[i]); - cr_assert(!ret, "fi_getinfo"); - } - - memset(&attr, 0, sizeof(attr)); - attr.type = FI_AV_MAP; - attr.count = NUMEPS; - - cq_attr.format = FI_CQ_FORMAT_TAGGED; - cq_attr.size = 1024; - cq_attr.wait_obj = 0; - - /* 3x BUF_SZ for multi recv testing */ - target_base = malloc(GNIT_ALIGN_LEN(BUF_SZ * 3)); - assert(target_base); - target = GNIT_ALIGN_BUFFER(char *, target_base); - - source_base = malloc(GNIT_ALIGN_LEN(BUF_SZ)); - assert(source_base); - source = GNIT_ALIGN_BUFFER(char *, source_base); - - uc_target = malloc(BUF_SZ); - assert(uc_target); - - uc_source = malloc(BUF_SZ); - assert(uc_source); - - ret = fi_fabric(fi[0]->fabric_attr, &fab, NULL); - cr_assert(!ret, "fi_fabric"); - - for (i = 0; i < NUMEPS; i++) { - ret = fi_domain(fab, fi[i], dom + i, NULL); - cr_assert(!ret, "fi_domain"); - - ret = fi_open_ops(&dom[i]->fid, FI_GNI_DOMAIN_OPS_1, - 0, (void **) (gni_domain_ops + i), NULL); - - ret = fi_av_open(dom[i], &attr, av + i, NULL); - cr_assert(!ret, "fi_av_open"); - - ret = fi_endpoint(dom[i], fi[i], ep + i, NULL); - cr_assert(!ret, "fi_endpoint"); - - ret = fi_cq_open(dom[i], &cq_attr, msg_cq + i, 0); - cr_assert(!ret, "fi_cq_open"); - - ret = fi_getname(&ep[i]->fid, NULL, &addrlen); - cr_assert(addrlen > 0); - - ep_name[i] = malloc(addrlen); - cr_assert(ep_name[i] != NULL); - - ret = fi_getname(&ep[i]->fid, ep_name[i], &addrlen); - cr_assert(ret == FI_SUCCESS); - } - - for (i = 0; i < NUMEPS; i++) { - /* Insert all gni addresses into each av */ - for (j = 0; j < NUMEPS; j++) { - ret = fi_av_insert(av[i], ep_name[j], 1, &gni_addr[j], - 0, NULL); - cr_assert(ret == 1); - } - - ret = fi_ep_bind(ep[i], &av[i]->fid, 0); - cr_assert(!ret, "fi_ep_bind"); - - } - - for (i = 0; i < NUMEPS; i++) { - int target_requested_key = - USING_SCALABLE(fi[i]) ? (i * 2) : 0; - int source_requested_key = - USING_SCALABLE(fi[i]) ? (i * 2) + 1 : 0; - - ret = fi_mr_reg(dom[i], - target, - 3 * BUF_SZ, - FI_REMOTE_WRITE, - 0, - target_requested_key, - 0, - rem_mr + i, - &target); - cr_assert_eq(ret, 0); - - ret = fi_mr_reg(dom[i], - source, - BUF_SZ, - FI_REMOTE_WRITE, - 0, - source_requested_key, - 0, - loc_mr + i, - &source); - cr_assert_eq(ret, 0); - - if (USING_SCALABLE(fi[i])) { - MR_ENABLE(rem_mr[i], target, 3 * BUF_SZ); - MR_ENABLE(loc_mr[i], source, BUF_SZ); - } - - mr_key[i] = fi_mr_key(rem_mr[i]); - } -} - -static void api_cq_setup_basic(void) -{ - __api_cq_setup(fi_version(), GNIX_MR_BASIC); -} - -static void api_cq_setup_scalable(void) -{ - __api_cq_setup(fi_version(), GNIX_MR_SCALABLE); -} - -static void api_cq_teardown_common(bool unreg) -{ - int ret = 0, i = 0; - - for (; i < NUMEPS; i++) { - if (unreg) { - fi_close(&loc_mr[i]->fid); - fi_close(&rem_mr[i]->fid); - } - - ret = fi_close(&ep[i]->fid); - cr_assert(!ret, "failure in closing ep."); - - ret = fi_close(&msg_cq[i]->fid); - cr_assert(!ret, "failure in send cq."); - - ret = fi_close(&av[i]->fid); - cr_assert(!ret, "failure in closing av."); - - ret = fi_close(&dom[i]->fid); - cr_assert(!ret, "failure in closing domain."); - - fi_freeinfo(fi[i]); - free(ep_name[i]); - fi_freeinfo(hints[i]); - } - - free(uc_source); - free(uc_target); - free(target_base); - free(source_base); - - ret = fi_close(&fab->fid); - cr_assert(!ret, "failure in closing fabric."); -} - -static void api_cq_teardown(void) -{ - api_cq_teardown_common(true); -} - -void api_cq_init_data(char *buf, int len, char seed) -{ - int i; - - for (i = 0; i < len; i++) - buf[i] = seed++; -} - -int api_cq_check_data(char *buf1, char *buf2, int len) -{ - int i; - - for (i = 0; i < len; i++) { - if (buf1[i] != buf2[i]) { - printf("data mismatch, elem: %d, exp: %hhx, act: %hhx\n" - , i, buf1[i], buf2[i]); - return 0; - } - } - - return 1; -} - -#define MSG_SEND_ALLOWED(flags) \ - (flags & FI_SEND) -#define MSG_RECV_ALLOWED(flags) \ - (flags & FI_RECV) - -void api_cq_send_allowed(ssize_t sz, uint64_t flags, char *fn) -{ - if (MSG_SEND_ALLOWED(flags)) { - cr_assert(sz == 0, "%s failed flags:0x%lx sz:%ld", - fn, flags, sz); - } else { - cr_assert(sz < 0, "%s should fail flags:0x%lx sz:%ld", - fn, flags, sz); - } -} - -void api_cq_recv_allowed(ssize_t sz, uint64_t flags, char *fn) -{ - if (MSG_RECV_ALLOWED(cq_bind_flags)) { - cr_assert(sz == 0, "%s failed flags:0x%lx sz:%ld", - fn, flags, sz); - } else { - cr_assert(sz < 0, "%s should fail flags:0x%lx sz:%ld", - fn, flags, sz); - } -} - -TestSuite(api_cq_basic, - .init = api_cq_setup_basic, - .fini = api_cq_teardown, - .disabled = false); - -TestSuite(api_cq_scalable, - .init = api_cq_setup_scalable, - .fini = api_cq_teardown, - .disabled = false); - - -void api_cq_wait1(struct fid_cq *cq0, uint64_t cq_bind_flags) -{ - int ret; - struct fi_cq_tagged_entry cqe; - - if (!cq_bind_flags) - return; - - while ((ret = fi_cq_read(msg_cq[0], &cqe, 1)) == -FI_EAGAIN); - cr_assert(ret > 0, "ret=%d", ret); -} - -void api_cq_wait2(struct fid_cq *cq0, struct fid_cq *cq1, - uint64_t check_send, uint64_t check_rcv) -{ - int ret; - int source_done = 0, dest_done = 0; - struct fi_cq_tagged_entry s_cqe, d_cqe; - - if (!check_send) - source_done = 1; - - if (!check_rcv) - dest_done = 1; - - do { - ret = fi_cq_read(cq0, &s_cqe, 1); - if (ret == 1) { - source_done = 1; - } - - ret = fi_cq_read(cq1, &d_cqe, 1); - if (ret == 1) { - dest_done = 1; - } - } while (!(source_done && dest_done)); -} - -void api_cq_send_recv(int len) -{ - ssize_t sz; - struct iovec iov; - struct fi_msg_rma rma_msg; - struct fi_rma_iov rma_iov; - - iov.iov_base = NULL; - iov.iov_len = 0; - - api_cq_init_data(source, len, 0xab); - api_cq_init_data(target, len, 0); - - sz = fi_send(ep[0], source, len, loc_mr[0], gni_addr[1], target); - api_cq_send_allowed(sz, cq_bind_flags, "fi_send"); - - sz = fi_recv(ep[1], target, len, rem_mr[1], gni_addr[0], source); - api_cq_recv_allowed(sz, cq_bind_flags, "fi_recv"); - - /* don't expect a recv cq if we can't send and vice versa */ - api_cq_wait2(msg_cq[0], msg_cq[1], - (cq_bind_flags & FI_SEND) && (cq_bind_flags & FI_RECV), - (cq_bind_flags & FI_SEND) && (cq_bind_flags & FI_RECV)); - - sz = fi_write(ep[0], source, len, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi[0], target, target), mr_key[1], - target); - api_cq_send_allowed(sz, cq_bind_flags, "fi_write"); - - api_cq_wait1(msg_cq[0], cq_bind_flags & FI_SEND); - - sz = fi_writev(ep[0], &iov, (void **)loc_mr, 1, - gni_addr[1], - _REM_ADDR(fi[0], target, target), mr_key[1], - target); - api_cq_send_allowed(sz, cq_bind_flags, "fi_writev"); - - api_cq_wait1(msg_cq[0], cq_bind_flags & FI_SEND); - - iov.iov_len = len; - iov.iov_base = source; - - rma_iov.addr = _REM_ADDR(fi[0], target, target); - rma_iov.len = len; - rma_iov.key = mr_key[1]; - rma_msg.msg_iov = &iov; - rma_msg.desc = (void **)loc_mr; - rma_msg.iov_count = 1; - rma_msg.addr = gni_addr[1]; - rma_msg.rma_iov = &rma_iov; - rma_msg.rma_iov_count = 1; - rma_msg.context = target; - rma_msg.data = (uint64_t)target; - - sz = fi_writemsg(ep[0], &rma_msg, 0); - api_cq_send_allowed(sz, cq_bind_flags, "fi_writemsg"); - - api_cq_wait1(msg_cq[0], cq_bind_flags & FI_SEND); - -#define WRITE_DATA 0x5123da1a145 - sz = fi_writedata(ep[0], source, len, loc_mr[0], WRITE_DATA, - gni_addr[1], - _REM_ADDR(fi[0], target, target), mr_key[1], - target); - api_cq_send_allowed(sz, cq_bind_flags, "fi_writedata"); - -#define READ_CTX 0x4e3dda1aULL - sz = fi_read(ep[0], source, len, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi[0], target, target), mr_key[1], - (void *)READ_CTX); - api_cq_send_allowed(sz, cq_bind_flags, "fi_read"); - - sz = fi_readv(ep[0], &iov, (void **)loc_mr, 1, - gni_addr[1], - _REM_ADDR(fi[0], target, target), mr_key[1], - target); - api_cq_send_allowed(sz, cq_bind_flags, "fi_readv"); - - sz = fi_readmsg(ep[0], &rma_msg, 0); - api_cq_send_allowed(sz, cq_bind_flags, "fi_readmsg"); - - sz = fi_inject_write(ep[0], source, 64, - gni_addr[1], - _REM_ADDR(fi[0], target, target), mr_key[1]); - cr_assert_eq(sz, 0); - - api_cq_wait1(msg_cq[0], cq_bind_flags & FI_SEND); -} - -static inline void __msg(void) -{ - cq_bind_flags = FI_SEND | FI_RECV; - api_cq_bind(cq_bind_flags); - api_cq_send_recv(BUF_SZ); -} - -Test(api_cq_basic, msg) -{ - __msg(); -} - -Test(api_cq_scalable, msg) -{ - __msg(); -} - -static inline void __msg_send_only(void) -{ - cq_bind_flags = FI_SEND; - api_cq_bind(cq_bind_flags); - api_cq_send_recv(BUF_SZ); -} - -Test(api_cq_basic, msg_send_only) -{ - __msg_send_only(); -} - -Test(api_cq_scalable, msg_send_only) -{ - __msg_send_only(); -} - -static inline void __msg_recv_only(void) -{ - cq_bind_flags = FI_RECV; - api_cq_bind(cq_bind_flags); - api_cq_send_recv(BUF_SZ); -} - -Test(api_cq_basic, msg_recv_only) -{ - __msg_recv_only(); -} - -Test(api_cq_scalable, msg_recv_only) -{ - __msg_recv_only(); -} - -static inline void __msg_no_cq(void) -{ - cq_bind_flags = 0; - api_cq_bind(cq_bind_flags); - api_cq_send_recv(BUF_SZ); -} - -Test(api_cq_basic, msg_no_cq) -{ - __msg_no_cq(); -} - -Test(api_cq_scalable, msg_no_cq) -{ - __msg_no_cq(); -} - -#define SOURCE_DATA 0xBBBB0000CCCCULL -#define TARGET_DATA 0xAAAA0000DDDDULL -#define FETCH_SOURCE_DATA 0xACEDACEDULL - -void api_cq_atomic(void) -{ - ssize_t sz; - - /* u64 */ - *((uint64_t *)source) = SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_atomic(ep[0], source, 1, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi[0], target, target), mr_key[1], - FI_UINT64, FI_ATOMIC_WRITE, target); - api_cq_send_allowed(sz, cq_bind_flags, "fi_atomic"); - - sz = fi_inject_atomic(ep[0], source, 1, - gni_addr[1], - _REM_ADDR(fi[0], target, target), mr_key[1], - FI_INT64, FI_MIN); - cr_assert_eq(sz, 0); - - api_cq_wait1(msg_cq[0], cq_bind_flags & FI_SEND); -} - -static inline void __atomic(void) -{ - cq_bind_flags = FI_SEND | FI_RECV; - api_cq_bind(cq_bind_flags); - api_cq_atomic(); -} - -Test(api_cq_basic, atomic) -{ - __atomic(); -} - -Test(api_cq_scalable, atomic) -{ - __atomic(); -} - -static inline void __atomic_send_only(void) -{ - cq_bind_flags = FI_SEND; - api_cq_bind(cq_bind_flags); - api_cq_atomic(); -} - -Test(api_cq_basic, atomic_send_only) -{ - __atomic_send_only(); -} - -Test(api_cq_scalable, atomic_send_only) -{ - __atomic_send_only(); -} - -static inline void __atomic_recv_only(void) -{ - cq_bind_flags = FI_RECV; - api_cq_bind(cq_bind_flags); - api_cq_atomic(); -} - -Test(api_cq_basic, atomic_recv_only) -{ - __atomic_recv_only(); -} - -Test(api_cq_scalable, atomic_recv_only) -{ - __atomic_recv_only(); -} diff --git a/prov/gni/test/auth_key.c b/prov/gni/test/auth_key.c deleted file mode 100644 index 566b8158e57..00000000000 --- a/prov/gni/test/auth_key.c +++ /dev/null @@ -1,184 +0,0 @@ -/* - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include - - -#include "gnix.h" - -#include -#include "gnix_rdma_headers.h" -#include "fi_ext_gni.h" - -#include "gnix_auth_key.h" - -static void setup(void) -{ - struct fi_info *info = fi_allocinfo(); - int ret; - - cr_assert(info); - - ret = fi_getinfo(fi_version(), NULL, 0, 0, NULL, &info); - cr_assert(ret == FI_SUCCESS); - - fi_freeinfo(info); -} - -static void teardown(void) -{ -} - -TestSuite(auth_key, .init = setup, .fini = teardown); - - -Test(auth_key, create) -{ - struct gnix_auth_key *ret; - - ret = _gnix_auth_key_create(NULL, 0); - cr_assert(ret != NULL, "failed to create auth key"); -} - -Test(auth_key, failed_insert) -{ - struct gnix_auth_key *ret; - - ret = _gnix_auth_key_create(NULL, 0); - cr_assert(ret != NULL, "failed to create auth_key"); - - ret = _gnix_auth_key_create(NULL, 0); - cr_assert(ret == NULL, "unexpectedly created auth_key"); -} - -void *race_create_func(void *context) -{ - pthread_barrier_t *barrier = (pthread_barrier_t *) context; - struct gnix_auth_key *auth_key; - int ret; - - /* -1 is the single thread return value for the - thread allowed to make modifications to the barrier. - For the version of the pthread header present on our - systems, the value does not have a define. */ - ret = pthread_barrier_wait(barrier); - cr_assert(ret == 0 || ret == -1, "pthread_barrier, " - "ret=%d errno=%d strerror=%s", ret, errno, strerror(errno)); - - auth_key = GNIX_GET_AUTH_KEY(NULL, 0, 0); - cr_assert_neq(auth_key, NULL, "failed to get authorization key"); - - return NULL; -} - -Test(auth_key, race_create) -{ -#define __AUTH_KEY_THREAD_COUNT 47 - int i; - int thread_count = __AUTH_KEY_THREAD_COUNT; - int ret; - pthread_t threads[__AUTH_KEY_THREAD_COUNT]; - pthread_barrier_t barrier; -#undef __AUTH_KEY_THREAD_COUNT - - ret = pthread_barrier_init(&barrier, NULL, thread_count); - cr_assert_eq(ret, 0, "failed to initialize barrier"); - - for (i = 0; i < thread_count; i++) { - ret = pthread_create(&threads[i], NULL, - race_create_func, &barrier); - cr_assert_eq(ret, 0, "failed to create pthread"); - } - - for (i = 0; i < thread_count; i++) { - ret = pthread_join(threads[i], NULL); - cr_assert_eq(ret, 0); - } - - ret = pthread_barrier_destroy(&barrier); - cr_assert_eq(ret, 0); -} - -Test(auth_key, limit_four_vmdh_entries) -{ - int i, ret; - struct gnix_auth_key *auth_key; - struct fi_gni_auth_key _ak; - - for (i = 0; i < 4; i++) { - auth_key = _gnix_auth_key_alloc(); - cr_assert(auth_key); - - auth_key->attr.prov_key_limit = 128; - auth_key->attr.user_key_limit = 128; - auth_key->ptag = i * 16; - auth_key->cookie = i * 32; - auth_key->using_vmdh = 1; - - _ak.type = GNIX_AKT_RAW; - _ak.raw.protection_key = auth_key->cookie; - - ret = _gnix_auth_key_insert((uint8_t *) &_ak, - sizeof(struct fi_gni_auth_key), auth_key); - cr_assert(ret == FI_SUCCESS); - - ret = _gnix_auth_key_enable(auth_key); - cr_assert(ret == FI_SUCCESS); - } - - auth_key = _gnix_auth_key_alloc(); - cr_assert(auth_key); - - auth_key->attr.prov_key_limit = 128; - auth_key->attr.user_key_limit = 128; - auth_key->ptag = i * 16; - auth_key->cookie = i * 32; - auth_key->using_vmdh = 1; - - _ak.type = GNIX_AKT_RAW; - _ak.raw.protection_key = auth_key->cookie; - - ret = _gnix_auth_key_insert((uint8_t *) &_ak, - sizeof(struct fi_gni_auth_key), auth_key); - cr_assert(ret == FI_SUCCESS); - - ret = _gnix_auth_key_enable(auth_key); - cr_assert(ret == -FI_ENOSPC, - "ret is not correct, expected=%d actual=%d\n", - -FI_ENOSPC, ret); -} diff --git a/prov/gni/test/av.c b/prov/gni/test/av.c deleted file mode 100644 index 2e0856b1d7a..00000000000 --- a/prov/gni/test/av.c +++ /dev/null @@ -1,984 +0,0 @@ -/* - * Copyright (c) 2015-2017 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2019-2020 Triad National Security, LLC. - * All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include - -#include "ofi.h" -#include "rdma/fi_domain.h" - -#include "gnix.h" - -#include -#include "gnix_rdma_headers.h" -#include "common.h" - -/* Note: Set to ~FI_NOTIFY_FLAGS_ONLY since this was written before api 1.5 */ -static uint64_t mode_bits = ~FI_NOTIFY_FLAGS_ONLY; -static struct fid_fabric *fab; -static struct fid_domain *dom; -static struct fi_info *hints; -static struct fi_info *fi; -static struct gnix_ep_name *fake_names; -static struct fid_av *av; -static struct gnix_fid_av *gnix_av; - - -#define SIMPLE_EP_ENTRY(id) \ -{ \ - .gnix_addr = { \ - .device_addr = id, \ - .cdm_id = id+1, \ - }, \ - .name_type = id+2, \ - .cm_nic_cdm_id = id+3, \ - .cookie = id+4, \ -} - -#define MT_ADDR_COUNT 59 -#define SIMPLE_ADDR_COUNT 16 -static struct gnix_ep_name simple_ep_names[MT_ADDR_COUNT] = { - SIMPLE_EP_ENTRY(1), - SIMPLE_EP_ENTRY(2), - SIMPLE_EP_ENTRY(3), - SIMPLE_EP_ENTRY(4), - SIMPLE_EP_ENTRY(5), - SIMPLE_EP_ENTRY(6), - SIMPLE_EP_ENTRY(7), - SIMPLE_EP_ENTRY(8), - SIMPLE_EP_ENTRY(9), - SIMPLE_EP_ENTRY(10), - SIMPLE_EP_ENTRY(11), - SIMPLE_EP_ENTRY(12), - SIMPLE_EP_ENTRY(13), - SIMPLE_EP_ENTRY(14), - SIMPLE_EP_ENTRY(15), - SIMPLE_EP_ENTRY(16), - SIMPLE_EP_ENTRY(17), - SIMPLE_EP_ENTRY(18), - SIMPLE_EP_ENTRY(19), - SIMPLE_EP_ENTRY(20), - SIMPLE_EP_ENTRY(21), - SIMPLE_EP_ENTRY(22), - SIMPLE_EP_ENTRY(23), - SIMPLE_EP_ENTRY(24), - SIMPLE_EP_ENTRY(25), - SIMPLE_EP_ENTRY(26), - SIMPLE_EP_ENTRY(27), - SIMPLE_EP_ENTRY(28), - SIMPLE_EP_ENTRY(29), - SIMPLE_EP_ENTRY(30), - SIMPLE_EP_ENTRY(31), - SIMPLE_EP_ENTRY(32), - SIMPLE_EP_ENTRY(33), - SIMPLE_EP_ENTRY(34), - SIMPLE_EP_ENTRY(35), - SIMPLE_EP_ENTRY(36), - SIMPLE_EP_ENTRY(37), - SIMPLE_EP_ENTRY(38), - SIMPLE_EP_ENTRY(39), - SIMPLE_EP_ENTRY(40), - SIMPLE_EP_ENTRY(41), - SIMPLE_EP_ENTRY(42), - SIMPLE_EP_ENTRY(43), - SIMPLE_EP_ENTRY(44), - SIMPLE_EP_ENTRY(45), - SIMPLE_EP_ENTRY(46), - SIMPLE_EP_ENTRY(47), - SIMPLE_EP_ENTRY(48), - SIMPLE_EP_ENTRY(49), - SIMPLE_EP_ENTRY(50), - SIMPLE_EP_ENTRY(51), - SIMPLE_EP_ENTRY(52), - SIMPLE_EP_ENTRY(53), - SIMPLE_EP_ENTRY(54), - SIMPLE_EP_ENTRY(55), - SIMPLE_EP_ENTRY(56), - SIMPLE_EP_ENTRY(57), - SIMPLE_EP_ENTRY(58), - SIMPLE_EP_ENTRY(59), -}; - -static void av_setup(void) -{ - int ret = 0; - - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - - hints->domain_attr->mr_mode = GNIX_DEFAULT_MR_MODE; - hints->domain_attr->cq_data_size = 4; - hints->mode = mode_bits; - - hints->fabric_attr->prov_name = strdup("gni"); - - ret = fi_getinfo(fi_version(), NULL, 0, 0, hints, &fi); - cr_assert_eq(ret, FI_SUCCESS, "fi_getinfo"); - - ret = fi_fabric(fi->fabric_attr, &fab, NULL); - cr_assert_eq(ret, FI_SUCCESS, "fi_fabric"); - - ret = fi_domain(fab, fi, &dom, NULL); - cr_assert_eq(ret, FI_SUCCESS, "fi_domain"); - -} - -static void av_teardown(void) -{ - int ret = 0; - - ret = fi_close(&dom->fid); - cr_assert_eq(ret, FI_SUCCESS, "failure in closing domain."); - ret = fi_close(&fab->fid); - cr_assert_eq(ret, FI_SUCCESS, "failure in closing fabric."); - fi_freeinfo(fi); - fi_freeinfo(hints); -} - - -static void av_full_map_setup(void) -{ - struct fi_av_attr av_table_attr = { - .type = FI_AV_MAP, - .count = 16, - }; - int ret; - - av_setup(); - - ret = fi_av_open(dom, &av_table_attr, &av, NULL); - cr_assert_eq(ret, FI_SUCCESS, "failed to open av"); - - gnix_av = container_of(av, struct gnix_fid_av, av_fid); -} - -static void av_full_map_teardown(void) -{ - int ret; - - ret = fi_close(&av->fid); - cr_assert_eq(ret, FI_SUCCESS, "failed to close av"); - - av_teardown(); -} - -static void av_full_table_setup(void) -{ - struct fi_av_attr av_table_attr = { - .type = FI_AV_TABLE, - .count = 16, - }; - int ret; - - av_setup(); - - ret = fi_av_open(dom, &av_table_attr, &av, NULL); - cr_assert_eq(ret, FI_SUCCESS, "failed to open av"); - - gnix_av = container_of(av, struct gnix_fid_av, av_fid); -} - -static void av_full_table_teardown(void) -{ - int ret; - - ret = fi_close(&av->fid); - cr_assert_eq(ret, FI_SUCCESS, "failed to close av"); - - av_teardown(); -} - -TestSuite(av_bare, .init = av_setup, .fini = av_teardown, .disabled = false); - -TestSuite(av_full_map, .init = av_full_map_setup, - .fini = av_full_map_teardown, .disabled = false); - -TestSuite(av_full_table, .init = av_full_table_setup, - .fini = av_full_table_teardown, .disabled = false); - -static void invalid_addrlen_pointer_test(void) -{ - int ret; - fi_addr_t address = 0xdeadbeef; - void *addr = (void *) 0xb00fbabe; - - /* while the pointers to address and addr aren't valid, they are - * acceptable as stated by the manpage. This will only test for a - * proper return code from fi_av_lookup() - */ - ret = fi_av_lookup(av, address, addr, NULL); - cr_assert_eq(ret, -FI_EINVAL); -} - -Test(av_full_map, invalid_addrlen_pointer) -{ - invalid_addrlen_pointer_test(); -} - -Test(av_full_table, invalid_addrlen_pointer) -{ - invalid_addrlen_pointer_test(); -} - -static void remove_addr_test(void) -{ - int ret; - int i; - fi_addr_t addresses[SIMPLE_ADDR_COUNT]; - fi_addr_t *compare; - - /* insert addresses */ - ret = fi_av_insert(av, (void *) simple_ep_names, SIMPLE_ADDR_COUNT, - addresses, 0, NULL); - cr_assert_eq(ret, SIMPLE_ADDR_COUNT); - - /* check address contents */ - for (i = 0; i < SIMPLE_ADDR_COUNT; i++) { - if (gnix_av->type == FI_AV_MAP) { - compare = (fi_addr_t *) &simple_ep_names[i].gnix_addr; - cr_assert_eq(*compare, addresses[i]); - } else { - cr_assert_eq(i, addresses[i]); - } - } - - /* remove addresses */ - ret = fi_av_remove(av, addresses, SIMPLE_ADDR_COUNT, 0); - cr_assert_eq(ret, FI_SUCCESS); -} - -Test(av_full_map, remove_addr) -{ - remove_addr_test(); -} - -Test(av_full_table, remove_addr) -{ - remove_addr_test(); -} - -static void addr_insert_test(void) -{ - int i, ret; - fi_addr_t addresses[SIMPLE_ADDR_COUNT]; - - int err[SIMPLE_ADDR_COUNT] = {0}; - - cr_log_info("check for sync err flag but no context\n"); - ret = fi_av_insert(av, (void *) simple_ep_names, SIMPLE_ADDR_COUNT, - addresses, FI_SYNC_ERR, NULL); - cr_assert_eq(ret, -FI_EINVAL, "%d", ret); - - ret = fi_av_insert(av, (void *) simple_ep_names, SIMPLE_ADDR_COUNT, - addresses, FI_SYNC_ERR, err); - cr_assert_eq(ret, SIMPLE_ADDR_COUNT); - - cr_log_info("check for errors\n"); - for (i = 0; i < SIMPLE_ADDR_COUNT; i++) { - cr_assert_eq(err[i], 0); - } - - ret = fi_av_remove(av, addresses, SIMPLE_ADDR_COUNT, 0); - cr_assert_eq(ret, FI_SUCCESS); -} - -static void addr_insert_null_fi_addr_test(void) -{ - int ret; - fi_addr_t *addresses = NULL; - - ret = fi_av_insert(av, (void *) simple_ep_names, SIMPLE_ADDR_COUNT, - addresses, 0, NULL); - cr_assert_eq(ret, -FI_EINVAL, "%d", ret); - -} - -Test(av_full_map, insert_addr) -{ - addr_insert_test(); - addr_insert_null_fi_addr_test(); -} - -Test(av_full_table, insert_addr) -{ - addr_insert_test(); -} - -static void lookup_invalid_test(void) -{ - int ret; - struct gnix_ep_name addr; - size_t addrlen = sizeof(struct gnix_ep_name); - - /* test null addrlen */ - ret = fi_av_lookup(av, 0xdeadbeef, (void *) 0xdeadbeef, NULL); - cr_assert_eq(ret, -FI_EINVAL); - - /* test null addr */ - ret = fi_av_lookup(av, 0xdeadbeef, NULL, &addrlen); - cr_assert_eq(ret, -FI_EINVAL); - - /* test invalid lookup */ - if (gnix_av->type == FI_AV_TABLE) { - ret = fi_av_lookup(av, 2000, &addr, &addrlen); - cr_assert_eq(ret, -FI_EINVAL); - - /* test within range, but not inserted case */ - ret = fi_av_lookup(av, 1, &addr, &addrlen); - cr_assert_eq(ret, -FI_EINVAL); - } else { - ret = fi_av_lookup(av, 0xdeadbeef, &addr, &addrlen); - cr_assert_eq(ret, -FI_ENOENT); - } -} - -Test(av_full_map, lookup_invalid) -{ - lookup_invalid_test(); -} - -Test(av_full_table, lookup_invalid) -{ - lookup_invalid_test(); -} - -static void lookup_test(void) -{ - int ret; - int i; - fi_addr_t addresses[SIMPLE_ADDR_COUNT]; - fi_addr_t *compare; - struct gnix_ep_name found; - size_t addrlen = sizeof(struct gnix_ep_name); - - /* insert addresses */ - ret = fi_av_insert(av, (void *) simple_ep_names, SIMPLE_ADDR_COUNT, - addresses, 0, NULL); - cr_assert_eq(ret, SIMPLE_ADDR_COUNT); - - /* check address contents */ - for (i = 0; i < SIMPLE_ADDR_COUNT; i++) { - if (gnix_av->type == FI_AV_MAP) { - compare = (fi_addr_t *) &simple_ep_names[i].gnix_addr; - cr_assert_eq(*compare, addresses[i]); - } else { - cr_assert_eq(i, addresses[i]); - } - } - - for (i = 0; i < SIMPLE_ADDR_COUNT; i++) { - ret = fi_av_lookup(av, addresses[i], &found, &addrlen); - cr_assert_eq(ret, FI_SUCCESS); - } -} - -Test(av_full_map, lookup) -{ - lookup_test(); -} - -Test(av_full_table, lookup) -{ - lookup_test(); -} - -/* Stuff for mulithreaded tests */ -static pthread_barrier_t mtbar; - -/* Currently the AV operations are not thread safe, so use this big - * fat lock when calling them */ -#define USE_LOCK -#ifdef USE_LOCK -static ofi_spin_t my_big_lock; -#define init_av_lock() ofi_spin_init(&my_big_lock) -#define av_lock() ofi_spin_lock(&my_big_lock) -#define av_unlock() ofi_spin_unlock(&my_big_lock) -#else -#define init_av_lock() -#define av_lock() -#define av_unlock() -#endif - -static void *insert_single(void *data) -{ - int ret; - int n = (int) ((int *) data)[0]; - fi_addr_t *addr = ((fi_addr_t **) data)[1]; - - ret = pthread_barrier_wait(&mtbar); - if ((ret != PTHREAD_BARRIER_SERIAL_THREAD) && (ret != 0)) { - pthread_exit((void *) 1UL); - } - - av_lock(); - ret = fi_av_insert(av, (void *) &simple_ep_names[n], 1, addr, 0, NULL); - av_unlock(); - - if (ret != 1) { - pthread_exit((void *) 2UL); - } - - pthread_exit((void *) 0UL); -} - -static void *remove_single(void *data) -{ - int ret; - fi_addr_t *addr = (fi_addr_t *) data; - - ret = pthread_barrier_wait(&mtbar); - if ((ret != PTHREAD_BARRIER_SERIAL_THREAD) && (ret != 0)) { - pthread_exit((void *) 1UL); - } - - av_lock(); - ret = fi_av_remove(av, addr, 1, 0); - av_unlock(); - - if (ret != FI_SUCCESS) { - pthread_exit((void *) 2UL); - } - - pthread_exit((void *) 0UL); -} - -static void *lookup_single(void *data) -{ - int ret; - struct gnix_ep_name found; - size_t addrlen = sizeof(struct gnix_ep_name); - fi_addr_t *addr = (fi_addr_t *) data; - - ret = pthread_barrier_wait(&mtbar); - if ((ret != PTHREAD_BARRIER_SERIAL_THREAD) && (ret != 0)) { - pthread_exit((void *) 1UL); - } - - av_lock(); - ret = fi_av_lookup(av, *addr, &found, &addrlen); - av_unlock(); - - if (ret != FI_SUCCESS) { - pthread_exit((void *) 2UL); - } - - pthread_exit((void *) 0UL); -} - -static void simple_mt_test(void) -{ - int ret; - unsigned long pret; - int i, j; - fi_addr_t *compare; - pthread_t threads[MT_ADDR_COUNT]; - fi_addr_t addresses[MT_ADDR_COUNT]; - bool found_addresses[MT_ADDR_COUNT]; - void *info[MT_ADDR_COUNT][2]; - - ret = pthread_barrier_init(&mtbar, NULL, MT_ADDR_COUNT); - cr_assert_eq(ret, 0); - - init_av_lock(); - - /* insert addresses */ - for (i = 0; i < MT_ADDR_COUNT; i++) { - info[i][0] = (void *) (uint64_t) i; - info[i][1] = (void *) &addresses[i]; - ret = pthread_create(&threads[i], NULL, - insert_single, &info[i]); - cr_assert_eq(ret, 0); - } - for (i = 0; i < MT_ADDR_COUNT; i++) { - ret = pthread_join(threads[i], (void **) &pret); - cr_assert_eq(ret, 0); - cr_assert_eq(pret, 0UL); - } - - for (i = 0; i < MT_ADDR_COUNT; i++) { - found_addresses[i] = false; - } - - /* check address contents */ - for (i = 0; i < MT_ADDR_COUNT; i++) { - if (gnix_av->type == FI_AV_MAP) { - for (j = 0; j < MT_ADDR_COUNT; j++) { - compare = (fi_addr_t *) - &simple_ep_names[j].gnix_addr; - if (addresses[i] == *compare) { - found_addresses[j] = true; - } - } - } else { - found_addresses[addresses[i]] = true; - } - } - for (i = 0; i < MT_ADDR_COUNT; i++) { - cr_assert_eq(found_addresses[i], true); - } - - /* look up addresses */ - for (i = 0; i < MT_ADDR_COUNT; i++) { - ret = pthread_create(&threads[i], NULL, lookup_single, - (void *) &addresses[i]); - cr_assert_eq(ret, 0); - } - - for (i = 0; i < MT_ADDR_COUNT; i++) { - ret = pthread_join(threads[i], (void **) &pret); - cr_assert_eq(ret, 0); - cr_assert_eq(pret, 0UL); - } - - /* remove addresses */ - for (i = 0; i < MT_ADDR_COUNT; i++) { - ret = pthread_create(&threads[i], NULL, remove_single, - (void *) &addresses[i]); - cr_assert_eq(ret, 0); - } - for (i = 0; i < MT_ADDR_COUNT; i++) { - ret = pthread_join(threads[i], (void **) &pret); - cr_assert_eq(ret, 0); - cr_assert_eq(pret, 0UL); - } - - ret = pthread_barrier_destroy(&mtbar); - cr_assert_eq(ret, 0); -} - -Test(av_full_map, mt_simple) -{ - simple_mt_test(); -} - -Test(av_full_table, mt_simple) -{ - simple_mt_test(); -} - -#include "ofi_atom.h" -/* add a compare-and-swap */ -static inline int atomic_cas_weak(ofi_atomic32_t *atomic, int *expected, int desired) -{ - ATOMIC_IS_INITIALIZED(atomic); - return atomic_compare_exchange_weak_explicit(&atomic->val, - expected, desired, - memory_order_seq_cst, - memory_order_seq_cst); -} - -static void *lookup_random(void *data) -{ - int n, ret; - fi_addr_t *addresses = ((fi_addr_t **) data)[0]; - ofi_atomic32_t *done = ((ofi_atomic32_t **) data)[1]; - struct gnix_ep_name found; - size_t addrlen = sizeof(struct gnix_ep_name); - - srand(0); - - ret = pthread_barrier_wait(&mtbar); - if ((ret != PTHREAD_BARRIER_SERIAL_THREAD) && (ret != 0)) { - pthread_exit((void *) 1UL); - } - - while (!ofi_atomic_get32(done)) { - n = rand()%MT_ADDR_COUNT; - (void) fi_av_lookup(av, addresses[n], &found, &addrlen); - } - - pthread_exit(NULL); -} - -static void continuous_lookup(void) -{ - int i, ret; - pthread_t thread; - fi_addr_t addresses[MT_ADDR_COUNT]; - ofi_atomic32_t done; - void *info[2]; - const int iters = 17; - - ret = pthread_barrier_init(&mtbar, NULL, 2); - cr_assert_eq(ret, 0); - - init_av_lock(); - - ofi_atomic_initialize32(&done, 0); - - memset(addresses, 0, MT_ADDR_COUNT*sizeof(fi_addr_t)); - - info[0] = (void *) addresses; - info[1] = (void *) &done; - - ret = pthread_create(&thread, NULL, lookup_random, info); - cr_assert_eq(ret, 0); - - ret = pthread_barrier_wait(&mtbar); - cr_assert((ret == PTHREAD_BARRIER_SERIAL_THREAD) || (ret == 0)); - - for (i = 0; i < iters; i++) { - for (i = 0; i < MT_ADDR_COUNT; i++) { - ret = fi_av_insert(av, (void *) &simple_ep_names[i], 1, - &addresses[i], 0, NULL); - cr_assert_eq(ret, 1); - } - for (i = 0; i < MT_ADDR_COUNT; i++) { - ret = fi_av_remove(av, &addresses[i], 1, 0); - cr_assert_eq(ret, FI_SUCCESS); - } - } - - ofi_atomic_set32(&done, 1); - - ret = pthread_join(thread, NULL); - cr_assert_eq(ret, 0); - - ret = pthread_barrier_destroy(&mtbar); - cr_assert_eq(ret, 0); -} - -Test(av_full_map, mt_lookup) -{ - continuous_lookup(); -} - -Test(av_full_table, mt_lookup) -{ - continuous_lookup(); -} - -static const int state_empty = 1; -static const int state_full = 2; -static const int state_locked = 3; - -static void *continuous_insert(void *data) -{ - int i, pos, n, ret; - int expected_state; - struct gnix_ep_name *ep_names = ((struct gnix_ep_name **) data)[0]; - fi_addr_t *addresses = ((fi_addr_t **) data)[1]; - ofi_atomic32_t *fe = ((ofi_atomic32_t **) data)[2]; - int num_insertions = (int) ((uint64_t *) data)[3]; - int num_addrs = (int) ((uint64_t *) data)[4]; - ofi_atomic32_t *done = ((ofi_atomic32_t **) data)[5]; - - ret = pthread_barrier_wait(&mtbar); - if ((ret != PTHREAD_BARRIER_SERIAL_THREAD) && (ret != 0)) { - pthread_exit((void *) 1UL); - } - - i = 0; - pos = 0; - while ((i < num_insertions) && !ofi_atomic_get32(done)) { - n = (pos++)%num_addrs; - expected_state = state_empty; - if (atomic_cas_weak(&fe[n], &expected_state, state_locked)) { - av_lock(); - ret = fi_av_insert(av, (void *) &ep_names[n], 1, - &addresses[n], 0, NULL); - av_unlock(); - if (ret != 1) { - /* flag shutdown to avoid deadlock */ - ofi_atomic_set32(done, 1); - pthread_exit((void *) 1UL); - } - ofi_atomic_set32(&fe[n], state_full); - i++; - } - } - - pthread_exit((void *) NULL); -} - -static void *continuous_remove(void *data) -{ - int pos, n, ret; - int expected_state; - fi_addr_t *addresses = ((fi_addr_t **) data)[0]; - ofi_atomic32_t *fe = ((ofi_atomic32_t **) data)[1]; - int num_addrs = (int) ((uint64_t *) data)[2]; - ofi_atomic32_t *done = ((ofi_atomic32_t **) data)[3]; - - ret = pthread_barrier_wait(&mtbar); - if ((ret != PTHREAD_BARRIER_SERIAL_THREAD) && (ret != 0)) { - pthread_exit((void *) 1UL); - } - - pos = 0; - while (!ofi_atomic_get32(done)) { - n = (pos++)%num_addrs; - expected_state = state_full; - if (atomic_cas_weak(&fe[n], &expected_state, state_locked)) { - av_lock(); - ret = fi_av_remove(av, &addresses[n], 1, 0); - av_unlock(); - if (ret != FI_SUCCESS) { - /* flag shutdown to avoid deadlock */ - ofi_atomic_set32(done, 1); - pthread_exit((void *) 1UL); - } - ofi_atomic_set32(&fe[n], state_empty); - } - } - - pthread_exit((void *) NULL); -} - -static void continuous_insert_remove(int num_inserters, int num_removers, - int num_insertions) -{ - int i, ret; - unsigned long pret; - ofi_atomic32_t done; - fi_addr_t addresses[MT_ADDR_COUNT]; - ofi_atomic32_t fe[MT_ADDR_COUNT]; - const int addrs_per_thread = MT_ADDR_COUNT/num_inserters; - const int num_threads = num_inserters + num_removers; - pthread_t threads[num_threads]; - void *info[num_threads][6]; - - ret = pthread_barrier_init(&mtbar, NULL, num_threads); - cr_assert_eq(ret, 0); - - init_av_lock(); - - ofi_atomic_initialize32(&done, 0); - for (i = 0; i < MT_ADDR_COUNT; i++) { - ofi_atomic_initialize32(&fe[i], state_empty); - } - - for (i = 0; i < num_inserters; i++) { - info[i][0] = (void *) &simple_ep_names[i*addrs_per_thread]; - info[i][1] = (void *) &addresses[i*addrs_per_thread]; - info[i][2] = (void *) &fe[i*addrs_per_thread]; - info[i][3] = (void *) (uint64_t) num_insertions; - info[i][4] = (void *) (uint64_t) addrs_per_thread; - info[i][5] = (void *) &done; - ret = pthread_create(&threads[i], NULL, - continuous_insert, &info[i]); - cr_assert_eq(ret, 0); - } - - for (i = num_inserters; i < num_threads; i++) { - info[i][0] = (void *) addresses; - info[i][1] = (void *) fe; - info[i][2] = (void *) (uint64_t) - (num_inserters*addrs_per_thread); - info[i][3] = (void *) &done; - ret = pthread_create(&threads[i], NULL, - continuous_remove, &info[i]); - cr_assert_eq(ret, 0); - } - - for (i = 0; i < num_threads; i++) { - if (i == num_inserters) { - ofi_atomic_set32(&done, 1); - } - ret = pthread_join(threads[i], (void **) &pret); - cr_assert_eq(ret, 0); - cr_assert_eq(pret, 0UL, "thread %d failed\n", i); - } - - ret = pthread_barrier_destroy(&mtbar); - cr_assert_eq(ret, 0); -} - -Test(av_full_map, mt_insert_remove) -{ - continuous_insert_remove(8, 1, 113); - continuous_insert_remove(4, 3, 113); - continuous_insert_remove(29, 13, 113); -} - -Test(av_full_table, mt_insert_remove) -{ - continuous_insert_remove(8, 1, 113); - continuous_insert_remove(4, 3, 113); - continuous_insert_remove(29, 13, 113); -} - -static void straddr_test(void) -{ - const char *buf; -#define ADDRSTR_LEN 128 - char addrstr[ADDRSTR_LEN]; - size_t addrstr_len; - char *pend; - long int value; - - addrstr_len = 10; /* too short */ - buf = fi_av_straddr(av, &simple_ep_names[0], addrstr, &addrstr_len); - cr_assert_eq(buf, addrstr); - cr_assert_eq(addrstr_len, 10); - - addrstr_len = ADDRSTR_LEN; - buf = fi_av_straddr(av, &simple_ep_names[0], addrstr, &addrstr_len); - cr_assert_eq(buf, addrstr); - cr_assert_eq(addrstr_len, GNIX_AV_MAX_STR_ADDR_LEN); - - /* extract the first component */ - buf = strtok(addrstr, ":"); - cr_assert_not_null(buf, "version not found"); - - value = strtol(buf, &pend, 16); - - /* verify the version has been returned. */ - cr_assert_eq(GNIX_AV_STR_ADDR_VERSION, value, "Invalid version"); - - /* extract the second component */ - buf = strtok(NULL, ":"); - cr_assert_not_null(buf, "device_addr not found"); - - value = strtol(buf, &pend, 16); - - /* verify the device addrstr has been returned. */ - cr_assert_eq(simple_ep_names[0].gnix_addr.device_addr, value, - "Invalid device_addr"); - - /* extract the third component */ - buf = strtok(NULL, ":"); - cr_assert_not_null(buf, "cdm_id not found"); - - value = strtol(buf, &pend, 16); - - /* verify the cdm_id has been returned. */ - cr_assert_eq(simple_ep_names[0].gnix_addr.cdm_id, value, - "Invalid cdm_id"); - - /* extract the fourth component */ - buf = strtok(NULL, ":"); - cr_assert_not_null(buf, "name_type not found"); - - value = strtol(buf, &pend, 10); - - /* verify the name_type has been returned. */ - cr_assert_eq(simple_ep_names[0].name_type, value, "Invalid name_type"); - - /* extract the fifth component */ - buf = strtok(NULL, ":"); - cr_assert_not_null(buf, "cm_nic_cdm_id not found"); - - value = strtol(buf, &pend, 16); - - /* verify the cm_nic_cdm_id has been returned. */ - cr_assert_eq(simple_ep_names[0].cm_nic_cdm_id, value, - "Invalid cm_nic_cdm_id"); - - /* extract the sixth component */ - buf = strtok(NULL, ":"); - cr_assert_not_null(buf, "cookie not found"); - - value = strtol(buf, &pend, 16); - - /* verify the cookie has been returned. */ - cr_assert_eq(simple_ep_names[0].cookie, value, "Invalid cookie"); - - /* extract the seventh component */ - buf = strtok(NULL, ":"); - cr_assert_not_null(buf, "number of contexts not found"); - - value = strtol(buf, &pend, 10); - - /* verify the rx_ctx_cnt has been returned. */ - cr_assert_eq(simple_ep_names[0].rx_ctx_cnt, value, - "Invalid number of contexts"); - - /* check to see if additional component are specified */ - buf = strtok(NULL, ":"); - cr_assert_null(buf, "extra values specified"); -} - -Test(av_full_map, straddr) -{ - straddr_test(); -} - -Test(av_full_table, straddr) -{ - straddr_test(); -} - -#define TABLE_SIZE_INIT 16 -#define TABLE_SIZE_FINAL 1024 - -Test(av_bare, test_capacity) -{ - int ret, i; - fi_addr_t addresses[TABLE_SIZE_FINAL]; - struct fi_av_attr av_table_attr = { - .type = FI_AV_TABLE, - .count = TABLE_SIZE_INIT, - }; - - ret = fi_av_open(dom, &av_table_attr, &av, NULL); - cr_assert_eq(ret, FI_SUCCESS, "failed to open av"); - - fake_names = (struct gnix_ep_name *)calloc(TABLE_SIZE_FINAL, - sizeof(*fake_names)); - cr_assert_neq(fake_names, NULL); - - for (i = 0; i < TABLE_SIZE_INIT; i++) { - fake_names[i].gnix_addr.device_addr = i + 100; - fake_names[i].gnix_addr.cdm_id = i; - fake_names[i].cm_nic_cdm_id = 0xbeef; - fake_names[i].cookie = 0xdeadbeef; - } - - ret = fi_av_insert(av, fake_names, TABLE_SIZE_INIT, - addresses, 0, NULL); - cr_assert_eq(ret, TABLE_SIZE_INIT, "av insert failed"); - - /* - * now add some more - */ - - for (i = TABLE_SIZE_INIT; i < TABLE_SIZE_FINAL; i++) { - fake_names[i].gnix_addr.device_addr = i + 100; - fake_names[i].gnix_addr.cdm_id = i; - fake_names[i].cm_nic_cdm_id = 0xbeef; - fake_names[i].cookie = 0xdeadbeef; - } - - ret = fi_av_insert(av, &fake_names[TABLE_SIZE_INIT], - TABLE_SIZE_FINAL - TABLE_SIZE_INIT, - &addresses[TABLE_SIZE_INIT], 0, NULL); - cr_assert_eq(ret, TABLE_SIZE_FINAL - TABLE_SIZE_INIT, - "av insert failed"); - -} diff --git a/prov/gni/test/bitmap.c b/prov/gni/test/bitmap.c deleted file mode 100644 index b1f172a537b..00000000000 --- a/prov/gni/test/bitmap.c +++ /dev/null @@ -1,615 +0,0 @@ -/* - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights reserved. - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/* - * Created on: Apr 23, 2015 - * Author: jswaro - */ -#include -#include -#include -#include -#include - -#include -#include "common.h" - -#include -#include "gnix_rdma_headers.h" - -gnix_bitmap_t *test_bitmap = NULL; -int call_free_bitmap = 0; - -#if HAVE_ATOMICS - -#define __gnix_set_block(bitmap, index, value) \ - atomic_store(&(bitmap)->arr[(index)], (value)) -#define __gnix_load_block(bitmap, index) atomic_load(&(bitmap->arr[(index)])) -#else -static inline void __gnix_set_block(gnix_bitmap_t *bitmap, int index, - uint64_t value) -{ - gnix_bitmap_block_t *block = &bitmap->arr[index]; - - ofi_spin_lock(&block->lock); - block->val = value; - ofi_spin_unlock(&block->lock); -} - -static inline uint64_t __gnix_load_block(gnix_bitmap_t *bitmap, int index) -{ - gnix_bitmap_block_t *block = &bitmap->arr[index]; - uint64_t ret; - - ofi_spin_lock(&block->lock); - ret = block->val; - ofi_spin_unlock(&block->lock); - - return ret; -} -#endif - -void __gnix_bitmap_test_setup(void) -{ - cr_assert(test_bitmap == NULL); - test_bitmap = calloc(1, sizeof(*test_bitmap)); - cr_assert(test_bitmap != NULL); - - call_free_bitmap = 1; -} - -void __gnix_bitmap_test_teardown(void) -{ - if (call_free_bitmap) { - _gnix_free_bitmap(test_bitmap); - } else if (test_bitmap && test_bitmap->arr) { - free(test_bitmap->arr); - } - - cr_assert(test_bitmap != NULL); - free(test_bitmap); - test_bitmap = NULL; -} - - -static void __test_clean_bitmap_state(gnix_bitmap_t *bitmap, - int _length, gnix_bitmap_state_e _state) -{ - cr_assert(bitmap->arr != NULL); - cr_assert(bitmap->length == _length); - cr_assert(bitmap->state == _state); -} - -static void __test_initialize_bitmap(gnix_bitmap_t *bitmap, int bits) -{ - int ret = _gnix_alloc_bitmap(bitmap, bits, NULL); - - cr_assert(ret == 0); - __test_clean_bitmap_state(bitmap, bits, GNIX_BITMAP_STATE_READY); -} - -static void __test_initialize_bitmap_clean(gnix_bitmap_t *bitmap, int bits) -{ - __test_initialize_bitmap(bitmap, bits); - cr_assert(_gnix_bitmap_empty(bitmap)); -} - -static void __test_realloc_bitmap(gnix_bitmap_t *bitmap, int bits) -{ - int ret = _gnix_realloc_bitmap(bitmap, bits); - - cr_assert(ret == 0); - __test_clean_bitmap_state(bitmap, bits, GNIX_BITMAP_STATE_READY); -} - -static void __test_realloc_bitmap_clean(gnix_bitmap_t *bitmap, int initial, - int next) -{ - __test_initialize_bitmap(bitmap, initial); - __test_realloc_bitmap(bitmap, next); - cr_assert(_gnix_bitmap_empty(bitmap)); -} - -static void __test_free_bitmap_clean(gnix_bitmap_t *bitmap) -{ - int ret = _gnix_free_bitmap(bitmap); - - cr_assert(ret == 0); - cr_assert(bitmap->arr == NULL); - cr_assert(bitmap->length == 0); - cr_assert(bitmap->state == GNIX_BITMAP_STATE_FREE); -} - -/* - * Basic functionality tests for the gnix_bitmap_t object - */ - -TestSuite(gnix_bitmap, - .init = __gnix_bitmap_test_setup, - .fini = __gnix_bitmap_test_teardown); - -Test(gnix_bitmap, uninitialized) -{ - cr_assert(test_bitmap->arr == NULL); - cr_assert(test_bitmap->length == 0); - cr_assert(test_bitmap->state == GNIX_BITMAP_STATE_UNINITIALIZED); - - call_free_bitmap = 0; -} - -Test(gnix_bitmap, initialize_128) -{ - __test_initialize_bitmap(test_bitmap, 128); - - call_free_bitmap = 0; -} - -Test(gnix_bitmap, initialize_1) -{ - __test_initialize_bitmap(test_bitmap, 1); - - call_free_bitmap = 0; -} - -Test(gnix_bitmap, initialize_0) -{ - int ret; - - ret = _gnix_alloc_bitmap(test_bitmap, 0, NULL); - cr_assert(ret == -FI_EINVAL); - - call_free_bitmap = 0; -} - -Test(gnix_bitmap, already_initialized) -{ - int ret; - - __test_initialize_bitmap(test_bitmap, 128); - - ret = _gnix_alloc_bitmap(test_bitmap, 128, NULL); - cr_assert(ret == -FI_EINVAL); - - call_free_bitmap = 0; -} - -Test(gnix_bitmap, destroy_bitmap) -{ - __test_initialize_bitmap(test_bitmap, 128); - - __test_free_bitmap_clean(test_bitmap); -} - -Test(gnix_bitmap, destroy_bitmap_uninitialized) -{ - int ret; - - ret = _gnix_free_bitmap(test_bitmap); - cr_assert(ret == -FI_EINVAL); - cr_expect(test_bitmap->arr == NULL); - cr_expect(test_bitmap->length == 0); - cr_expect(test_bitmap->state == GNIX_BITMAP_STATE_UNINITIALIZED); -} - -Test(gnix_bitmap, destroy_bitmap_already_freed) -{ - int ret; - - __test_initialize_bitmap(test_bitmap, 128); - - __test_free_bitmap_clean(test_bitmap); - - ret = _gnix_free_bitmap(test_bitmap); - cr_assert(ret == -FI_EINVAL); - cr_expect(test_bitmap->arr == NULL); - cr_expect(test_bitmap->length == 0); - cr_expect(test_bitmap->state == GNIX_BITMAP_STATE_FREE); -} - -Test(gnix_bitmap, provided_buffer) -{ - int ret; - void *buffer; - uint32_t size, elements = 128; - - size = _gnix_bitmap_get_buffer_size(elements); - cr_assert(size > 0, "bad size returned, size=%d\n", size); - - buffer = calloc(1, size); - cr_assert(buffer, "failed to allocate buffer, size=%d", size); - - ret = _gnix_alloc_bitmap(test_bitmap, elements, buffer); - cr_assert(ret == FI_SUCCESS, "ret=%d\n", ret); - - _gnix_fill_bitmap(test_bitmap, 1); - - ret = _gnix_bitmap_full(test_bitmap); - cr_assert(ret == 1, - "bitmap was not full, ret=%d\n", - ret); - - _gnix_fill_bitmap(test_bitmap, 0); - - ret = _gnix_bitmap_empty(test_bitmap); - cr_assert(ret == 1, - "bitmap was not empty, ret=%d\n", - ret); - - ret = _gnix_realloc_bitmap(test_bitmap, elements * 2); - cr_assert(ret == -FI_EINVAL, "succeeded unexpectedly"); - - ret = _gnix_free_bitmap(test_bitmap); - cr_assert(ret == FI_SUCCESS); - - free(buffer); -} - - -Test(gnix_bitmap, realloc_63) -{ - __test_realloc_bitmap_clean(test_bitmap, 128, 63); -} - -Test(gnix_bitmap, realloc_64) -{ - __test_realloc_bitmap_clean(test_bitmap, 128, 64); -} - -Test(gnix_bitmap, realloc_65) -{ - __test_realloc_bitmap_clean(test_bitmap, 128, 65); -} - -Test(gnix_bitmap, realloc_255) -{ - __test_realloc_bitmap_clean(test_bitmap, 128, 255); -} - -Test(gnix_bitmap, realloc_256) -{ - __test_realloc_bitmap_clean(test_bitmap, 128, 256); -} - -Test(gnix_bitmap, realloc_257) -{ - __test_realloc_bitmap_clean(test_bitmap, 128, 257); -} - -Test(gnix_bitmap, realloc_63_check_bits) -{ - __test_realloc_bitmap_clean(test_bitmap, 128, 63); -} - -Test(gnix_bitmap, realloc_64_check_bits) -{ - __test_realloc_bitmap_clean(test_bitmap, 128, 64); -} - -Test(gnix_bitmap, realloc_65_check_bits) -{ - __test_realloc_bitmap_clean(test_bitmap, 128, 65); -} - -Test(gnix_bitmap, realloc_255_check_bits) -{ - __test_realloc_bitmap_clean(test_bitmap, 128, 255); -} - -Test(gnix_bitmap, realloc_256_check_bits) -{ - __test_realloc_bitmap_clean(test_bitmap, 128, 256); -} - -Test(gnix_bitmap, realloc_257_check_bits) -{ - __test_realloc_bitmap_clean(test_bitmap, 128, 257); -} - -Test(gnix_bitmap, bit_set_test_pass) -{ - __test_initialize_bitmap_clean(test_bitmap, 64); - - _gnix_set_bit(test_bitmap, 1); - - cr_assert(_gnix_test_bit(test_bitmap, 1)); -} - -Test(gnix_bitmap, bit_set_test_fail) -{ - __test_initialize_bitmap_clean(test_bitmap, 64); - - _gnix_set_bit(test_bitmap, 1); - - cr_assert(!_gnix_test_bit(test_bitmap, 0)); -} - -Test(gnix_bitmap, bit_set_clear) -{ - __test_initialize_bitmap_clean(test_bitmap, 64); - - _gnix_set_bit(test_bitmap, 1); - - cr_assert(_gnix_test_bit(test_bitmap, 1)); - - _gnix_clear_bit(test_bitmap, 1); - - cr_assert(!_gnix_test_bit(test_bitmap, 1)); -} - -Test(gnix_bitmap, bit_clear) -{ - __test_initialize_bitmap_clean(test_bitmap, 64); - - _gnix_clear_bit(test_bitmap, 1); - - cr_assert(!_gnix_test_bit(test_bitmap, 1)); -} - -Test(gnix_bitmap, bit_set) -{ - __test_initialize_bitmap_clean(test_bitmap, 64); - - _gnix_set_bit(test_bitmap, 1); -} - -Test(gnix_bitmap, bit_test_and_set_unset) -{ - __test_initialize_bitmap_clean(test_bitmap, 64); - - cr_assert(!_gnix_test_and_set_bit(test_bitmap, 1)); -} - -Test(gnix_bitmap, bit_test_and_set_already_set) -{ - __test_initialize_bitmap_clean(test_bitmap, 64); - - _gnix_set_bit(test_bitmap, 1); - cr_assert(_gnix_test_bit(test_bitmap, 1)); - - cr_assert(_gnix_test_and_set_bit(test_bitmap, 1)); -} - -Test(gnix_bitmap, bit_test_and_clear_unset) -{ - __test_initialize_bitmap_clean(test_bitmap, 64); - - cr_assert(!_gnix_test_and_clear_bit(test_bitmap, 1)); -} - -Test(gnix_bitmap, bit_test_and_clear_already_set) -{ - __test_initialize_bitmap_clean(test_bitmap, 64); - - _gnix_set_bit(test_bitmap, 1); - cr_assert(_gnix_test_bit(test_bitmap, 1)); - - cr_assert(_gnix_test_and_clear_bit(test_bitmap, 1)); -} - -Test(gnix_bitmap, ffs_clean_bitmap) -{ - __test_initialize_bitmap_clean(test_bitmap, 64); - - cr_assert(_gnix_find_first_set_bit(test_bitmap) == -FI_EAGAIN); -} - -Test(gnix_bitmap, ffs_first_bit_set) -{ - __test_initialize_bitmap_clean(test_bitmap, 64); - - _gnix_set_bit(test_bitmap, 0); - - cr_assert(_gnix_find_first_set_bit(test_bitmap) == 0); -} - -Test(gnix_bitmap, ffs_seventeen_set) -{ - __test_initialize_bitmap_clean(test_bitmap, 64); - - _gnix_set_bit(test_bitmap, 17); - - cr_assert(_gnix_find_first_set_bit(test_bitmap) == 17); -} - -Test(gnix_bitmap, ffz_clean_bitmap) -{ - __test_initialize_bitmap_clean(test_bitmap, 64); - - cr_assert(_gnix_find_first_zero_bit(test_bitmap) == 0); -} - -Test(gnix_bitmap, ffz_full_bitmap) -{ - int i; - - __test_initialize_bitmap_clean(test_bitmap, 64); - - for (i = 0; i < test_bitmap->length; ++i) { - _gnix_set_bit(test_bitmap, i); - cr_assert(_gnix_test_bit(test_bitmap, i)); - } - - cr_assert(_gnix_find_first_zero_bit(test_bitmap) == -FI_EAGAIN); -} - -Test(gnix_bitmap, ffz_first_half_set) -{ - int i; - - __test_initialize_bitmap_clean(test_bitmap, 64); - - for (i = 0; i < 32 ; ++i) { - _gnix_set_bit(test_bitmap, i); - cr_assert(_gnix_test_bit(test_bitmap, i)); - } - - cr_expect(test_bitmap->length == 64); - cr_expect(i == 32); - cr_assert(_gnix_find_first_zero_bit(test_bitmap) == i); -} - -Test(gnix_bitmap, map_fill_0) -{ - int i; - - __test_initialize_bitmap_clean(test_bitmap, 64); - - for (i = 0; i < test_bitmap->length; ++i) { - _gnix_set_bit(test_bitmap, i); - cr_assert(_gnix_test_bit(test_bitmap, i)); - } - - cr_assert(_gnix_bitmap_full(test_bitmap)); - - _gnix_fill_bitmap(test_bitmap, 0); - - cr_assert(_gnix_bitmap_empty(test_bitmap)); -} - -Test(gnix_bitmap, map_fill_1) -{ - __test_initialize_bitmap_clean(test_bitmap, 64); - - _gnix_fill_bitmap(test_bitmap, 1); - - cr_assert(_gnix_bitmap_full(test_bitmap)); -} - -Test(gnix_bitmap, bitmap_load) -{ - gnix_bitmap_value_t expected = ~0; - - __test_initialize_bitmap_clean(test_bitmap, 64); - - _gnix_fill_bitmap(test_bitmap, 1); - - cr_assert(expected == __gnix_load_block(test_bitmap, 0)); -} - -Test(gnix_bitmap, bitmap_set) -{ - gnix_bitmap_value_t expected = ~0; - - __test_initialize_bitmap_clean(test_bitmap, 64); - - __gnix_set_block(test_bitmap, 0, expected); - - cr_assert(__gnix_load_block(test_bitmap, 0) == expected); -} - - -Test(gnix_bitmap, fill_bitmap_60_ffz_eagain) -{ - int i; - - __test_initialize_bitmap_clean(test_bitmap, 60); - - for (i = 0; i < 60; ++i) - _gnix_set_bit(test_bitmap, i); - - cr_assert(_gnix_find_first_zero_bit(test_bitmap) == -FI_EAGAIN); -} - -Test(gnix_bitmap, fill_bitmap_60_ffs_eagain) -{ - int i; - - __test_initialize_bitmap_clean(test_bitmap, 60); - - /* this will succeed because set_bit doesn't account for bounds of the - * bitmap as the user should be responsible for handling the bitmap - * properly. - */ - for (i = 60; i < 64; ++i) - _gnix_set_bit(test_bitmap, i); - - cr_assert(_gnix_find_first_set_bit(test_bitmap) == -FI_EAGAIN); -} - -TestSuite(perf_bitmap, - .init = __gnix_bitmap_test_setup, - .fini = __gnix_bitmap_test_teardown, - .disabled = true); - -Test(perf_bitmap, performance_set_test) -{ - int i, j; - int secs, usec; - struct timeval start, end; - - __test_initialize_bitmap_clean(test_bitmap, 8192); - - gettimeofday(&start, 0); - for (i = 0; i < 100000; ++i) { - j = i % 8192; - _gnix_set_bit(test_bitmap, j); - cr_assert(_gnix_test_bit(test_bitmap, j)); - _gnix_clear_bit(test_bitmap, j); - cr_assert(!_gnix_test_bit(test_bitmap, j)); - } - gettimeofday(&end, 0); - - calculate_time_difference(&start, &end, &secs, &usec); - - cr_assert(_gnix_bitmap_empty(test_bitmap)); - - cr_expect(secs < 1); -} - -Test(perf_bitmap, performance_set_test_random) -{ - int i, j; - int secs, usec; - struct timeval start, end; - - srand(time(NULL)); - - __test_initialize_bitmap_clean(test_bitmap, 8192); - - gettimeofday(&start, 0); - for (i = 0; i < 100000; ++i) { - j = rand() % 8192; - _gnix_set_bit(test_bitmap, j); - cr_assert(_gnix_test_bit(test_bitmap, j)); - _gnix_clear_bit(test_bitmap, j); - cr_assert(!_gnix_test_bit(test_bitmap, j)); - } - gettimeofday(&end, 0); - - calculate_time_difference(&start, &end, &secs, &usec); - - cr_assert(_gnix_bitmap_empty(test_bitmap)); - - cr_expect(secs < 1); -} - diff --git a/prov/gni/test/buddy_allocator.c b/prov/gni/test/buddy_allocator.c deleted file mode 100644 index d1e744784e5..00000000000 --- a/prov/gni/test/buddy_allocator.c +++ /dev/null @@ -1,270 +0,0 @@ -/* - * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All - * rights reserved. - * Copyright (c) 2015-2016 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "gnix_buddy_allocator.h" -#include -#include "gnix_rdma_headers.h" -#include - -#define LEN (1024 * 1024) /* buddy_handle->len */ -#define MAX_LEN (LEN / 2) /* buddy_handle->max */ -#define MIN_LEN MIN_BLOCK_SIZE - -long *buf = NULL; /* buddy_handle->base */ -gnix_buddy_alloc_handle_t *buddy_handle; - -struct ptrs_t { - void *ptr; /* ptrs alloc'd by buddy_alloc */ - uint32_t size; /* size of the ptr */ -} *ptrs; - -void buddy_allocator_setup(void) -{ - int ret; - - ptrs = calloc(LEN / MIN_LEN, sizeof(struct ptrs_t)); - cr_assert(ptrs, "buddy_allocator_setup"); - - buf = calloc(LEN, sizeof(long)); - cr_assert(buf, "buddy_allocator_setup"); - - ret = _gnix_buddy_allocator_create(buf, LEN, MAX_LEN, &buddy_handle); - cr_assert(!ret, "_gnix_buddy_allocator_create"); -} - -void buddy_allocator_teardown(void) -{ - int ret; - - ret = _gnix_buddy_allocator_destroy(buddy_handle); - cr_assert(!ret, "_gnix_buddy_allocator_destroy"); - - free(ptrs); - free(buf); -} - -/* Test invalid parameters for setup */ -void buddy_allocator_setup_error(void) -{ - int ret; - - ret = _gnix_buddy_allocator_create(NULL, LEN, MAX_LEN, &buddy_handle); - cr_assert_eq(ret, -FI_EINVAL); - - ret = _gnix_buddy_allocator_create(buf, 0, MAX_LEN, &buddy_handle); - cr_assert_eq(ret, -FI_EINVAL); - - ret = _gnix_buddy_allocator_create(buf, LEN, LEN + 1, &buddy_handle); - cr_assert_eq(ret, -FI_EINVAL); - - ret = _gnix_buddy_allocator_create(buf, LEN, 0, &buddy_handle); - cr_assert_eq(ret, -FI_EINVAL); - - ret = _gnix_buddy_allocator_create(buf, LEN, MAX_LEN, NULL); - cr_assert_eq(ret, -FI_EINVAL); -} - -/* Test invalid parameters for teardown */ -void buddy_allocator_teardown_error(void) -{ - int ret; - - ret = _gnix_buddy_allocator_destroy(NULL); - cr_assert_eq(ret, -FI_EINVAL); -} - -/* Sequential alloc */ -void do_alloc(uint32_t len) -{ - uint32_t i = 0, ret; - - /* Allocate all the memory and write to each block */ - for (; i < LEN / len; i++) { - ptrs[i].size = len; - ret = _gnix_buddy_alloc(buddy_handle, &ptrs[i].ptr, len); - cr_assert(!ret, "_gnix_buddy_alloc"); - memset(ptrs[i].ptr, 0, len); - } - - /* Ensure that all free lists are empty */ - for (i = 0; i < buddy_handle->nlists; i++) { - ret = dlist_empty(buddy_handle->lists + i); - cr_assert_eq(ret, 1); - } -} - -/* Sequential free */ -void do_free(uint32_t len) -{ - int i = 0, ret; - - /* Free all allocated blocks */ - for (i = 0; i < LEN / len; i++) { - ret = _gnix_buddy_free(buddy_handle, ptrs[i].ptr, ptrs[i].size); - cr_assert(!ret, "_gnix_buddy_free"); - } - - /* Ensure that every free list except the last is empty */ - for (i = 0; i < buddy_handle->nlists - 1; i++) { - ret = dlist_empty(buddy_handle->lists + i); - cr_assert_eq(ret, 1); - } - ret = dlist_empty(buddy_handle->lists + i); - cr_assert_eq(ret, 0); -} - -TestSuite(buddy_allocator, .init = buddy_allocator_setup, - .fini = buddy_allocator_teardown, .disabled = false); - -/* Sequential alloc and frees */ -Test(buddy_allocator, sequential_alloc_free) -{ - uint32_t i = MIN_LEN; - - for (i = MIN_LEN; i <= MAX_LEN; i *= 2) { - do_alloc(i); - do_free(i); - } -} - -/* Pseudo random allocs and frees */ -Test(buddy_allocator, random_alloc_free) -{ - int i = 0, j = 0, ret; - - srand((unsigned) time(NULL)); - - for (j = MIN_LEN; j <= MAX_LEN; j *= 2) { - do { - ret = rand() % 100; - - if (ret <= 49) { - /* ~50% chance to alloc min size blocks*/ - ptrs[i].size = MIN_BLOCK_SIZE; - } else if (ret >= 50 && - ret <= 87) { - /* ~37% chance to alloc blocks of size - * [MIN_BLOCK_SIZE * 2, MAX_BLOCK_SIZE / 2] - */ - ptrs[i].size = OFFSET(MIN_BLOCK_SIZE, - (rand() % - (buddy_handle->nlists - - 1)) + 1); - } else { - /* ~13% chance to alloc max size blocks */ - ptrs[i].size = buddy_handle->max; - } - - ret = _gnix_buddy_alloc(buddy_handle, &ptrs[i].ptr, - ptrs[i].size); - cr_assert_neq(ret, -FI_EINVAL); - - i++; - } while (ret != -FI_ENOMEM); - - /* Free all allocated blocks */ - for (i -= 2; i >= 0; i--) { - ret = _gnix_buddy_free(buddy_handle, ptrs[i].ptr, - ptrs[i].size); - cr_assert(!ret, "_gnix_buddy_free"); - } - - /* Ensure that every free list except the last is empty */ - for (i = 0; i < buddy_handle->nlists - 1; i++) { - ret = dlist_empty(buddy_handle->lists + i); - cr_assert_eq(ret, 1); - } - ret = dlist_empty(buddy_handle->lists + i); - cr_assert_eq(ret, 0); - - i = 0; - } -} - -Test(buddy_allocator, alloc_free_error) -{ - int ret; - void *tmp; - - do_alloc(MIN_LEN); - - /* Request one additional block */ - ret = _gnix_buddy_alloc(buddy_handle, &tmp, MIN_LEN); - cr_assert_eq(ret, -FI_ENOMEM); - - do_free(MIN_LEN); -} - -/* Test invalid buddy alloc and free parameters */ -Test(buddy_allocator, parameter_error) -{ - int ret; - - buddy_allocator_setup_error(); - buddy_allocator_teardown_error(); - - /* BEGIN: Alloc, invalid parameters */ - ret = _gnix_buddy_alloc(NULL, ptrs->ptr, MAX_LEN); - cr_assert_eq(ret, -FI_EINVAL); - - ret = _gnix_buddy_alloc(buddy_handle, ptrs->ptr, MAX_LEN + 1); - cr_assert_eq(ret, -FI_EINVAL); - - ret = _gnix_buddy_alloc(buddy_handle, ptrs->ptr, 0); - cr_assert_eq(ret, -FI_EINVAL); - - ret = _gnix_buddy_alloc(buddy_handle, NULL, MAX_LEN); - cr_assert_eq(ret, -FI_EINVAL); - /* END: Alloc, invalid parameters */ - - /* BEGIN: Free, invalid parameters */ - ret = _gnix_buddy_free(NULL, ptrs->ptr, MAX_LEN); - cr_assert_eq(ret, -FI_EINVAL); - - ret = _gnix_buddy_free(buddy_handle, NULL, MAX_LEN); - cr_assert_eq(ret, -FI_EINVAL); - - ret = _gnix_buddy_free(buddy_handle, buf - 1, MAX_LEN); - cr_assert_eq(ret, -FI_EINVAL); - - ret = _gnix_buddy_free(buddy_handle, buf + LEN, MAX_LEN); - cr_assert_eq(ret, -FI_EINVAL); - - ret = _gnix_buddy_free(buddy_handle, buf, MAX_LEN + 1); - cr_assert_eq(ret, -FI_EINVAL); - - ret = _gnix_buddy_free(buddy_handle, buf - 1, 0); - cr_assert_eq(ret, -FI_EINVAL); - /* END: Free, invalid parameters */ -} diff --git a/prov/gni/test/cancel.c b/prov/gni/test/cancel.c deleted file mode 100644 index eb24433c015..00000000000 --- a/prov/gni/test/cancel.c +++ /dev/null @@ -1,371 +0,0 @@ -/* - * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights reserved. - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * Copyright (c) 2020 Triad National Security, LLC. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - - -#include -#include -#include - -#include "gnix_vc.h" -#include "gnix_cm_nic.h" -#include "gnix_hashtable.h" -#include "gnix_rma.h" -#include "gnix_ep.h" - -#include -#include "gnix_rdma_headers.h" -#include "common.h" - -/* Note: Set to ~FI_NOTIFY_FLAGS_ONLY since this was written before api 1.5 */ -static uint64_t mode_bits = ~FI_NOTIFY_FLAGS_ONLY; -static struct fid_fabric *fab; -static struct fid_domain *dom; -static struct fid_ep *ep[2]; -static struct fid_av *av; -static struct fi_info *hints; -static struct fi_info *fi; -static void *ep_name[2]; -static size_t gni_addr[2]; -static struct fid_cq *msg_cq[2]; -static struct fi_cq_attr cq_attr; - -#define BUF_SZ (8*1024) -static char *target, *target_base; -static char *source, *source_base; -static struct fid_mr *rem_mr, *loc_mr; -static uint64_t mr_key; - -void cancel_setup(void) -{ - int ret = 0; - struct fi_av_attr attr; - size_t addrlen = 0; - int rem_requested_key, loc_requested_key; - - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - - hints->domain_attr->mr_mode = GNIX_DEFAULT_MR_MODE; - hints->domain_attr->cq_data_size = 4; - hints->mode = mode_bits; - - hints->fabric_attr->prov_name = strdup("gni"); - - ret = fi_getinfo(fi_version(), NULL, 0, 0, hints, &fi); - cr_assert(!ret, "fi_getinfo"); - - ret = fi_fabric(fi->fabric_attr, &fab, NULL); - cr_assert(!ret, "fi_fabric"); - - ret = fi_domain(fab, fi, &dom, NULL); - cr_assert(!ret, "fi_domain"); - - memset(&attr, 0, sizeof(attr)); - attr.type = FI_AV_MAP; - attr.count = 16; - - ret = fi_av_open(dom, &attr, &av, NULL); - cr_assert(!ret, "fi_av_open"); - - ret = fi_endpoint(dom, fi, &ep[0], NULL); - cr_assert(!ret, "fi_endpoint"); - - cq_attr.format = FI_CQ_FORMAT_CONTEXT; - cq_attr.size = 1024; - cq_attr.wait_obj = 0; - - ret = fi_cq_open(dom, &cq_attr, &msg_cq[0], 0); - cr_assert(!ret, "fi_cq_open"); - - ret = fi_cq_open(dom, &cq_attr, &msg_cq[1], 0); - cr_assert(!ret, "fi_cq_open"); - - ret = fi_ep_bind(ep[0], &msg_cq[0]->fid, FI_SEND | FI_RECV); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_getname(&ep[0]->fid, NULL, &addrlen); - cr_assert(addrlen > 0); - - ep_name[0] = malloc(addrlen); - cr_assert(ep_name[0] != NULL); - - ret = fi_getname(&ep[0]->fid, ep_name[0], &addrlen); - cr_assert(ret == FI_SUCCESS); - - ret = fi_endpoint(dom, fi, &ep[1], NULL); - cr_assert(!ret, "fi_endpoint"); - - ret = fi_ep_bind(ep[1], &msg_cq[1]->fid, FI_SEND | FI_RECV); - cr_assert(!ret, "fi_ep_bind"); - - ep_name[1] = malloc(addrlen); - cr_assert(ep_name[1] != NULL); - - ret = fi_getname(&ep[1]->fid, ep_name[1], &addrlen); - cr_assert(ret == FI_SUCCESS); - - ret = fi_av_insert(av, ep_name[0], 1, &gni_addr[0], 0, - NULL); - cr_assert(ret == 1); - - ret = fi_av_insert(av, ep_name[1], 1, &gni_addr[1], 0, - NULL); - cr_assert(ret == 1); - - ret = fi_ep_bind(ep[0], &av->fid, 0); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_ep_bind(ep[1], &av->fid, 0); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_enable(ep[0]); - cr_assert(!ret, "fi_ep_enable"); - - ret = fi_enable(ep[1]); - cr_assert(!ret, "fi_ep_enable"); - - target_base = malloc(GNIT_ALIGN_LEN(BUF_SZ)); - assert(target_base); - target = GNIT_ALIGN_BUFFER(char *, target_base); - - source_base = malloc(GNIT_ALIGN_LEN(BUF_SZ)); - assert(source_base); - source = GNIT_ALIGN_BUFFER(char *, source_base); - - rem_requested_key = USING_SCALABLE(fi) ? 1 : 0; - loc_requested_key = USING_SCALABLE(fi) ? 2 : 0; - - ret = fi_mr_reg(dom, - target, - BUF_SZ, - FI_REMOTE_WRITE, - 0, - rem_requested_key, - 0, - &rem_mr, - &target); - cr_assert_eq(ret, 0); - - ret = fi_mr_reg(dom, - source, - BUF_SZ, - FI_REMOTE_WRITE, - 0, - loc_requested_key, - 0, - &loc_mr, - &source); - cr_assert_eq(ret, 0); - - if (USING_SCALABLE(fi)) { - MR_ENABLE(rem_mr, target, BUF_SZ); - MR_ENABLE(loc_mr, source, BUF_SZ); - } - - mr_key = fi_mr_key(rem_mr); -} - -void cancel_teardown(void) -{ - int ret = 0; - - fi_close(&loc_mr->fid); - fi_close(&rem_mr->fid); - - free(target_base); - free(source_base); - - ret = fi_close(&ep[0]->fid); - cr_assert(!ret, "failure in closing ep."); - - ret = fi_close(&ep[1]->fid); - cr_assert(!ret, "failure in closing ep."); - - ret = fi_close(&msg_cq[0]->fid); - cr_assert(!ret, "failure in send cq."); - - ret = fi_close(&msg_cq[1]->fid); - cr_assert(!ret, "failure in recv cq."); - - ret = fi_close(&av->fid); - cr_assert(!ret, "failure in closing av."); - - ret = fi_close(&dom->fid); - cr_assert(!ret, "failure in closing domain."); - - ret = fi_close(&fab->fid); - cr_assert(!ret, "failure in closing fabric."); - - fi_freeinfo(fi); - fi_freeinfo(hints); - free(ep_name[0]); - free(ep_name[1]); -} - -void cancel_init_data(char *buf, int len, char seed) -{ - int i; - - for (i = 0; i < len; i++) { - buf[i] = seed++; - } -} - -int cancel_check_data(char *buf1, char *buf2, int len) -{ - int i; - - for (i = 0; i < len; i++) { - if (buf1[i] != buf2[i]) { - printf("data mismatch, elem: %d, exp: %x, act: %x\n", - i, buf1[i], buf2[i]); - return 0; - } - } - - return 1; -} - -/******************************************************************************* - * Test MSG functions - ******************************************************************************/ - -TestSuite(gnix_cancel, .init = cancel_setup, .fini = cancel_teardown, - .disabled = false); - -Test(gnix_cancel, cancel_ep_send) -{ - int ret; - struct gnix_fid_ep *gnix_ep; - struct gnix_fab_req *req; - struct fi_cq_err_entry buf; - struct gnix_vc *vc; - void *foobar_ptr = NULL; - gnix_ht_key_t *key; - - /* simulate a posted request */ - gnix_ep = container_of(ep[0], struct gnix_fid_ep, ep_fid); - req = _gnix_fr_alloc(gnix_ep); - - req->msg.send_info[0].send_addr = 0xdeadbeef; - req->msg.cum_send_len = req->msg.send_info[0].send_len = 128; - req->user_context = foobar_ptr; - req->type = GNIX_FAB_RQ_SEND; - - /* allocate, store vc */ - ret = _gnix_vc_alloc(gnix_ep, NULL, &vc); - cr_assert(ret == FI_SUCCESS, "_gnix_vc_alloc failed"); - - key = (gnix_ht_key_t *)&gnix_ep->src_addr.gnix_addr; - ret = _gnix_ht_insert(gnix_ep->vc_ht, *key, vc); - cr_assert(!ret); - - /* make a dummy request */ - dlist_insert_head(&req->dlist, &vc->tx_queue); - - /* cancel simulated request */ - ret = fi_cancel(&ep[0]->fid, foobar_ptr); - cr_assert(ret == FI_SUCCESS, "fi_cancel failed"); - - /* check for event */ - ret = fi_cq_readerr(msg_cq[0], &buf, FI_SEND); - cr_assert(ret == 1, "did not find one error event"); - - cr_assert(buf.buf == (void *) 0xdeadbeef, "buffer mismatch"); - cr_assert(buf.data == 0, "data mismatch"); - cr_assert(buf.err == FI_ECANCELED, "error code mismatch"); - cr_assert(buf.prov_errno == FI_ECANCELED, "prov error code mismatch"); - cr_assert(buf.len == 128, "length mismatch"); -} - -Test(gnix_cancel, cancel_ep_recv) -{ - int ret; - struct fi_cq_err_entry buf; - - /* simulate a posted request */ - ret = fi_recv(ep[0], (void *) 0xdeadbeef, 128, 0, FI_ADDR_UNSPEC, - (void *) 0xcafebabe); - cr_assert(ret == FI_SUCCESS, "fi_recv failed"); - - /* cancel simulated request */ - ret = fi_cancel(&ep[0]->fid, (void *) 0xcafebabe); - cr_assert(ret == FI_SUCCESS, "fi_cancel failed"); - - /* check for event */ - ret = fi_cq_readerr(msg_cq[0], &buf, FI_RECV); - cr_assert(ret == 1, "did not find one error event"); - - cr_assert(buf.buf == (void *) 0xdeadbeef, "buffer mismatch"); - cr_assert(buf.data == 0, "data mismatch"); - cr_assert(buf.err == FI_ECANCELED, "error code mismatch"); - cr_assert(buf.prov_errno == FI_ECANCELED, "prov error code mismatch"); - cr_assert(buf.len == 128, "length mismatch"); -} - -Test(gnix_cancel, cancel_ep_no_event) -{ - int ret; - - ret = fi_cancel(&ep[0]->fid, NULL); - cr_assert(ret == -FI_ENOENT, "fi_cancel failed"); -} - -Test(gnix_cancel, cancel_ep_no_domain) -{ - int ret; - struct gnix_fid_ep *gnix_ep; - struct gnix_fid_domain *gnix_dom; - - /* simulate a disconnected endpoint */ - gnix_ep = container_of(ep[0], struct gnix_fid_ep, ep_fid); - gnix_dom = gnix_ep->domain; - gnix_ep->domain = NULL; - - /* run test */ - ret = fi_cancel(&ep[0]->fid, NULL); - cr_assert(ret == -FI_EDOMAIN, "fi_cancel failed"); - - /* reconnect */ - gnix_ep->domain = gnix_dom; -} diff --git a/prov/gni/test/cm.c b/prov/gni/test/cm.c deleted file mode 100644 index 0ce599c4d71..00000000000 --- a/prov/gni/test/cm.c +++ /dev/null @@ -1,480 +0,0 @@ -/* - * Copyright (c) 2016-2017 Cray Inc. All rights reserved. - * Copyright (c) 2019-2020 Triad National Security, LLC. - * All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include /* superset of previous */ -#include - -#include "gnix_vc.h" -#include "gnix_cm_nic.h" -#include "gnix_hashtable.h" -#include "gnix_atomic.h" -#include "gnix_cm.h" - -#include -#include "gnix_rdma_headers.h" -#include "common.h" - -#include "common.h" - -#if 1 -#define dbg_printf(...) -#else -#define dbg_printf(...) \ - do { \ - printf(__VA_ARGS__); \ - fflush(stdout); \ - } while (0) -#endif - -#define NUMEPS 2 - -#define DEF_PORT "1973" - -#define EQE_SIZE (sizeof(struct fi_eq_cm_entry) + GNIX_CM_DATA_MAX_SIZE) - - -static struct fid_fabric *cli_fab; -static struct fid_domain *cli_dom; -static struct fid_ep *cli_ep; -static struct fi_info *cli_hints; -static struct fi_info *cli_fi; -static struct fid_eq *cli_eq; -static struct fid_cq *cli_cq; -static char *cli_cm_in_data = "Hola. Soy cliente."; - -static struct fid_fabric *srv_fab; -static struct fid_domain *srv_dom; -static struct fid_pep *srv_pep; -static struct fid_ep *srv_ep; -static struct fi_info *srv_hints; -static struct fi_info *srv_fi; -static struct fid_eq *srv_eq; -static struct fid_cq *srv_cq; -static char *srv_cm_in_data = "Este es servidor."; - -struct fi_eq_attr eq_attr = { - .wait_obj = FI_WAIT_UNSPEC -}; - -struct fi_cq_attr cq_attr = { - .wait_obj = FI_WAIT_NONE -}; - -int cm_local_ip(struct sockaddr_in *sa) -{ - struct ifaddrs *ifap; - struct ifaddrs *ifa; - int ret = -1; - - ofi_getifaddrs(&ifap); - - ifa = ifap; - while (ifa) { - dbg_printf("IF: %s, IP ADDR: %s\n", - ifa->ifa_name, - inet_ntoa(((struct sockaddr_in *) - (ifa->ifa_addr))->sin_addr)); - /* Return first non loopback interface. */ - if (ifa->ifa_addr && - ifa->ifa_addr->sa_family == AF_INET && - !ofi_is_loopback_addr(ifa->ifa_addr)) { - ret = 0; - break; - } - ifa = ifa->ifa_next; - } - - if (!ret) { - memcpy((void *)sa, (void *)ifa->ifa_addr, - sizeof(struct sockaddr)); - } - - freeifaddrs(ifap); - - return ret; -} - -int cm_server_start(void) -{ - int ret; - struct sockaddr_in loc_sa; - - cm_local_ip(&loc_sa); - - srv_hints = fi_allocinfo(); - srv_hints->fabric_attr->name = strdup("gni"); - srv_hints->ep_attr->type = FI_EP_MSG; - srv_hints->domain_attr->mr_mode = GNIX_DEFAULT_MR_MODE; - - ret = fi_getinfo(fi_version(), inet_ntoa(loc_sa.sin_addr), - DEF_PORT, FI_SOURCE, srv_hints, &srv_fi); - cr_assert(!ret); - - ret = fi_fabric(srv_fi->fabric_attr, &srv_fab, NULL); - cr_assert(!ret); - - ret = fi_eq_open(srv_fab, &eq_attr, &srv_eq, NULL); - cr_assert(!ret); - - ret = fi_passive_ep(srv_fab, srv_fi, &srv_pep, NULL); - cr_assert(!ret); - - ret = fi_pep_bind(srv_pep, &srv_eq->fid, 0); - cr_assert(!ret); - - ret = fi_listen(srv_pep); - cr_assert(!ret); - - dbg_printf("Server start complete.\n"); - - return 0; -} - -void cm_stop_server(void) -{ - int ret; - - ret = fi_close(&srv_cq->fid); - cr_assert_eq(ret, FI_SUCCESS); - - ret = fi_close(&srv_ep->fid); - cr_assert_eq(ret, FI_SUCCESS); - - ret = fi_close(&srv_dom->fid); - cr_assert_eq(ret, FI_SUCCESS); - - ret = fi_close(&srv_pep->fid); - cr_assert_eq(ret, FI_SUCCESS); - - ret = fi_close(&srv_eq->fid); - cr_assert_eq(ret, FI_SUCCESS); - - ret = fi_close(&srv_fab->fid); - cr_assert_eq(ret, FI_SUCCESS); - - fi_freeinfo(srv_fi); -} - -int cm_server_accept(void) -{ - uint32_t event; - ssize_t rd; - int ret; - struct fi_eq_cm_entry *entry; - void *eqe_buf[EQE_SIZE] = {0}; - - rd = fi_eq_sread(srv_eq, &event, &eqe_buf, EQE_SIZE, -1, 0); - cr_assert(rd == (sizeof(*entry) + strlen(cli_cm_in_data))); - cr_assert(event == FI_CONNREQ); - - entry = (struct fi_eq_cm_entry *)eqe_buf; - cr_assert(!memcmp(cli_cm_in_data, entry->data, - strlen(cli_cm_in_data))); - - ret = fi_domain(srv_fab, entry->info, &srv_dom, NULL); - cr_assert(!ret); - - ret = fi_endpoint(srv_dom, entry->info, &srv_ep, NULL); - cr_assert(!ret, "fi_endpoint"); - - fi_freeinfo(entry->info); - - cq_attr.format = FI_CQ_FORMAT_TAGGED; - cq_attr.size = 1024; - cq_attr.wait_obj = 0; - - ret = fi_cq_open(srv_dom, &cq_attr, &srv_cq, &srv_cq); - cr_assert(!ret); - - ret = fi_ep_bind(srv_ep, &srv_eq->fid, 0); - cr_assert(!ret); - - ret = fi_ep_bind(srv_ep, &srv_cq->fid, FI_SEND | FI_RECV); - cr_assert(!ret); - - ret = fi_enable(srv_ep); - cr_assert(!ret); - - ret = fi_accept(srv_ep, srv_cm_in_data, GNIX_CM_DATA_MAX_SIZE+1); - cr_assert(ret == -FI_EINVAL); - - ret = fi_accept(srv_ep, srv_cm_in_data, strlen(srv_cm_in_data)); - cr_assert(!ret); - - dbg_printf("Server accept complete.\n"); - - return 0; -} - -int cm_server_finish_connect(void) -{ - uint32_t event; - struct fi_eq_cm_entry entry; - ssize_t rd; - - rd = fi_eq_read(srv_eq, &event, &entry, sizeof(entry), 0); - if (rd > 0) { - dbg_printf("got event: %d\n", event); - cr_assert(rd == sizeof(entry)); - cr_assert(event == FI_CONNECTED && entry.fid == &srv_ep->fid); - return 1; - } - - return 0; -} - -int cm_client_start_connect(void) -{ - int ret; - struct sockaddr_in loc_sa; - - cm_local_ip(&loc_sa); - - cli_hints = fi_allocinfo(); - cli_hints->fabric_attr->name = strdup("gni"); - cli_hints->caps = GNIX_EP_PRIMARY_CAPS; - cli_hints->ep_attr->type = FI_EP_MSG; - cli_hints->domain_attr->mr_mode = GNIX_DEFAULT_MR_MODE; - - ret = fi_getinfo(fi_version(), inet_ntoa(loc_sa.sin_addr), - DEF_PORT, 0, cli_hints, &cli_fi); - cr_assert(!ret); - - ret = fi_fabric(cli_fi->fabric_attr, &cli_fab, NULL); - cr_assert(!ret); - - ret = fi_eq_open(cli_fab, &eq_attr, &cli_eq, NULL); - cr_assert(!ret); - - ret = fi_domain(cli_fab, cli_fi, &cli_dom, NULL); - cr_assert(!ret); - - ret = fi_endpoint(cli_dom, cli_fi, &cli_ep, NULL); - cr_assert(!ret, "fi_endpoint"); - - cq_attr.format = FI_CQ_FORMAT_TAGGED; - cq_attr.size = 1024; - cq_attr.wait_obj = 0; - - ret = fi_cq_open(cli_dom, &cq_attr, &cli_cq, &cli_cq); - cr_assert(!ret); - - ret = fi_ep_bind(cli_ep, &cli_eq->fid, 0); - cr_assert(!ret); - - ret = fi_ep_bind(cli_ep, &cli_cq->fid, FI_SEND | FI_RECV); - cr_assert(!ret); - - ret = fi_enable(cli_ep); - cr_assert(!ret); - - ret = fi_connect(cli_ep, cli_fi->dest_addr, cli_cm_in_data, - GNIX_CM_DATA_MAX_SIZE+1); - cr_assert(ret == -FI_EINVAL); - - ret = fi_connect(cli_ep, cli_fi->dest_addr, cli_cm_in_data, - strlen(cli_cm_in_data)); - cr_assert(!ret); - - dbg_printf("Client connect complete.\n"); - - return 0; -} - -int cm_client_finish_connect(void) -{ - uint32_t event; - ssize_t rd; - struct fi_eq_cm_entry *entry; - void *eqe_buf[EQE_SIZE] = {0}; - - rd = fi_eq_read(cli_eq, &event, eqe_buf, EQE_SIZE, 0); - if (rd > 0) { - dbg_printf("got event: %d\n", event); - entry = (struct fi_eq_cm_entry *)eqe_buf; - cr_assert(rd == (sizeof(*entry) + strlen(srv_cm_in_data))); - cr_assert(event == FI_CONNECTED && entry->fid == &cli_ep->fid); - cr_assert(!memcmp(srv_cm_in_data, entry->data, - strlen(srv_cm_in_data))); - return 1; - } - - return 0; -} - -void cm_stop_client(void) -{ - int ret; - - ret = fi_close(&cli_cq->fid); - cr_assert_eq(ret, FI_SUCCESS); - - ret = fi_close(&cli_ep->fid); - cr_assert_eq(ret, FI_SUCCESS); - - ret = fi_close(&cli_dom->fid); - cr_assert_eq(ret, FI_SUCCESS); - - ret = fi_close(&cli_eq->fid); - cr_assert_eq(ret, FI_SUCCESS); - - ret = fi_close(&cli_fab->fid); - cr_assert_eq(ret, FI_SUCCESS); - - fi_freeinfo(cli_fi); -} - -void cm_basic_send(int trunc) -{ - int ret,i; - int slen = 8, rlen; - int source_done = 0, dest_done = 0; - struct fi_cq_tagged_entry cqe; - struct fi_cq_err_entry err_cqe = {0}; - ssize_t sz; - uint64_t source = 0xa4321234a4321234, - target = 0xb5678901b5678901, - before = target; - uint8_t *s_ptr,*t_ptr; - - rlen = trunc ? slen/2 : slen; - - sz = fi_send(cli_ep, &source, slen, 0, 0, &target); - cr_assert_eq(sz, 0); - - sz = fi_recv(srv_ep, &target, rlen, 0, 0, &source); - cr_assert_eq(sz, 0); - - /* need to progress both CQs simultaneously for rendezvous */ - do { - ret = fi_cq_read(cli_cq, &cqe, 1); - if (ret == 1) { - cr_assert_eq(cqe.op_context, &target); - source_done = 1; - } - - ret = fi_cq_read(srv_cq, &cqe, 1); - if (ret == 1) { - cr_assert(trunc == 0); - cr_assert_eq(cqe.op_context, &source); - cr_assert_eq(cqe.len, rlen); - dest_done = 1; - } else { - if (ret == -FI_EAVAIL) { - cr_assert(trunc != 0); - ret = fi_cq_readerr(srv_cq, &err_cqe, 0); - if (ret == 1) { - cr_assert(err_cqe.olen == (slen - rlen), - "Bad error olen"); - cr_assert(err_cqe.err == FI_ETRUNC, - "Bad error errno"); - cr_assert(err_cqe.prov_errno == FI_ETRUNC, - "Bad prov errno"); - cr_assert(err_cqe.err_data == NULL, - "Bad error provider data"); - dest_done = 1; - } - } - } - - } while (!source_done || !dest_done); - - s_ptr = (uint8_t *)&source; - t_ptr = (uint8_t *)⌖ - for (i = 0; i < rlen; i++) - cr_assert_eq(s_ptr[i], t_ptr[i]); - - s_ptr = (uint8_t *)&before; - for (i = rlen; i < slen; i++) - cr_assert_eq(s_ptr[i], t_ptr[i]); - - dbg_printf("Basic send/recv complete! (0x%lx, 0x%lx)\n", - source, target); -} - -Test(cm_basic, srv_setup, .disabled = false) -{ - int cli_connected = 0, srv_connected = 0; - int i; - - /* Start listening PEP. */ - cm_server_start(); - /* Create EP and fi_connect() to server. */ - cm_client_start_connect(); - /* Wait for EQE and fi_accept() new EP. */ - cm_server_accept(); - - /* Wait for FI_CONNECTED EQES on client and server EQ. */ - do { - if (!srv_connected) { - srv_connected += cm_server_finish_connect(); - if (srv_connected) { - dbg_printf("Server connect complete!\n"); - } - } - - if (!cli_connected) { - cli_connected += cm_client_finish_connect(); - if (cli_connected) { - dbg_printf("Client connect complete!\n"); - } - } - } while (!srv_connected || !cli_connected); - - dbg_printf("testing cm_basic_send without trunc\n"); - for (i = 0; i < 1000; i++) { - /* Perform basic send/recv. */ - cm_basic_send(0); - } - - /* Perform basic send/recv with trunc*/ - dbg_printf("testing cm_basic_send with trunc\n"); - cm_basic_send(1); - - cm_stop_server(); - cm_stop_client(); -} diff --git a/prov/gni/test/cntr.c b/prov/gni/test/cntr.c deleted file mode 100644 index 51bdf9ce08e..00000000000 --- a/prov/gni/test/cntr.c +++ /dev/null @@ -1,1096 +0,0 @@ -/* - * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights reserved. - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * Copyright (c) 2020 Triad National Security, LLC. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - - -#include -#include -#include - -#include "gnix_vc.h" -#include "gnix_cm_nic.h" -#include "gnix_hashtable.h" -#include "gnix_rma.h" - -#include -#include "gnix_rdma_headers.h" -#include "common.h" - -#if 1 -#define dbg_printf(...) -#else -#define dbg_printf(...) \ -do { \ - printf(__VA_ARGS__); \ - fflush(stdout); \ -} while (0) -#endif - -#define NUM_EPS 5 -/* NUM_THREADS must be <= NUM_EPS */ -#define NUM_THREADS NUM_EPS - -/* Note: Set to ~FI_NOTIFY_FLAGS_ONLY since this was written before api 1.5 */ -static uint64_t mode_bits = ~FI_NOTIFY_FLAGS_ONLY; -static struct fid_fabric *fab; -static struct fid_domain *dom[NUM_EPS]; -static struct fid_ep *ep[NUM_EPS]; -static struct fid_av *av[NUM_EPS]; -static struct fi_info *hints; -static struct fi_info *fi; -static void *ep_name[NUM_EPS]; -static size_t gni_addr[NUM_EPS][NUM_EPS]; -static struct fid_cq *send_cq; -static struct fid_cq *recv_cq; -static struct fi_cq_attr cq_attr; -static struct fid_cntr *write_cntrs[NUM_EPS]; -static struct fid_cntr *read_cntrs[NUM_EPS]; -static struct fid_cntr *recv_cntrs[NUM_EPS]; - -static struct fi_cntr_attr cntr_attr = {.events = FI_CNTR_EVENTS_COMP, - .wait_obj = FI_WAIT_UNSPEC, - .flags = 0}; - -#define BUF_SZ (64*1024) -static char *target, *target_base; -static char *source, *source_base; -static struct fid_mr *rem_mr[NUM_EPS], *loc_mr[NUM_EPS]; -static uint64_t mr_key[NUM_EPS]; - -static inline void cntr_setup_eps(const uint64_t caps, - uint32_t version, - int mr_mode) -{ - int i, ret; - struct fi_av_attr attr; - - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - - hints->domain_attr->mr_mode = mr_mode; - hints->domain_attr->cq_data_size = 4; - hints->mode = mode_bits; - - hints->fabric_attr->prov_name = strdup("gni"); - - hints->caps = caps; - - ret = fi_getinfo(version, NULL, 0, 0, hints, &fi); - cr_assert(!ret, "fi_getinfo"); - - ret = fi_fabric(fi->fabric_attr, &fab, NULL); - cr_assert(!ret, "fi_fabric"); - - memset(&attr, 0, sizeof(attr)); - attr.type = FI_AV_MAP; - attr.count = 16; - - for (i = 0; i < NUM_EPS; i++) { - ret = fi_domain(fab, fi, &dom[i], NULL); - cr_assert(!ret, "fi_domain"); - - ret = fi_av_open(dom[i], &attr, &av[i], NULL); - cr_assert(!ret, "fi_av_open"); - - ret = fi_endpoint(dom[i], fi, &ep[i], NULL); - cr_assert(!ret, "fi_endpoint"); - } -} - -/* Only used by cntr suite for now */ -static inline void cntr_setup_cqs(void) -{ - int ret; - - cq_attr.format = FI_CQ_FORMAT_CONTEXT; - cq_attr.size = 1024; - cq_attr.wait_obj = FI_WAIT_NONE; - - ret = fi_cq_open(dom[0], &cq_attr, &send_cq, 0); - cr_assert(!ret, "fi_cq_open"); - ret = fi_ep_bind(ep[0], &send_cq->fid, FI_SEND); - cr_assert(!ret, "fi_ep_bind cq"); - - ret = fi_cq_open(dom[1], &cq_attr, &recv_cq, 0); - cr_assert(!ret, "fi_cq_open"); - ret = fi_ep_bind(ep[1], &recv_cq->fid, FI_RECV); - cr_assert(!ret, "fi_ep_bind cq"); - -} - -static inline void cntr_setup_cntrs(uint64_t write_flags, - uint64_t read_flags, - uint64_t recv_flags) -{ - int i, ret; - - for (i = 0; i < NUM_EPS; i++) { - if (write_flags) { - ret = fi_cntr_open(dom[i], &cntr_attr, - &write_cntrs[i], 0); - cr_assert(!ret, "fi_cntr_open"); - ret = fi_ep_bind(ep[i], &write_cntrs[i]->fid, - write_flags); - cr_assert(!ret, "fi_ep_bind cntr"); - } else { - write_cntrs[i] = NULL; - } - - if (read_flags) { - ret = fi_cntr_open(dom[i], &cntr_attr, - &read_cntrs[i], 0); - cr_assert(!ret, "fi_cntr_open"); - ret = fi_ep_bind(ep[i], &read_cntrs[i]->fid, - read_flags); - cr_assert(!ret, "fi_ep_bind cntr"); - } else { - read_cntrs[i] = NULL; - } - - if (recv_flags) { - ret = fi_cntr_open(dom[i], &cntr_attr, - &recv_cntrs[i], 0); - cr_assert(!ret, "fi_cntr_open"); - ret = fi_ep_bind(ep[i], &recv_cntrs[i]->fid, - recv_flags); - cr_assert(!ret, "fi_ep_bind cntr"); - } else { - recv_cntrs[i] = NULL; - } - } - -} - -static inline void cntr_setup_av(void) -{ - int i, j, ret; - size_t addrlen = 0; - - ret = fi_getname(&ep[0]->fid, NULL, &addrlen); - cr_assert(addrlen > 0); - - for (i = 0; i < NUM_EPS; i++) { - ep_name[i] = malloc(addrlen); - cr_assert(ep_name[i] != NULL); - - ret = fi_getname(&ep[i]->fid, ep_name[i], &addrlen); - cr_assert(ret == FI_SUCCESS); - - for (j = 0; j < NUM_EPS; j++) { - ret = fi_av_insert(av[j], ep_name[i], 1, - &gni_addr[j][i], 0, NULL); - cr_assert(ret == 1); - } - } - - for (i = 0; i < NUM_EPS; i++) { - ret = fi_ep_bind(ep[i], &av[i]->fid, 0); - cr_assert(!ret, "fi_ep_bind av"); - } -} - -static inline void cntr_setup_enable_ep(void) -{ - int i, ret; - - for (i = 0; i < NUM_EPS; i++) { - ret = fi_enable(ep[i]); - cr_assert(!ret, "fi_ep_enable"); - } -} - -static inline void cntr_setup_mr(void) -{ - int i, ret; - int source_key, target_key; - - target_base = malloc(GNIT_ALIGN_LEN(BUF_SZ)); - assert(target_base); - target = GNIT_ALIGN_BUFFER(char *, target_base); - - source_base = malloc(GNIT_ALIGN_LEN(BUF_SZ)); - assert(source_base); - source = GNIT_ALIGN_BUFFER(char *, source_base); - - for (i = 0; i < NUM_EPS; i++) { - source_key = USING_SCALABLE(fi) ? (i * 2) + 1 : 0; - target_key = USING_SCALABLE(fi) ? (i * 2) + 2 : 0; - - ret = fi_mr_reg(dom[i], target, BUF_SZ, - FI_REMOTE_READ | FI_REMOTE_WRITE, - 0, target_key, 0, &rem_mr[i], &target); - cr_assert_eq(ret, 0); - - ret = fi_mr_reg(dom[i], source, BUF_SZ, FI_READ | FI_WRITE, - 0, source_key, 0, &loc_mr[i], &source); - cr_assert_eq(ret, 0); - - if (USING_SCALABLE(fi)) { - MR_ENABLE(rem_mr[i], target, BUF_SZ); - MR_ENABLE(loc_mr[i], source, BUF_SZ); - } - - mr_key[i] = fi_mr_key(rem_mr[i]); - } -} - -static void __cntr_setup(uint32_t version, int mr_mode) -{ - cntr_setup_eps(GNIX_EP_PRIMARY_CAPS, version, mr_mode); - cntr_setup_av(); - cntr_setup_cqs(); - cntr_setup_cntrs(FI_WRITE | FI_SEND, FI_READ, FI_RECV); - cntr_setup_enable_ep(); - cntr_setup_mr(); -} - -static void cntr_setup_basic(void) -{ - __cntr_setup(fi_version(), GNIX_MR_BASIC); -} - -static void cntr_setup_scalable(void) -{ - __cntr_setup(fi_version(), GNIX_MR_SCALABLE); -} - -static void cntr_setup_default(void) -{ - __cntr_setup(fi_version(), GNIX_DEFAULT_MR_MODE); -} - -static inline void cntr_teardown_mr(void) -{ - int i; - - for (i = 0; i < NUM_EPS; i++) { - fi_close(&loc_mr[i]->fid); - fi_close(&rem_mr[i]->fid); - } - - free(target_base); - free(source_base); -} - -static inline void cntr_teardown_eps(void) -{ - int i, ret; - - for (i = 0; i < NUM_EPS; i++) { - ret = fi_close(&ep[i]->fid); - cr_assert(!ret, "failure in closing ep."); - - } -} - -static inline void cntr_teardown_cqs(void) -{ - int ret; - ret = fi_close(&send_cq->fid); - cr_assert(!ret, "failure in closing send cq."); - - ret = fi_close(&recv_cq->fid); - cr_assert(!ret, "failure in closing recv cq."); - -} - -static inline void cntr_teardown_cntrs(void) -{ - int i, ret; - - for (i = 0; i < NUM_EPS; i++) { - if (write_cntrs[i]) { - ret = fi_close(&write_cntrs[i]->fid); - cr_assert(!ret, "failure in closing write_cntr."); - } - - if (read_cntrs[i]) { - ret = fi_close(&read_cntrs[i]->fid); - cr_assert(!ret, "failure in closing read_cntr."); - } - - if (recv_cntrs[i]) { - ret = fi_close(&recv_cntrs[i]->fid); - cr_assert(!ret, "failure in closing read_cntr."); - } - } - -} - -static inline void cntr_teardown_fini(void) -{ - int i, ret; - - for (i = 0; i < NUM_EPS; i++) { - ret = fi_close(&av[i]->fid); - cr_assert(!ret, "failure in closing av."); - - ret = fi_close(&dom[i]->fid); - cr_assert(!ret, "failure in closing domain."); - - free(ep_name[i]); - } - - ret = fi_close(&fab->fid); - cr_assert(!ret, "failure in closing fabric."); - - /* This can be moved up to set up */ - fi_freeinfo(fi); - fi_freeinfo(hints); -} - -static void cntr_teardown(void) -{ - cntr_teardown_mr(); - cntr_teardown_eps(); - cntr_teardown_cqs(); - cntr_teardown_cntrs(); - cntr_teardown_fini(); -} - -static void init_data(char *buf, int len, char seed) -{ - int i; - - for (i = 0; i < len; i++) - buf[i] = seed++; -} - -static int check_data(char *buf1, char *buf2, int len) -{ - int i; - - for (i = 0; i < len; i++) { - if (buf1[i] != buf2[i]) { - printf("data mismatch, elem: %d, exp: %x, act: %x\n", - i, buf1[i], buf2[i]); - return 0; - } - } - - return 1; -} - -static void xfer_for_each_size(void (*xfer)(int len), int slen, int elen) -{ - int i; - - for (i = slen; i <= elen; i *= 2) - xfer(i); -} - -/******************************************************************************* - * Test RMA functions - ******************************************************************************/ -TestSuite(cntr_default, .init = cntr_setup_default, .fini = cntr_teardown, - .disabled = false); -TestSuite(cntr_basic, .init = cntr_setup_basic, .fini = cntr_teardown, - .disabled = false); -TestSuite(cntr_scalable, .init = cntr_setup_scalable, .fini = cntr_teardown, - .disabled = false); - -static void do_write(int len) -{ - uint64_t old_w_cnt, new_w_cnt; - uint64_t old_r_cnt, new_r_cnt; - ssize_t sz; - - init_data(source, len, 0xab); - init_data(target, len, 0); - - - old_w_cnt = fi_cntr_read(write_cntrs[0]); - old_r_cnt = fi_cntr_read(read_cntrs[0]); - - sz = fi_write(ep[0], source, len, loc_mr[0], gni_addr[0][1], - _REM_ADDR(fi, target, target), mr_key[1], target); - cr_assert_eq(sz, 0); - - do { - new_w_cnt = fi_cntr_read(write_cntrs[0]); - if (new_w_cnt == (old_w_cnt + 1)) - break; - pthread_yield(); - } while (1); - - cr_assert(check_data(source, target, len), "Data mismatch"); - - new_r_cnt = fi_cntr_read(read_cntrs[0]); - - /* - * no fi_read called so old and new read cnts should be equal - */ - cr_assert(new_r_cnt == old_r_cnt); -} - -Test(cntr_basic, write) -{ - xfer_for_each_size(do_write, 8, BUF_SZ); -} - -Test(cntr_scalable, write) -{ - xfer_for_each_size(do_write, 8, BUF_SZ); -} - -static void do_write_wait(int len) -{ - uint64_t old_w_cnt, new_w_cnt; - uint64_t old_r_cnt, new_r_cnt; - ssize_t sz; - const int iters = 100; - int i; - - init_data(source, len, 0xab); - init_data(target, len, 0); - - old_w_cnt = fi_cntr_read(write_cntrs[0]); - old_r_cnt = fi_cntr_read(read_cntrs[0]); - - for (i = 0; i < iters; i++) { - sz = fi_write(ep[0], source, len, loc_mr[0], - gni_addr[0][1], - _REM_ADDR(fi, target, target), mr_key[1], - target); - cr_assert_eq(sz, 0); - } - - fi_cntr_wait(write_cntrs[0], old_w_cnt+iters, -1); - new_w_cnt = fi_cntr_read(write_cntrs[0]); - cr_assert(old_w_cnt + iters == new_w_cnt); - - cr_assert(check_data(source, target, len), "Data mismatch"); - - new_r_cnt = fi_cntr_read(read_cntrs[0]); - - /* - * no fi_read called so old and new read cnts should be equal - */ - cr_assert(new_r_cnt == old_r_cnt); -} - -Test(cntr_basic, write_wait) -{ - xfer_for_each_size(do_write_wait, 8, BUF_SZ); -} - -Test(cntr_scalable, write_wait) -{ - xfer_for_each_size(do_write_wait, 8, BUF_SZ); -} - -static void do_read(int len) -{ - ssize_t sz; - uint64_t old_w_cnt, new_w_cnt; - uint64_t old_r_cnt, new_r_cnt; - -#define READ_CTX 0x4e3dda1aULL - init_data(source, len, 0); - init_data(target, len, 0xad); - - old_w_cnt = fi_cntr_read(write_cntrs[0]); - old_r_cnt = fi_cntr_read(read_cntrs[0]); - - sz = fi_read(ep[0], source, len, loc_mr[0], - gni_addr[0][1], - _REM_ADDR(fi, target, target), mr_key[1], - (void *)READ_CTX); - cr_assert_eq(sz, 0); - - do { - new_r_cnt = fi_cntr_read(read_cntrs[0]); - if (new_r_cnt == (old_r_cnt + 1)) - break; - pthread_yield(); - } while (1); - - cr_assert(check_data(source, target, len), "Data mismatch"); - - new_w_cnt = fi_cntr_read(write_cntrs[0]); - - /* - * no fi_read called so old and new read cnts should be equal - */ - cr_assert(new_w_cnt == old_w_cnt); -} - -static void do_read_wait(int len) -{ - int i, iters = 100; - ssize_t sz; - uint64_t old_w_cnt, new_w_cnt; - uint64_t old_r_cnt; - -#define READ_CTX 0x4e3dda1aULL - init_data(source, len, 0); - init_data(target, len, 0xad); - - old_w_cnt = fi_cntr_read(write_cntrs[0]); - old_r_cnt = fi_cntr_read(read_cntrs[0]); - - for (i = 0; i < iters; i++) { - sz = fi_read(ep[0], source, len, loc_mr[0], - gni_addr[0][1], - _REM_ADDR(fi, target, target), - mr_key[1], (void *)READ_CTX); - cr_assert_eq(sz, 0); - } - - fi_cntr_wait(read_cntrs[0], old_r_cnt + iters, -1); - - cr_assert(check_data(source, target, len), "Data mismatch"); - - new_w_cnt = fi_cntr_read(write_cntrs[0]); - - /* - * no fi_read called so old and new read cnts should be equal - */ - cr_assert(new_w_cnt == old_w_cnt); -} - -Test(cntr_basic, read) -{ - xfer_for_each_size(do_read, 8, BUF_SZ); -} - -Test(cntr_scalable, read) -{ - xfer_for_each_size(do_read, 8, BUF_SZ); -} - -Test(cntr_basic, read_wait) -{ - xfer_for_each_size(do_read_wait, 8, BUF_SZ); -} - -Test(cntr_scalable, read_wait) -{ - xfer_for_each_size(do_read_wait, 8, BUF_SZ); -} - -static inline void __send_recv(void) -{ - int ret, i, got_r = 0; - struct fi_context r_context, s_context; - struct fi_cq_entry cqe; - uint64_t old_s_cnt, new_s_cnt; - uint64_t old_r_cnt, new_r_cnt; - char s_buffer[128], r_buffer[128]; - - old_s_cnt = fi_cntr_read(write_cntrs[0]); - old_r_cnt = fi_cntr_read(recv_cntrs[1]); - - for (i = 0; i < 16; i++) { - sprintf(s_buffer, "Hello there iter=%d", i); - memset(r_buffer, 0, 128); - ret = fi_recv(ep[1], - r_buffer, - sizeof(r_buffer), - NULL, - gni_addr[1][0], - &r_context); - cr_assert_eq(ret, FI_SUCCESS, "fi_recv"); - ret = fi_send(ep[0], - s_buffer, - strlen(s_buffer), - NULL, - gni_addr[0][1], - &s_context); - cr_assert_eq(ret, FI_SUCCESS, "fi_send"); - - while ((ret = fi_cq_read(send_cq, &cqe, 1)) == -FI_EAGAIN) - pthread_yield(); - - cr_assert((cqe.op_context == &r_context) || - (cqe.op_context == &s_context), "fi_cq_read"); - got_r = (cqe.op_context == &r_context) ? 1 : 0; - - if (got_r) { - new_r_cnt = fi_cntr_read(recv_cntrs[1]); - old_r_cnt++; - cr_assert(new_r_cnt == old_r_cnt); - } else { - new_s_cnt = fi_cntr_read(write_cntrs[0]); - old_s_cnt++; - cr_assert(new_s_cnt == old_s_cnt); - } - - while ((ret = fi_cq_read(recv_cq, &cqe, 1)) == -FI_EAGAIN) - pthread_yield(); - if (got_r) - cr_assert((cqe.op_context == &s_context), "fi_cq_read"); - else - cr_assert((cqe.op_context == &r_context), "fi_cq_read"); - - if (got_r) { - new_s_cnt = fi_cntr_read(write_cntrs[0]); - old_s_cnt++; - cr_assert(new_s_cnt == old_s_cnt); - } else { - new_r_cnt = fi_cntr_read(recv_cntrs[1]); - old_r_cnt++; - cr_assert(new_r_cnt == old_r_cnt); - } - - cr_assert(strcmp(s_buffer, r_buffer) == 0, "check message"); - - got_r = 0; - } - -} - -Test(cntr_basic, send_recv) -{ - __send_recv(); -} - -Test(cntr_scalable, send_recv) -{ - __send_recv(); -} - -/* - * Multithreaded tests - */ - -struct tinfo { - int msg_size; - int iters; -}; - -#define get_mark(i) ((char) (((i)%255)+0x31)) - -static ofi_atomic32_t cntr_test_next_tid; -static __thread uint32_t cntr_test_tid = ~(uint32_t) 0; -#define cntr_test_get_tid() \ - ((cntr_test_tid == ~(uint32_t) 0) ? \ - ofi_atomic_inc32(&cntr_test_next_tid) : \ - cntr_test_tid) - - -static void __cntr_setup_mt(uint32_t version, int mr_mode) -{ - cr_assert(NUM_EPS >= NUM_THREADS); - - cntr_setup_eps(GNIX_EP_PRIMARY_CAPS, version, mr_mode); - cntr_setup_av(); - cntr_setup_cntrs(FI_WRITE | FI_SEND, FI_READ, 0x0); - cntr_setup_enable_ep(); - cntr_setup_mr(); - - ofi_atomic_initialize32(&cntr_test_next_tid, 0); -} - -static void cntr_setup_mt_basic(void) -{ - __cntr_setup_mt(fi_version(), GNIX_MR_BASIC); -} - -static void cntr_setup_mt_scalable(void) -{ - __cntr_setup_mt(fi_version(), GNIX_MR_SCALABLE); -} - -static void cntr_setup_mt_default(void) -{ - __cntr_setup_mt(fi_version(), GNIX_DEFAULT_MR_MODE); -} - -static void cntr_teardown_mt(void) -{ - cntr_teardown_mr(); - cntr_teardown_eps(); - cntr_teardown_cntrs(); - cntr_teardown_fini(); -} - -TestSuite(cntr_mt_default, - .init = cntr_setup_mt_default, - .fini = cntr_teardown_mt, - .disabled = false); - -TestSuite(cntr_mt_basic, - .init = cntr_setup_mt_basic, - .fini = cntr_teardown_mt, - .disabled = false); - -TestSuite(cntr_mt_scalable, - .init = cntr_setup_mt_scalable, - .fini = cntr_teardown_mt, - .disabled = false); - -static void *do_thread_read_wait(void *data) -{ - int i, tid, ret; - ssize_t sz; - struct tinfo *info = (struct tinfo *) data; - int msg_size = info->msg_size; - int iters = info->iters; - - tid = cntr_test_get_tid(); - - dbg_printf("%d: reading\n", tid); - for (i = 0; i < iters; i++) { - sz = fi_read(ep[tid], &source[tid*msg_size], msg_size, - loc_mr[tid], gni_addr[tid][0], - _REM_ADDR(fi, target, &target[tid*msg_size]), - mr_key[0], (void *)(READ_CTX+i)); - cr_assert_eq(sz, 0); - } - - dbg_printf("%d: waiting\n", tid); - ret = fi_cntr_wait(read_cntrs[tid], iters, -1); - cr_assert(ret == FI_SUCCESS); - - dbg_printf("%d: done\n", tid); - return NULL; -} - -static inline void __read_wait(void) -{ - int i, j; - pthread_t threads[NUM_THREADS]; - const int msg_size = 128; - struct tinfo info = { msg_size, 500 /* iters */}; - - cr_assert(NUM_THREADS*msg_size <= BUF_SZ); - - memset(source, 0, NUM_THREADS*msg_size); - for (i = 0; i < NUM_THREADS; i++) { - memset(&target[i*msg_size], get_mark(i), msg_size); - } - - dbg_printf("creating threads\n"); - for (i = 1; i < NUM_THREADS; i++) { - pthread_create(&threads[i], NULL, do_thread_read_wait, &info); - } - - dbg_printf("joining\n"); - - for (i = 1; i < NUM_THREADS; i++) { - pthread_join(threads[i], NULL); - } - - /* Must wait until all threads are done, since we don't know - * which thread got which id */ - for (i = 1; i < NUM_THREADS; i++) { - for (j = 0; j < msg_size; j++) { - cr_assert(source[i*msg_size+j] == get_mark(i)); - } - } - - dbg_printf("done\n"); - -} - -Test(cntr_mt_basic, read_wait) -{ - __read_wait(); -} - -Test(cntr_mt_scalable, read_wait) -{ - __read_wait(); -} - -static void *do_thread_write_wait(void *data) -{ - int i, tid, ret; - ssize_t sz; - struct tinfo *info = (struct tinfo *) data; - int msg_size = info->msg_size; - int iters = info->iters; - - tid = cntr_test_get_tid(); - - dbg_printf("%d: writing\n", tid); - for (i = 0; i < iters; i++) { - sz = fi_write(ep[tid], &source[tid*msg_size], msg_size, - loc_mr[tid], gni_addr[tid][0], - _REM_ADDR(fi, target, &target[tid*msg_size]), - mr_key[0], (void *)(READ_CTX+i)); - cr_assert_eq(sz, 0); - } - - dbg_printf("%d: waiting\n", tid); - ret = fi_cntr_wait(write_cntrs[tid], iters, -1); - cr_assert(ret == FI_SUCCESS); - - dbg_printf("%d: done\n", tid); - return NULL; -} - -static inline void __write_wait(void) -{ - int i, j; - pthread_t threads[NUM_THREADS]; - const int msg_size = 128; - struct tinfo info = { msg_size, 500 /* iters */}; - - cr_assert(NUM_THREADS*msg_size <= BUF_SZ); - - memset(target, 0, NUM_THREADS*msg_size); - for (i = 0; i < NUM_THREADS; i++) { - memset(&source[i*msg_size], get_mark(i), msg_size); - } - - dbg_printf("creating threads\n"); - for (i = 1; i < NUM_THREADS; i++) { - pthread_create(&threads[i], NULL, do_thread_write_wait, &info); - } - - dbg_printf("joining\n"); - - for (i = 1; i < NUM_THREADS; i++) { - pthread_join(threads[i], NULL); - } - - /* Must wait until all threads are done, since we don't know - * which thread got which id */ - for (i = 1; i < NUM_THREADS; i++) { - for (j = 0; j < msg_size; j++) { - cr_assert(target[i*msg_size+j] == get_mark(i)); - } - } - - dbg_printf("done\n"); -} - -Test(cntr_mt_basic, write_wait) -{ - __write_wait(); -} - -Test(cntr_mt_scalable, write_wait) -{ - __write_wait(); -} - -void *do_add_cntr_mt(void *arg) -{ - int i = 0, ret, iters = ((int *)arg)[0]; - - for (; i < iters; i++) { - ret = fi_cntr_add(write_cntrs[0], 1); - cr_assert(ret == FI_SUCCESS); - } - - return NULL; -} - -void *do_add_err_cntr_mt(void *arg) -{ - int i = 0, ret, iters = ((int *)arg)[0]; - - for (; i < iters; i++) { - ret = fi_cntr_adderr(write_cntrs[0], 1); - cr_assert(ret == FI_SUCCESS); - } - - return NULL; -} - -Test(cntr_mt_default, set_add_read_cntr) -{ - int iters = 128, nthreads = 4, i, ret; - uint64_t cntr_val; - pthread_t threads[nthreads]; - void *pt_ret; - - ret = fi_cntr_set(write_cntrs[0], 0); - cr_assert(ret == FI_SUCCESS); - - cntr_val = fi_cntr_read(write_cntrs[0]); - cr_assert_eq(cntr_val, 0, "write error counter is incorrect."); - - /* Create & Spawn threads */ - for (i = 0; i < nthreads; i++) { - cr_assert(!pthread_create(threads + i, NULL, do_add_cntr_mt, - (void *) &iters)); - } - - /* Wait until all threads are done */ - for (i = 0; i < nthreads; i++) { - cr_assert(!pthread_join(threads[i], &pt_ret)); - cr_assert(!pt_ret); - } - - cntr_val = fi_cntr_read(write_cntrs[0]); - cr_assert_eq(cntr_val, (iters * nthreads), "write error counter " - "is incorrect."); -} - -Test(cntr_mt_default, set_add_read_err_cntr) -{ - int iters = 128, nthreads = 4, i, ret; - uint64_t err_cntr_val; - pthread_t threads[nthreads]; - void *pt_ret; - - ret = fi_cntr_seterr(write_cntrs[0], 0); - cr_assert(ret == FI_SUCCESS); - - err_cntr_val = fi_cntr_readerr(write_cntrs[0]); - cr_assert_eq(err_cntr_val, 0, "write error counter is incorrect."); - - /* Create & Spawn threads */ - for (i = 0; i < nthreads; i++) { - cr_assert(!pthread_create(threads + i, NULL, do_add_err_cntr_mt, - (void *) &iters)); - } - - /* Wait until all threads are done */ - for (i = 0; i < nthreads; i++) { - cr_assert(!pthread_join(threads[i], &pt_ret)); - cr_assert(!pt_ret); - } - - err_cntr_val = fi_cntr_readerr(write_cntrs[0]); - cr_assert_eq(err_cntr_val, (iters * nthreads), "write error counter " - "is incorrect."); -} - -static void *do_thread_adderr_wait(void *data) -{ - int i, ret; - i = *((int *) data); - - dbg_printf("%d: waiting\n", i); - ret = fi_cntr_wait(write_cntrs[i], ~0, -1); - cr_assert(ret != FI_SUCCESS, "Bad return value from fi_cntr_wait"); - - dbg_printf("%d: done\n", i); - return NULL; -} - -Test(cntr_mt_default, adderr_wait) -{ - int i, ret; - pthread_t threads[NUM_THREADS]; - int thread_args[NUM_THREADS]; - void *pt_ret; - - /* Each thread waits for an err cntr change on the i'th ep_write_cntr */ - dbg_printf("creating threads\n"); - for (i = 0; i < NUM_THREADS; i++) { - thread_args[i] = i; - cr_assert(!pthread_create(&threads[i], NULL, - do_thread_adderr_wait, (void *) &thread_args[i])); - } - - dbg_printf("Adding errors\n"); - for (i = 0; i < NUM_THREADS; i++) { - ret = fi_cntr_adderr(write_cntrs[i], 1); - cr_assert(ret == FI_SUCCESS, "Bad return value from " - "fi_cntr_adderr"); - } - - for (i = 0; i < NUM_THREADS; i++) { - cr_assert(!pthread_join(threads[i], &pt_ret)); - cr_assert(!pt_ret); - } - - dbg_printf("done\n"); -} - -Test(cntr_default, adderr_wait) -{ - int ret; - - ret = fi_cntr_adderr(write_cntrs[0], 1); - cr_assert(ret == FI_SUCCESS, "Bad return value from fi_cntr_adderr"); - - ret = fi_cntr_wait(write_cntrs[0], ~0, -1); - cr_assert(ret != FI_SUCCESS, "Bad return value from fi_cntr_wait"); -} - -Test(cntr_default, set_add_read_cntr) -{ - int iters = 128, ret, i = 0, init_val = 0xabcdefab; - uint64_t cntr_val, prev_cntr_val; - - ret = fi_cntr_set(write_cntrs[0], init_val); - cr_assert(ret == FI_SUCCESS); - - cntr_val = fi_cntr_read(write_cntrs[0]); - cr_assert_eq(cntr_val, init_val, "write or counter is incorrect."); - - ret = fi_cntr_set(write_cntrs[0], 0); - cr_assert(ret == FI_SUCCESS); - - for (; i < iters; i++) { - prev_cntr_val = fi_cntr_read(write_cntrs[0]); - ret = fi_cntr_add(write_cntrs[0], 1); - cr_assert(ret == FI_SUCCESS); - - cntr_val = fi_cntr_read(write_cntrs[0]); - cr_assert_eq(cntr_val, prev_cntr_val + 1, "counter is " - "incorrect"); - } -} - -Test(cntr_default, set_add_read_err_cntr) -{ - int iters = 128, ret, i = 0, init_val = 0xabcdefab; - uint64_t cntr_val, prev_cntr_val; - - ret = fi_cntr_seterr(write_cntrs[0], init_val); - cr_assert(ret == FI_SUCCESS); - - cntr_val = fi_cntr_readerr(write_cntrs[0]); - cr_assert_eq(cntr_val, init_val, "write or counter is incorrect."); - - ret = fi_cntr_seterr(write_cntrs[0], 0); - cr_assert(ret == FI_SUCCESS); - - for (; i < iters; i++) { - prev_cntr_val = fi_cntr_readerr(write_cntrs[0]); - ret = fi_cntr_adderr(write_cntrs[0], 1); - cr_assert(ret == FI_SUCCESS); - - cntr_val = fi_cntr_readerr(write_cntrs[0]); - cr_assert_eq(cntr_val, prev_cntr_val + 1, "error counter is " - "incorrect"); - } -} - diff --git a/prov/gni/test/common.c b/prov/gni/test/common.c deleted file mode 100644 index 37d561894c8..00000000000 --- a/prov/gni/test/common.c +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "common.h" - -void calculate_time_difference(struct timeval *start, struct timeval *end, - int *secs_out, int *usec_out) -{ - *secs_out = end->tv_sec - start->tv_sec; - if (end->tv_usec < start->tv_usec) { - *secs_out = *secs_out - 1; - *usec_out = (1000000 + end->tv_usec) - start->tv_usec; - } else { - *usec_out = end->tv_usec - start->tv_usec; - } -} - -int dump_cq_error(struct fid_cq *cq, void *context, uint64_t flags) -{ - int ret; - struct fi_cq_err_entry err_cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX, - UINT_MAX, INT_MAX, INT_MAX, - (void *) -1 }; - - ret = fi_cq_readerr(cq, &err_cqe, flags); - - if (ret > 0) { - if (context && ((uint64_t)err_cqe.op_context != - (uint64_t)context)) { - fprintf(stderr, "Bad err context: ctx %p err ctx %p\n", - context, err_cqe.op_context); - } - - fprintf(stderr, "err flags 0x%lx\n", err_cqe.flags); - fprintf(stderr, "err len %ld\n", err_cqe.len); - fprintf(stderr, "err data 0x%lx\n", err_cqe.data); - fprintf(stderr, "err tag 0x%lx\n", err_cqe.tag); - fprintf(stderr, "err olen %ld\n", err_cqe.olen); - fprintf(stderr, "err err %d\n", err_cqe.err); - fprintf(stderr, "err prov_errno %d\n", err_cqe.prov_errno); - } - - return 0; -} diff --git a/prov/gni/test/common.h b/prov/gni/test/common.h deleted file mode 100644 index 77704b17680..00000000000 --- a/prov/gni/test/common.h +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef PROV_GNI_TEST_COMMON_H_ -#define PROV_GNI_TEST_COMMON_H_ - -#include -#include -#include -#include -#include -#include -#include "gnix_rdma_headers.h" -#include "gnix.h" -#include "ofi_util.h" - -#define BLUE "\x1b[34m" -#define COLOR_RESET "\x1b[0m" - -#define CACHE_RO 0 -#define CACHE_RW 1 - -#define GET_DOMAIN_RO_CACHE(domain) \ - ({ domain->mr_cache_info[domain->auth_key->ptag].mr_cache_ro; }) -#define GET_DOMAIN_RW_CACHE(domain) \ - ({ domain->mr_cache_info[domain->auth_key->ptag].mr_cache_rw; }) - -/* defined in rdm_atomic.c */ -extern int supported_compare_atomic_ops[FI_ATOMIC_OP_LAST][FI_DATATYPE_LAST]; -extern int supported_fetch_atomic_ops[FI_ATOMIC_OP_LAST][FI_DATATYPE_LAST]; - -void calculate_time_difference(struct timeval *start, struct timeval *end, - int *secs_out, int *usec_out); -int dump_cq_error(struct fid_cq *cq, void *context, uint64_t flags); - -static inline struct gnix_fid_ep *get_gnix_ep(struct fid_ep *fid_ep) -{ - return container_of(fid_ep, struct gnix_fid_ep, ep_fid); -} - -#define GNIX_MR_BASIC (FI_MR_BASIC) -#define GNIX_MR_SCALABLE (FI_MR_MMU_NOTIFY) -#define GNIX_DEFAULT_MR_MODE GNIX_MR_BASIC - -#define LOC_ADDR(base, addr) (addr) -#define REM_ADDR(base, addr) \ - ((gnit_use_scalable != 0) ? \ - ((uint64_t)(addr) - (uint64_t)(base)) : (uint64_t) (addr)) - -#define GNIT_ALIGNMENT_ORDER 12 -#define GNIT_ALIGNMENT_PGSIZE (1 << GNIT_ALIGNMENT_ORDER) -#define GNIT_ALIGNMENT_MASK ((uint64_t) (GNIT_ALIGNMENT_PGSIZE - 1)) - -#define GNIT_ALIGN_LEN(len) (((uint64_t) len) + GNIT_ALIGNMENT_PGSIZE) -#define GNIT_ALIGN_BUFFER(type, addr) \ - ((((uint64_t) (addr)) & GNIT_ALIGNMENT_MASK) ? \ - (type)((((uint64_t) (addr)) + \ - GNIT_ALIGNMENT_PGSIZE) & ~(GNIT_ALIGNMENT_MASK)) \ - : (type)(addr)) - -#define USING_SCALABLE(hints) \ - (!(((hints)->domain_attr->mr_mode & FI_MR_VIRT_ADDR) || \ - ((hints->domain_attr->mr_mode == FI_MR_BASIC)))) - -#define _REM_ADDR(info, base, addr) \ - (USING_SCALABLE(info) ? \ - ((uint64_t)(addr) - (uint64_t)(base)) : (uint64_t) (addr)) - -#define SKIP_IF(cond, message) \ - do { \ - if (cond) \ - cr_skip_test(message); \ - } while (0) - -#define SKIP_IF_SCALABLE_LT_1_5(version, mr_mode) \ - SKIP_IF((FI_VERSION_LT((version), FI_VERSION(1, 5)) && \ - !((mr_mode) & (FI_MR_BASIC | FI_MR_VIRT_ADDR))), \ - "scalable is not supported for versions less than " \ - "FI 1.5") - -#define MR_ENABLE(mr, addr, len) \ - do { \ - struct iovec __iov = { \ - .iov_base = (void *) (addr),\ - .iov_len = (len), \ - }; \ - int enable_ret; \ - \ - enable_ret = fi_mr_refresh((mr), &__iov, 1, 0); \ - cr_assert_eq(enable_ret, FI_SUCCESS, "failed to enable mr"); \ - } while (0) - -#endif /* PROV_GNI_TEST_COMMON_H_ */ diff --git a/prov/gni/test/cq.c b/prov/gni/test/cq.c deleted file mode 100644 index 0c19c1919f3..00000000000 --- a/prov/gni/test/cq.c +++ /dev/null @@ -1,1041 +0,0 @@ -/* - * Copyright (c) 2015-2017 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include - - -#include -#include -#include - -#include "gnix_cq.h" -#include "gnix.h" - -#include -#include "gnix_rdma_headers.h" -#include "common.h" - -/* Note: Set to ~FI_NOTIFY_FLAGS_ONLY since this was written before api 1.5 */ -static uint64_t mode_bits = ~FI_NOTIFY_FLAGS_ONLY; -static uint64_t old_mode_bits; -static struct fid_fabric *fab; -static struct fid_domain *dom; -static struct gnix_fid_ep *ep; -static struct fid_ep *fid_ep; -static struct fid_cq *rcq; -static struct fi_info *hints; -static struct fi_info *fi; -static struct fi_cq_attr cq_attr; -static struct gnix_fid_cq *cq_priv; - -static struct gnix_fid_wait *wait_priv; -static struct fid_wait *wait_set; -static struct fi_wait_attr wait_attr; - -static void _setup(uint32_t version) -{ - int ret = 0; - - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - if (FI_VERSION_LT(version, FI_VERSION(1, 5))) - hints->domain_attr->mr_mode = FI_MR_BASIC; - else - hints->domain_attr->mr_mode = GNIX_DEFAULT_MR_MODE; - hints->domain_attr->cq_data_size = 4; - hints->mode = mode_bits; - - hints->fabric_attr->prov_name = strdup("gni"); - - ret = fi_getinfo(version, NULL, 0, 0, hints, &fi); - cr_assert(!ret, "fi_getinfo"); - - ret = fi_fabric(fi->fabric_attr, &fab, NULL); - cr_assert(!ret, "fi_fabric"); - - ret = fi_domain(fab, fi, &dom, NULL); - cr_assert(!ret, "fi_domain"); - - cq_attr.wait_obj = FI_WAIT_NONE; -} - -static void setup(void) -{ - _setup(fi_version()); -} - -static void setup_1_4(void) -{ - _setup(FI_VERSION(1, 4)); -} - -static void teardown(void) -{ - int ret = 0; - - ret = fi_close(&dom->fid); - cr_assert(!ret, "failure in closing domain."); - ret = fi_close(&fab->fid); - cr_assert(!ret, "failure in closing fabric."); - fi_freeinfo(fi); - fi_freeinfo(hints); -} - -void cq_create(enum fi_cq_format format, enum fi_wait_obj wait_obj, - size_t size) -{ - int ret = 0; - - cq_attr.format = format; - cq_attr.size = size; - cq_attr.wait_obj = wait_obj; - - ret = fi_cq_open(dom, &cq_attr, &rcq, NULL); - cr_assert(!ret, "fi_cq_open"); - - cq_priv = container_of(rcq, struct gnix_fid_cq, cq_fid); - - if (cq_priv->wait) { - wait_priv = container_of(cq_priv->wait, struct gnix_fid_wait, - wait); - } -} - -void cq_setup(void) -{ - setup(); - cq_create(FI_CQ_FORMAT_UNSPEC, FI_WAIT_NONE, 0); -} - -void cq_setup_1_4(void) -{ - setup_1_4(); - cq_create(FI_CQ_FORMAT_UNSPEC, FI_WAIT_NONE, 0); -} - -void cq_msg_setup(void) -{ - setup(); - cq_create(FI_CQ_FORMAT_MSG, FI_WAIT_NONE, 8); -} - -void cq_data_setup(void) -{ - setup(); - cq_create(FI_CQ_FORMAT_DATA, FI_WAIT_NONE, 8); -} - -void cq_tagged_setup(void) -{ - setup(); - cq_create(FI_CQ_FORMAT_TAGGED, FI_WAIT_NONE, 8); -} - -void cq_wait_none_setup(void) -{ - setup(); - cq_create(FI_CQ_FORMAT_MSG, FI_WAIT_NONE, 8); -} - -void cq_wait_fd_setup(void) -{ - setup(); - cq_create(FI_CQ_FORMAT_MSG, FI_WAIT_FD, 8); -} - -void cq_wait_unspec_setup(void) -{ - setup(); - cq_create(FI_CQ_FORMAT_MSG, FI_WAIT_UNSPEC, 8); -} - -void cq_wait_mutex_cond_setup(void) -{ - setup(); - cq_create(FI_CQ_FORMAT_MSG, FI_WAIT_MUTEX_COND, 8); -} - -void cq_notify_setup(void) -{ - int ret; - old_mode_bits = mode_bits; - mode_bits = FI_NOTIFY_FLAGS_ONLY; - setup(); - - ret = fi_endpoint(dom, fi, &fid_ep, NULL); - cr_assert(!ret, "fi_endpoint"); - cr_assert(fid_ep != NULL); - - ep = container_of(fid_ep, struct gnix_fid_ep, ep_fid); - cr_assert(ep, "ep not allocated"); - - cq_create(FI_CQ_FORMAT_MSG, FI_WAIT_NONE, 0); -} - -void cq_teardown(void) -{ - cr_assert(!fi_close(&rcq->fid), "failure in closing cq."); - teardown(); -} - -void cq_notify_teardown(void) -{ - cr_assert(!fi_close(&rcq->fid), "failure in closing cq."); - cr_assert(!fi_close(&fid_ep->fid), "failure in closing ep."); - teardown(); - mode_bits = old_mode_bits; - -} - -/******************************************************************************* - * Creation Tests: - * - * Create the CQ with various parameters and make sure the fields are being - * initialized correctly. - ******************************************************************************/ - -TestSuite(creation, .init = setup, .fini = cq_teardown); - -Test(creation, format_unspec) -{ - int ret = 0; - - cq_attr.format = FI_CQ_FORMAT_UNSPEC; - - ret = fi_cq_open(dom, &cq_attr, &rcq, NULL); - cr_assert(!ret, "fi_cq_open"); - - cq_priv = container_of(rcq, struct gnix_fid_cq, cq_fid); - cr_assert(cq_priv->entry_size == sizeof(struct fi_cq_entry)); -} - -Test(creation, format_context) -{ - int ret = 0; - - cq_attr.format = FI_CQ_FORMAT_CONTEXT; - - ret = fi_cq_open(dom, &cq_attr, &rcq, NULL); - cr_assert(!ret, "fi_cq_open"); - - cq_priv = container_of(rcq, struct gnix_fid_cq, cq_fid); - cr_assert(cq_priv->entry_size == sizeof(struct fi_cq_entry)); -} - -Test(creation, format_msg) -{ - int ret = 0; - - cq_attr.format = FI_CQ_FORMAT_MSG; - - ret = fi_cq_open(dom, &cq_attr, &rcq, NULL); - cr_assert(!ret, "fi_cq_open"); - - cq_priv = container_of(rcq, struct gnix_fid_cq, cq_fid); - cr_assert(cq_priv->entry_size == sizeof(struct fi_cq_msg_entry)); -} - -Test(creation, format_data) -{ - int ret = 0; - - cq_attr.format = FI_CQ_FORMAT_DATA; - - ret = fi_cq_open(dom, &cq_attr, &rcq, NULL); - cr_assert(!ret, "fi_cq_open"); - - cq_priv = container_of(rcq, struct gnix_fid_cq, cq_fid); - cr_assert(cq_priv->entry_size == sizeof(struct fi_cq_data_entry)); -} - -Test(creation, format_tagged) -{ - int ret = 0; - - cq_attr.format = FI_CQ_FORMAT_TAGGED; - - ret = fi_cq_open(dom, &cq_attr, &rcq, NULL); - cr_assert(!ret, "fi_cq_open"); - - cq_priv = container_of(rcq, struct gnix_fid_cq, cq_fid); - cr_assert(cq_priv->entry_size == sizeof(struct fi_cq_tagged_entry)); -} - -TestSuite(insertion, .init = cq_setup, .fini = cq_teardown); - -Test(insertion, single) -{ - int ret = 0; - char input_ctx = 'a'; - struct fi_cq_entry entry; - - cr_assert(!cq_priv->events->item_list.head); - - _gnix_cq_add_event(cq_priv, NULL, &input_ctx, 0, 0, 0, 0, 0, 0); - - cr_assert(cq_priv->events->item_list.head); - cr_assert_eq(cq_priv->events->item_list.head, - cq_priv->events->item_list.tail); - - ret = fi_cq_read(rcq, &entry, 1); - cr_assert(ret == 1); - cr_assert(!cq_priv->events->item_list.head); - - cr_assert_eq(*(char *) entry.op_context, input_ctx, - "Expected same op_context as inserted."); -} - -Test(insertion, limit) -{ - int ret = 0; - char input_ctx = 'a'; - struct fi_cq_entry entry; - const size_t cq_size = cq_priv->attr.size; - - for (size_t i = 0; i < cq_size; i++) - _gnix_cq_add_event(cq_priv, NULL, &input_ctx, 0, 0, 0, 0, 0, 0); - - cr_assert(cq_priv->events->item_list.head); - cr_assert(!cq_priv->events->free_list.head); - - _gnix_cq_add_event(cq_priv, NULL, &input_ctx, 0, 0, 0, 0, 0, 0); - - for (size_t i = 0; i < cq_size + 1; i++) { - ret = fi_cq_read(rcq, &entry, 1); - cr_assert_eq(ret, 1); - } - - cr_assert(!cq_priv->events->item_list.head); - cr_assert(cq_priv->events->free_list.head); -} - -TestSuite(mode_bits, .init = NULL, .fini = teardown); - -Test(mode_bits, fi_notify_flags_only_1_4) -{ - old_mode_bits = mode_bits; - mode_bits = ~0; - _setup(FI_VERSION(1, 4)); - cr_assert_eq(fi->mode & FI_NOTIFY_FLAGS_ONLY, 0, "Did not clear notify flag for version 1.4"); - mode_bits = old_mode_bits; -} - -Test(mode_bits, fi_notify_flags_only) -{ - old_mode_bits = mode_bits; - mode_bits = ~0; - _setup(fi_version()); - cr_assert(fi->mode & FI_NOTIFY_FLAGS_ONLY, "Cleared the notify flag when we shouldn't have\n"); - mode_bits = old_mode_bits; -} - -TestSuite(reading, .init = cq_setup, .fini = cq_teardown); - -Test(reading, empty) -{ - int ret = 0; - struct fi_cq_entry entry; - - ret = fi_cq_read(rcq, &entry, 1); - cr_assert_eq(ret, -FI_EAGAIN); -} - -Test(reading, error) -{ - int ret = 0; - struct fi_cq_entry entry; - struct fi_cq_err_entry err_entry; - - char input_ctx = 'a'; - uint64_t flags = 0xb; - size_t len = sizeof(input_ctx); - void *buf = &input_ctx; - uint64_t data = 20; - uint64_t tag = 40; - size_t olen = 20; - int err = 50; - int prov_errno = 80; - - /* - * By default CQ start out with no error entries and no entries - * in the error entry free list. - */ - cr_assert(!cq_priv->errors->item_list.head); - cr_assert(!cq_priv->errors->free_list.head); - - _gnix_cq_add_error(cq_priv, &input_ctx, flags, len, buf, data, tag, - olen, err, prov_errno, 0, 0); - - cr_assert(cq_priv->errors->item_list.head); - - ret = fi_cq_read(rcq, &entry, 1); - cr_assert_eq(ret, -FI_EAVAIL); - - cr_assert(!cq_priv->events->item_list.head); - cr_assert(cq_priv->errors->item_list.head); - /* Testing err_data == NULL path set size to something - * other than 0 then verify it was set back to 0 */ - err_entry.err_data_size = 12; - err_entry.err_data = malloc(12); - ret = fi_cq_readerr(rcq, &err_entry, 0); - cr_assert_eq(ret, 1); - - /* - * Item should have been removed from error queue and placed on free - * queue. - */ - cr_assert(!cq_priv->errors->item_list.head); - cr_assert(cq_priv->errors->free_list.head); - - /* - * Compare structural items... - */ - cr_assert_eq(*(char *) err_entry.op_context, input_ctx); - cr_assert_eq(err_entry.flags, flags); - cr_assert_eq(err_entry.len, len); - cr_assert_eq(err_entry.buf, buf); - cr_assert_eq(err_entry.data, data); - cr_assert_eq(err_entry.tag, tag); - cr_assert_eq(err_entry.olen, olen); - cr_assert_eq(err_entry.err, err); - cr_assert_eq(err_entry.prov_errno, prov_errno); - cr_assert(err_entry.err_data != NULL); - free(err_entry.err_data); - cr_assert(err_entry.err_data_size == 0); -} - -TestSuite(reading_1_4, .init = cq_setup_1_4, .fini = cq_teardown); - -Test(reading_1_4, error) -{ - int ret = 0; - struct fi_cq_entry entry; - struct fi_cq_err_entry err_entry; - - char input_ctx = 'a'; - uint64_t flags = 0xb; - size_t len = sizeof(input_ctx); - void *buf = &input_ctx; - uint64_t data = 20; - uint64_t tag = 40; - size_t olen = 20; - int err = 50; - int prov_errno = 80; - - /* - * By default CQ start out with no error entries and no entries - * in the error entry free list. - */ - cr_assert(!cq_priv->errors->item_list.head); - cr_assert(!cq_priv->errors->free_list.head); - - _gnix_cq_add_error(cq_priv, &input_ctx, flags, len, buf, data, tag, - olen, err, prov_errno, 0, 0); - - cr_assert(cq_priv->errors->item_list.head); - - ret = fi_cq_read(rcq, &entry, 1); - cr_assert_eq(ret, -FI_EAVAIL); - - cr_assert(!cq_priv->events->item_list.head); - cr_assert(cq_priv->errors->item_list.head); - ret = fi_cq_readerr(rcq, &err_entry, 0); - cr_assert_eq(ret, 1); - - /* - * Item should have been removed from error queue and placed on free - * queue. - */ - cr_assert(!cq_priv->errors->item_list.head); - cr_assert(cq_priv->errors->free_list.head); - - /* - * Compare structural items... - */ - cr_assert_eq(*(char *) err_entry.op_context, input_ctx); - cr_assert_eq(err_entry.flags, flags); - cr_assert_eq(err_entry.len, len); - cr_assert_eq(err_entry.buf, buf); - cr_assert_eq(err_entry.data, data); - cr_assert_eq(err_entry.tag, tag); - cr_assert_eq(err_entry.olen, olen); - cr_assert_eq(err_entry.err, err); - cr_assert_eq(err_entry.prov_errno, prov_errno); - cr_assert(err_entry.err_data == NULL); -} - -#define ENTRY_CNT 5 -Test(reading, issue192) -{ - int ret = 0; - char input_ctx = 'a'; - struct fi_cq_entry entries[ENTRY_CNT]; - - _gnix_cq_add_event(cq_priv, NULL, &input_ctx, 0, 0, 0, 0, 0, 0); - - ret = fi_cq_read(rcq, &entries, ENTRY_CNT); - cr_assert_eq(ret, 1); - - ret = fi_cq_read(rcq, &entries, ENTRY_CNT); - cr_assert_eq(ret, -FI_EAGAIN); -} - - -static void cq_add_read_setup(enum fi_cq_format format) -{ - switch (format) { - default: - case FI_CQ_FORMAT_UNSPEC: - cq_setup(); - break; - case FI_CQ_FORMAT_MSG: - cq_msg_setup(); - break; - case FI_CQ_FORMAT_DATA: - cq_data_setup(); - break; - case FI_CQ_FORMAT_TAGGED: - cq_tagged_setup(); - break; - } -} - -static void cq_add_read_check(enum fi_cq_format format, - struct fi_cq_tagged_entry *entry, - struct fi_cq_tagged_entry *expected) -{ - cr_assert_eq(*(char *) entry->op_context, - *(char *) expected->op_context); - - if (format == FI_CQ_FORMAT_UNSPEC || - format == FI_CQ_FORMAT_CONTEXT) { - return; - } - - cr_assert_eq(entry->flags, expected->flags); - cr_assert_eq(entry->len, expected->len); - - if (format == FI_CQ_FORMAT_MSG) { - return; - } - - cr_assert_eq(entry->buf, expected->buf); - cr_assert_eq(entry->data, expected->data); - - if (format == FI_CQ_FORMAT_DATA) { - return; - } - - cr_assert_eq(entry->tag, expected->tag); -} - -/* - * Add an event and read the cq. - */ -static void cq_add_read(enum fi_cq_format format) -{ - int ret = 0; - char input_ctx = 'a'; - struct fi_cq_tagged_entry entry; /* biggest one */ - struct fi_cq_tagged_entry expected = { &input_ctx, 2, 4, (void *) 8, - 16, 32 }; - - cq_add_read_setup(format); - - cr_assert(!cq_priv->events->item_list.head); - - _gnix_cq_add_event(cq_priv, NULL, expected.op_context, expected.flags, - expected.len, expected.buf, expected.data, - expected.tag, 0x0); - - cr_assert(cq_priv->events->item_list.head); - - ret = fi_cq_read(rcq, &entry, 1); - cr_assert_eq(ret, 1); - - cq_add_read_check(format, &entry, &expected); - - cr_assert(!cq_priv->events->item_list.head); -} - -/* - * Create up to the size events to fill it up. Check that all the properties - * are correct, then add one more that is different. Read size items and then - * add an error and try reading. Ensure that we get back -FI_EAVAIL. Then read - * the last item and make sure it's the same values put in originally. - */ -static void cq_fill_test(enum fi_cq_format format) -{ - char input_ctx = 'a'; - struct fi_cq_tagged_entry entry; /* biggest one */ - struct fi_cq_tagged_entry expected = { &input_ctx, 2, 4, (void *) 8, - 16, 32 }; - struct fi_cq_err_entry err_entry; - int ret = 0; - uint64_t flags = 2; - size_t len = 4; - size_t cq_size; - - cq_add_read_setup(format); - - cr_assert(!cq_priv->events->item_list.head); - cr_assert(cq_priv->events->free_list.head); - - cq_size = cq_priv->attr.size; - for (size_t i = 0; i < cq_size; i++) { - _gnix_cq_add_event(cq_priv, NULL, expected.op_context, - expected.flags, expected.len, - expected.buf, expected.data, - expected.tag, 0x0); - } - - cr_assert(cq_priv->events->item_list.head); - cr_assert(!cq_priv->events->free_list.head); - - _gnix_cq_add_event(cq_priv, NULL, expected.op_context, - expected.flags, 2 * expected.len, expected.buf, - expected.data, expected.tag, 0x0); - - for (size_t i = 0; i < cq_size; i++) { - ret = fi_cq_read(rcq, &entry, 1); - cr_assert_eq(ret, 1); - cq_add_read_check(format, &entry, &expected); - } - - /* - * If we insert an error it should return -FI_EAVAIL despite having - * something to read. - */ - - _gnix_cq_add_error(cq_priv, &input_ctx, flags, len, 0, 0, 0, 0, 0, 0, - 0, 0); - cr_assert(cq_priv->errors->item_list.head); - - ret = fi_cq_read(rcq, &entry, 1); - cr_assert_eq(ret, -FI_EAVAIL); - - ret = fi_cq_readerr(rcq, &err_entry, 0); - cr_assert_eq(ret, 1); - - /* - * Creating an error allocs an error but it is then placed in the free - * list after reading. - */ - cr_assert(cq_priv->errors->free_list.head); - cr_assert(!cq_priv->errors->item_list.head); - - ret = fi_cq_read(rcq, &entry, 1); - cr_assert_eq(ret, 1); - - cr_assert(cq_priv->events->free_list.head); - cr_assert(!cq_priv->events->item_list.head); - - expected.len *= 2; - cq_add_read_check(format, &entry, &expected); -} - -/* - * Read more than one cqe at a time. - */ -static void cq_multi_read_test(enum fi_cq_format format) -{ - int ret = 0; - size_t count = 3; - char input_ctx = 'a'; - struct fi_cq_tagged_entry entry[count]; /* biggest one */ - struct fi_cq_tagged_entry expected = { &input_ctx, 2, 4, (void *) 8, - 16, 32 }; - - cq_add_read_setup(format); - - cr_assert(cq_priv->events->free_list.head); - cr_assert(!cq_priv->events->item_list.head); - - for (size_t i = 0; i < count; i++) { - _gnix_cq_add_event(cq_priv, NULL, expected.op_context, - expected.flags, expected.len, - expected.buf, expected.data, - expected.tag, 0x0); - } - - cr_assert(cq_priv->events->item_list.head); - - ret = fi_cq_read(rcq, &entry, count); - cr_assert_eq(ret, count); - - for (size_t j = 0; j < count; j++) { - /* This is gross */ - switch (format) { - default: - case FI_CQ_FORMAT_UNSPEC: - { - struct fi_cq_entry *e = (struct fi_cq_entry *) entry; - - cq_add_read_check(format, - (struct fi_cq_tagged_entry *) &e[j], - &expected); - break; - } - case FI_CQ_FORMAT_MSG: - { - struct fi_cq_msg_entry *e = - (struct fi_cq_msg_entry *) entry; - - cq_add_read_check(format, - (struct fi_cq_tagged_entry *) &e[j], - &expected); - break; - } - case FI_CQ_FORMAT_DATA: - { - struct fi_cq_data_entry *e = - (struct fi_cq_data_entry *) entry; - - cq_add_read_check(format, - (struct fi_cq_tagged_entry *) &e[j], - &expected); - break; - } - case FI_CQ_FORMAT_TAGGED: - { - cq_add_read_check(format, &entry[j], &expected); - break; - } - } - } -} - -TestSuite(check_cqe, .init = NULL, .fini = cq_teardown); - -Test(check_cqe, context) { - cq_add_read(FI_CQ_FORMAT_CONTEXT); -} - -Test(check_cqe, context_fill) { - cq_fill_test(FI_CQ_FORMAT_CONTEXT); -} - -Test(check_cqe, context_multi_read) { - cq_multi_read_test(FI_CQ_FORMAT_CONTEXT); -} - -Test(check_cqe, msg) { - cq_add_read(FI_CQ_FORMAT_MSG); -} - -Test(check_cqe, msg_fill) { - cq_fill_test(FI_CQ_FORMAT_MSG); -} - -Test(check_cqe, msg_multi_read) { - cq_multi_read_test(FI_CQ_FORMAT_MSG); -} - -Test(check_cqe, data) { - cq_add_read(FI_CQ_FORMAT_DATA); -} - -Test(check_cqe, data_fill) { - cq_fill_test(FI_CQ_FORMAT_DATA); -} - -Test(check_cqe, data_multi_read) { - cq_multi_read_test(FI_CQ_FORMAT_DATA); -} - -Test(check_cqe, tagged) { - cq_add_read(FI_CQ_FORMAT_TAGGED); -} - -Test(check_cqe, tagged_fill) { - cq_fill_test(FI_CQ_FORMAT_TAGGED); -} - -Test(check_cqe, tagged_multi_read) { - cq_multi_read_test(FI_CQ_FORMAT_TAGGED); -} - -/* This test should be combined with cq_multi_read_test above when - * wait object are implemented. - */ -Test(cq_msg, multi_sread, .init = cq_wait_unspec_setup, .disabled = false) -{ - int ret = 0; - size_t count = 3; - struct fi_cq_msg_entry entry[count]; - - cr_assert(cq_priv->events->free_list.head); - cr_assert(!cq_priv->events->item_list.head); - - ret = fi_cq_sread(rcq, &entry, count, NULL, 100); - cr_assert_eq(ret, -FI_EAGAIN); - - for (size_t i = 0; i < count; i++) - _gnix_cq_add_event(cq_priv, NULL, 0, (uint64_t) i, 0, 0, 0, 0, 0); - - cr_assert(cq_priv->events->item_list.head); - - ret = fi_cq_sread(rcq, &entry, count, NULL, -1); - cr_assert_eq(ret, count); - - for (size_t j = 0; j < count; j++) - cr_assert_eq(entry[j].flags, (uint64_t) j); -} - -TestSuite(cq_wait_obj, .fini = cq_teardown); -TestSuite(cq_wait_control, .fini = cq_teardown, .disabled = true); -TestSuite(cq_wait_ops, .fini = cq_teardown); - -Test(cq_wait_obj, none, .init = cq_wait_none_setup) -{ - cr_expect(!wait_priv, "wait_priv is not null."); -} - -Test(cq_wait_obj, unspec, .init = cq_wait_unspec_setup) -{ - cr_expect_eq(wait_priv->type, FI_WAIT_UNSPEC); - cr_expect_eq(wait_priv->type, cq_priv->attr.wait_obj); - cr_expect_eq(wait_priv->type, cq_attr.wait_obj); - cr_expect_eq(&wait_priv->fabric->fab_fid, fab); - cr_expect_eq(wait_priv->cond_type, FI_CQ_COND_NONE); -} - -Test(cq_wait_obj, fd, .init = cq_wait_fd_setup, .disabled = true) -{ - cr_expect_eq(wait_priv->type, FI_WAIT_FD); - cr_expect_eq(wait_priv->type, cq_priv->attr.wait_obj); - cr_expect_eq(wait_priv->type, cq_attr.wait_obj); - cr_expect_eq(&wait_priv->fabric->fab_fid, fab); - cr_expect_eq(wait_priv->cond_type, FI_CQ_COND_NONE); -} - -Test(cq_wait_obj, mutex_cond, .init = cq_wait_mutex_cond_setup, .disabled = true) -{ - cr_expect_eq(wait_priv->type, FI_WAIT_MUTEX_COND); - cr_expect_eq(wait_priv->type, cq_priv->attr.wait_obj); - cr_expect_eq(wait_priv->type, cq_attr.wait_obj); - cr_expect_eq(&wait_priv->fabric->fab_fid, fab); - cr_expect_eq(wait_priv->cond_type, FI_CQ_COND_NONE); -} - -Test(cq_wait_control, none, .init = cq_wait_none_setup) -{ - int ret; - int fd; - - ret = fi_control(&cq_priv->cq_fid.fid, FI_GETWAIT, &fd); - cr_expect_eq(-FI_ENOSYS, ret, "fi_control exists for none."); -} - -Test(cq_wait_control, unspec, .init = cq_wait_unspec_setup, .disabled = true) -{ - int ret; - int fd; - - ret = fi_control(&cq_priv->cq_fid.fid, FI_GETWAIT, &fd); - cr_expect_eq(FI_SUCCESS, ret, "fi_control failed."); - - cr_expect_eq(wait_priv->fd[WAIT_READ], fd); -} - -Test(cq_wait_control, fd, .init = cq_wait_fd_setup, - .disabled = true) -{ - int ret; - int fd; - - ret = fi_control(&cq_priv->cq_fid.fid, FI_GETWAIT, &fd); - cr_expect_eq(FI_SUCCESS, ret, "fi_control failed."); - - cr_expect_eq(wait_priv->fd[WAIT_READ], fd); -} - -Test(cq_wait_control, mutex_cond, .init = cq_wait_mutex_cond_setup, - .disabled = true) -{ - int ret; - struct fi_mutex_cond mutex_cond; - - ret = fi_control(&cq_priv->cq_fid.fid, FI_GETWAIT, &mutex_cond); - cr_expect_eq(FI_SUCCESS, ret, "fi_control failed."); - - ret = memcmp(&wait_priv->mutex, mutex_cond.mutex, - sizeof(*mutex_cond.mutex)); - cr_expect_eq(0, ret, "mutex compare failed."); - - ret = memcmp(&wait_priv->cond, mutex_cond.cond, - sizeof(*mutex_cond.cond)); - cr_expect_eq(0, ret, "cond compare failed."); -} - -Test(cq_wait_ops, none, .init = cq_wait_none_setup) -{ - cr_expect_eq(cq_priv->cq_fid.ops->signal, fi_no_cq_signal, - "signal implementation available."); - cr_expect_eq(cq_priv->cq_fid.ops->sread, fi_no_cq_sread, - "sread implementation available."); - cr_expect_eq(cq_priv->cq_fid.ops->sreadfrom, fi_no_cq_sreadfrom, - "sreadfrom implementation available."); - cr_expect_eq(cq_priv->cq_fid.fid.ops->control, fi_no_control, - "control implementation available."); -} - -Test(cq_wait_ops, fd, .init = cq_wait_fd_setup, .disabled = true) -{ - cr_expect_neq(cq_priv->cq_fid.ops->signal, fi_no_cq_signal, - "signal implementation not available."); - cr_expect_neq(cq_priv->cq_fid.fid.ops->control, fi_no_control, - "control implementation not available."); -} - -Test(cq_wait_set, fd, .init = setup, .disabled = true) -{ - int ret; - int fd; - - wait_attr.wait_obj = FI_WAIT_FD; - - ret = fi_wait_open(fab, &wait_attr, &wait_set); - cr_expect_eq(FI_SUCCESS, ret, "fi_wait_open failed."); - - wait_priv = container_of(wait_set, struct gnix_fid_wait, wait); - - cq_attr.format = FI_CQ_FORMAT_MSG; - cq_attr.size = 8; - cq_attr.wait_obj = FI_WAIT_SET; - cq_attr.wait_set = wait_set; - - ret = fi_cq_open(dom, &cq_attr, &rcq, NULL); - cr_expect_eq(FI_SUCCESS, ret, "fi_cq_open failed."); - - cq_priv = container_of(rcq, struct gnix_fid_cq, cq_fid); - - ret = fi_control(&cq_priv->cq_fid.fid, FI_GETWAIT, &fd); - cr_expect_eq(FI_SUCCESS, ret, "fi_control failed."); - - cr_expect_eq(wait_priv->fd[WAIT_READ], fd); - - ret = fi_close(&rcq->fid); - cr_expect_eq(FI_SUCCESS, ret, "failure in closing cq."); - - ret = fi_close(&wait_set->fid); - cr_expect_eq(FI_SUCCESS, ret, "failure in closing waitset."); - - teardown(); -} - -TestSuite(cq_notify_flags, .init = cq_notify_setup, .fini = cq_notify_teardown); - -void do_cq_notify(uint64_t flags) -{ - ssize_t ret; - struct fi_cq_msg_entry entry; - - ret = _gnix_cq_add_event(cq_priv, ep, NULL, flags, 0, NULL, - 0, 0, 0); - cr_assert(ret == FI_SUCCESS, "failing in _gnix_cq_add_event"); - - ret = fi_cq_read(rcq, &entry, 1); - cr_assert(ret == 1, "failing in fi_cq_read"); - - if (flags & FI_RMA_EVENT) { - flags &= (FI_REMOTE_READ | FI_REMOTE_WRITE | - FI_RMA | FI_REMOTE_CQ_DATA | - FI_MULTI_RECV); - } else { - flags &= (FI_REMOTE_CQ_DATA | FI_MULTI_RECV); - } - - cr_assert_eq(flags, entry.flags, "unexpected cq entry flags"); -} - -Test(cq_notify_flags, fi_rma_event) -{ - do_cq_notify(FI_REMOTE_READ | FI_REMOTE_WRITE | - FI_RMA | FI_REMOTE_CQ_DATA | - FI_MULTI_RECV | FI_RMA_EVENT); -} - -Test(cq_notify_flags, not_fi_rma_event) -{ - do_cq_notify(~FI_RMA_EVENT); -} - - -struct test_err { - int padding_1; - struct fi_cq_err_entry_1_0 error; - int padding_2; -}; - -Test(reading_1_4, issue_ofiwg_3227) -{ - struct test_err error_entry; - int ret = 0; - char input_ctx = 'a'; - uint64_t flags = 0xb; - size_t len = sizeof(input_ctx); - void *buf = &input_ctx; - uint64_t data = 20; - uint64_t tag = 40; - size_t olen = 20; - int err = 50; - int prov_errno = 80; - - /* - * By default CQ start out with no error entries and no entries - * in the error entry free list. - */ - cr_assert(!cq_priv->errors->item_list.head); - cr_assert(!cq_priv->errors->free_list.head); - - _gnix_cq_add_error(cq_priv, &input_ctx, flags, len, buf, data, tag, - olen, err, prov_errno, 0, 0); - - error_entry.padding_1 = 0xcafebabe; - error_entry.padding_2 = 0xcafed00d; - - ret = fi_cq_readerr((struct fid_cq *) cq_priv, - (struct fi_cq_err_entry *) &error_entry.error, 0); - cr_assert_eq(ret, 1); - - cr_assert_eq(error_entry.padding_1, 0xcafebabe); - cr_assert_eq(error_entry.padding_2, 0xcafed00d); -} diff --git a/prov/gni/test/datagram.c b/prov/gni/test/datagram.c deleted file mode 100644 index dc5bf641168..00000000000 --- a/prov/gni/test/datagram.c +++ /dev/null @@ -1,457 +0,0 @@ -/* - * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights reserved. - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * Copyright (c) 2020 Triad National Security, LLC. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -#include -#include -#include - -#include "gnix_datagram.h" -#include "gnix_cm_nic.h" - -#include -#include "gnix_rdma_headers.h" -#include "common.h" - -#include "common.h" - -/* Note: Set to ~FI_NOTIFY_FLAGS_ONLY since this was written before api 1.5 */ -static uint64_t mode_bits = ~FI_NOTIFY_FLAGS_ONLY; -static struct fid_fabric *fab; -static struct fid_domain *dom; -static struct fid_ep *ep; -static struct fi_info *hints; -static struct fi_info *fi; -static struct gnix_fid_ep *ep_priv; -static const char my_cdm_id[] = "3000"; - -void dg_setup(void) -{ - int ret = 0; - char my_hostname[HOST_NAME_MAX]; - - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - - hints->domain_attr->mr_mode = GNIX_DEFAULT_MR_MODE; - hints->domain_attr->cq_data_size = 4; - hints->mode = mode_bits; - - hints->fabric_attr->prov_name = strdup("gni"); - - ret = gethostname(my_hostname, sizeof(my_hostname)); - cr_assert(!ret, "gethostname"); - - ret = fi_getinfo(fi_version(), my_hostname, my_cdm_id, FI_SOURCE, - hints, &fi); - cr_assert(!ret, "fi_getinfo"); - - ret = fi_fabric(fi->fabric_attr, &fab, NULL); - cr_assert(!ret, "fi_fabric"); - - ret = fi_domain(fab, fi, &dom, NULL); - cr_assert(!ret, "fi_domain"); - - ret = fi_endpoint(dom, fi, &ep, NULL); - cr_assert(!ret, "fi_endpoint"); -} - -void dg_setup_prog_manual(void) -{ - int ret = 0; - - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - - hints->domain_attr->mr_mode = GNIX_DEFAULT_MR_MODE; - hints->domain_attr->cq_data_size = 4; - hints->domain_attr->control_progress = FI_PROGRESS_MANUAL; - hints->mode = mode_bits; - - hints->fabric_attr->prov_name = strdup("gni"); - - ret = fi_getinfo(fi_version(), NULL, 0, 0, hints, &fi); - cr_assert(!ret, "fi_getinfo"); - - ret = fi_fabric(fi->fabric_attr, &fab, NULL); - cr_assert(!ret, "fi_fabric"); - - ret = fi_domain(fab, fi, &dom, NULL); - cr_assert(!ret, "fi_domain"); - - ret = fi_endpoint(dom, fi, &ep, NULL); - cr_assert(!ret, "fi_endpoint"); -} - -void dg_teardown(void) -{ - int ret = 0; - - ret = fi_close(&ep->fid); - cr_assert(!ret, "failure in closing ep."); - ret = fi_close(&dom->fid); - cr_assert(!ret, "failure in closing domain."); - ret = fi_close(&fab->fid); - cr_assert(!ret, "failure in closing fabric."); - fi_freeinfo(fi); - fi_freeinfo(hints); -} - -/******************************************************************************* - * Allocation Tests: - * - * try different datagram allocation/free patterns and see if something - * explodes. - ******************************************************************************/ - -TestSuite(dg_allocation, .init = dg_setup, .fini = dg_teardown); - -Test(dg_allocation, dgram_verify_cdm_id) -{ - uint32_t correct = atol(my_cdm_id); - struct gnix_cm_nic *cm_nic; - - ep_priv = container_of(ep, struct gnix_fid_ep, ep_fid); - cm_nic = ep_priv->cm_nic; - cr_assert((cm_nic != NULL), "cm_nic NULL"); - cr_assert((cm_nic->my_name.gnix_addr.cdm_id == correct), "cm_nic incorrect cdm_id"); -} - - -Test(dg_allocation, dgram_alloc_wc) -{ - int ret = 0, i; - struct gnix_cm_nic *cm_nic; - struct gnix_datagram **dgram_ptr; - struct gnix_fid_fabric *fab_priv; - - ep_priv = container_of(ep, struct gnix_fid_ep, ep_fid); - cm_nic = ep_priv->cm_nic; - cr_assert((cm_nic != NULL), "cm_nic NULL"); - - cr_assert((cm_nic->dgram_hndl != NULL), "cm_nic dgram_hndl NULL"); - - fab_priv = container_of(fab, struct gnix_fid_fabric, fab_fid); - - dgram_ptr = calloc(fab_priv->n_wc_dgrams, - sizeof(struct gnix_datagram *)); - cr_assert((dgram_ptr != NULL), "calloc failed"); - - for (i = 0; i < fab_priv->n_wc_dgrams; i++) { - ret = _gnix_dgram_alloc(cm_nic->dgram_hndl, GNIX_DGRAM_WC, - &dgram_ptr[i]); - cr_assert(!ret, "_gnix_dgram_alloc wc"); - } - - for (i = 0; i < fab_priv->n_wc_dgrams; i++) { - ret = _gnix_dgram_free(dgram_ptr[i]); - cr_assert(!ret, "_gnix_dgram_free wc"); - } - - free(dgram_ptr); -} - -Test(dg_allocation, dgram_alloc_wc_alt) -{ - int ret = 0, i; - struct gnix_cm_nic *cm_nic; - struct gnix_datagram *dgram_ptr; - struct gnix_fid_fabric *fab_priv; - - ep_priv = container_of(ep, struct gnix_fid_ep, ep_fid); - cm_nic = ep_priv->cm_nic; - cr_assert((cm_nic != NULL), "cm_nic NULL"); - - cr_assert((cm_nic->dgram_hndl != NULL), "cm_nic dgram_hndl NULL"); - - fab_priv = container_of(fab, struct gnix_fid_fabric, fab_fid); - - for (i = 0; i < fab_priv->n_wc_dgrams; i++) { - ret = _gnix_dgram_alloc(cm_nic->dgram_hndl, GNIX_DGRAM_WC, - &dgram_ptr); - cr_assert(!ret, "_gnix_dgram_alloc wc"); - ret = _gnix_dgram_free(dgram_ptr); - cr_assert(!ret, "_gnix_dgram_free wc"); - } -} - -Test(dg_allocation, dgram_alloc_bnd) -{ - int ret = 0, i; - struct gnix_cm_nic *cm_nic; - struct gnix_datagram **dgram_ptr; - struct gnix_fid_fabric *fab_priv; - - ep_priv = container_of(ep, struct gnix_fid_ep, ep_fid); - cm_nic = ep_priv->cm_nic; - cr_assert((cm_nic != NULL), "cm_nic NULL"); - - cr_assert((cm_nic->dgram_hndl != NULL), "cm_nic dgram_hndl NULL"); - - fab_priv = container_of(fab, struct gnix_fid_fabric, fab_fid); - - dgram_ptr = calloc(fab_priv->n_bnd_dgrams, - sizeof(struct gnix_datagram *)); - cr_assert((dgram_ptr != NULL), "calloc failed"); - - for (i = 0; i < fab_priv->n_bnd_dgrams; i++) { - ret = _gnix_dgram_alloc(cm_nic->dgram_hndl, GNIX_DGRAM_BND, - &dgram_ptr[i]); - cr_assert(!ret, "_gnix_dgram_alloc bnd"); - } - - for (i = 0; i < fab_priv->n_wc_dgrams; i++) { - ret = _gnix_dgram_free(dgram_ptr[i]); - cr_assert(!ret, "_gnix_dgram_free bnd"); - } - - free(dgram_ptr); -} - -Test(dg_allocation, dgram_alloc_wc_bnd) -{ - int ret = 0, i; - struct gnix_cm_nic *cm_nic; - struct gnix_datagram *dgram_ptr; - struct gnix_fid_fabric *fab_priv; - - ep_priv = container_of(ep, struct gnix_fid_ep, ep_fid); - cm_nic = ep_priv->cm_nic; - cr_assert((cm_nic != NULL), "cm_nic NULL"); - - cr_assert((cm_nic->dgram_hndl != NULL), "cm_nic dgram_hndl NULL"); - - fab_priv = container_of(fab, struct gnix_fid_fabric, fab_fid); - - for (i = 0; i < fab_priv->n_bnd_dgrams; i++) { - ret = _gnix_dgram_alloc(cm_nic->dgram_hndl, GNIX_DGRAM_BND, - &dgram_ptr); - cr_assert(!ret, "_gnix_dgram_alloc bnd"); - ret = _gnix_dgram_free(dgram_ptr); - cr_assert(!ret, "_gnix_dgram_free bnd"); - } -} - -Test(dg_allocation, dgram_pack_unpack) -{ - int ret = 0; - ssize_t len; - struct gnix_cm_nic *cm_nic; - struct gnix_datagram *dgram_ptr; - char in_buf[] = "0xdeadbeef"; - char out_buf[GNI_DATAGRAM_MAXSIZE]; - - ep_priv = container_of(ep, struct gnix_fid_ep, ep_fid); - cm_nic = ep_priv->cm_nic; - cr_assert((cm_nic != NULL), "cm_nic NULL"); - - cr_assert((cm_nic->dgram_hndl != NULL), "cm_nic dgram_hndl NULL"); - - ret = _gnix_dgram_alloc(cm_nic->dgram_hndl, GNIX_DGRAM_BND, - &dgram_ptr); - cr_assert(!ret, "_gnix_dgram_alloc bnd"); - - /* - * check pack/unpack for GNIX_DGRAM_IN_BUF - */ - - len = _gnix_dgram_pack_buf(dgram_ptr, GNIX_DGRAM_IN_BUF, - in_buf, sizeof(in_buf)); - cr_assert(len > 0); - cr_assert_eq(len, (ssize_t)sizeof(in_buf)); - - len = _gnix_dgram_unpack_buf(dgram_ptr, GNIX_DGRAM_IN_BUF, - out_buf, sizeof(in_buf)); - cr_assert(len > 0); - cr_assert_eq(len, (ssize_t)sizeof(in_buf)); - - cr_assert_eq(0, strcmp(in_buf, out_buf)); - - /* - * check pack/unpack for GNIX_DGRAM_OUT_BUF - */ - - len = _gnix_dgram_pack_buf(dgram_ptr, GNIX_DGRAM_OUT_BUF, - in_buf, sizeof(in_buf)); - cr_assert(len > 0); - cr_assert_eq(len, (ssize_t)sizeof(in_buf)); - - memset(out_buf, 0, sizeof(out_buf)); - - len = _gnix_dgram_unpack_buf(dgram_ptr, GNIX_DGRAM_OUT_BUF, - out_buf, sizeof(in_buf)); - cr_assert(len > 0); - cr_assert_eq(len, (ssize_t)sizeof(in_buf)); - - cr_assert_eq(0, strcmp(in_buf, out_buf)); - - ret = _gnix_dgram_free(dgram_ptr); - cr_assert(!ret, "_gnix_dgram_free bnd"); - -} - -static struct gnix_address local_address; -static int dgram_match; - -static int dgram_callback_fn(struct gnix_datagram *the_dgram, - struct gnix_address where_from, - gni_post_state_t dgram_state) -{ - if (dgram_state != GNI_POST_COMPLETED) { - fprintf(stderr, "dgram_state check failed %s %d\n", - __func__, __LINE__); - return -FI_EIO; - } - - if ((where_from.device_addr != local_address.device_addr) || - (where_from.cdm_id != local_address.cdm_id)) { - fprintf(stderr, "where from check failed %s %d\n", - __func__, __LINE__); - return -FI_EIO; - } - - dgram_match = 1; - return 0; -} - -Test(dg_allocation, dgram_wc_post_exchg) -{ - int ret = 0; - struct gnix_cm_nic *cm_nic; - struct gnix_datagram *dgram_wc, *dgram_bnd; - - ep_priv = container_of(ep, struct gnix_fid_ep, ep_fid); - cm_nic = ep_priv->cm_nic; - cr_assert((cm_nic != NULL), "cm_nic NULL"); - - cr_assert((cm_nic->dgram_hndl != NULL), "cm_nic dgram_hndl NULL"); - - ret = _gnix_dgram_alloc(cm_nic->dgram_hndl, GNIX_DGRAM_WC, - &dgram_wc); - cr_assert(!ret, "_gnix_dgram_alloc wc"); - - dgram_wc->callback_fn = dgram_callback_fn; - ret = _gnix_dgram_wc_post(dgram_wc); - cr_assert((ret == 0), "_gnix_dgram_alloc wc"); - - ret = _gnix_dgram_alloc(cm_nic->dgram_hndl, GNIX_DGRAM_BND, - &dgram_bnd); - cr_assert((ret == 0), "_gnix_dgram_alloc bnd"); - - dgram_bnd->target_addr = cm_nic->my_name.gnix_addr; - - local_address = cm_nic->my_name.gnix_addr; - - dgram_bnd->callback_fn = dgram_callback_fn; - ret = _gnix_dgram_bnd_post(dgram_bnd); - cr_assert(ret == 0); - - /* - * progress auto, don't need to do anything - */ - while (dgram_match != 1) { - ret = _gnix_cm_nic_progress(cm_nic); - cr_assert(ret == 0); - pthread_yield(); - } - - ret = _gnix_dgram_free(dgram_bnd); - cr_assert(!ret, "_gnix_dgram_free bnd"); - - ret = _gnix_dgram_free(dgram_wc); - cr_assert(!ret, "_gnix_dgram_free wc"); - -} - -Test(dg_allocation, dgram_wc_post_exchg_manual, .init = dg_setup_prog_manual) -{ - int ret = 0; - struct gnix_cm_nic *cm_nic; - struct gnix_datagram *dgram_wc, *dgram_bnd; - - ep_priv = container_of(ep, struct gnix_fid_ep, ep_fid); - cm_nic = ep_priv->cm_nic; - cr_assert((cm_nic != NULL), "cm_nic NULL"); - - cr_assert(cm_nic->ctrl_progress == FI_PROGRESS_MANUAL); - - cr_assert((cm_nic->dgram_hndl != NULL), "cm_nic dgram_hndl NULL"); - - ret = _gnix_dgram_alloc(cm_nic->dgram_hndl, GNIX_DGRAM_WC, - &dgram_wc); - cr_assert(!ret, "_gnix_dgram_alloc wc"); - - dgram_wc->callback_fn = dgram_callback_fn; - ret = _gnix_dgram_wc_post(dgram_wc); - cr_assert((ret == 0), "_gnix_dgram_alloc wc"); - - ret = _gnix_dgram_alloc(cm_nic->dgram_hndl, GNIX_DGRAM_BND, - &dgram_bnd); - cr_assert((ret == 0), "_gnix_dgram_alloc bnd"); - - dgram_bnd->target_addr = cm_nic->my_name.gnix_addr; - - local_address = cm_nic->my_name.gnix_addr; - - dgram_bnd->callback_fn = dgram_callback_fn; - ret = _gnix_dgram_bnd_post(dgram_bnd); - cr_assert(ret == 0); - - /* - * progress auto, don't need to do anything - */ - while (dgram_match != 1) { - ret = _gnix_cm_nic_progress(cm_nic); - cr_assert(ret == 0); - pthread_yield(); - } - - ret = _gnix_dgram_free(dgram_bnd); - cr_assert(!ret, "_gnix_dgram_free bnd"); - - ret = _gnix_dgram_free(dgram_wc); - cr_assert(!ret, "_gnix_dgram_free wc"); - -} diff --git a/prov/gni/test/dlist-utils.c b/prov/gni/test/dlist-utils.c deleted file mode 100644 index 7a6e63d91a3..00000000000 --- a/prov/gni/test/dlist-utils.c +++ /dev/null @@ -1,223 +0,0 @@ -/* - * Copyright (c) 2015-2016 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include - -#include - -#include "gnix_util.h" - -#include -#include "gnix_rdma_headers.h" - -struct element { - int val; - struct dlist_entry entry; -}; - -static void setup(void) -{ - srand(time(NULL)); -} - -static void teardown(void) -{ -} - -static void generate_perm(int *perm, int len) -{ - int i; - /* good 'nuff */ - for (i = 0; i < len; i++) { - int t = perm[i]; - int j = rand() % len; - - perm[i] = perm[j]; - perm[j] = t; - } -} - -TestSuite(dlist_utils, .init = setup, .fini = teardown); - -struct dlist_test { - char dummy[3]; - struct dlist_entry le; - int x; -}; - -Test(dlist_utils, for_each) -{ - int i; - const int n = 593; - struct dlist_entry dl; - struct dlist_test dt[n]; - struct dlist_test *elem; - int perm[n]; - - for (i = 0; i < n; i++) - perm[i] = i; - - generate_perm(perm, n); - - dlist_init(&dl); - for (i = 0; i < n; i++) { - dt[perm[i]].x = i; - dlist_insert_tail(&dt[perm[i]].le, &dl); - } - - i = 0; - dlist_for_each(&dl, elem, le) { - cr_assert(elem->x == i); - ++i; - } -} - -Test(dlist_utils, for_each_empty) -{ - struct dlist_entry dl; - struct dlist_test *elem; - - dlist_init(&dl); - - dlist_for_each(&dl, elem, le) { - cr_assert(false); - } -} - -Test(dlist_utils, for_each_safe) -{ - int i; - const int n = 1129; - struct dlist_entry dl; - struct dlist_test dt[n]; - struct dlist_test *elem, *next; - int perm[n]; - - for (i = 0; i < n; i++) - perm[i] = i; - - generate_perm(perm, n); - - dlist_init(&dl); - for (i = 0; i < n; i++) { - dt[perm[i]].x = i; - dlist_insert_tail(&dt[perm[i]].le, &dl); - } - - i = 0; - dlist_for_each_safe(&dl, elem, next, le) { - cr_assert(elem->x == i); - dlist_remove(&elem->le); - ++i; - } -} - -Test(dlist_utils, for_each_safe_empty) -{ - struct dlist_entry dl; - struct dlist_test *elem, *next; - - dlist_init(&dl); - - dlist_for_each_safe(&dl, elem, next, le) { - cr_assert(false); - } -} - -Test(dlist_utils, dlist_splice_head_test) -{ - struct dlist_entry list1, list2; - struct element values[4], *current; - int i; - int expected[4] = {2, 3, 0, 1}; - - for (i = 0; i < 4; i++) { - values[i].val = i; - dlist_init(&values[i].entry); - } - - dlist_init(&list1); - dlist_init(&list2); - dlist_insert_tail(&values[0].entry, &list1); - dlist_insert_tail(&values[1].entry, &list1); - - dlist_insert_tail(&values[2].entry, &list2); - dlist_insert_tail(&values[3].entry, &list2); - - dlist_splice_head(&list1, &list2); - - cr_assert(dlist_empty(&list2)); - - i = 0; - dlist_for_each(&list1, current, entry) - { - cr_assert(current->val == expected[i]); - i++; - } - -} - -Test(dlist_utils, dlist_splice_tail_test) -{ - struct dlist_entry list1, list2; - struct element values[4], *current; - int i; - int expected[4] = {0, 1, 2, 3}; - - for (i = 0; i < 4; i++) { - values[i].val = i; - dlist_init(&values[i].entry); - } - - dlist_init(&list1); - dlist_init(&list2); - dlist_insert_tail(&values[0].entry, &list1); - dlist_insert_tail(&values[1].entry, &list1); - - dlist_insert_tail(&values[2].entry, &list2); - dlist_insert_tail(&values[3].entry, &list2); - - dlist_splice_tail(&list1, &list2); - - cr_assert(dlist_empty(&list2)); - - i = 0; - dlist_for_each(&list1, current, entry) - { - cr_assert(current->val == expected[i]); - i++; - } - - -} diff --git a/prov/gni/test/dom.c b/prov/gni/test/dom.c deleted file mode 100644 index 3e90f3789ac..00000000000 --- a/prov/gni/test/dom.c +++ /dev/null @@ -1,322 +0,0 @@ -/* - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "gnix.h" - -#include -#include "gnix_rdma_headers.h" -#include "common.h" - -#include "common.h" - -static struct fid_fabric *fabric; -static struct fi_info *fi; - -static void _setup(uint32_t version) -{ - int ret; - struct fi_info *hints; - - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - - hints->fabric_attr->prov_name = strdup("gni"); - if (FI_VERSION_LT(version, FI_VERSION(1, 5))) - hints->domain_attr->mr_mode = FI_MR_BASIC; - else - hints->domain_attr->mr_mode = GNIX_DEFAULT_MR_MODE; - - ret = fi_getinfo(version, NULL, 0, 0, hints, &fi); - cr_assert(ret == FI_SUCCESS, "fi_getinfo"); - - ret = fi_fabric(fi->fabric_attr, &fabric, NULL); - cr_assert(ret == FI_SUCCESS, "fi_fabric"); - - fi_freeinfo(hints); -} - -static void setup(void) -{ - _setup(fi_version()); -} - -static void setup_1_0(void) -{ - _setup(FI_VERSION(1, 0)); -} - -static void teardown(void) -{ - int ret; - - ret = fi_close(&fabric->fid); - cr_assert(ret == FI_SUCCESS, "fi_close fabric"); - - fi_freeinfo(fi); -} - -TestSuite(domain, .init = setup, .fini = teardown); -TestSuite(domain_1_0, .init = setup_1_0, .fini = teardown); - -Test(domain_1_0, no_dom_auth_key_support) -{ - int ret; - struct fid_domain *dom; - void *old_auth_key = fi->domain_attr->auth_key; - int old_auth_key_size = fi->domain_attr->auth_key_size; - - fi->domain_attr->auth_key = (void *) 0xdeadbeef; - fi->domain_attr->auth_key_size = 47; - - ret = fi_domain(fabric, fi, &dom, NULL); - cr_assert(ret == -FI_EINVAL, "fi_domain, ret=%d expected=%d", - ret, -FI_EINVAL); - - fi->domain_attr->auth_key = old_auth_key; - fi->domain_attr->auth_key_size = old_auth_key_size; -} - -Test(domain_1_0, no_mr_auth_key_support) -{ - int ret; - struct fid_mr *mr; - struct fid_domain *dom; - - struct iovec iov = { - .iov_base = (void *) 0xabbaabba, - .iov_len = 1024, - }; - struct fi_mr_attr mr_attr = { - .mr_iov = &iov, - .iov_count = 1, - .access = (FI_REMOTE_READ | FI_REMOTE_WRITE - | FI_READ | FI_WRITE), - .offset = 0, - .requested_key = 0, - .context = NULL, - .auth_key = (void *) 0xdeadbeef, - .auth_key_size = 47, - }; - - ret = fi_domain(fabric, fi, &dom, NULL); - cr_assert(ret == FI_SUCCESS, "fi_domain, ret=%d expected=%d", - ret, FI_SUCCESS); - - ret = fi_mr_regattr(dom, &mr_attr, 0, &mr); - cr_assert(ret == -FI_EINVAL, "fi_mr_regattr, ret=%d expected=%d", - ret, -FI_EINVAL); - - ret = fi_close(&dom->fid); - cr_assert(ret == FI_SUCCESS, "fi_close, ret=%d expected=%d", - ret, FI_SUCCESS); -} - -Test(domain, many_domains) -{ - int i, ret; - const int num_doms = 7919; - struct fid_domain *doms[num_doms]; - struct gnix_fid_domain *gdom; - struct gnix_fid_fabric *gfab; - - memset(doms, 0, num_doms*sizeof(struct fid_domain *)); - - gfab = container_of(fabric, struct gnix_fid_fabric, fab_fid); - for (i = 0; i < num_doms; i++) { - ret = fi_domain(fabric, fi, &doms[i], NULL); - cr_assert(ret == FI_SUCCESS, "fi_domain"); - gdom = container_of(doms[i], struct gnix_fid_domain, - domain_fid); - cr_assert(gdom, "domain not allcoated"); - cr_assert(gdom->fabric == gfab, "Incorrect fabric"); - cr_assert(ofi_atomic_get32(&gdom->ref_cnt.references) == 1, - "Incorrect ref_cnt"); - - } - - for (i = num_doms-1; i >= 0; i--) { - ret = fi_close(&doms[i]->fid); - cr_assert(ret == FI_SUCCESS, "fi_close domain"); - } - -} - -Test(domain, open_ops) -{ - int i, ret; - const int num_doms = 11; - struct fid_domain *doms[num_doms]; - struct fi_gni_ops_domain *gni_domain_ops; - enum dom_ops_val op; - uint32_t val; - char *other_reg_type = "none"; - char *string_val; - bool xpmem_toggle = false, xpmem_check; - - memset(doms, 0, num_doms*sizeof(struct fid_domain *)); - - for (i = 0; i < num_doms; i++) { - ret = fi_domain(fabric, fi, &doms[i], NULL); - cr_assert(ret == FI_SUCCESS, "fi_domain"); - ret = fi_open_ops(&doms[i]->fid, FI_GNI_DOMAIN_OPS_1, - 0, (void **) &gni_domain_ops, NULL); - cr_assert(ret == FI_SUCCESS, "fi_open_ops"); - for (op = 0; op < GNI_NUM_DOM_OPS; op++) { - val = i*op+op; - switch (op) { - case GNI_MR_CACHE: - ret = gni_domain_ops->set_val(&doms[i]->fid, op, - &other_reg_type); - break; - case GNI_XPMEM_ENABLE: - ret = gni_domain_ops->set_val(&doms[i]->fid, op, - &xpmem_toggle); - break; - default: - ret = gni_domain_ops->set_val(&doms[i]->fid, op, &val); - break; - } - cr_assert(ret == FI_SUCCESS, "set_val"); - - switch (op) { - case GNI_MR_CACHE: - ret = gni_domain_ops->get_val(&doms[i]->fid, op, &string_val); - break; - case GNI_XPMEM_ENABLE: - ret = gni_domain_ops->get_val(&doms[i]->fid, op, - &xpmem_check); - break; - default: - ret = gni_domain_ops->get_val(&doms[i]->fid, op, &val); - break; - } - cr_assert(ret == FI_SUCCESS, "get_val"); - - switch (op) { - case GNI_MR_CACHE: - cr_assert_eq(strncmp(other_reg_type, string_val, - strlen(other_reg_type)), 0, "Incorrect op value"); - break; - case GNI_XPMEM_ENABLE: - cr_assert(xpmem_toggle == xpmem_check, - "Incorrect op value"); - break; - default: - cr_assert(val == i*op+op, "Incorrect op value"); - break; - } - } - ret = fi_close(&doms[i]->fid); - cr_assert(ret == FI_SUCCESS, "fi_close domain"); - } -} - -Test(domain, cache_flush_op) -{ - int i, ret; - const int num_doms = 11; - struct fid_domain *doms[num_doms]; - struct fi_gni_ops_domain *gni_domain_ops; - struct fid_mr *mr; - char *buf = calloc(1024, sizeof(char)); - int requested_key = 0; - - cr_assert(buf); - - memset(doms, 0, num_doms*sizeof(struct fid_domain *)); - - for (i = 0; i < num_doms; i++) { - requested_key = USING_SCALABLE(fi) ? i : 0; - - ret = fi_domain(fabric, fi, &doms[i], NULL); - cr_assert(ret == FI_SUCCESS, "fi_domain"); - ret = fi_open_ops(&doms[i]->fid, FI_GNI_DOMAIN_OPS_1, - 0, (void **) &gni_domain_ops, NULL); - cr_assert(ret == FI_SUCCESS, "fi_open_ops"); - - ret = fi_mr_reg(doms[i], - buf, - 1024, - FI_READ, - 0, - requested_key, - 0, - &mr, - NULL); - cr_assert(ret == FI_SUCCESS, "fi_reg_mr, ret=%d", ret); - - if (USING_SCALABLE(fi)) - MR_ENABLE(mr, buf, 1024); - - ret = fi_close(&mr->fid); - cr_assert(ret == FI_SUCCESS, "fi_close mr"); - - ret = gni_domain_ops->flush_cache(&doms[i]->fid); - cr_assert(ret == FI_SUCCESS, "flush cache"); - - ret = fi_close(&doms[i]->fid); - cr_assert(ret == FI_SUCCESS, "fi_close domain"); - } - - free(buf); -} - -Test(domain, invalid_open_ops) -{ - int ret; - struct fid_domain *dom; - struct fi_gni_ops_domain *gni_domain_ops; - uint32_t val = 0; - - ret = fi_domain(fabric, fi, &dom, NULL); - cr_assert(ret == FI_SUCCESS, "fi_domain"); - ret = fi_open_ops(&dom->fid, FI_GNI_DOMAIN_OPS_1, - 0, (void **) &gni_domain_ops, NULL); - cr_assert(ret == FI_SUCCESS, "fi_open_ops"); - - ret = gni_domain_ops->get_val(&dom->fid, GNI_NUM_DOM_OPS, &val); - cr_assert(ret == -FI_EINVAL, "get_val"); - - ret = gni_domain_ops->set_val(&dom->fid, GNI_NUM_DOM_OPS, &val); - cr_assert(ret == -FI_EINVAL, "set_val"); - - ret = fi_close(&dom->fid); - cr_assert(ret == FI_SUCCESS, "fi_close domain"); -} diff --git a/prov/gni/test/ep.c b/prov/gni/test/ep.c deleted file mode 100644 index 60b5d0952de..00000000000 --- a/prov/gni/test/ep.c +++ /dev/null @@ -1,359 +0,0 @@ -/* - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "gnix_ep.h" -#include "gnix_cm.h" - -#include -#include "gnix_rdma_headers.h" -#include "common.h" - -static struct fi_info *hints; -static struct fi_info *fi; -static struct fid_fabric *fab; -static struct fid_domain *dom; - -static void _setup(uint32_t version) -{ - int ret; - - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - - if (FI_VERSION_LT(version, FI_VERSION(1, 5))) - hints->domain_attr->mr_mode = FI_MR_BASIC; - else - hints->domain_attr->mr_mode = GNIX_DEFAULT_MR_MODE; - hints->fabric_attr->prov_name = strdup("gni"); - - ret = fi_getinfo(version, NULL, 0, 0, hints, &fi); - cr_assert(!ret, "fi_getinfo"); - - ret = fi_fabric(fi->fabric_attr, &fab, NULL); - cr_assert(!ret, "fi_fabric"); - - if (USING_SCALABLE(fi)) { - struct fi_gni_ops_fab *ops; - int in; - - /* open_close test opens many nics and exhausts reserved keys */ - in = 256; - - ret = fi_open_ops(&fab->fid, - FI_GNI_FAB_OPS_1, 0, (void **) &ops, NULL); - cr_assert_eq(ret, FI_SUCCESS); - cr_assert(ops); - - ret = ops->set_val(&fab->fid, - GNI_DEFAULT_PROV_REGISTRATION_LIMIT, - &in); - cr_assert_eq(ret, FI_SUCCESS); - } - - ret = fi_domain(fab, fi, &dom, NULL); - cr_assert(!ret, "fi_domain"); -} - -static void setup(void) -{ - _setup(fi_version()); -} - -static void setup_1_0(void) -{ - _setup(FI_VERSION(1, 0)); -} - -static void teardown(void) -{ - int ret; - - ret = fi_close(&dom->fid); - cr_assert(!ret, "fi_close domain"); - - ret = fi_close(&fab->fid); - cr_assert(!ret, "fi_close fabric"); - - fi_freeinfo(fi); - fi_freeinfo(hints); -} - -TestSuite(endpoint, .init = setup, .fini = teardown); -TestSuite(endpoint_1_0, .init = setup_1_0, .fini = teardown); - -Test(endpoint_1_0, no_auth_key_support) -{ - int ret; - struct fid_ep *ep; - void *old_auth_key = fi->ep_attr->auth_key; - int old_auth_key_size = fi->ep_attr->auth_key_size; - - fi->ep_attr->auth_key = (void *) 0xdeadbeef; - fi->ep_attr->auth_key_size = 47; - - ret = fi_endpoint(dom, fi, &ep, NULL); - cr_assert(ret == -FI_EINVAL, "fi_endpoint, ret=%d expected=%d", - ret, -FI_EINVAL); - - fi->ep_attr->auth_key = old_auth_key; - fi->ep_attr->auth_key_size = old_auth_key_size; -} - -Test(endpoint_info, info) -{ - int ret; - - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - - hints->domain_attr->mr_mode = GNIX_DEFAULT_MR_MODE; - hints->fabric_attr->prov_name = strdup("gni"); - - ret = fi_getinfo(fi_version(), NULL, 0, 0, hints, &fi); - cr_assert(!ret, "fi_getinfo"); - cr_assert_eq(fi->ep_attr->type, FI_EP_RDM); - cr_assert_eq(fi->next->ep_attr->type, FI_EP_DGRAM); - cr_assert_eq(fi->next->next->ep_attr->type, FI_EP_MSG); - cr_assert_neq(fi->domain_attr->cntr_cnt, 0); - cr_assert_neq(fi->domain_attr->cq_cnt, 0); - cr_assert_eq(fi->domain_attr->ep_cnt, SIZE_MAX); - - fi_freeinfo(fi); - - hints->ep_attr->type = FI_EP_RDM; - ret = fi_getinfo(fi_version(), NULL, 0, 0, hints, &fi); - cr_assert(!ret, "fi_getinfo"); - cr_assert_eq(fi->ep_attr->type, FI_EP_RDM); - - fi_freeinfo(fi); - - hints->ep_attr->type = FI_EP_DGRAM; - ret = fi_getinfo(fi_version(), NULL, 0, 0, hints, &fi); - cr_assert(!ret, "fi_getinfo"); - cr_assert_eq(fi->ep_attr->type, FI_EP_DGRAM); - - fi_freeinfo(fi); - fi_freeinfo(hints); -} - -Test(endpoint, open_close) -{ - int i, ret; - const int num_eps = 61; - struct fid_ep *eps[num_eps]; - - memset(eps, 0, num_eps*sizeof(struct fid_ep *)); - - for (i = 0; i < num_eps; i++) { - ret = fi_endpoint(dom, fi, &eps[i], NULL); - cr_assert(!ret, "fi_endpoint"); - struct gnix_fid_ep *ep = container_of(eps[i], - struct gnix_fid_ep, - ep_fid); - cr_assert(ep, "endpoint not allcoated"); - - /* Check fields (fill in as implemented) */ - cr_assert(ep->nic, "NIC not allocated"); - cr_assert(!_gnix_fl_empty(&ep->fr_freelist), - "gnix_fab_req freelist empty"); - } - - for (i = num_eps-1; i >= 0; i--) { - ret = fi_close(&eps[i]->fid); - cr_assert(!ret, "fi_close endpoint"); - } - -} - -Test(endpoint, getsetopt) -{ - int ret; - struct fid_ep *ep = NULL; - uint64_t val; - size_t len; - - ret = fi_endpoint(dom, fi, &ep, NULL); - cr_assert(!ret, "fi_endpoint"); - - /* Test bad params. */ - ret = fi_getopt(&ep->fid, !FI_OPT_ENDPOINT, FI_OPT_MIN_MULTI_RECV, - (void *)&val, &len); - cr_assert(ret == -FI_ENOPROTOOPT, "fi_getopt"); - - ret = fi_getopt(&ep->fid, FI_OPT_ENDPOINT, FI_OPT_CM_DATA_SIZE+1, - (void *)&val, &len); - cr_assert(ret == -FI_ENOPROTOOPT, "fi_getopt"); - - ret = fi_getopt(&ep->fid, FI_OPT_ENDPOINT, FI_OPT_MIN_MULTI_RECV, - NULL, &len); - cr_assert(ret == -FI_EINVAL, "fi_getopt"); - - ret = fi_getopt(&ep->fid, FI_OPT_ENDPOINT, FI_OPT_MIN_MULTI_RECV, - (void *)&val, NULL); - cr_assert(ret == -FI_EINVAL, "fi_getopt"); - - ret = fi_setopt(&ep->fid, !FI_OPT_ENDPOINT, FI_OPT_MIN_MULTI_RECV, - (void *)&val, sizeof(size_t)); - cr_assert(ret == -FI_ENOPROTOOPT, "fi_setopt"); - - ret = fi_setopt(&ep->fid, FI_OPT_ENDPOINT, !FI_OPT_MIN_MULTI_RECV, - (void *)&val, sizeof(size_t)); - cr_assert(ret == -FI_ENOPROTOOPT, "fi_setopt"); - - ret = fi_setopt(&ep->fid, FI_OPT_ENDPOINT, FI_OPT_MIN_MULTI_RECV, - NULL, sizeof(size_t)); - cr_assert(ret == -FI_EINVAL, "fi_setopt"); - - ret = fi_setopt(&ep->fid, FI_OPT_ENDPOINT, FI_OPT_MIN_MULTI_RECV, - (void *)&val, sizeof(size_t) - 1); - cr_assert(ret == -FI_EINVAL, "fi_setopt"); - - /* - * see issue 1120 - */ - val = 0UL; - ret = fi_setopt(&ep->fid, FI_OPT_ENDPOINT, FI_OPT_MIN_MULTI_RECV, - (void *)&val, sizeof(size_t)); - cr_assert(ret == -FI_EINVAL, "fi_setopt"); - - /* Test update. */ - ret = fi_getopt(&ep->fid, FI_OPT_ENDPOINT, FI_OPT_MIN_MULTI_RECV, - (void *)&val, &len); - cr_assert(!ret, "fi_getopt"); - cr_assert(val == GNIX_OPT_MIN_MULTI_RECV_DEFAULT, "fi_getopt"); - cr_assert(len == sizeof(size_t), "fi_getopt"); - - ret = fi_getopt(&ep->fid, FI_OPT_ENDPOINT, FI_OPT_CM_DATA_SIZE, - (void *)&val, &len); - cr_assert(!ret, "fi_getopt"); - cr_assert(val == GNIX_CM_DATA_MAX_SIZE, "fi_getopt"); - cr_assert(len == sizeof(size_t), "fi_getopt"); - - val = 128; - ret = fi_setopt(&ep->fid, FI_OPT_ENDPOINT, FI_OPT_MIN_MULTI_RECV, - (void *)&val, sizeof(size_t)); - cr_assert(!ret, "fi_setopt"); - - ret = fi_getopt(&ep->fid, FI_OPT_ENDPOINT, FI_OPT_MIN_MULTI_RECV, - (void *)&val, &len); - cr_assert(!ret, "fi_getopt"); - cr_assert(val == 128, "fi_getopt"); - cr_assert(len == sizeof(size_t), "fi_getopt"); - - ret = fi_close(&ep->fid); - cr_assert(!ret, "fi_close endpoint"); -} - -/* - * size left interfaces have been deprecated - */ - -#if 0 -Test(endpoint, sizeleft) -{ - int ret; - size_t sz; - struct fid_ep *ep = NULL; - - ret = fi_endpoint(dom, fi, &ep, NULL); - cr_assert(ret == FI_SUCCESS, "fi_endpoint"); - - /* Test in disabled state. */ - sz = fi_rx_size_left(ep); - cr_assert(sz == -FI_EOPBADSTATE, "fi_rx_size_left"); - - sz = fi_tx_size_left(ep); - cr_assert(sz == -FI_EOPBADSTATE, "fi_tx_size_left"); - - ret = fi_enable(ep); - cr_assert(ret == FI_SUCCESS, "fi_enable"); - - /* Test default values. */ - sz = fi_rx_size_left(ep); - cr_assert(sz == GNIX_RX_SIZE_DEFAULT, "fi_rx_size_left"); - - sz = fi_tx_size_left(ep); - cr_assert(sz == GNIX_TX_SIZE_DEFAULT, "fi_tx_size_left"); - - ret = fi_close(&ep->fid); - cr_assert(!ret, "fi_close endpoint"); -} -#endif - -Test(endpoint, getsetopt_gni_ep) -{ - int ret; - int val; - struct fid_ep *ep = NULL; - struct fi_gni_ops_ep *ep_ops; - struct gnix_fid_ep *ep_priv = NULL; - - ret = fi_endpoint(dom, fi, &ep, NULL); - cr_assert(!ret, "fi_endpoint"); - - ep_priv = (struct gnix_fid_ep *) ep; - - ret = fi_open_ops(&ep->fid, "ep ops 1", 0, (void **) &ep_ops, NULL); - cr_assert(!ret, "fi_open_ops endpoint"); - - ret = ep_ops->get_val(&ep->fid, GNI_HASH_TAG_IMPL, &val); - cr_assert(!ret, "ep_ops get_val"); - cr_assert_eq(val, 0); - cr_assert_eq(ep_priv->use_tag_hlist, 0); - - val = 1; /* set the hash implementation */ - ret = ep_ops->set_val(&ep->fid, GNI_HASH_TAG_IMPL, &val); - cr_assert(!ret, "ep_ops set_val"); - cr_assert_eq(ep_priv->use_tag_hlist, 1); - cr_assert_eq(ep_priv->unexp_recv_queue.attr.type, GNIX_TAG_HLIST); - cr_assert_eq(ep_priv->posted_recv_queue.attr.type, GNIX_TAG_HLIST); - cr_assert_eq(ep_priv->tagged_unexp_recv_queue.attr.type, GNIX_TAG_HLIST); - cr_assert_eq(ep_priv->tagged_posted_recv_queue.attr.type, GNIX_TAG_HLIST); - - val = 0; /* reset the value */ - ret = ep_ops->get_val(&ep->fid, GNI_HASH_TAG_IMPL, &val); - cr_assert(!ret, "ep_ops get_val"); - cr_assert_eq(val, 1); - cr_assert_eq(ep_priv->use_tag_hlist, 1); - - ret = fi_close(&ep->fid); - cr_assert(!ret, "fi_close endpoint"); -} diff --git a/prov/gni/test/eq.c b/prov/gni/test/eq.c deleted file mode 100644 index c624338c6f3..00000000000 --- a/prov/gni/test/eq.c +++ /dev/null @@ -1,366 +0,0 @@ -/* - * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights reserved. - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include - -#include "gnix_eq.h" -#include "gnix.h" - -#include -#include "gnix_rdma_headers.h" -#include "common.h" - -/* Note: Set to ~FI_NOTIFY_FLAGS_ONLY since this was written before api 1.5 */ -static uint64_t mode_bits = ~FI_NOTIFY_FLAGS_ONLY; -static struct fid_fabric *fab; -static struct fi_info *hints; -static struct fi_info *fi; - -static struct gnix_fid_eq *eq_priv; -static struct fi_eq_attr eq_attr; -static struct fid_eq *eq; - -static struct gnix_fid_wait *wait_priv; -static struct fi_wait_attr wait_attr; -static struct fid_wait *wait_set; - -void _setup(void) -{ - int ret = 0; - - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo failed."); - - hints->mode = mode_bits; - hints->domain_attr->mr_mode = GNIX_DEFAULT_MR_MODE; - hints->fabric_attr->prov_name = strdup("gni"); - - ret = fi_getinfo(fi_version(), NULL, 0, 0, hints, &fi); - cr_assert_eq(ret, FI_SUCCESS, "fi_getinfo failed."); - - ret = fi_fabric(fi->fabric_attr, &fab, NULL); - cr_assert_eq(ret, FI_SUCCESS, "fi_fabric failed."); -} - -void _teardown(void) -{ - int ret = 0; - - ret = fi_close(&fab->fid); - cr_assert_eq(ret, FI_SUCCESS, "failure in closing fabric."); - - fi_freeinfo(fi); - fi_freeinfo(hints); -} - -void eq_teardown(void) -{ - cr_assert_eq(FI_SUCCESS, fi_close(&eq->fid), "failure in closing eq."); - _teardown(); -} - -void eq_create(enum fi_wait_obj wait_obj, size_t size) -{ - int ret = 0; - - eq_attr.size = size; - eq_attr.wait_obj = wait_obj; - - ret = fi_eq_open(fab, &eq_attr, &eq, NULL); - cr_assert_eq(ret, FI_SUCCESS, "fi_eq_open failed."); - - eq_priv = container_of(eq, struct gnix_fid_eq, eq_fid); - - if (eq_priv->wait) { - wait_priv = container_of(eq_priv->wait, struct gnix_fid_wait, - wait); - } -} - -void eq_wait_none_setup(void) -{ - _setup(); - eq_create(FI_WAIT_NONE, 8); -} - -void eq_wait_fd_setup(void) -{ - _setup(); - eq_create(FI_WAIT_FD, 8); -} - -void eq_wait_unspec_setup(void) -{ - _setup(); - eq_create(FI_WAIT_UNSPEC, 8); -} - -void eq_wait_mutex_cond_setup(void) -{ - _setup(); - eq_create(FI_WAIT_MUTEX_COND, 8); -} - -/****************************************************************************** - * Basic in/out tests - *****************************************************************************/ - -TestSuite(eq_basic, .init = eq_wait_unspec_setup, .fini = eq_teardown); - -void eq_rw(int blocking) -{ - int ret = 0, i; - uint32_t in_event = 'a'; - uint32_t out_event = 'a'; - - fid_t in_err_fid = (fid_t)0xf1d; - int64_t in_err_ctx = 0xc0413; - uint64_t in_err_idx = 10; - int in_err_err = 255; - int in_err_perrno = -13; - - char *in_buf = "12345"; - char out_buf[1024]; - const size_t eq_size = 1025; - struct fi_eq_err_entry err_buf; - char *err_data_buf = "abcd"; - int err_cnt = 0; - - for (i = 0; i < eq_size; i++) { - ret = fi_eq_write(eq, in_event + i + eq_size, in_buf, - sizeof(in_buf), 0); - cr_assert_eq(ret, sizeof(in_buf)); - -#if 0 - fid_t fid; /* fid associated with error */ - void *context; /* operation context */ - uint64_t data; /* completion-specific data */ - int err; /* positive error code */ - int prov_errno; /* provider error code */ - void *err_data; /* additional error data */ - size_t err_data_size; /* size of err_data */ -#endif - ret = _gnix_eq_write_error(eq_priv, - (fid_t)(in_err_fid + i), - (void *)(in_err_ctx + i), - in_err_idx + i, - in_err_err + i, - in_err_perrno + i, - err_data_buf, - sizeof(err_data_buf)); - cr_assert_eq(ret, FI_SUCCESS); - err_cnt++; - } - - for (i = 0; i < eq_size*2; i++) { - if (blocking) { - ret = fi_eq_sread(eq, &out_event, &out_buf, 1024, 1, 0); - } else { - ret = fi_eq_read(eq, &out_event, &out_buf, 1024, 0); - } - - if (ret == -FI_EAVAIL) { - struct fi_eq_err_entry tmp_err = { - .fid = (fid_t)(in_err_fid + i), - .context = (void *)(in_err_ctx + i), - .data = in_err_idx + i, - .err = in_err_err + i, - .prov_errno = in_err_perrno + i, - }; - - ret = fi_eq_readerr(eq, &err_buf, 0); - cr_assert(!memcmp(&tmp_err, &err_buf, - offsetof(struct fi_eq_err_entry, - err_data)), - "bad error fields"); - cr_assert(err_buf.err_data_size = strlen(err_data_buf), - "bad error data size"); - cr_assert(!strncmp(err_buf.err_data, err_data_buf, - err_buf.err_data_size), - "bad error data"); - err_cnt--; - } else { - cr_assert(err_cnt == 0); - cr_assert_eq(ret, sizeof(in_buf)); - cr_assert_eq(out_event, in_event + i); - } - } - - if (blocking) { - ret = fi_eq_sread(eq, &out_event, &out_buf, 1024, 1, 0); - } else { - ret = fi_eq_read(eq, &out_event, &out_buf, 1024, 0); - } - cr_assert_eq(ret, -FI_EAGAIN); -} - -Test(eq_basic, rw) -{ - eq_rw(0); -} - -Test(eq_basic, rw_blocking) -{ - eq_rw(1); -} - -/****************************************************************************** - * Wait object tests - *****************************************************************************/ - -TestSuite(eq_wait_obj, .fini = eq_teardown); - -Test(eq_wait_obj, none, .init = eq_wait_none_setup) -{ - char out_buf[1024]; - uint32_t out_event = 'a'; - int ret; - - ret = fi_eq_sread(eq, &out_event, &out_buf, 1024, 1, 0); - cr_expect_eq(ret, -FI_EINVAL); - cr_expect(!wait_priv, "wait_priv should be null."); -} - -Test(eq_wait_obj, unspec, .init = eq_wait_unspec_setup) -{ - cr_expect_eq(wait_priv->type, FI_WAIT_UNSPEC); - cr_expect_eq(wait_priv->type, eq_priv->attr.wait_obj); - cr_expect_eq(wait_priv->type, eq_attr.wait_obj); - cr_expect_eq(&wait_priv->fabric->fab_fid, fab); - cr_expect_eq(wait_priv->cond_type, FI_CQ_COND_NONE); -} - -Test(eq_wait_obj, fd, .init = eq_wait_fd_setup, - .disabled = true) -{ - cr_expect_eq(wait_priv->type, FI_WAIT_FD); - cr_expect_eq(wait_priv->type, eq_priv->attr.wait_obj); - cr_expect_eq(wait_priv->type, eq_attr.wait_obj); - cr_expect_eq(&wait_priv->fabric->fab_fid, fab); - cr_expect_eq(wait_priv->cond_type, FI_CQ_COND_NONE); -} - -Test(eq_wait_obj, mutex_cond, .init = eq_wait_mutex_cond_setup, - .disabled = true) -{ - cr_expect_eq(wait_priv->type, FI_WAIT_MUTEX_COND); - cr_expect_eq(wait_priv->type, eq_priv->attr.wait_obj); - cr_expect_eq(wait_priv->type, eq_attr.wait_obj); - cr_expect_eq(&wait_priv->fabric->fab_fid, fab); - cr_expect_eq(wait_priv->cond_type, FI_CQ_COND_NONE); -} - -TestSuite(eq_wait_control, .fini = eq_teardown, .disabled = true); - -/* -Test(eq_wait_control, none, .init = eq_wait_none_setup) -{ - int ret; - int fd; - - ret = fi_control(&eq_priv->eq_fid.fid, FI_GETWAIT, &fd); - cr_expect_eq(-FI_ENOSYS, ret, "fi_control exists for none."); -} -*/ - -Test(eq_wait_control, unspec, .init = eq_wait_unspec_setup) -{ - int ret; - int fd; - - ret = fi_control(&eq_priv->eq_fid.fid, FI_GETWAIT, &fd); - cr_expect_eq(FI_SUCCESS, ret, "fi_control failed."); - - cr_expect_eq(wait_priv->fd[WAIT_READ], fd); -} - -Test(eq_wait_control, fd, .init = eq_wait_fd_setup) -{ - int ret; - int fd; - - ret = fi_control(&eq_priv->eq_fid.fid, FI_GETWAIT, &fd); - cr_expect_eq(FI_SUCCESS, ret, "fi_control failed."); - - cr_expect_eq(wait_priv->fd[WAIT_READ], fd); -} - -Test(eq_wait_control, mutex_cond, .init = eq_wait_mutex_cond_setup) -{ - int ret; - struct fi_mutex_cond mutex_cond; - - ret = fi_control(&eq_priv->eq_fid.fid, FI_GETWAIT, &mutex_cond); - cr_expect_eq(FI_SUCCESS, ret, "fi_control failed."); - - ret = memcmp(&wait_priv->mutex, mutex_cond.mutex, - sizeof(*mutex_cond.mutex)); - cr_expect_eq(0, ret, "mutex compare failed."); - - ret = memcmp(&wait_priv->cond, mutex_cond.cond, - sizeof(*mutex_cond.cond)); - cr_expect_eq(0, ret, "cond compare failed."); -} - -Test(eq_wait_set, fd, .init = _setup, .fini = _teardown, .disabled = true) -{ - int ret; - int fd; - - wait_attr.wait_obj = FI_WAIT_FD; - - ret = fi_wait_open(fab, &wait_attr, &wait_set); - cr_expect_eq(FI_SUCCESS, ret, "fi_wait_open failed."); - - wait_priv = container_of(wait_set, struct gnix_fid_wait, wait); - - eq_attr.size = 8; - eq_attr.wait_obj = FI_WAIT_SET; - eq_attr.wait_set = wait_set; - - ret = fi_eq_open(fab, &eq_attr, &eq, NULL); - cr_expect_eq(FI_SUCCESS, ret, "fi_eq_open failed."); - - eq_priv = container_of(eq, struct gnix_fid_eq, eq_fid); - - ret = fi_control(&eq_priv->eq_fid.fid, FI_GETWAIT, &fd); - cr_expect_eq(FI_SUCCESS, ret, "fi_control failed."); - - cr_expect_eq(wait_priv->fd[WAIT_READ], fd); - - ret = fi_close(&eq->fid); - cr_expect_eq(FI_SUCCESS, ret, "failure in closing cq."); - - ret = fi_close(&wait_set->fid); - cr_expect_eq(FI_SUCCESS, ret, "failure in closing waitset."); -} diff --git a/prov/gni/test/fabric.c b/prov/gni/test/fabric.c deleted file mode 100644 index fac6bc50142..00000000000 --- a/prov/gni/test/fabric.c +++ /dev/null @@ -1,389 +0,0 @@ -/* - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "gnix.h" - -#include -#include "gnix_rdma_headers.h" -#include "fi_ext_gni.h" -#include "common.h" - -static struct fid_fabric *fabric; -static struct fi_info *fi; - -static void setup(void) -{ - int ret; - struct fi_info *hints; - - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - - hints->domain_attr->mr_mode = FI_MR_BASIC; - hints->fabric_attr->prov_name = strdup("gni"); - - ret = fi_getinfo(FI_VERSION(1, 0), NULL, 0, 0, hints, &fi); - cr_assert(ret == FI_SUCCESS, "fi_getinfo"); - - ret = fi_fabric(fi->fabric_attr, &fabric, NULL); - cr_assert(ret == FI_SUCCESS, "fi_fabric"); - - fi_freeinfo(hints); -} - -static void teardown(void) -{ - int ret; - - ret = fi_close(&fabric->fid); - cr_assert(ret == FI_SUCCESS, "fi_close fabric"); - - fi_freeinfo(fi); -} -TestSuite(fabric_bare); -TestSuite(fabric, .init = setup, .fini = teardown); - -Test(fabric, simple) -{ - cr_assert(fabric != NULL); -} - -Test(fabric, open_ops_1) -{ - int ret; - struct fi_gni_ops_fab *ops; - - ret = fi_open_ops(&fabric->fid, - FI_GNI_FAB_OPS_1, 0, (void **) &ops, NULL); - cr_assert_eq(ret, FI_SUCCESS); - - cr_assert(ops); -} - -Test(fabric, set_wait_timeout) -{ - int ret; - struct fi_gni_ops_fab *ops; - int old_val = 0, new_val, current_val; - - ret = fi_open_ops(&fabric->fid, - FI_GNI_FAB_OPS_1, 0, (void **) &ops, NULL); - cr_assert_eq(ret, FI_SUCCESS); - - cr_assert(ops); - - ret = ops->get_val(&fabric->fid, - GNI_WAIT_SHARED_MEMORY_TIMEOUT, &old_val); - cr_assert_eq(ret, FI_SUCCESS); - cr_assert_neq(old_val, 0); - - new_val = old_val * 2; - ret = ops->set_val(&fabric->fid, - GNI_WAIT_SHARED_MEMORY_TIMEOUT, &new_val); - cr_assert_eq(ret, FI_SUCCESS); - - ret = ops->get_val(&fabric->fid, - GNI_WAIT_SHARED_MEMORY_TIMEOUT, ¤t_val); - cr_assert_eq(ret, FI_SUCCESS); - cr_assert_eq(current_val, new_val); -} - -Test(fabric, open_ops_2) -{ - int ret; - struct fi_gni_auth_key_ops_fab *auth_ops; - - ret = fi_open_ops(&fabric->fid, - FI_GNI_FAB_OPS_2, 0, (void **) &auth_ops, NULL); - cr_assert_eq(ret, FI_SUCCESS); - - cr_assert(auth_ops); -} - -Test(fabric, set_limits) -{ - int ret; - int in; - int total; - - struct gnix_auth_key_attr default_attr = { 0 }; - struct gnix_auth_key_attr expected_attr = { 0 }; - struct gnix_auth_key_attr actual_attr = { 0 }; - - struct fi_gni_ops_fab *ops; - struct fi_gni_auth_key_ops_fab *auth_ops; - - ret = fi_open_ops(&fabric->fid, - FI_GNI_FAB_OPS_1, 0, (void **) &ops, NULL); - cr_assert_eq(ret, FI_SUCCESS); - - ret = fi_open_ops(&fabric->fid, - FI_GNI_FAB_OPS_2, 0, (void **) &auth_ops, NULL); - cr_assert_eq(ret, FI_SUCCESS); - - cr_assert(ops); - cr_assert(auth_ops); - - ret = ops->get_val(&fabric->fid, - GNI_DEFAULT_USER_REGISTRATION_LIMIT, - &default_attr.user_key_limit); - cr_assert_eq(ret, FI_SUCCESS); - - in = (default_attr.user_key_limit == 1) ? - 2 : default_attr.user_key_limit - 1; - cr_assert(in > 0); - expected_attr.user_key_limit = in; - - ret = ops->get_val(&fabric->fid, - GNI_DEFAULT_PROV_REGISTRATION_LIMIT, - &default_attr.prov_key_limit); - cr_assert_eq(ret, FI_SUCCESS); - - in = (default_attr.prov_key_limit == 1) ? - 2 : default_attr.prov_key_limit - 1; - cr_assert(in > 0); - expected_attr.prov_key_limit = in; - - /* set defaults */ - ret = ops->set_val(&fabric->fid, - GNI_DEFAULT_USER_REGISTRATION_LIMIT, - &expected_attr.user_key_limit); - cr_assert_eq(ret, FI_SUCCESS); - - ret = ops->set_val(&fabric->fid, - GNI_DEFAULT_PROV_REGISTRATION_LIMIT, - &expected_attr.prov_key_limit); - cr_assert_eq(ret, FI_SUCCESS); - - /* get defaults */ - ret = ops->get_val(&fabric->fid, - GNI_DEFAULT_USER_REGISTRATION_LIMIT, - &actual_attr.user_key_limit); - cr_assert_eq(ret, FI_SUCCESS); - cr_assert_eq(actual_attr.user_key_limit, expected_attr.user_key_limit); - - ret = ops->get_val(&fabric->fid, - GNI_DEFAULT_PROV_REGISTRATION_LIMIT, - &actual_attr.prov_key_limit); - cr_assert_eq(ret, FI_SUCCESS); - cr_assert_eq(actual_attr.prov_key_limit, expected_attr.prov_key_limit); - - /* clear result buffer */ - memset(&actual_attr, 0x0, sizeof(actual_attr)); - - /* ensure defaults were propogated to default key */ - ret = auth_ops->get_val(GNIX_PROV_DEFAULT_AUTH_KEY, - GNIX_PROV_DEFAULT_AUTH_KEYLEN, - GNIX_USER_KEY_LIMIT, - &actual_attr.user_key_limit); - cr_assert_eq(ret, FI_SUCCESS); - cr_assert_eq(actual_attr.user_key_limit, expected_attr.user_key_limit); - - ret = auth_ops->get_val(GNIX_PROV_DEFAULT_AUTH_KEY, - GNIX_PROV_DEFAULT_AUTH_KEYLEN, - GNIX_PROV_KEY_LIMIT, - &actual_attr.prov_key_limit); - cr_assert_eq(ret, FI_SUCCESS); - cr_assert_eq(actual_attr.prov_key_limit, expected_attr.prov_key_limit); - - ret = auth_ops->get_val(GNIX_PROV_DEFAULT_AUTH_KEY, - GNIX_PROV_DEFAULT_AUTH_KEYLEN, - GNIX_TOTAL_KEYS_NEEDED, - &total); - cr_assert_eq(ret, FI_SUCCESS); - cr_assert_eq(total, - expected_attr.prov_key_limit + expected_attr.user_key_limit); - - /* set default auth key limits to something else */ - expected_attr.user_key_limit >>= 1; - expected_attr.prov_key_limit >>= 1; - - ret = auth_ops->set_val(GNIX_PROV_DEFAULT_AUTH_KEY, - GNIX_PROV_DEFAULT_AUTH_KEYLEN, - GNIX_USER_KEY_LIMIT, - &expected_attr.user_key_limit); - cr_assert_eq(ret, FI_SUCCESS); - - ret = auth_ops->set_val(GNIX_PROV_DEFAULT_AUTH_KEY, - GNIX_PROV_DEFAULT_AUTH_KEYLEN, - GNIX_PROV_KEY_LIMIT, - &expected_attr.prov_key_limit); - cr_assert_eq(ret, FI_SUCCESS); - - ret = auth_ops->get_val(GNIX_PROV_DEFAULT_AUTH_KEY, - GNIX_PROV_DEFAULT_AUTH_KEYLEN, - GNIX_USER_KEY_LIMIT, - &actual_attr.user_key_limit); - cr_assert_eq(ret, FI_SUCCESS); - cr_assert_eq(actual_attr.user_key_limit, expected_attr.user_key_limit); - - ret = auth_ops->get_val(GNIX_PROV_DEFAULT_AUTH_KEY, - GNIX_PROV_DEFAULT_AUTH_KEYLEN, - GNIX_PROV_KEY_LIMIT, - &actual_attr.prov_key_limit); - cr_assert_eq(ret, FI_SUCCESS); - cr_assert_eq(actual_attr.prov_key_limit, expected_attr.prov_key_limit); -} - -Test(fabric_bare, fi_mr_basic_1_0) -{ - int ret; - struct fi_info *hints; - struct fi_info *info; - - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - - hints->domain_attr->mr_mode = FI_MR_BASIC; - hints->fabric_attr->prov_name = strdup("gni"); - - ret = fi_getinfo(FI_VERSION(1, 0), NULL, 0, 0, hints, &info); - cr_assert(ret == FI_SUCCESS, "fi_getinfo"); - cr_assert(info->domain_attr->mr_mode == FI_MR_BASIC); - - fi_freeinfo(fi); - fi_freeinfo(hints); -} - -Test(fabric_bare, fi_mr_scalable_1_0) -{ - int ret; - struct fi_info *hints; - struct fi_info *info = NULL; - - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - - hints->domain_attr->mr_mode = FI_MR_SCALABLE; - hints->fabric_attr->prov_name = strdup("gni"); - - ret = fi_getinfo(FI_VERSION(1, 0), NULL, 0, 0, hints, &info); - if (info) { - cr_assert(strcmp(info->fabric_attr->prov_name,hints->fabric_attr->prov_name), - "fi_getinfo"); - } else { - cr_assert(ret == -FI_ENODATA, "fi_getinfo"); - } - - fi_freeinfo(hints); -} - -Test(fabric_bare, fi_mr_basic_1_5) -{ - int ret; - struct fi_info *hints; - struct fi_info *info; - - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - - hints->domain_attr->mr_mode = FI_MR_BASIC; - hints->fabric_attr->prov_name = strdup("gni"); - - ret = fi_getinfo(FI_VERSION(1, 5), NULL, 0, 0, hints, &info); - cr_assert(ret == FI_SUCCESS, "fi_getinfo"); - cr_assert(info->domain_attr->mr_mode == FI_MR_BASIC); - - fi_freeinfo(fi); - fi_freeinfo(hints); -} - -Test(fabric_bare, fi_mr_scalable_1_5_fail) -{ - int ret; - struct fi_info *hints; - struct fi_info *info = NULL; - - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - - hints->domain_attr->mr_mode = FI_MR_SCALABLE; - hints->fabric_attr->prov_name = strdup("gni"); - - ret = fi_getinfo(FI_VERSION(1, 5), NULL, 0, 0, hints, &info); - if (info) { - cr_assert(strcmp(info->fabric_attr->prov_name,hints->fabric_attr->prov_name), - "fi_getinfo"); - } else { - cr_assert(ret == -FI_ENODATA, "fi_getinfo"); - } - - fi_freeinfo(hints); -} - -Test(fabric_bare, fi_mr_scalable_1_5_pass) -{ - int ret; - struct fi_info *hints; - struct fi_info *info; - - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - - hints->domain_attr->mr_mode = FI_MR_MMU_NOTIFY; - hints->fabric_attr->prov_name = strdup("gni"); - - ret = fi_getinfo(FI_VERSION(1, 5), NULL, 0, 0, hints, &info); - cr_assert(ret == FI_SUCCESS, "fi_getinfo"); - cr_assert(info->domain_attr->mr_mode == FI_MR_MMU_NOTIFY); - - fi_freeinfo(fi); - fi_freeinfo(hints); -} - -Test(fabric_bare, fi_mr_basic_1_5_ofi_map) -{ - int ret; - struct fi_info *hints; - struct fi_info *info; - - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - - hints->domain_attr->mr_mode = OFI_MR_BASIC_MAP; - hints->fabric_attr->prov_name = strdup("gni"); - - ret = fi_getinfo(FI_VERSION(1, 5), NULL, 0, 0, hints, &info); - cr_assert(ret == FI_SUCCESS, "fi_getinfo"); - cr_assert(info->domain_attr->mr_mode == OFI_MR_BASIC_MAP); - - fi_freeinfo(fi); - fi_freeinfo(hints); -} diff --git a/prov/gni/test/fi_addr_str.c b/prov/gni/test/fi_addr_str.c deleted file mode 100644 index 461db8f58ca..00000000000 --- a/prov/gni/test/fi_addr_str.c +++ /dev/null @@ -1,1700 +0,0 @@ -/* - * Copyright (c) 2017 Los Alamos National Security, LLC. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include "gnix_rdma_headers.h" -#include "fi_ext_gni.h" -#include "gnix.h" -#include "common.h" - -#if 1 -#define dbg_printf(...) -#else -#define dbg_printf(...) \ - do { \ - printf(__VA_ARGS__); \ - fflush(stdout); \ - } while (0) -#endif - -#define NUMEPS 2 -#define NUMCONTEXTS (NUMEPS * 2) - -/* Note: Set to ~FI_NOTIFY_FLAGS_ONLY since this was written before api 1.5 */ -static uint64_t mode_bits = ~FI_NOTIFY_FLAGS_ONLY; -static struct fid_fabric *fab; -static struct fid_domain *dom[NUMEPS]; - -static struct fid_ep *ep[NUMEPS]; -static struct fid_pep *pep[NUMEPS]; -static struct fid_ep **tx_ep[NUMEPS], **rx_ep[NUMEPS]; -static struct fid_cq **tx_cq[NUMEPS]; -static struct fid_cq **rx_cq[NUMEPS]; -static struct fid_av *av[NUMEPS]; -static struct fid_cq *msg_cq[NUMEPS]; -static fi_addr_t gni_addr[NUMEPS]; -static struct fi_info *fi[NUMEPS]; -static struct fi_info *hints; - -static struct fi_cq_attr cq_attr; -static struct fi_av_attr attr; - -static void *ep_name[NUMEPS]; -static size_t ep_name_len[NUMEPS]; - -#define BUF_SZ (1<<15) -static char *target[NUMEPS]; -static char *source[NUMEPS]; - -static struct fid_cntr *send_cntr[NUMEPS], *recv_cntr[NUMEPS]; -static struct fi_cntr_attr cntr_attr = { - .events = FI_CNTR_EVENTS_COMP, - .flags = 0 -}; -static uint64_t sends[NUMEPS] = {0}, recvs[NUMEPS] = {0}; - - -static bool use_str_fmt = true; -static enum ep_type_e { - EP, PEP, SEP -} ep_type; - -static fid_t (*get_fid[3])(int); - -static int ctx_cnt; - -/****************************************************************************** -* Begin help routines -******************************************************************************/ -static fid_t get_ep_fid(int i) -{ - return &ep[i]->fid; -} - -static fid_t get_pep_fid(int i) -{ - return &pep[i]->fid; -} - -static fid_t get_sep_fid(int i) -{ - return get_ep_fid(i); -} - -struct fid_ep *get_fid_ep(int i, void **info, void **dest_addr, - void **src_addr) -{ - struct gnix_fid_ep *ep = NULL; - struct gnix_fid_sep *sep = NULL; - struct gnix_fid_pep *pep = NULL; - - switch (ep_type) { - case EP: - ep = container_of(get_fid[ep_type](i), - struct gnix_fid_ep, - ep_fid.fid); - if (info) - *info = (void *) ep->info; - if (dest_addr) - *dest_addr = (void *) &ep->dest_addr; - if (src_addr) - *src_addr = (void *) &ep->src_addr; - - return &ep->ep_fid; - case SEP: - sep = container_of(get_fid[ep_type](i), - struct gnix_fid_sep, - ep_fid); - if (info) - *info = (void *) sep->info; - if (dest_addr) { - if (!sep->info->dest_addr) { - sep->info->dest_addr = malloc - (sep->info->dest_addrlen); - cr_assert(sep->info->dest_addr, "malloc " - "failed"); - } - *dest_addr = sep->info->dest_addr; - } - if (src_addr) - *src_addr = (void *) &sep->my_name; - - return &sep->ep_fid; - case PEP: - pep = container_of(get_fid[ep_type](i), - struct gnix_fid_pep, - pep_fid.fid); - if (info) - *info = (void *) pep->info; - if (dest_addr) - *dest_addr = pep->info->dest_addr; - if (src_addr) - *src_addr = (void *) &pep->src_addr; - break; - default: - cr_assert_fail("Unknown endpoint type."); - } - - return NULL; -} - -/* - * FI_ADDR_STR; "gni;NODE;SERVICE;GNIX_AV_STR_ADD_VERSION;device_addr;cdm_id; - * name_type;cm_nic_cdm_id;cookie;rx_ctx_cnt - */ -int generate_rand_fas(char **node) -{ - char rand_str[GNIX_FI_ADDR_STR_LEN] = {0}; - char byte; - size_t nbytes; - - if (node) { - /*gni:3*/ - sprintf(rand_str, "gni;"); - - /*node:[0-9]+*/ - byte = (rand() % ('Z' - 'A')) + 'A'; - nbytes = 4; - memset(rand_str + strlen(rand_str), byte, nbytes); - rand_str[strlen(rand_str)] = ';'; - dbg_printf(BLUE "rand_str = %s\n" COLOR_RESET, rand_str); - - /*service:[0-9]+*/ - byte = (rand() % ('Z' - 'A')) + 'A'; - nbytes = 4; - memset(rand_str + strlen(rand_str), byte, nbytes); - rand_str[strlen(rand_str)] = ';'; - dbg_printf(BLUE "rand_str = %s\n" COLOR_RESET, rand_str); - - /*GNIX_AV_STR_ADDR_VERSION:4*/ - sprintf(rand_str + strlen(rand_str), "%04i", - GNIX_AV_STR_ADDR_VERSION); - rand_str[strlen(rand_str)] = ';'; - dbg_printf(BLUE "rand_str = %s\n" COLOR_RESET, rand_str); - - /*device_addr:10*/ - byte = (rand() % 10) + '0'; - nbytes = 8; - sprintf(rand_str + strlen(rand_str), "0x"); - memset(rand_str + strlen(rand_str), byte, nbytes); - rand_str[strlen(rand_str)] = ';'; - dbg_printf(BLUE "rand_str = %s\n" COLOR_RESET, rand_str); - - /*cdm_id:10*/ - byte = (rand() % 10) + '0'; - nbytes = 8; - sprintf(rand_str + strlen(rand_str), "0x"); - memset(rand_str + strlen(rand_str), byte, nbytes); - rand_str[strlen(rand_str)] = ';'; - dbg_printf(BLUE "rand_str = %s\n" COLOR_RESET, rand_str); - - /*name_type:2*/ - byte = (rand() % ('9' - '0')) + '0'; - nbytes = 2; - memset(rand_str + strlen(rand_str), byte, nbytes); - rand_str[strlen(rand_str)] = ';'; - dbg_printf(BLUE "rand_str = %s\n" COLOR_RESET, rand_str); - - /*cm_nic_cdm_id:8*/ - byte = (rand() % 10) + '0'; - nbytes = 6; - sprintf(rand_str + strlen(rand_str), "0x"); - memset(rand_str + strlen(rand_str), byte, nbytes); - rand_str[strlen(rand_str)] = ';'; - dbg_printf(BLUE "rand_str = %s\n" COLOR_RESET, rand_str); - - /*cookie:10*/ - byte = (rand() % 10) + '0'; - nbytes = 8; - sprintf(rand_str + strlen(rand_str), "0x"); - memset(rand_str + strlen(rand_str), byte, nbytes); - rand_str[strlen(rand_str)] = ';'; - dbg_printf(BLUE "rand_str = %s\n" COLOR_RESET, rand_str); - - /*rx_ctx_cnt:3*/ - byte = (rand() % 10) + '0'; - nbytes = 3; - memset(rand_str + strlen(rand_str), byte, nbytes); - dbg_printf(BLUE "rand_str = %s\n" COLOR_RESET, rand_str); - - sprintf(node[0], rand_str); - } - return 0; -} -/****************************************************************************** -* End help routines -******************************************************************************/ - -/****************************************************************************** -* Begin setup routines -******************************************************************************/ -static void fas_setup_common(uint32_t version) -{ - int i, ret; - - srand(time(NULL)); - - get_fid[EP] = get_ep_fid; - get_fid[PEP] = get_pep_fid; - get_fid[SEP] = get_sep_fid; - - /* This is sufficient for verifying FI_ADDR_STR with seps */ - ctx_cnt = 1; - - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - - hints->domain_attr->mr_mode = GNIX_DEFAULT_MR_MODE; - hints->domain_attr->cq_data_size = NUMEPS * 2; - hints->domain_attr->data_progress = FI_PROGRESS_AUTO; - hints->domain_attr->control_progress = FI_PROGRESS_AUTO; - hints->mode = mode_bits; - hints->fabric_attr->prov_name = strdup("gni"); - hints->addr_format = use_str_fmt ? FI_ADDR_STR : FI_ADDR_GNI; - if (ep_type == SEP) { - hints->ep_attr->tx_ctx_cnt = NUMCONTEXTS; - hints->ep_attr->rx_ctx_cnt = NUMCONTEXTS; - } - - /* Get info about fabric services with the provided hints */ - for (i = 0; i < NUMEPS; i++) { - ret = fi_getinfo(version, NULL, 0, 0, hints, - &fi[i]); - cr_assert(!ret, "fi_getinfo returned: %s", fi_strerror(-ret)); - - tx_cq[i] = calloc(ctx_cnt, sizeof(*tx_cq)); - rx_cq[i] = calloc(ctx_cnt, sizeof(*rx_cq)); - tx_ep[i] = calloc(ctx_cnt, sizeof(*tx_ep)); - rx_ep[i] = calloc(ctx_cnt, sizeof(*rx_ep)); - } - - memset(&attr, 0, sizeof(attr)); - attr.rx_ctx_bits = ctx_cnt; - attr.count = NUMEPS; - - cq_attr.format = FI_CQ_FORMAT_TAGGED; - cq_attr.size = 1024; - cq_attr.wait_obj = 0; - - for (i = 0; i < NUMEPS; i++) { - target[i] = malloc(BUF_SZ); - cr_assert(target[i], "malloc returned: %s", strerror(errno)); - - source[i] = malloc(BUF_SZ); - cr_assert(source[i], "malloc returned: %s", strerror(errno)); - } - - ret = fi_fabric(fi[0]->fabric_attr, &fab, NULL); - cr_assert(!ret, "fi_fabric returned: %s", fi_strerror(-ret)); -} - -static void fas_ep_setup(void) -{ - int ret, i, j; - size_t addrlen = 0; - - fas_setup_common(fi_version()); - ctx_cnt = MIN(ctx_cnt, fi[0]->domain_attr->rx_ctx_cnt); - ctx_cnt = MIN(ctx_cnt, fi[0]->domain_attr->tx_ctx_cnt); - - for (i = 0; i < NUMEPS; i++) { - fi[i]->ep_attr->tx_ctx_cnt = ctx_cnt; - fi[i]->ep_attr->rx_ctx_cnt = ctx_cnt; - - ret = fi_domain(fab, fi[i], dom + i, NULL); - cr_assert(!ret, "fi_domain returned: %s", fi_strerror(-ret)); - - ret = fi_cntr_open(dom[i], &cntr_attr, send_cntr + i, 0); - cr_assert(!ret, "fi_cntr_open returned: %s", fi_strerror(-ret)); - - ret = fi_cntr_open(dom[i], &cntr_attr, recv_cntr + i, 0); - cr_assert(!ret, "fi_cntr_open returned: %s", fi_strerror(-ret)); - - switch (ep_type) { - case EP: - ret = fi_endpoint(dom[i], fi[i], ep + i, NULL); - cr_assert(!ret, "fi_endpoint returned: %s", - fi_strerror(-ret)); - break; - case SEP: - ret = fi_scalable_ep(dom[i], fi[i], ep + i, - NULL); - cr_assert(!ret, "fi_endpoint returned: %s", - fi_strerror(-ret)); - break; - case PEP: - ret = fi_passive_ep(fab, fi[i], pep + i, - NULL); - cr_assert(!ret, "fi_endpoint returned: %s", - fi_strerror(-ret)); - ret = fi_getname(get_fid[ep_type](i), NULL, - &addrlen); - if (use_str_fmt) { - cr_assert(addrlen == GNIX_FI_ADDR_STR_LEN, - "fi_getname returned: %s", - fi_strerror(-ret)); - } else { - cr_assert(addrlen == - sizeof(struct gnix_ep_name), - "fi_getname returned: %s", - fi_strerror(-ret)); - } - ep_name_len[i] = addrlen; - continue; - default: - cr_assert_fail("Unknown endpoint type."); - } - - ret = fi_av_open(dom[i], &attr, av + i, NULL); - cr_assert(!ret, "fi_av_open returned: %s", fi_strerror(-ret)); - - switch (ep_type) { - case EP: - case PEP: - ret = fi_cq_open(dom[i], &cq_attr, msg_cq + i, - 0); - cr_assert(!ret, "fi_cq_open returned: %s", - fi_strerror(-ret)); - - ret = fi_ep_bind(ep[i], &msg_cq[i]->fid, - FI_SEND | FI_RECV); - cr_assert(!ret, "fi_ep_bind returned: %s", - fi_strerror(-ret)); - break; - case SEP: - dbg_printf(BLUE - "ctx_cnt = %d\n" - COLOR_RESET, - ctx_cnt); - - for (j = 0; j < ctx_cnt; j++) { - ret = fi_tx_context(ep[i], j, NULL, - &tx_ep[i][j], NULL); - cr_assert(!ret, - "fi_tx_context returned: %s", - fi_strerror(-ret)); - - ret = fi_cq_open(dom[i], &cq_attr, - &tx_cq[i][j], - NULL); - cr_assert(!ret, - "fi_cq_open returned: %s", - fi_strerror(-ret)); - - ret = fi_rx_context(ep[i], j, NULL, - &rx_ep[i][j], NULL); - cr_assert(!ret, - "fi_rx_context returned: %s", - fi_strerror(-ret)); - - ret = fi_cq_open(dom[i], &cq_attr, - &rx_cq[i][j], - NULL); - cr_assert(!ret, - "fi_cq_open returned: %s", - fi_strerror(-ret)); - } - break; - default: - cr_assert_fail("Unknown endpoint type."); - } - - ret = fi_getname(get_fid[ep_type](i), NULL, &addrlen); - if (use_str_fmt) { - cr_assert(addrlen > sizeof(struct gnix_ep_name), - "fi_getname returned: %s", - fi_strerror(-ret)); - } else { - cr_assert(addrlen == sizeof(struct gnix_ep_name), - "fi_getname returned: %s", - fi_strerror(-ret)); - } - - ep_name[i] = malloc(addrlen); - ep_name_len[i] = addrlen; - - dbg_printf(BLUE - "ep_name_len[%d] = %lu\n" - COLOR_RESET, i, - ep_name_len[i]); - cr_assert(ep_name[i] != NULL, "malloc returned: %s", - strerror(errno)); - - ret = fi_getname(get_fid[ep_type](i), ep_name[i], &addrlen); - cr_assert(ret == FI_SUCCESS, "fi_getname returned: %s", - fi_strerror(-ret)); - } - - /* Just testing setname / getname for passive endpoints */ - if (ep_type == PEP) - return; - - for (i = 0; i < NUMEPS; i++) { - /*Insert all gni addresses into each av*/ - for (j = 0; j < NUMEPS; j++) { - ret = fi_av_insert(av[i], ep_name[j], 1, &gni_addr[j], - 0, NULL); - cr_assert(ret == 1, "fi_av_insert returned: %s", - fi_strerror(-ret)); - } - - switch (ep_type) { - case EP: - ret = fi_ep_bind(ep[i], &av[i]->fid, 0); - cr_assert(!ret, "fi_ep_bind returned: %s", - fi_strerror(-ret)); - - ret = fi_ep_bind(ep[i], &send_cntr[i]->fid, - FI_SEND); - cr_assert(!ret, "fi_ep_bind returned: %s", - fi_strerror(-ret)); - - ret = fi_ep_bind(ep[i], &recv_cntr[i]->fid, - FI_RECV); - cr_assert(!ret, "fi_ep_bind returned: %s", - fi_strerror(-ret)); - break; - case SEP: - ret = fi_scalable_ep_bind(ep[i], &av[i]->fid, - 0); - cr_assert(!ret, - "fi_scalable_ep_bind returned: %s", - fi_strerror(-ret)); - dbg_printf(BLUE - "ctx_cnt = %d\n" - COLOR_RESET, - ctx_cnt); - for (j = 0; j < ctx_cnt; j++) { - ret = fi_ep_bind(tx_ep[i][j], - &tx_cq[i][j]->fid, - FI_TRANSMIT); - cr_assert(!ret, - "fi_ep_bind returned: %s", - fi_strerror(-ret)); - - ret = fi_ep_bind(tx_ep[i][j], - &send_cntr[i]->fid, - FI_SEND); - cr_assert(!ret, - "fi_ep_bind returned: %s", - fi_strerror(-ret)); - - ret = fi_enable(tx_ep[i][j]); - cr_assert(!ret, - "fi_enable returned: %s", - fi_strerror(-ret)); - - ret = fi_ep_bind(rx_ep[i][j], - &rx_cq[i][j]->fid, - FI_RECV); - cr_assert(!ret, - "fi_ep_bind returned: %s", - fi_strerror(-ret)); - - ret = fi_ep_bind(rx_ep[i][j], - &recv_cntr[i]->fid, - FI_RECV); - cr_assert(!ret, - "fi_ep_bind returned: %s", - fi_strerror(-ret)); - - ret = fi_enable(rx_ep[i][j]); - cr_assert(!ret, - "fi_enable returned: %s", - fi_strerror(-ret)); - - } - break; - case PEP: - break; - default: - cr_assert_fail("Unknown endpoint type."); - } - - ret = fi_enable(ep[i]); - cr_assert(!ret, "fi_ep_enable returned: %s", fi_strerror(-ret)); - - if (ep_type != SEP) { - ret = fi_enable(ep[i]); - cr_assert_eq(ret, -FI_EOPBADSTATE, - "fi_enable returned: %s", - fi_strerror(-ret)); - } - } -} - -static void fas_getinfo_setup(void) -{ - srand(time(NULL)); - - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - - hints->domain_attr->mr_mode = GNIX_DEFAULT_MR_MODE; - hints->domain_attr->cq_data_size = NUMEPS * 2; - hints->domain_attr->data_progress = FI_PROGRESS_AUTO; - hints->domain_attr->control_progress = FI_PROGRESS_AUTO; - hints->mode = ~0; - hints->fabric_attr->prov_name = strdup("gni"); - hints->addr_format = use_str_fmt ? FI_ADDR_STR : FI_ADDR_GNI; -} - -static void fas_getinfo_teardown(void) -{ - fi_freeinfo(hints); -} - -static void fas_ep_setup_gni_fmt_av_map(void) -{ - use_str_fmt = false; - ep_type = EP; - attr.type = FI_AV_MAP; - - fas_ep_setup(); -} - -static void fas_ep_setup_str_fmt_av_map(void) -{ - use_str_fmt = true; - ep_type = EP; - attr.type = FI_AV_MAP; - - fas_ep_setup(); -} - -static void fas_sep_setup_gni_fmt_av_map(void) -{ - use_str_fmt = false; - ep_type = SEP; - attr.type = FI_AV_MAP; - - fas_ep_setup(); -} - -static void fas_sep_setup_str_fmt_av_map(void) -{ - use_str_fmt = true; - ep_type = SEP; - attr.type = FI_AV_MAP; - - fas_ep_setup(); -} - -static void fas_pep_setup_gni_fmt_av_map(void) -{ - use_str_fmt = false; - ep_type = PEP; - attr.type = FI_AV_MAP; - - fas_ep_setup(); -} - -static void fas_pep_setup_str_fmt_av_map(void) -{ - use_str_fmt = true; - ep_type = PEP; - attr.type = FI_AV_MAP; - - fas_ep_setup(); -} - -static void fas_ep_setup_gni_fmt_av_tbl(void) -{ - use_str_fmt = false; - ep_type = EP; - attr.type = FI_AV_TABLE; - - fas_ep_setup(); -} - -static void fas_ep_setup_str_fmt_av_tbl(void) -{ - use_str_fmt = true; - ep_type = EP; - attr.type = FI_AV_TABLE; - - fas_ep_setup(); -} - -static void fas_sep_setup_gni_fmt_av_tbl(void) -{ - use_str_fmt = false; - ep_type = SEP; - attr.type = FI_AV_TABLE; - - fas_ep_setup(); -} - -static void fas_sep_setup_str_fmt_av_tbl(void) -{ - use_str_fmt = true; - ep_type = SEP; - attr.type = FI_AV_TABLE; - - fas_ep_setup(); -} - -static void fas_pep_setup_gni_fmt_av_tbl(void) -{ - use_str_fmt = false; - ep_type = PEP; - attr.type = FI_AV_TABLE; - - fas_ep_setup(); -} - -static void fas_pep_setup_str_fmt_av_tbl(void) -{ - use_str_fmt = true; - ep_type = PEP; - attr.type = FI_AV_TABLE; - - fas_ep_setup(); -} - -static void fas_teardown_common(void) -{ - int ret = 0, i = 0, j; - - for (; i < NUMEPS; i++) { - if (ep_type == SEP || ep_type == EP) { - ret = fi_close(&recv_cntr[i]->fid); - cr_assert(!ret, "failure in closing recv cntr."); - - ret = fi_close(&send_cntr[i]->fid); - cr_assert(!ret, "failure in closing send cntr."); - } - - switch (ep_type) { - case EP: - ret = fi_close(&msg_cq[i]->fid); - cr_assert(!ret, "failure in closing msg cq."); - break; - case SEP: - for (j = 0; j < ctx_cnt; j++) { - ret = fi_close(&tx_ep[i][j]->fid); - cr_assert(!ret, - "failure closing tx_ep."); - - ret = fi_close(&rx_ep[i][j]->fid); - cr_assert(!ret, - "failure closing rx_ep."); - - ret = fi_close(&tx_cq[i][j]->fid); - cr_assert(!ret, - "failure closing tx cq."); - - ret = fi_close(&rx_cq[i][j]->fid); - cr_assert(!ret, - "failure closing rx cq."); - } - break; - case PEP: - ret = fi_close(get_fid[ep_type](i)); - cr_assert(!ret, "failure in closing ep."); - continue; - break; - default: - cr_assert_fail("Unknown endpoint type."); - break; - } - - ret = fi_close(get_fid[ep_type](i)); - cr_assert(!ret, "failure in closing ep."); - - ret = fi_close(&av[i]->fid); - cr_assert(!ret, "failure in closing av."); - - ret = fi_close(&dom[i]->fid); - cr_assert(!ret, "failure in closing domain."); - - free(ep_name[i]); - free(target[i]); - free(source[i]); - free(tx_cq[i]); - free(tx_ep[i]); - free(rx_cq[i]); - free(rx_ep[i]); - - fi_freeinfo(fi[i]); - } - - ret = fi_close(&fab->fid); - cr_assert(!ret, "failure in closing fabric."); - - fi_freeinfo(hints); -} -/****************************************************************************** -* End setup and teardown routines -******************************************************************************/ - -/****************************************************************************** -* Begin verification routines -******************************************************************************/ -static void fas_to_ep_name(char *ep_name_str, struct gnix_ep_name *rebuilt) -{ - char *buf; - - dbg_printf(BLUE - "ep_name_str(%p) = %s.\n" - COLOR_RESET, ep_name_str, ep_name_str); - - buf = strtok(ep_name_str, ";"); - - cr_assert_not_null(buf, "address family not found"); - - dbg_printf(BLUE - "buf = %s\nbuf_len = %lu\n" - COLOR_RESET, buf, - strlen(buf)); - cr_assert(!memcmp(buf, "gni", 3)); - - buf = strtok(NULL, ";"); - cr_assert_not_null(buf, "node not found"); - dbg_printf(BLUE - "buf = %s\nbuf_len = %lu\n" - COLOR_RESET, buf, - strlen(buf)); - - buf = strtok(NULL, ";"); - cr_assert_not_null(buf, "service not found"); - dbg_printf(BLUE - "buf = %s\nbuf_len = %lu\n" - COLOR_RESET, buf, strlen(buf)); - - - buf = strtok(NULL, ";"); - cr_assert_not_null(buf, "zeroth additional field " - "(GNIX_AV_STR_ADDR_VERSION) not found"); - dbg_printf(BLUE - "buf = %s\nbuf_len = %lu\n" - COLOR_RESET, buf, strlen(buf)); - - buf = strtok(NULL, ";"); - cr_assert_not_null(buf, "first additional field (device address) not " - "found"); - rebuilt->gnix_addr.device_addr = strtol(buf, NULL, 16); - dbg_printf(BLUE - "buf = %s\nbuf_len = %lu\n" - COLOR_RESET, buf, strlen(buf)); - - buf = strtok(NULL, ";"); - cr_assert_not_null(buf, "second additional field (cdm id) not found"); - rebuilt->gnix_addr.cdm_id = strtol(buf, NULL, 16); - dbg_printf(BLUE - "buf = %s\nbuf_len = %lu\n" - COLOR_RESET, buf, strlen(buf)); - - buf = strtok(NULL, ";"); - cr_assert_not_null(buf, "third additional field (name type) not found"); - rebuilt->name_type = strtol(buf, NULL, 10); - dbg_printf(BLUE - "buf = %s\nbuf_len = %lu\n" - COLOR_RESET, buf, strlen(buf)); - - buf = strtok(NULL, ";"); - cr_assert_not_null(buf, "forth additional field (cm_nic_cdm_id) not " - "found"); - rebuilt->cm_nic_cdm_id = strtol(buf, NULL, 16); - dbg_printf(BLUE - "buf = %s\nbuf_len = %lu\n" - COLOR_RESET, buf, strlen(buf)); - - buf = strtok(NULL, ";"); - cr_assert_not_null(buf, "fifth additional field (cookie) not found"); - rebuilt->cookie = strtol(buf, NULL, 16); - dbg_printf(BLUE - "buf = %s\nbuf_len = %lu\n" - COLOR_RESET, buf, strlen(buf)); - - buf = strtok(NULL, ";"); - cr_assert_not_null(buf, "sixth additional field (rx_ctx_cnt) not " - "found"); - rebuilt->rx_ctx_cnt = strtol(buf, NULL, 10); - dbg_printf(BLUE - "buf = %s\nbuf_len = %lu\n" - COLOR_RESET, buf, strlen(buf)); -} - -static void check_ep_name(struct gnix_ep_name actual, - struct gnix_ep_name expected) -{ - cr_assert_eq(expected.gnix_addr.cdm_id, actual.gnix_addr.cdm_id, - "Invalid cdm_id: expected(%x) actual(%x)", - actual.gnix_addr.cdm_id, expected.gnix_addr.cdm_id); - - cr_assert_eq(expected.name_type, actual.name_type, "Invalid name_type: " - "expected(%x) actual(%x)", actual.name_type, - expected.name_type); - - cr_assert_eq(expected.cm_nic_cdm_id, actual.cm_nic_cdm_id, "Invalid " - "cm_nic_cmd_id: expected(%x) actual(%x)", actual.cm_nic_cdm_id, - expected.cm_nic_cdm_id); - - cr_assert_eq(expected.cookie, actual.cookie, "Invalid cookie: expected" - "(%x) actual(%x)", actual.cookie, expected.cookie); - - cr_assert_eq(expected.rx_ctx_cnt, actual.rx_ctx_cnt, "Invalid " - "rx_ctx_cnt: expected(%x) actual(%x)", actual.rx_ctx_cnt, - expected.rx_ctx_cnt); -} - -static void check_ep_name_str(struct gnix_ep_name actual, void *expected, - size_t ep_name_len) -{ - char *ep_name_str; - struct gnix_ep_name rebuilt; - - ep_name_str = (char *) mem_dup(expected, ep_name_len); - - fas_to_ep_name(ep_name_str, &rebuilt); - - check_ep_name(actual, rebuilt); - - free(ep_name_str); -} - -void init_bufs(void **bufs, int nbufs, int len) -{ - int i; - char byte = (char) rand(); - - for (i = 0; i < nbufs; i++, byte = (char) rand()) { - memset(bufs[i], byte, len); - } -} - -void check_buf(char *expected, char *buf, int len) -{ - int idx; - - for (idx = 0; idx < len; idx++) { - cr_assert(expected[idx] == buf[idx], "data mismatch: " - "expected[%d] = 0x%x, buf[%d] = 0x%x", idx, - expected[idx], idx, buf[idx]); - } -} - -void check_tagged_cqe(struct fi_cq_tagged_entry expected_tcqe, - struct fi_cq_tagged_entry fi_tcqe) -{ - cr_assert_eq(expected_tcqe.op_context, fi_tcqe.op_context, - "Invalid op_context: expected(%p) actual(%p)", - expected_tcqe.op_context, fi_tcqe.op_context); - - cr_assert_eq(expected_tcqe.flags, fi_tcqe.flags, - "Invalid flags: expected(0x%lx) actual(0x%lx", - expected_tcqe.flags, fi_tcqe.flags); - - cr_assert_eq(expected_tcqe.len, fi_tcqe.len, - "Invalid len: expected(%lu) actual(%lu)", - expected_tcqe.len, fi_tcqe.len); - - cr_assert_eq(expected_tcqe.buf, fi_tcqe.buf, - "Invalid buf: expected(%p) actual(%p)", - expected_tcqe.buf, fi_tcqe.buf); - - cr_assert_eq(expected_tcqe.data, fi_tcqe.data, - "Invalid data: expected(0x%lx) actual(0x%lx)", - expected_tcqe.data, fi_tcqe.data); - - cr_assert_eq(expected_tcqe.tag, fi_tcqe.tag, - "Invalid tag: expected(0x%lx) actual(0x%lx)", - expected_tcqe.tag, fi_tcqe.tag); -} -/****************************************************************************** -* End verification routines -******************************************************************************/ - -/****************************************************************************** -* Begin test running routines -******************************************************************************/ -void do_getname(void) -{ - int i, ret; - size_t addrlen; - void *addr; - struct gnix_ep_name *src_addr; - - ret = fi_getname(get_fid[ep_type](0), NULL, NULL); - cr_assert(ret == -FI_EINVAL, "fi_getname returned: %s", - fi_strerror(-ret)); - - for (i = 0; i < NUMEPS; i++) { - ret = fi_getname(get_fid[ep_type](i), NULL, &addrlen); - cr_assert(ret == -FI_ETOOSMALL, "fi_getname returned: %s", - fi_strerror(-ret)); - if (use_str_fmt) { - cr_assert(addrlen == GNIX_FI_ADDR_STR_LEN, - "addrlen: %lu does not match size for " - "FI_ADDR_STR", addrlen); - } else { - cr_assert(addrlen == sizeof(struct gnix_ep_name), - "addrlen: %lu does not match the size for" - " FI_ADDR_GNI", addrlen); - } - - addr = malloc(addrlen); - ret = errno; - cr_assert_not_null(addr, "malloc returned: %s", strerror(ret)); - - ret = fi_getname(get_fid[ep_type](i), addr, &addrlen); - cr_assert(ret == FI_SUCCESS, "fi_getname returned: %s", - fi_strerror(-ret)); - - if (use_str_fmt) { - cr_assert(addrlen == GNIX_FI_ADDR_STR_LEN, - "addrlen: %lu does not match size for " - "FI_ADDR_STR", addrlen); - } else { - cr_assert(addrlen == sizeof(struct gnix_ep_name), - "addrlen: %lu does not match the size for " - "FI_ADDR_GNI", addrlen); - } - - get_fid_ep(i, NULL, NULL, (void **) &src_addr); - - dbg_printf(BLUE "ep_name = %p\n" COLOR_RESET, src_addr); - - if (use_str_fmt) - check_ep_name_str(*src_addr, addr, ep_name_len[i]); - free(addr); - } -} - -void do_setname(void) -{ - int i, ret; - void *addr; - struct gnix_ep_name *src_addr = NULL, rebuilt; - struct fi_info *info = NULL; - - ret = fi_setname(get_fid[ep_type](0), NULL, 0xbabbbcbd); - cr_assert(ret == -FI_EINVAL, "fi_setname returned: %s", - fi_strerror(-ret)); - - for (i = 0; i < NUMEPS; i++) { - addr = malloc(ep_name_len[i]); - ret = errno; - cr_assert_not_null(addr, "malloc returned: %s", strerror(ret)); - - if (use_str_fmt) - generate_rand_fas((char **) &addr); - else - init_bufs(&addr, 1, ep_name_len[i]); - - ret = fi_setname(get_fid[ep_type](i), addr, ep_name_len[i]); - cr_assert(ret == FI_SUCCESS, "fi_setname returned: %s", - fi_strerror(-ret)); - - get_fid_ep(i, (void **) &info, (void **) NULL, (void **) - &src_addr); - - /* Ensure that the address was set properly. */ - if (use_str_fmt) { - fas_to_ep_name(addr, &rebuilt); - check_ep_name(rebuilt, *src_addr); - } else { - check_ep_name(((struct gnix_ep_name *)addr)[0], - *src_addr); - } - - - free(addr); - } -} - -void do_getpeer(void) -{ - int i = 0, ret; - size_t addrlen; - void *addr; - struct gnix_ep_name *dest_addr, rebuilt; - struct fid_ep *ep_fid; - - ret = fi_getpeer(get_fid_ep(0, NULL, NULL, NULL), NULL, NULL); - cr_assert(ret == -FI_EINVAL, "fi_getpeer returned: %s", - fi_strerror(-ret)); - - for (i = 0; i < NUMEPS; i++) { - ep_fid = get_fid_ep(i, NULL, (void **) &dest_addr, NULL); - addrlen = ep_name_len[i]; - addr = malloc(addrlen); - ret = errno; - cr_assert_not_null(addr, "malloc returned: %s", strerror(ret)); - init_bufs(&addr, 1, addrlen); - - addrlen = 0; - ret = fi_getpeer(ep_fid, addr, &addrlen); - cr_assert(ret == -FI_ETOOSMALL, "fi_getpeer returned: %s", - fi_strerror(-ret)); - - ret = fi_getpeer(ep_fid, addr, &addrlen); - cr_assert(ret == FI_SUCCESS, "fi_getpeer returned: %s", - fi_strerror(-ret)); - - if (use_str_fmt) { - dbg_printf(BLUE "strlen(addr) = %lu\n" COLOR_RESET, - strlen(addr)); - - fas_to_ep_name(addr, &rebuilt); - check_ep_name(*dest_addr, rebuilt); - } else { - check_ep_name(*dest_addr, - ((struct gnix_ep_name *) addr)[0]); - } - - free(addr); - } -} - -void do_getname_enosys(void) -{ - int ret, i; - - for (i = 0; i < NUMEPS; i++) { - ret = fi_getname(get_fid[ep_type](i), NULL, NULL); - cr_assert_eq(ret, -FI_ENOSYS, "Invalid return value: %s", - fi_strerror(-ret)); - } -} - -void do_setname_enosys(void) -{ - int ret, i; - - for (i = 0; i < NUMEPS; i++) { - ret = fi_setname(get_fid[ep_type](i), NULL, 0); - cr_assert_eq(ret, -FI_ENOSYS, "Invalid return value: %s", - fi_strerror(-ret)); - } -} - -void do_getpeer_enosys(void) -{ - int ret = 0, i; - struct gnix_fid_pep *gnix_pep; - struct fid_ep *ep_fid = NULL; - - for (i = 0; i < NUMEPS; i++) { - switch (ep_type) { - case EP: - ep_fid = get_fid_ep(i, NULL, NULL, NULL); - ret = fi_getpeer(ep_fid, NULL, NULL); - break; - case SEP: - ep_fid = get_fid_ep(i, NULL, NULL, NULL); - ret = fi_getpeer(ep_fid, NULL, NULL); - break; - case PEP: - gnix_pep = container_of(get_fid[ep_type](i), - struct gnix_fid_pep, - pep_fid.fid); - ret = gnix_pep->pep_fid.cm->getpeer(NULL, NULL, - NULL); - break; - default: - cr_assert_fail("Unknown endpoint type."); - } - - cr_assert_eq(ret, -FI_ENOSYS, "Invalid return value: %s", - fi_strerror(-ret)); - } -} - -void do_ep_send_recv_iter(int len) -{ - ssize_t sz; - int i = 0; - uint64_t cntr; - ssize_t ret, src_done, dest_done; - struct fi_cq_tagged_entry s_cqe = {(void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX}; - struct fi_cq_tagged_entry d_cqe = {(void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX}; - struct fi_cq_tagged_entry s_expected_cqe, d_expected_cqe; - - init_bufs((void **) source, NUMEPS, len); - init_bufs((void **) target, NUMEPS, len); - - for (i = 0; i < NUMEPS; i++) { - dbg_printf(BLUE - "From ep(%d) to ep(%d) of xfer size %d\n" - COLOR_RESET, i, NUMEPS - 1 - i, len); - - s_expected_cqe.buf = NULL; - s_expected_cqe.data = 0; - s_expected_cqe.flags = (FI_MSG | FI_SEND); - s_expected_cqe.len = 0; - s_expected_cqe.op_context = target[NUMEPS - 1 - i]; - s_expected_cqe.tag = 0; - - sz = fi_send(ep[i], source[i], len, NULL, - gni_addr[NUMEPS - 1 - i], target[NUMEPS - 1 - i]); - - cr_assert(sz == FI_SUCCESS, "Invalid return value: %s", - fi_strerror((int) -sz)); - - d_expected_cqe.buf = NULL; - d_expected_cqe.data = 0; - d_expected_cqe.flags = (FI_MSG | FI_RECV); - d_expected_cqe.len = len; - d_expected_cqe.op_context = source[i]; - d_expected_cqe.tag = 0; - - sz = fi_recv(ep[NUMEPS - 1 - i], target[NUMEPS - 1 - i], len, - NULL, gni_addr[i], source[i]); - - cr_assert(sz == FI_SUCCESS, "Invalid return value: %s", - fi_strerror((int) -sz)); - - src_done = dest_done = 0; - /* Progress sender and receiver */ - do { - ret = fi_cq_read(msg_cq[i], &s_cqe, 1); - if (ret == 1) - src_done = 1; - - ret = fi_cq_read(msg_cq[NUMEPS - 1 - i], - &d_cqe, 1); - if (ret == 1) - dest_done = 1; - } while (src_done != 1 || dest_done != 1); - - cntr = fi_cntr_read(send_cntr[i]); - cr_assert(cntr == ++sends[i], - "Invalid send counter: actual(%lu), expected(%lu)", - cntr, sends[i]); - - cntr = fi_cntr_read(recv_cntr[NUMEPS - 1 - i]); - cr_assert(cntr == ++recvs[NUMEPS - 1 - i], - "Invalid recv counter: actual(%lu), expected(%lu)", - cntr, recvs[NUMEPS - 1 - i]); - - check_tagged_cqe(s_expected_cqe, s_cqe); - check_tagged_cqe(d_expected_cqe, d_cqe); - - check_buf(source[i], target[NUMEPS - 1 - i], len); - } -} - -void do_sep_send_recv_iter(int idx, int len) -{ - ssize_t sz; - int i = 0; - uint64_t cntr; - ssize_t ret, src_done, dest_done; - struct fi_cq_tagged_entry s_cqe = {(void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX}; - struct fi_cq_tagged_entry d_cqe = {(void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX}; - struct fi_cq_tagged_entry s_expected_cqe, d_expected_cqe; - - init_bufs((void **) source, NUMEPS, len); - init_bufs((void **) target, NUMEPS, len); - - for (i = 0; i < NUMEPS; i++) { - dbg_printf(BLUE - "From ep(%d) to ep(%d) of xfer size %d\n" - COLOR_RESET, i, NUMEPS - 1 - i, len); - - s_expected_cqe.buf = NULL; - s_expected_cqe.data = 0; - s_expected_cqe.flags = (FI_MSG | FI_TRANSMIT/*FI_SEND*/); - s_expected_cqe.len = 0; - s_expected_cqe.op_context = target[NUMEPS - 1 - i]; - s_expected_cqe.tag = 0; - - sz = fi_send(tx_ep[i][idx], source[i], len, NULL, - gni_addr[NUMEPS - 1 - i], target[NUMEPS - 1 - i]); - - cr_assert(sz == FI_SUCCESS, "Invalid return value: %s", - fi_strerror((int) -sz)); - - d_expected_cqe.buf = NULL; - d_expected_cqe.data = 0; - d_expected_cqe.flags = (FI_MSG | FI_RECV); - d_expected_cqe.len = len; - d_expected_cqe.op_context = source[i]; - d_expected_cqe.tag = 0; - - sz = fi_recv(rx_ep[NUMEPS - 1 - i][idx], - target[NUMEPS - 1 - i], len, - NULL, gni_addr[i], source[i]); - - cr_assert(sz == FI_SUCCESS, "Invalid return value: %s", - fi_strerror((int) -sz)); - - src_done = dest_done = 0; - /* Progress sender and receiver */ - do { - ret = fi_cq_read(tx_cq[i][idx], &s_cqe, 1); - if (ret == 1) - src_done = 1; - - ret = fi_cq_read(rx_cq[NUMEPS - 1 - i][idx], - &d_cqe, 1); - if (ret == 1) - dest_done = 1; - } while (src_done != 1 || dest_done != 1); - - cntr = fi_cntr_read(send_cntr[i]); - cr_assert(cntr == ++sends[i], - "Invalid send counter: actual(%lu), expected(%lu)", - cntr, sends[i]); - - cntr = fi_cntr_read(recv_cntr[NUMEPS - 1 - i]); - cr_assert(cntr == ++recvs[NUMEPS - 1 - i], - "Invalid recv counter: actual(%lu), expected(%lu)", - cntr, recvs[NUMEPS - 1 - i]); - - check_tagged_cqe(s_expected_cqe, s_cqe); - check_tagged_cqe(d_expected_cqe, d_cqe); - - check_buf(source[i], target[NUMEPS - 1 - i], len); - } -} - -void do_send_recv(void) -{ - int len, i, j; - - switch (ep_type) { - case EP: - for (len = 2; len <= BUF_SZ; len *= 2) { - do_ep_send_recv_iter(len); - } - break; - - case SEP: - for (j = 0; j < ctx_cnt; j++) { - for (len = 2; len <= BUF_SZ; len *= 2) { - do_sep_send_recv_iter(j, len); - } - - for (i = 0; i < NUMEPS; i++) { - fi_cntr_set(send_cntr[i], 0); - fi_cntr_set(recv_cntr[i], 0); - } - } - break; - case PEP: - break; - - default: - cr_assert_fail("Invalid endpoint type."); - } -} - -/* - * Note: the default addr_format is FI_ADDR_STR unless use_str_fmt is otherwise - * set to false. - */ -void do_invalid_fi_getinfo(void) -{ - int i, ret; - - for (i = 0; i < NUMEPS; i++) { - /* - * This test is to ensure that gni provider fails to provide - * info if the FI_ADDR_STR format is being used and both the - * node and service parameters are non-NULL. - * - * See the fi_getinfo man page DESCRIPTION section. - */ - ret = fi_getinfo(FI_VERSION(1, 5), "this is a test", "testing", - 0, hints, &fi[i]); - cr_assert(ret == -FI_ENODATA, "fi_getinfo returned: %s", - fi_strerror(-ret)); - - fi_freeinfo(fi[i]); - - /* - * This test is to ensure that the gni provider does not allow - * FI_ADDR_STR to be used with api versions <= 1.5. - */ - ret = fi_getinfo(FI_VERSION(1, 0), NULL, NULL, 0, hints, - &fi[i]); - cr_assert(ret == -FI_ENODATA, "fi_getinfo returned: %s", - fi_strerror(-ret)); - - fi_freeinfo(fi[i]); - } -} - -void do_valid_fi_getinfo_with_fas(void) -{ - int i, ret; - char *fas = calloc(GNIX_FI_ADDR_STR_LEN, sizeof(char)); - struct gnix_ep_name ep_name; - - /* - * This test ensures the gni provider can set addresses properly with - * FI_ADDR_STR and no flags set. - */ - for (i = 0; i < NUMEPS; i++) { - generate_rand_fas(&fas); - - ret = fi_getinfo(fi_version(), fas, NULL, 0, hints, &fi[i]); - cr_assert(ret == FI_SUCCESS, "fi_getinfo returned: %s", - fi_strerror(-ret)); - - dbg_printf(BLUE "fi[%d]->dest_addr = %s\n" COLOR_RESET, - i, (char *) fi[i]->dest_addr); - - fas_to_ep_name(fas, &ep_name); - check_ep_name(((struct gnix_ep_name *) fi[i]->dest_addr)[0], - ep_name); - - fi_freeinfo(fi[i]); - } - - /* - * This test ensures the gni provider can set addresses properly with - * FI_ADDR_STR and the FI_SOURCE set. - */ - for (i = 0; i < NUMEPS; i++) { - generate_rand_fas(&fas); - - ret = fi_getinfo(fi_version(), fas, NULL, FI_SOURCE, hints, - &fi[i]); - cr_assert(ret == FI_SUCCESS, "fi_getinfo returned: %s", - fi_strerror(-ret)); - - fas_to_ep_name(fas, &ep_name); - check_ep_name(((struct gnix_ep_name *) fi[i]->src_addr)[0], - ep_name); - - fi_freeinfo(fi[i]); - } - - free(fas); -} -/****************************************************************************** -* End test running routines -******************************************************************************/ - -/****************************************************************************** -* Begin test invocation routines - FI_AV_MAP -******************************************************************************/ -TestSuite(fas_ep_str_fmt_av_map, .init = fas_ep_setup_str_fmt_av_map, - .fini = fas_teardown_common, .disabled = false); - -Test(fas_ep_str_fmt_av_map, getname) -{ - do_getname(); -} - -Test(fas_ep_str_fmt_av_map, setname) -{ - do_setname(); -} - -Test(fas_ep_str_fmt_av_map, getpeer) -{ - do_getpeer(); -} - -Test(fas_ep_str_fmt_av_map, send_recv) -{ - do_send_recv(); -} - -TestSuite(fas_ep_gni_fmt_av_map, .init = fas_ep_setup_gni_fmt_av_map, - .fini = fas_teardown_common, .disabled = false); - -Test(fas_ep_gni_fmt_av_map, getname) -{ - do_getname(); -} - -Test(fas_ep_gni_fmt_av_map, setname) -{ - do_setname(); -} - -Test(fas_ep_gni_fmt_av_map, getpeer) -{ - do_getpeer(); -} - -Test(fas_ep_gni_fmt_av_map, send_recv) -{ - do_send_recv(); -} - -TestSuite(fas_sep_str_fmt_av_map, .init = fas_sep_setup_str_fmt_av_map, - .fini = fas_teardown_common, .disabled = false); - -Test(fas_sep_str_fmt_av_map, getname) -{ - do_getname(); -} - -Test(fas_sep_str_fmt_av_map, setname) -{ - do_setname(); -} - -Test(fas_sep_str_fmt_av_map, getpeer) -{ - do_getpeer(); -} - -Test(fas_sep_str_fmt_av_map, send_recv) -{ - do_send_recv(); -} - -TestSuite(fas_sep_gni_fmt_av_map, .init = fas_sep_setup_gni_fmt_av_map, - .fini = fas_teardown_common, .disabled = false); - -Test(fas_sep_gni_fmt_av_map, getname) -{ - do_getname(); -} - -Test(fas_sep_gni_fmt_av_map, setname) -{ - do_setname(); -} - -Test(fas_sep_gni_fmt_av_map, getpeer) -{ - do_getpeer(); -} - -Test(fas_sep_gni_fmt_av_map, send_recv) -{ - do_send_recv(); -} - -TestSuite(fas_pep_str_fmt_av_map, .init = fas_pep_setup_str_fmt_av_map, - .fini = fas_teardown_common, .disabled = false); - -Test(fas_pep_str_fmt_av_map, getname) -{ - do_getname(); -} - -Test(fas_pep_str_fmt_av_map, setname) -{ - do_setname(); -} - -Test(fas_pep_str_fmt_av_map, getpeer) -{ - do_getpeer_enosys(); -} - -TestSuite(fas_pep_gni_fmt_av_map, .init = fas_pep_setup_gni_fmt_av_map, - .fini = fas_teardown_common, .disabled = false); - -Test(fas_pep_gni_fmt_av_map, getname) -{ - do_getname(); -} - -Test(fas_pep_gni_fmt_av_map, setname) -{ - do_setname(); -} - -Test(fas_pep_gni_fmt_av_map, getpeer) -{ - do_getpeer_enosys(); -} -/****************************************************************************** - * Begin FI_AV_TABLE - ******************************************************************************/ -TestSuite(fas_ep_str_fmt_av_tbl, .init = fas_ep_setup_str_fmt_av_tbl, - .fini = fas_teardown_common, .disabled = false); - -Test(fas_ep_str_fmt_av_tbl, getname) -{ - do_getname(); -} - -Test(fas_ep_str_fmt_av_tbl, setname) -{ - do_setname(); -} - -Test(fas_ep_str_fmt_av_tbl, getpeer) -{ - do_getpeer(); -} - -Test(fas_ep_str_fmt_av_tbl, send_recv) -{ - do_send_recv(); -} - -TestSuite(fas_ep_gni_fmt_av_tbl, .init = fas_ep_setup_gni_fmt_av_tbl, - .fini = fas_teardown_common, .disabled = false); - -Test(fas_ep_gni_fmt_av_tbl, getname) -{ - do_getname(); -} - -Test(fas_ep_gni_fmt_av_tbl, setname) -{ - do_setname(); -} - -Test(fas_ep_gni_fmt_av_tbl, getpeer) -{ - do_getpeer(); -} - -Test(fas_ep_gni_fmt_av_tbl, send_recv) -{ - do_send_recv(); -} - -TestSuite(fas_sep_str_fmt_av_tbl, .init = fas_sep_setup_str_fmt_av_tbl, - .fini = fas_teardown_common, .disabled = false); - -Test(fas_sep_str_fmt_av_tbl, getname) -{ - do_getname(); -} - -Test(fas_sep_str_fmt_av_tbl, setname) -{ - do_setname(); -} - -Test(fas_sep_str_fmt_av_tbl, getpeer) -{ - do_getpeer(); -} - -Test(fas_sep_str_fmt_av_tbl, send_recv) -{ - do_send_recv(); -} - -TestSuite(fas_sep_gni_fmt_av_tbl, .init = fas_sep_setup_gni_fmt_av_tbl, - .fini = fas_teardown_common, .disabled = false); - -Test(fas_sep_gni_fmt_av_tbl, getname) -{ - do_getname(); -} - -Test(fas_sep_gni_fmt_av_tbl, setname) -{ - do_setname(); -} - -Test(fas_sep_gni_fmt_av_tbl, getpeer) -{ - do_getpeer(); -} - -Test(fas_sep_gni_fmt_av_tbl, send_recv) -{ - do_send_recv(); -} - -TestSuite(fas_pep_str_fmt_av_tbl, .init = fas_pep_setup_str_fmt_av_tbl, - .fini = fas_teardown_common, .disabled = false); - -Test(fas_pep_str_fmt_av_tbl, getname) -{ - do_getname(); -} - -Test(fas_pep_str_fmt_av_tbl, setname) -{ - do_setname(); -} - -Test(fas_pep_str_fmt_av_tbl, getpeer) -{ - do_getpeer_enosys(); -} - -TestSuite(fas_pep_gni_fmt_av_tbl, .init = fas_pep_setup_gni_fmt_av_tbl, - .fini = fas_teardown_common, .disabled = false); - -Test(fas_pep_gni_fmt_av_tbl, getname) -{ - do_getname(); -} - -Test(fas_pep_gni_fmt_av_tbl, setname) -{ - do_setname(); -} - -Test(fas_pep_gni_fmt_av_tbl, getpeer) -{ - do_getpeer_enosys(); -} - -TestSuite(fas_getinfo_str_fmt, .init = fas_getinfo_setup, .fini = - fas_getinfo_teardown, .disabled = false); - -/* TODO: uncomment the ifdef below after the 1.5 release */ -#if 0 -Test(fas_getinfo_str_fmt, getinfo_invalid_param) -{ - do_invalid_fi_getinfo(); -} -#endif - -Test(fas_getinfo_str_fmt, getinfo_valid_param) -{ - do_valid_fi_getinfo_with_fas(); -} -/****************************************************************************** - * End test invocation routines - ******************************************************************************/ diff --git a/prov/gni/test/freelist.c b/prov/gni/test/freelist.c deleted file mode 100644 index 2f7aba08da3..00000000000 --- a/prov/gni/test/freelist.c +++ /dev/null @@ -1,217 +0,0 @@ -/* - * Copyright (c) 2015 Cray Inc. All rights reserved. - * Copyright (c) 2017 Los Alamos National Security, LLC. - * All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include - -#include "gnix_freelist.h" - -#include -#include "gnix_rdma_headers.h" - -#if 0 -#define dbg_printf(...) -#else -#define dbg_printf(...) fprintf(stderr, __VA_ARGS__); fflush(stderr) -#endif - -static void setup(void) -{ - srand(time(NULL)); -} - -static void teardown(void) -{ -} - -static void generate_perm(int *perm, int len) -{ - int i; - /* good 'nuff */ - for (i = 0; i < len; i++) { - int t = perm[i]; - int j = rand() % len; - - perm[i] = perm[j]; - perm[j] = t; - } -} - -TestSuite(gnix_freelist, .init = setup, .fini = teardown); - -Test(gnix_freelist, freelist_init_destroy) -{ - const int n = 13; - struct gnix_freelist fls[n]; - int i, ret; - - /* non-optimized code may not zero structures */ - memset(fls, 0x0, n * sizeof(struct gnix_freelist)); - - for (i = 0; i < n; i++) { - ret = _gnix_fl_init(sizeof(struct dlist_entry), 0, - 2*n, n, n, 3*n, &fls[i]); - cr_assert_eq(ret, FI_SUCCESS, "Failed to initialize freelist"); - } - - for (i = n-1; i >= 0; i--) - _gnix_fl_destroy(&fls[i]); -} - -Test(gnix_freelist, freelist_refill_test) -{ - struct gnix_freelist fl; - int i, ret; - const int num_elems = 71; - struct dlist_entry *elems[num_elems]; - const int refill_size = 47; - struct dlist_entry *refill_elems[refill_size]; - - /* non-optimized code may not zero structures */ - memset(&fl, 0x0, sizeof(struct gnix_freelist)); - - ret = _gnix_fl_init(sizeof(struct dlist_entry), 0, - num_elems, refill_size, 0, 0, &fl); - cr_assert_eq(ret, FI_SUCCESS, "Failed to initialize freelist"); - - for (i = 0; i < num_elems; i++) { - ret = _gnix_fl_alloc(&elems[i], &fl); - cr_assert_eq(ret, FI_SUCCESS, "Failed to obtain dlist_entry"); - } - cr_assert(_gnix_fl_empty(&fl), "Freelist not empty"); - - for (i = 0; i < refill_size; i++) { - ret = _gnix_fl_alloc(&refill_elems[i], &fl); - cr_assert_eq(ret, FI_SUCCESS, "Failed to obtain dlist_entry"); - if (i != refill_size-1) { - /* Not the last one, so must not be empty */ - cr_assert(!_gnix_fl_empty(&fl), "Freelist empty"); - } - } - cr_assert(_gnix_fl_empty(&fl), "Freelist not empty"); - - for (i = num_elems-1; i >= 0 ; i--) - _gnix_fl_free(elems[i], &fl); - - for (i = refill_size-1; i >= 0 ; i--) - _gnix_fl_free(refill_elems[i], &fl); - - _gnix_fl_destroy(&fl); -} - -Test(gnix_freelist, freelist_zero_refill_test) -{ - struct gnix_freelist fl; - int i, ret; - const int num_elems = 71; - struct dlist_entry *elems[num_elems + 1]; - const int refill_size = 0; - - /* non-optimized code may not zero structures */ - memset(&fl, 0x0, sizeof(struct gnix_freelist)); - - ret = _gnix_fl_init(sizeof(struct dlist_entry), 0, - num_elems, refill_size, 0, 0, &fl); - cr_assert_eq(ret, FI_SUCCESS, "Failed to initialize freelist"); - - for (i = 0; i < num_elems; i++) { - ret = _gnix_fl_alloc(&elems[i], &fl); - cr_assert_eq(ret, FI_SUCCESS, "Failed to obtain dlist_entry"); - } - - cr_assert(_gnix_fl_empty(&fl), "Freelist not empty"); - - ret = _gnix_fl_alloc(&elems[num_elems], &fl); - cr_assert_eq(ret, -FI_ECANCELED, "Unexpected return code from " - "_gnix_fl_alloc"); - - for (i = num_elems-1; i >= 0 ; i--) - _gnix_fl_free(elems[i], &fl); - - _gnix_fl_destroy(&fl); -} - -struct list_ts { - char dummy[7]; - struct dlist_entry e; - int n; -}; - -Test(gnix_freelist, freelist_random_alloc_free) { - - struct gnix_freelist fl; - int i, ret, refill_size = 0; - const int n = 719; - int perm[n]; - struct dlist_entry *de = NULL; - struct list_ts *ts[n]; - - while (++refill_size <= 23) { - for (i = 0; i < n; i++) - perm[i] = i; - - generate_perm(perm, n); - - /* non-optimized code may not zero structures */ - memset(&fl, 0x0, sizeof(struct gnix_freelist)); - - ret = _gnix_fl_init(sizeof(struct list_ts), - offsetof(struct list_ts, e), 0, - refill_size, 0, 0, &fl); - cr_assert_eq(ret, FI_SUCCESS, "Failed to initialize " - "freelist"); - - for (i = 0; i < n; i++) { - ret = _gnix_fl_alloc(&de, &fl); - cr_assert_eq(ret, FI_SUCCESS, - "Failed to obtain valid " - "dlist_entry"); - ts[i] = container_of(de, struct list_ts, e); - ts[i]->n = perm[i]; - } - - for (i = 0; i < n; i++) { - int j = perm[i]; - - cr_assert(ts[j]->n == perm[j], "Incorrect value"); - _gnix_fl_free(&ts[j]->e, &fl); - ts[j] = NULL; - } - - _gnix_fl_destroy(&fl); - } -} - diff --git a/prov/gni/test/gnix_rdma_headers.h b/prov/gni/test/gnix_rdma_headers.h deleted file mode 100644 index 6e758912f0d..00000000000 --- a/prov/gni/test/gnix_rdma_headers.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2016 Los Alamos National Security, LLC. All rights reserved. - * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef GNIX_RDMA_HEADERS_H -#define GNIX_RDMA_HEADERS_H - -#ifdef FABRIC_DIRECT_ENABLED -#define FABRIC_DIRECT -#endif /* FABRIC_DIRECT_ENABLED */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#endif /* GNIX_RDMA_HEADERS_H */ diff --git a/prov/gni/test/hashtable.c b/prov/gni/test/hashtable.c deleted file mode 100644 index 3fb8a5ec1ef..00000000000 --- a/prov/gni/test/hashtable.c +++ /dev/null @@ -1,757 +0,0 @@ -/* - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include - -#include -#include - -#include -#include "gnix_rdma_headers.h" - -#define __GNIX_MAGIC_VALUE 0xDEADBEEF - -extern const gnix_hashtable_attr_t default_attr; - -typedef struct gnix_test_element { - uint64_t val; - uint64_t key; - uint64_t magic; -} gnix_test_element_t; - -#define GNIX_TEST_ELEMENT_INIT(_val, _key) \ - { .val = (_val), .key = (_key), .magic = (__GNIX_MAGIC_VALUE) } - -gnix_test_element_t elements[4] = { - GNIX_TEST_ELEMENT_INIT(1, 100), - GNIX_TEST_ELEMENT_INIT(2, 200), - GNIX_TEST_ELEMENT_INIT(10, 300), - GNIX_TEST_ELEMENT_INIT(777, 500000) -}; - -gnix_test_element_t *simple_element = &elements[0]; -gnix_hashtable_t *test_ht = NULL; - -void __gnix_hashtable_test_uninitialized(void) -{ - cr_assert(test_ht->ht_state == GNIX_HT_STATE_UNINITIALIZED); - cr_assert(test_ht->ht_size == 0); - cr_assert(test_ht->ht_lf_tbl == NULL); -} - -void __gnix_hashtable_test_setup_bare(void) -{ - cr_assert(test_ht == NULL); - test_ht = (gnix_hashtable_t *) calloc(1, sizeof(gnix_hashtable_t)); - cr_assert(test_ht != NULL); - - __gnix_hashtable_test_uninitialized(); -} - - -void __gnix_hashtable_test_teardown_bare(void) -{ - cr_assert(test_ht != NULL); - free(test_ht); - test_ht = NULL; -} - -void __gnix_hashtable_test_initialized(void) -{ - cr_assert(test_ht->ht_state == GNIX_HT_STATE_READY); - cr_assert(ofi_atomic_get32(&test_ht->ht_elements) == 0); - cr_assert(test_ht->ht_size == test_ht->ht_attr.ht_initial_size); - cr_assert(test_ht->ht_lf_tbl != NULL); -} - -void __gnix_hashtable_test_destroyed_clean(void) -{ - cr_assert(test_ht->ht_state == GNIX_HT_STATE_DEAD); - cr_assert(ofi_atomic_get32(&test_ht->ht_elements) == 0); - cr_assert(test_ht->ht_size == 0); - cr_assert(test_ht->ht_lf_tbl == NULL); -} - -void __gnix_hashtable_destroy(void) -{ - int ret = _gnix_ht_destroy(test_ht); - cr_assert(ret == 0); - __gnix_hashtable_test_destroyed_clean(); -} - -void __gnix_hashtable_initialize(void) -{ - int ret; - - ret = _gnix_ht_init(test_ht, NULL); - cr_assert(ret == 0); - - __gnix_hashtable_test_initialized(); -} - -void __gnix_hashtable_initialize_attr(gnix_hashtable_attr_t *attr) -{ - int ret; - - ret = _gnix_ht_init(test_ht, attr); - cr_assert(ret == 0); - - __gnix_hashtable_test_initialized(); -} - -void __gnix_hashtable_test_setup(void) -{ - __gnix_hashtable_test_setup_bare(); - - __gnix_hashtable_test_uninitialized(); - - __gnix_hashtable_initialize(); -} - -void __gnix_hashtable_test_setup_locked(void) -{ - gnix_hashtable_attr_t attr = default_attr; - - __gnix_hashtable_test_setup_bare(); - - __gnix_hashtable_test_uninitialized(); - - attr.ht_internal_locking = 1; - __gnix_hashtable_initialize_attr(&attr); -} - -void __gnix_hashtable_test_teardown(void) -{ - __gnix_hashtable_destroy(); - - __gnix_hashtable_test_teardown_bare(); -} - -/* - * Basic functionality tests for the gnix_hashtable_t object - */ - -TestSuite(gnix_hashtable_basic, - .init = __gnix_hashtable_test_setup_bare, - .fini = __gnix_hashtable_test_teardown_bare); - -TestSuite(gnix_hashtable_advanced, - .init = __gnix_hashtable_test_setup, - .fini = __gnix_hashtable_test_teardown); - -TestSuite(gnix_hashtable_locked, - .init = __gnix_hashtable_test_setup_locked, - .fini = __gnix_hashtable_test_teardown); - -Test(gnix_hashtable_basic, uninitialized) -{ - __gnix_hashtable_test_uninitialized(); -} - - -Test(gnix_hashtable_basic, initialize_ht) -{ - __gnix_hashtable_initialize(); - __gnix_hashtable_destroy(); -} - -Test(gnix_hashtable_basic, initialize_locked_ht) -{ - int ret; - gnix_hashtable_attr_t attr; - - memcpy(&attr, &default_attr, sizeof(gnix_hashtable_attr_t)); - - attr.ht_internal_locking = 1; - - ret = _gnix_ht_init(test_ht, &attr); - cr_assert(ret == 0); - - __gnix_hashtable_test_initialized(); - - __gnix_hashtable_destroy(); -} - -Test(gnix_hashtable_basic, err_initialize_twice) -{ - int ret; - - __gnix_hashtable_initialize(); - - ret = _gnix_ht_init(test_ht, NULL); - cr_assert(ret == -FI_EINVAL); - __gnix_hashtable_test_initialized(); - - __gnix_hashtable_destroy(); -} - -Test(gnix_hashtable_basic, err_invalid_initial_size_0) -{ - int ret; - gnix_hashtable_attr_t attr; - - memcpy(&attr, &default_attr, sizeof(gnix_hashtable_attr_t)); - - attr.ht_initial_size = 0; - - ret = _gnix_ht_init(test_ht, &attr); - cr_assert(ret == -FI_EINVAL); - __gnix_hashtable_test_uninitialized(); -} - -Test(gnix_hashtable_basic, err_invalid_initial_size_gt_max) -{ - int ret; - gnix_hashtable_attr_t attr; - - memcpy(&attr, &default_attr, sizeof(gnix_hashtable_attr_t)); - - attr.ht_initial_size = attr.ht_maximum_size * 2; - - ret = _gnix_ht_init(test_ht, &attr); - cr_assert(ret == -FI_EINVAL); - __gnix_hashtable_test_uninitialized(); -} - -Test(gnix_hashtable_basic, err_invalid_max_size) -{ - int ret; - gnix_hashtable_attr_t attr; - - memcpy(&attr, &default_attr, sizeof(gnix_hashtable_attr_t)); - - attr.ht_maximum_size = 0; - - ret = _gnix_ht_init(test_ht, &attr); - cr_assert(ret == -FI_EINVAL); - __gnix_hashtable_test_uninitialized(); -} - -Test(gnix_hashtable_basic, err_invalid_increase_step_all) -{ - int ret; - gnix_hashtable_attr_t attr; - - memcpy(&attr, &default_attr, sizeof(gnix_hashtable_attr_t)); - - attr.ht_increase_step = 0; - - ret = _gnix_ht_init(test_ht, &attr); - cr_assert(ret == -FI_EINVAL); - __gnix_hashtable_test_uninitialized(); -} - -Test(gnix_hashtable_basic, err_invalid_increase_step_mult) -{ - int ret; - gnix_hashtable_attr_t attr; - - memcpy(&attr, &default_attr, sizeof(gnix_hashtable_attr_t)); - - attr.ht_increase_step = 1; - attr.ht_increase_type = GNIX_HT_INCREASE_MULT; - - ret = _gnix_ht_init(test_ht, &attr); - cr_assert(ret == -FI_EINVAL); - __gnix_hashtable_test_uninitialized(); -} - -Test(gnix_hashtable_basic, err_invalid_increase_type) -{ - int ret; - gnix_hashtable_attr_t attr; - - memcpy(&attr, &default_attr, sizeof(gnix_hashtable_attr_t)); - - attr.ht_increase_type = -1; - - ret = _gnix_ht_init(test_ht, &attr); - cr_assert(ret == -FI_EINVAL); - __gnix_hashtable_test_uninitialized(); -} - -Test(gnix_hashtable_basic, err_invalid_collision) -{ - int ret; - gnix_hashtable_attr_t attr; - - memcpy(&attr, &default_attr, sizeof(gnix_hashtable_attr_t)); - - attr.ht_collision_thresh = 0; - - ret = _gnix_ht_init(test_ht, &attr); - cr_assert(ret == -FI_EINVAL); - __gnix_hashtable_test_uninitialized(); -} - -Test(gnix_hashtable_basic, err_destroy_uninitialized) -{ - int ret; - - ret = _gnix_ht_destroy(test_ht); - cr_assert(ret == -FI_EINVAL); - - __gnix_hashtable_test_uninitialized(); -} - -Test(gnix_hashtable_basic, destroy) -{ - __gnix_hashtable_initialize(); - - __gnix_hashtable_destroy(); -} - -Test(gnix_hashtable_basic, destroy_twice) -{ - int ret; - - __gnix_hashtable_initialize(); - - __gnix_hashtable_destroy(); - - ret = _gnix_ht_destroy(test_ht); - cr_assert(ret == -FI_EINVAL); - __gnix_hashtable_test_destroyed_clean(); -} - -Test(gnix_hashtable_advanced, insert_1) -{ - int ret; - - ret = _gnix_ht_insert(test_ht, simple_element->key, simple_element); - cr_assert(ret == 0); - - cr_assert(ofi_atomic_get32(&test_ht->ht_elements) == 1); -} - -Test(gnix_hashtable_advanced, insert_duplicate) -{ - int ret; - - ret = _gnix_ht_insert(test_ht, simple_element->key, simple_element); - cr_assert(ret == 0); - - cr_assert(ofi_atomic_get32(&test_ht->ht_elements) == 1); - - ret = _gnix_ht_insert(test_ht, simple_element->key, simple_element); - cr_assert(ret == -FI_ENOSPC); - - cr_assert(ofi_atomic_get32(&test_ht->ht_elements) == 1); -} - -Test(gnix_hashtable_advanced, insert_1_remove_1) -{ - int ret; - - srand(time(NULL)); - - ret = _gnix_ht_insert(test_ht, simple_element->key, simple_element); - cr_assert(ret == 0); - - cr_assert(ofi_atomic_get32(&test_ht->ht_elements) == 1); - - ret = _gnix_ht_remove(test_ht, simple_element->key); - cr_assert(ret == 0); - - cr_assert(ofi_atomic_get32(&test_ht->ht_elements) == 0); -} - - -Test(gnix_hashtable_advanced, insert_1024) -{ - int ret, i; - - gnix_test_element_t test_elements[1024]; - - srand(time(NULL)); - - for (i = 0; i < 1024; ++i) { - test_elements[i].key = i; - test_elements[i].val = rand() % (1024 * 1024); - test_elements[i].magic = __GNIX_MAGIC_VALUE; - } - - for (i = 0; i < 1024; ++i) { - ret = _gnix_ht_insert(test_ht, - test_elements[i].key, &test_elements[i]); - cr_assert(ret == 0); - cr_assert(ofi_atomic_get32(&test_ht->ht_elements) == (i + 1)); - } - - cr_assert(ofi_atomic_get32(&test_ht->ht_elements) == 1024); -} - - -Test(gnix_hashtable_advanced, insert_1024_remove_1024) -{ - int ret, i; - - gnix_test_element_t test_elements[1024]; - gnix_test_element_t *item; - - srand(time(NULL)); - - for (i = 0; i < 1024; ++i) { - item = &test_elements[i]; - item->key = i; - item->val = rand() % (1024 * 1024); - item->magic = __GNIX_MAGIC_VALUE; - } - - for (i = 0; i < 1024; ++i) { - item = &test_elements[i]; - ret = _gnix_ht_insert(test_ht, - item->key, item); - cr_assert(ret == 0); - cr_assert(ofi_atomic_get32(&test_ht->ht_elements) == (i + 1)); - } - - for (i = 1023; i >= 0; --i) { - item = &test_elements[i]; - cr_assert(i == item->key); - - ret = _gnix_ht_remove(test_ht, - item->key); - cr_assert(ret == 0); - cr_assert(ofi_atomic_get32(&test_ht->ht_elements) == i); - } - - cr_assert(ofi_atomic_get32(&test_ht->ht_elements) == 0); -} - -Test(gnix_hashtable_advanced, insert_2048_remove_all_resize_down) -{ - int ret, i; - int nelem = 2048; - gnix_test_element_t test_elements[2048]; - gnix_test_element_t *item; - - srand(time(NULL)); - - for (i = 0; i < nelem; ++i) { - item = &test_elements[i]; - item->key = i; - item->val = rand() % (1024 * 1024); - item->magic = __GNIX_MAGIC_VALUE; - } - - for (i = 0; i < nelem; ++i) { - item = &test_elements[i]; - ret = _gnix_ht_insert(test_ht, - item->key, item); - cr_assert(ret == 0); - cr_assert(ofi_atomic_get32(&test_ht->ht_elements) == (i + 1)); - } - - cr_assert(test_ht->ht_size > test_ht->ht_attr.ht_initial_size); - - for (i = nelem - 1; i >= 0; --i) { - item = &test_elements[i]; - cr_assert(i == item->key); - - ret = _gnix_ht_remove(test_ht, - item->key); - cr_assert(ret == 0); - cr_assert(ofi_atomic_get32(&test_ht->ht_elements) == i); - } - - cr_assert(ofi_atomic_get32(&test_ht->ht_elements) == 0); - /* on default settings, the hash table should resize to initial on - * removal of all elements - */ - cr_assert(test_ht->ht_size == test_ht->ht_attr.ht_initial_size); -} - - -Test(gnix_hashtable_advanced, insert_1_lookup_pass) -{ - int ret; - gnix_test_element_t *found = NULL; - - ret = _gnix_ht_insert(test_ht, - simple_element->key, simple_element); - cr_assert(ret == 0); - - cr_assert(ofi_atomic_get32(&test_ht->ht_elements) == 1); - - found = _gnix_ht_lookup(test_ht, simple_element->key); - cr_assert(found == simple_element); - cr_assert(found->magic == __GNIX_MAGIC_VALUE); -} - -Test(gnix_hashtable_advanced, insert_1_lookup_fail) -{ - int ret; - gnix_test_element_t *found = NULL; - - ret = _gnix_ht_insert(test_ht, - simple_element->key, simple_element); - cr_assert(ret == 0); - - cr_assert(ofi_atomic_get32(&test_ht->ht_elements) == 1); - - found = _gnix_ht_lookup(test_ht, simple_element->key - 1); - cr_assert(found != simple_element); - cr_assert(found == NULL); -} - -Test(gnix_hashtable_advanced, insert_1024_lookup_all) -{ - int ret, i; - gnix_test_element_t test_elements[1024]; - gnix_test_element_t *item; - gnix_test_element_t *found = NULL; - - srand(time(NULL)); - - for (i = 0; i < 1024; ++i) { - item = &test_elements[i]; - - item->key = i; - item->val = rand() % (1024 * 1024); - item->magic = __GNIX_MAGIC_VALUE; - } - - for (i = 0; i < 1024; ++i) { - item = &test_elements[i]; - - ret = _gnix_ht_insert(test_ht, - item->key, item); - cr_assert(ret == 0); - cr_assert(ofi_atomic_get32(&test_ht->ht_elements) == (i + 1)); - } - - cr_assert(ofi_atomic_get32(&test_ht->ht_elements) == 1024); - - for (i = 0; i < 1024; ++i) { - found = _gnix_ht_lookup(test_ht, test_elements[i].key); - cr_assert(found != NULL); - cr_assert(found == &test_elements[i]); - cr_assert(found->magic == __GNIX_MAGIC_VALUE); - } -} - -Test(gnix_hashtable_advanced, insert_1024_lookup_random) -{ - int ret, i; - gnix_test_element_t test_elements[1024]; - gnix_test_element_t *found = NULL, *to_find = NULL; - gnix_test_element_t *item; - - srand(time(NULL)); - - for (i = 0; i < 1024; ++i) { - item = &test_elements[i]; - - item->key = i; - item->val = rand() % (1024 * 1024); - item->magic = __GNIX_MAGIC_VALUE; - } - - for (i = 0; i < 1024; ++i) { - item = &test_elements[i]; - - ret = _gnix_ht_insert(test_ht, - item->key, item); - cr_assert(ret == 0); - cr_assert(ofi_atomic_get32(&test_ht->ht_elements) == (i + 1)); - } - - cr_assert(ofi_atomic_get32(&test_ht->ht_elements) == 1024); - - for (i = 0; i < 1024; ++i) { - to_find = &test_elements[rand() % 1024]; - found = _gnix_ht_lookup(test_ht, to_find->key); - cr_assert(found != NULL); - cr_assert(found == to_find); - cr_assert(found->magic == __GNIX_MAGIC_VALUE); - } -} - -Test(gnix_hashtable_advanced, insert_8K_lookup_128K_random) -{ - int ret, i, index; - gnix_test_element_t *test_elements; - gnix_test_element_t *found = NULL, *to_find = NULL; - gnix_test_element_t *item; - gnix_bitmap_t allocated = {0}; - int test_size = 8 * 1024; - int bitmap_size = 64 * test_size; - int lookups = 128 * 1024; - - test_elements = calloc(test_size, sizeof(gnix_test_element_t)); - cr_assert(test_elements != NULL); - - ret = _gnix_alloc_bitmap(&allocated, bitmap_size, NULL); - cr_assert(ret == 0); - - srand(time(NULL)); - - for (i = 0; i < test_size; ++i) { - do { - index = rand() % bitmap_size; - } while (_gnix_test_and_set_bit(&allocated, index)); - - item = &test_elements[i]; - - item->key = index; - item->val = rand() % lookups; - item->magic = __GNIX_MAGIC_VALUE; - } - - for (i = 0; i < test_size; ++i) { - item = &test_elements[i]; - - ret = _gnix_ht_insert(test_ht, - item->key, item); - cr_assert(ret == 0); - cr_assert(ofi_atomic_get32(&test_ht->ht_elements) == (i + 1)); - } - - cr_assert(ofi_atomic_get32(&test_ht->ht_elements) == test_size); - - for (i = 0; i < lookups; ++i) { - to_find = &test_elements[rand() % test_size]; - found = _gnix_ht_lookup(test_ht, to_find->key); - cr_assert(found != NULL); - cr_assert(found == to_find); - cr_assert(found->magic == __GNIX_MAGIC_VALUE); - } - - ret = _gnix_free_bitmap(&allocated); - cr_expect(ret == 0); - - free(test_elements); -} - -Test(gnix_hashtable_advanced, iterate) -{ - int ret, i; - - gnix_test_element_t test_elements[1024]; - gnix_test_element_t *item; - char test_elements_found[1024] = {0}; - - srand(time(NULL)); - - for (i = 0; i < 1024; ++i) { - item = &test_elements[i]; - item->key = i; - item->val = rand() % (1024 * 1024); - item->magic = __GNIX_MAGIC_VALUE; - } - - for (i = 0; i < 1024; ++i) { - item = &test_elements[i]; - ret = _gnix_ht_insert(test_ht, - item->key, item); - cr_assert(ret == 0); - cr_assert(ofi_atomic_get32(&test_ht->ht_elements) == (i + 1)); - } - - { - GNIX_HASHTABLE_ITERATOR(test_ht, iter); - - for (i = 0; i < 1024; ++i) { - item = (gnix_test_element_t *) - _gnix_ht_iterator_next(&iter); - cr_assert(item); - cr_assert(!test_elements_found[item->key]); - test_elements_found[item->key] = 1; - } - } - - for (i = 1023; i >= 0; --i) { - item = &test_elements[i]; - cr_assert(i == item->key); - - ret = _gnix_ht_remove(test_ht, - item->key); - cr_assert(ret == 0); - cr_assert(ofi_atomic_get32(&test_ht->ht_elements) == i); - } - - cr_assert(ofi_atomic_get32(&test_ht->ht_elements) == 0); -} - -Test(gnix_hashtable_locked, iterate) -{ - int ret, i; - - gnix_test_element_t test_elements[1024]; - gnix_test_element_t *item; - char test_elements_found[1024] = {0}; - - srand(time(NULL)); - - for (i = 0; i < 1024; ++i) { - item = &test_elements[i]; - item->key = i; - item->val = rand() % (1024 * 1024); - item->magic = __GNIX_MAGIC_VALUE; - } - - for (i = 0; i < 1024; ++i) { - item = &test_elements[i]; - ret = _gnix_ht_insert(test_ht, - item->key, item); - cr_assert(ret == 0); - cr_assert(ofi_atomic_get32(&test_ht->ht_elements) == (i + 1)); - } - - { - GNIX_HASHTABLE_ITERATOR(test_ht, iter); - - for (i = 0; i < 1024; ++i) { - item = (gnix_test_element_t *) - _gnix_ht_iterator_next(&iter); - cr_assert(item); - cr_assert(!test_elements_found[item->key]); - test_elements_found[item->key] = 1; - } - } - - for (i = 1023; i >= 0; --i) { - item = &test_elements[i]; - cr_assert(i == item->key); - - ret = _gnix_ht_remove(test_ht, - item->key); - cr_assert(ret == 0); - cr_assert(ofi_atomic_get32(&test_ht->ht_elements) == i); - } - - cr_assert(ofi_atomic_get32(&test_ht->ht_elements) == 0); -} diff --git a/prov/gni/test/mr.c b/prov/gni/test/mr.c deleted file mode 100644 index beaf5ed4769..00000000000 --- a/prov/gni/test/mr.c +++ /dev/null @@ -1,1854 +0,0 @@ -/* - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * Copyright (c) 2018 Los Alamos National Security, LLC. All - * rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "gnix.h" -#include "common.h" - -#include -#include "gnix_rdma_headers.h" -#include "gnix.h" -#include "gnix_mr.h" -#include "common.h" - -#define CHECK_HOOK(name, args...) \ - ({ \ - int __hook_return_val = 0; \ - if (hooks->name) \ - __hook_return_val = hooks->name(__func__, \ - __LINE__, ##args); \ - __hook_return_val; }) -#define HOOK_PRESENT(name) (hooks->name != NULL) - -#if 0 -#define MR_DBG(fmt, args...) fprintf(stderr, fmt, ##args) -#define HOOK_DEBUG(message, args...) \ - MR_DBG("%s:%d - " message, func, line, ##args) -#define HOOK_ASSERT(cond, message, args...) \ - do { \ - if (!(cond)) \ - HOOK_DEBUG(message, args); \ - } while (0) -#else -#define MR_DBG(fmt, args...) -#define HOOK_DEBUG(message, args...) do { } while (0) -#define HOOK_ASSERT(cond, message, args...) do { } while (0) -#endif - -#define GNIX_ASSUMED_ALIGNMENT 0xfff - -static struct fid_fabric *fab; -static struct fid_domain *dom; -static struct fid_mr *mr; -static struct fi_info *hints; -static struct fi_info *fi; - -#define __BUF_LEN 4096 -static unsigned char *buf; -static int buf_len = __BUF_LEN * sizeof(unsigned char); -static struct gnix_fid_domain *domain; -static uint8_t ptag; -static gnix_mr_cache_t *cache; - -static uint64_t default_access = (FI_REMOTE_READ | FI_REMOTE_WRITE - | FI_READ | FI_WRITE); -static uint64_t ro_access = (FI_REMOTE_READ | FI_WRITE); - -static uint64_t default_flags; -static uint64_t default_req_key; -static uint64_t default_offset; - -static int regions; - -struct timeval s1, s2; - -struct _mr_test_hooks { - int (*init_hook)(const char *func, int line); - int (*post_reg_hook)(const char *func, int line, - int cache_type, int inuse, int stale); - int (*post_dereg_hook)(const char *func, int line, - int inuse, int stale); - int (*get_lazy_dereg_limit)(const char *func, int line); -}; - -#define HOOK_DECL struct _mr_test_hooks *hooks -struct _mr_test_hooks empty_hooks = {NULL}; - -#define DEFAULT_REGION_COUNT 1024 -#define DEFAULT_SCALABLE_REGION_COUNT 128 - -/* this helper function doesn't work for string ops */ -static void _set_check_domain_op_value(int op, int value) -{ - int ret; - struct fi_gni_ops_domain *gni_domain_ops; - int32_t get_val, val; - - ret = fi_open_ops(&domain->domain_fid.fid, FI_GNI_DOMAIN_OPS_1, - 0, (void **) &gni_domain_ops, NULL); - cr_assert(ret == FI_SUCCESS, "fi_open_ops"); - - val = value; - ret = gni_domain_ops->set_val(&domain->domain_fid.fid, - op, &val); - cr_assert(ret == FI_SUCCESS, "set val"); - - ret = gni_domain_ops->get_val(&domain->domain_fid.fid, - op, &get_val); - cr_assert(val == get_val, "get val"); -} - -static void _set_lazy_deregistration(int val) -{ - _set_check_domain_op_value(GNI_MR_CACHE_LAZY_DEREG, val); -} - -static void _mr_setup(uint32_t version, int mr_mode) -{ - int ret = 0; - - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - - hints->domain_attr->mr_mode = mr_mode; - hints->fabric_attr->prov_name = strdup("gni"); - - ret = fi_getinfo(version, NULL, 0, 0, hints, &fi); - cr_assert(!ret, "fi_getinfo"); - - ret = fi_fabric(fi->fabric_attr, &fab, NULL); - cr_assert(!ret, "fi_fabric"); - - ret = fi_domain(fab, fi, &dom, NULL); - cr_assert(!ret, "fi_domain"); - - buf = calloc(__BUF_LEN, sizeof(unsigned char)); - cr_assert(buf, "buffer allocation"); - - domain = container_of(dom, struct gnix_fid_domain, domain_fid.fid); - ptag = domain->auth_key->ptag; - - regions = 1024; -} - -static void mr_teardown(void) -{ - int ret = 0; - - ret = fi_close(&dom->fid); - cr_assert(!ret, "failure in closing domain."); - ret = fi_close(&fab->fid); - cr_assert(!ret, "failure in closing fabric."); - fi_freeinfo(fi); - fi_freeinfo(hints); - - domain = NULL; - cache = NULL; - - free(buf); -} - -static void udreg_setup(void) -{ - _mr_setup(fi_version(), GNIX_MR_BASIC); - - _gnix_open_cache(domain, GNIX_MR_TYPE_UDREG); - - _set_lazy_deregistration(1); -} - -static void udreg_setup_nld(void) -{ - _mr_setup(fi_version(), GNIX_MR_BASIC); - - _gnix_open_cache(domain, GNIX_MR_TYPE_UDREG); - - _set_lazy_deregistration(0); -} - -static void internal_mr_setup(void) -{ - _mr_setup(fi_version(), GNIX_MR_BASIC); - - _gnix_open_cache(domain, GNIX_MR_TYPE_INTERNAL); - - _set_lazy_deregistration(1); -} - -static void internal_mr_setup_nld(void) -{ - _mr_setup(fi_version(), GNIX_MR_BASIC); - - _gnix_open_cache(domain, GNIX_MR_TYPE_INTERNAL); - - _set_lazy_deregistration(0); -} - -static void no_cache_scalable_setup(void) -{ - _mr_setup(fi_version(), GNIX_MR_SCALABLE); - - _gnix_open_cache(domain, GNIX_MR_TYPE_NONE); -} - -static void no_cache_basic_setup(void) -{ - _mr_setup(fi_version(), GNIX_MR_BASIC); - - _gnix_open_cache(domain, GNIX_MR_TYPE_NONE); -} - -#if HAVE_KDREG -# define KDREG_CHECK false -#else -# define KDREG_CHECK true -#endif - -/* bare tests */ -TestSuite(mr_internal_bare, - .init = internal_mr_setup, - .fini = mr_teardown); - -/* simple tests with lazy deregistration */ -TestSuite(mr_internal_cache, - .init = internal_mr_setup, - .fini = mr_teardown, - .disabled = KDREG_CHECK); - -#ifdef HAVE_UDREG -/* - * mr_udreg_cache doesn't work if KDREG is enabled - * since by the time this testsuite is run, the kdreg device - * has been opened as part of the criterion test suite - * run. - * /dev/kdreg should really be fixed, but that's probably - * not going to happen. - */ -TestSuite(mr_udreg_cache, - .init = udreg_setup, - .fini = mr_teardown, - .disabled = ~KDREG_CHECK); -#endif - -TestSuite(mr_no_cache_basic, - .init = no_cache_basic_setup, - .fini = mr_teardown); -TestSuite(mr_no_cache_scalable, - .init = no_cache_scalable_setup, - .fini = mr_teardown); - -/* simple tests without lazy deregistration */ -TestSuite(mr_internal_cache_nld, - .init = internal_mr_setup_nld, - .fini = mr_teardown); - - -#ifdef HAVE_UDREG -TestSuite(mr_udreg_cache_nld, - .init = udreg_setup_nld, - .fini = mr_teardown); -#endif - - -/* performance tests */ -TestSuite(perf_mr_internal, - .init = internal_mr_setup, - .fini = mr_teardown, - .disabled = true); - -#ifdef HAVE_UDREG -TestSuite(perf_mr_udreg, - .init = udreg_setup, - .fini = mr_teardown, - .disabled = true); -#endif - -TestSuite(perf_mr_no_cache, - .init = no_cache_basic_setup, - .fini = mr_teardown, - .disabled = true); - -/* test hooks */ - -static int __simple_init_hook(const char *func, int line) -{ - cr_assert(GET_DOMAIN_RO_CACHE(domain)->state == GNIX_MRC_STATE_READY); - cr_assert(GET_DOMAIN_RW_CACHE(domain)->state == GNIX_MRC_STATE_READY); - - return 0; -} - -static int __simple_post_reg_hook(const char *func, int line, int cache_type, - int expected_inuse, - int expected_stale) -{ - if (cache_type == CACHE_RO) - cache = GET_DOMAIN_RO_CACHE(domain); - else - cache = GET_DOMAIN_RW_CACHE(domain); - - cr_assert(ofi_atomic_get32(&cache->inuse.elements) == expected_inuse, - "%s:%d failed expected inuse condition, actual=%d expected=%d\n", - func, line, ofi_atomic_get32(&cache->inuse.elements), expected_inuse); - cr_assert(ofi_atomic_get32(&cache->stale.elements) == expected_stale, - "%s:%d failed expected stale condition, actual=%d expected=%d\n", - func, line, ofi_atomic_get32(&cache->stale.elements), expected_stale); - - return 0; -} - -static int __simple_post_dereg_hook(const char *func, int line, - int expected_inuse, - int expected_stale) -{ - cache = GET_DOMAIN_RW_CACHE(domain); - cr_assert(ofi_atomic_get32(&cache->inuse.elements) == expected_inuse); - cr_assert(ofi_atomic_get32(&cache->stale.elements) == expected_stale); - - return 0; -} - - -/* We won't do a very of the 'no cache' tests with nld "no lazy-dereg' as it - just doesn't make sense */ - -/* Test simple init, register and deregister */ -Test(mr_internal_bare, basic_init) -{ - int ret; - - /* ensure that the memory registration key is the right size */ - cr_assert_eq(sizeof(gnix_mr_key_t), 8); - - ret = fi_mr_reg(dom, (void *) buf, buf_len, default_access, - default_offset, default_req_key, - default_flags, &mr, NULL); - cr_assert(ret == FI_SUCCESS); - - ret = fi_close(&mr->fid); - cr_assert(ret == FI_SUCCESS); -} - -/* Test simple init, register and deregister */ -Test(mr_no_cache_scalable, basic_init_update) -{ - int ret; - struct fi_gni_ops_domain *gni_domain_ops; - void *base, *addr; - int len; - uint64_t align_len = 0; - struct iovec iov; - - ret = fi_open_ops(&dom->fid, FI_GNI_DOMAIN_OPS_1, - 0, (void **) &gni_domain_ops, NULL); - cr_assert(ret == FI_SUCCESS, "fi_open_ops"); - - /* registrations must be made on aligned boundaries */ - base = sbrk(0); - if ((uint64_t) base & GNIX_ASSUMED_ALIGNMENT) { - align_len = 0x1000 - ((uint64_t) base & GNIX_ASSUMED_ALIGNMENT); - sbrk(align_len); - base = (void *) ((uint64_t)base + align_len); - } - - len = 1 << 16; - - ret = fi_mr_reg(dom, base, len*4, FI_REMOTE_READ, - default_offset, default_req_key, - default_flags, &mr, NULL); - cr_assert(ret == FI_SUCCESS, "actual=%d expected=%d", ret, FI_SUCCESS); - - /* assume we'll get the memory */ - ret = brk((void *)((uint64_t) base + (uint64_t) (len * 2))); - cr_assert(ret == 0); - - addr = (void *) ((uint64_t) base + (uint64_t) len); - - iov.iov_base = addr; - iov.iov_len = len; - - ret = fi_mr_refresh(mr, &iov, 1, 0); - cr_assert_eq(ret, FI_SUCCESS, "expected=%d actual=%d", FI_SUCCESS, ret); - - /* retract memory and assume we didn't step on anyone */ - addr = sbrk(-len); - addr = sbrk(align_len); - cr_assert(addr >= base); - - ret = fi_close(&mr->fid); - cr_assert(ret == FI_SUCCESS); -} - - -/* Test simple init, register and deregister */ -Test(mr_internal_bare, basic_init_regv) -{ - int ret; - const struct iovec iov = { - .iov_base = buf, - .iov_len = buf_len, - }; - - ret = fi_mr_regv(dom, &iov, 1, default_access, - default_offset, default_req_key, - default_flags, &mr, NULL); - cr_assert(ret == FI_SUCCESS); - - ret = fi_close(&mr->fid); - cr_assert(ret == FI_SUCCESS); -} - - -/* Test simple init, register and deregister */ -Test(mr_internal_bare, basic_init_regattr) -{ - int ret; - const struct iovec iov = { - .iov_base = buf, - .iov_len = buf_len, - }; - struct fi_mr_attr attr = { - .mr_iov = &iov, - .iov_count = 1, - .access = default_access, - .offset = default_offset, - .requested_key = default_req_key, - .context = NULL, - }; - - ret = fi_mr_regattr(dom, &attr, default_flags, &mr); - cr_assert(ret == FI_SUCCESS); - - ret = fi_close(&mr->fid); - cr_assert(ret == FI_SUCCESS); -} - - -/* Test simple init, register and deregister, no NIC present */ -Test(mr_internal_bare, bug_1086) -{ - struct gnix_fid_mem_desc *g_mr; - struct gnix_nic *g_nic; - int ret; - - _set_lazy_deregistration(0); - - ret = fi_mr_reg(dom, (void *) buf, buf_len, default_access, - default_offset, default_req_key, - default_flags, &mr, NULL); - cr_assert(ret == FI_SUCCESS); - - g_mr = (struct gnix_fid_mem_desc *) container_of(mr, \ - struct gnix_fid_mem_desc, mr_fid); - - g_nic = g_mr->nic; - cr_assert(ofi_atomic_get32(&g_nic->ref_cnt.references) > 0); - - ret = fi_close(&mr->fid); - cr_assert(ret == FI_SUCCESS); -} - -/* Test invalid flags to fi_mr_reg */ -Test(mr_internal_bare, invalid_flags) -{ - int ret; - - ret = fi_mr_reg(dom, (void *) buf, buf_len, default_access, - default_offset, default_req_key, - ~0, &mr, NULL); - cr_assert(ret == -FI_EBADFLAGS); -} - -/* Test invalid access param to fi_mr_reg */ -Test(mr_internal_bare, invalid_access) -{ - int ret; - - ret = fi_mr_reg(dom, (void *) buf, buf_len, 0, - default_offset, default_req_key, - default_flags, &mr, NULL); - cr_assert(ret == -FI_EINVAL); -} - -/* Test invalid offset param to fi_mr_reg */ -Test(mr_internal_bare, invalid_offset) -{ - int ret; - - ret = fi_mr_reg(dom, (void *) buf, buf_len, default_access, - ~0, default_req_key, default_flags, - &mr, NULL); - cr_assert(ret == -FI_EINVAL); -} - -/* Test invalid buf param to fi_mr_reg */ -Test(mr_internal_bare, invalid_buf) -{ - int ret; - - ret = fi_mr_reg(dom, NULL, buf_len, default_access, - default_offset, default_req_key, default_flags, - &mr, NULL); - cr_assert(ret == -FI_EINVAL); -} - -/* Test invalid mr_o param to fi_mr_reg */ -Test(mr_internal_bare, invalid_mr_ptr) -{ - int ret; - - ret = fi_mr_reg(dom, (void *) buf, buf_len, default_access, - default_offset, default_req_key, default_flags, - NULL, NULL); - cr_assert(ret == -FI_EINVAL); -} - - -Test(mr_internal_bare, invalid_attr) -{ - int ret; - - ret = fi_mr_regattr(dom, NULL, default_flags, &mr); - cr_assert(ret == -FI_EINVAL); -} - - -Test(mr_internal_bare, invalid_iov_count) -{ - int ret; - const struct iovec iov = { - .iov_base = buf, - .iov_len = buf_len, - }; - - ret = fi_mr_regv(dom, &iov, 0, default_access, - default_offset, default_req_key, default_flags, - &mr, NULL); - cr_assert(ret == -FI_EINVAL); -} - -Test(mr_internal_bare, invalid_iov) -{ - int ret; - - ret = fi_mr_regv(dom, NULL, 1, default_access, - default_offset, default_req_key, default_flags, - &mr, NULL); - cr_assert(ret == -FI_EINVAL); -} - -Test(mr_internal_bare, unsupported_iov_count) -{ - int ret; - const struct iovec iov[2] = { - { - .iov_base = buf, - .iov_len = buf_len >> 2, - }, - { - .iov_base = buf + (buf_len >> 1), - .iov_len = buf_len >> 2, - }, - }; - - ret = fi_mr_regv(dom,(const struct iovec *) &iov, 2, default_access, - default_offset, default_req_key, default_flags, - &mr, NULL); - cr_assert(ret == -FI_EOPNOTSUPP); -} - -/* Test invalid fid param to fi_mr_reg */ -Test(mr_internal_bare, invalid_fid_class) -{ - int ret; - size_t old_class = dom->fid.fclass; - - dom->fid.fclass = FI_CLASS_UNSPEC; - - ret = fi_mr_reg(dom, (void *) buf, buf_len, default_access, - default_offset, default_req_key, default_flags, - &mr, NULL); - cr_assert(ret == -FI_EINVAL); - - /* restore old fclass for teardown */ - dom->fid.fclass = old_class; -} - -/* Test invalid access param to fi_mr_reg */ -Test(mr_internal_bare, invalid_requested_key) -{ - int ret; - - ret = fi_mr_reg(dom, (void *) buf, buf_len, default_access, - default_offset, 1000, - default_flags, &mr, NULL); - cr_assert(ret == FI_SUCCESS, "ret=%d\n", ret); - - cr_assert(fi_mr_key(mr) != 1000); -} - -Test(mr_no_cache_scalable, invalid_user_requested_key) -{ - int ret; - - ret = fi_mr_reg(dom, (void *) buf, buf_len, default_access, - default_offset, 1000, - default_flags, &mr, NULL); - cr_assert(ret == -FI_EKEYREJECTED); -} - -Test(mr_no_cache_scalable, invalid_key_already_assigned) -{ - int ret; - struct fid_mr *invalid; - - ret = fi_mr_reg(dom, (void *) buf, buf_len, default_access, - default_offset, 1, - default_flags, &mr, NULL); - - cr_assert(ret == FI_SUCCESS); - - ret = fi_mr_reg(dom, (void *) buf, buf_len, default_access, - default_offset, 1, - default_flags, &invalid, NULL); - cr_assert(ret == -FI_ENOKEY); - - ret = fi_close(&mr->fid); - cr_assert(ret == FI_SUCCESS); -} - -/* more advanced test setups */ -static struct _mr_test_hooks __simple_test_hooks = { - .init_hook = __simple_init_hook, - .post_reg_hook = __simple_post_reg_hook, - .post_dereg_hook = __simple_post_dereg_hook, -}; - -static void __simple_init_test(HOOK_DECL) -{ - int ret; - - ret = fi_mr_reg(dom, (void *) buf, buf_len, default_access, - default_offset, default_req_key, - default_flags, &mr, NULL); - cr_assert(ret == FI_SUCCESS); - - if (USING_SCALABLE(fi)) - MR_ENABLE(mr, buf, buf_len); - - CHECK_HOOK(init_hook); - - CHECK_HOOK(post_reg_hook, CACHE_RW, 1, 0); - CHECK_HOOK(post_reg_hook, CACHE_RO, 0, 0); - - ret = fi_close(&mr->fid); - cr_assert(ret == FI_SUCCESS); - - CHECK_HOOK(post_dereg_hook, 0, 1); - CHECK_HOOK(post_reg_hook, CACHE_RO, 0, 0); -} - -static void __simple_init_ro_test(HOOK_DECL) -{ - int ret; - - ret = fi_mr_reg(dom, (void *) buf, buf_len, ro_access, - default_offset, default_req_key, - default_flags, &mr, NULL); - cr_assert(ret == FI_SUCCESS); - - if (USING_SCALABLE(fi)) - MR_ENABLE(mr, buf, buf_len); - - CHECK_HOOK(init_hook); - - CHECK_HOOK(post_reg_hook, CACHE_RW, 0, 0); - CHECK_HOOK(post_reg_hook, CACHE_RO, 1, 0); - - ret = fi_close(&mr->fid); - cr_assert(ret == FI_SUCCESS); - - CHECK_HOOK(post_dereg_hook, 0, 0); - CHECK_HOOK(post_reg_hook, CACHE_RO, 0, 1); -} - -Test(mr_internal_cache, change_hard_soft_limits) -{ - int ret; - struct fi_gni_ops_domain *gni_domain_ops; - uint32_t val, get_val; - - ret = fi_open_ops(&domain->domain_fid.fid, FI_GNI_DOMAIN_OPS_1, - 0, (void **) &gni_domain_ops, NULL); - cr_assert(ret == FI_SUCCESS, "fi_open_ops"); - - - val = 8192; - ret = gni_domain_ops->set_val(&domain->domain_fid.fid, - GNI_MR_HARD_REG_LIMIT, &val); - cr_assert(ret == FI_SUCCESS); - - ret = gni_domain_ops->get_val(&domain->domain_fid.fid, - GNI_MR_HARD_REG_LIMIT, &get_val); - cr_assert(ret == FI_SUCCESS); - cr_assert(val == get_val); - - val = 4096; - ret = gni_domain_ops->set_val(&domain->domain_fid.fid, - GNI_MR_SOFT_REG_LIMIT, &val); - cr_assert(ret == FI_SUCCESS); - - ret = gni_domain_ops->get_val(&domain->domain_fid.fid, - GNI_MR_SOFT_REG_LIMIT, &get_val); - cr_assert(ret == FI_SUCCESS); - cr_assert(val == get_val); - - val = 256; - ret = gni_domain_ops->set_val(&domain->domain_fid.fid, - GNI_MR_HARD_STALE_REG_LIMIT, &val); - cr_assert(ret == FI_SUCCESS); - - ret = gni_domain_ops->get_val(&domain->domain_fid.fid, - GNI_MR_HARD_STALE_REG_LIMIT, &get_val); - cr_assert(ret == FI_SUCCESS); - cr_assert(val == get_val); - - ret = fi_mr_reg(dom, (void *) buf, buf_len, default_access, - default_offset, default_req_key, - default_flags, &mr, NULL); - cr_assert(ret == FI_SUCCESS); - - cache = GET_DOMAIN_RW_CACHE(domain); - cr_assert(cache->state == GNIX_MRC_STATE_READY); - cr_assert(cache->attr.hard_reg_limit == 8192); - cr_assert(cache->attr.soft_reg_limit == 4096); - cr_assert(cache->attr.hard_stale_limit == 256); - - cr_assert(ofi_atomic_get32(&cache->inuse.elements) == 1); - cr_assert(ofi_atomic_get32(&cache->stale.elements) == 0); - - ret = fi_close(&mr->fid); - cr_assert(ret == FI_SUCCESS); - - cr_assert(ofi_atomic_get32(&cache->inuse.elements) == 0); - cr_assert(ofi_atomic_get32(&cache->stale.elements) == 1); -} - -/* Test duplicate registration. Since this is a valid operation, we - * provide a unique fid_mr but internally, a second reference to the same - * entry is provided to prevent expensive calls to GNI_MemRegister - */ -static void __simple_duplicate_registration_test(HOOK_DECL) -{ - int ret; - struct fid_mr *f_mr; - - ret = fi_mr_reg(dom, (void *) buf, buf_len, default_access, - default_offset, default_req_key, - default_flags, &mr, NULL); - cr_assert(ret == FI_SUCCESS); - - CHECK_HOOK(init_hook); - - CHECK_HOOK(post_reg_hook, CACHE_RW, 1, 0); - CHECK_HOOK(post_reg_hook, CACHE_RO, 0, 0); - - ret = fi_mr_reg(dom, (void *) buf, buf_len, default_access, - default_offset, default_req_key, - default_flags, &f_mr, NULL); - cr_assert(ret == FI_SUCCESS); - - CHECK_HOOK(post_reg_hook, CACHE_RW, 1, 0); - - ret = fi_close(&mr->fid); - cr_assert(ret == FI_SUCCESS); - - ret = fi_close(&f_mr->fid); - cr_assert(ret == FI_SUCCESS); - - CHECK_HOOK(post_reg_hook, CACHE_RW, 0, 1); -} - -static int __post_dereg_greater_or_equal(const char *func, int line, - int expected_inuse, - int expected_stale) -{ - cache = GET_DOMAIN_RW_CACHE(domain); - - cr_assert(ofi_atomic_get32(&cache->inuse.elements) == expected_inuse, - "failed expected inuse test, actual=%d expected=%d\n", - ofi_atomic_get32(&cache->inuse.elements), - expected_inuse); - cr_assert(ofi_atomic_get32(&cache->stale.elements) >= expected_stale, - "failed expected stale test, actual=%d expected=%d\n", - ofi_atomic_get32(&cache->stale.elements), - expected_stale); - - return 0; -} - -static struct _mr_test_hooks __simple_rdr_hooks = { - .post_reg_hook = __simple_post_reg_hook, - .post_dereg_hook = __post_dereg_greater_or_equal, -}; - -/* Test registration of 1024 elements, all distinct. Cache element counts - * should meet expected values - */ -static void __simple_register_distinct_regions(HOOK_DECL, int regions) -{ - int ret; - uint64_t **buffers; - char *buffer; - struct fid_mr **mr_arr; - int i; - - mr_arr = calloc(regions, sizeof(struct fid_mr *)); - cr_assert(mr_arr); - - buffers = calloc(regions, sizeof(uint64_t *)); - cr_assert(buffers, "failed to allocate array of buffers"); - - buffer = calloc(regions * 4 * __BUF_LEN, sizeof(char)); - cr_assert(buffer); - - for (i = 0; i < regions; ++i) { - buffers[i] = (uint64_t *) (buffer + ((i * 4) * __BUF_LEN)); - } - - for (i = 0; i < regions; ++i) { - ret = fi_mr_reg(dom, (void *) buffers[i], __BUF_LEN, - default_access, default_offset, default_req_key, - default_flags, &mr_arr[i], NULL); - cr_assert(ret == FI_SUCCESS); - } - - CHECK_HOOK(post_reg_hook, CACHE_RW, regions, 0); - - for (i = 0; i < regions; ++i) { - ret = fi_close(&mr_arr[i]->fid); - cr_assert(ret == FI_SUCCESS); - } - - free(buffers); - buffers = NULL; - - free(mr_arr); - mr_arr = NULL; - - free(buffer); - buffer = NULL; - - CHECK_HOOK(post_dereg_hook, 0, 0); -} - -static int __post_dereg_greater_than(const char *func, int line, - int expected_inuse, - int expected_stale) -{ - cache = GET_DOMAIN_RW_CACHE(domain); - cr_assert(ofi_atomic_get32(&cache->inuse.elements) == expected_inuse); - cr_assert(ofi_atomic_get32(&cache->stale.elements) > expected_stale); - - return 0; -} - -static struct _mr_test_hooks __simple_rnur_hooks = { - .post_reg_hook = __simple_post_reg_hook, - .post_dereg_hook = __post_dereg_greater_than, -}; - -/* Test registration of 1024 registrations backed by the same initial - * registration. There should only be a single registration in the cache - */ -static void __simple_register_non_unique_regions_test(HOOK_DECL, int regions) -{ - int ret; - char *hugepage; - struct fid_mr *hugepage_mr; - char **buffers; - struct fid_mr **mr_arr; - int i; - - mr_arr = calloc(regions, sizeof(struct fid_mr *)); - cr_assert(mr_arr); - - buffers = calloc(regions, sizeof(uint64_t *)); - cr_assert(buffers, "failed to allocate array of buffers"); - - hugepage = calloc(regions * regions, sizeof(char)); - cr_assert(hugepage); - - for (i = 0; i < regions; ++i) { - buffers[i] = &hugepage[i * regions]; - cr_assert(buffers[i]); - } - - ret = fi_mr_reg(dom, (void *) hugepage, - regions * regions * sizeof(char), - default_access, default_offset, default_req_key, - default_flags, &hugepage_mr, NULL); - cr_assert(ret == FI_SUCCESS); - - for (i = 0; i < regions; ++i) { - ret = fi_mr_reg(dom, (void *) buffers[i], regions, - default_access, default_offset, default_req_key, - default_flags, &mr_arr[i], NULL); - cr_assert(ret == FI_SUCCESS); - } - - CHECK_HOOK(post_reg_hook, CACHE_RW, 1, 0); - - for (i = 0; i < regions; ++i) { - ret = fi_close(&mr_arr[i]->fid); - cr_assert(ret == FI_SUCCESS); - } - - ret = fi_close(&hugepage_mr->fid); - cr_assert(ret == FI_SUCCESS); - - free(hugepage); - hugepage = NULL; - - free(buffers); - buffers = NULL; - - free(mr_arr); - mr_arr = NULL; - - CHECK_HOOK(post_dereg_hook, 0, 0); -} - -static int __get_lazy_dereg_limit(const char *func, int line) -{ - cache = GET_DOMAIN_RW_CACHE(domain); - - return cache->attr.hard_stale_limit; -} - -static struct _mr_test_hooks __simple_lazy_hooks = { - .post_reg_hook = __simple_post_reg_hook, - .post_dereg_hook = __simple_post_dereg_hook, - .get_lazy_dereg_limit = __get_lazy_dereg_limit, -}; - - -/* Test registration of 128 regions that will be cycled in and out of the - * inuse and stale trees. inuse + stale should never exceed 128 - */ -static void __simple_cyclic_register_distinct_regions(HOOK_DECL, int regions) -{ - int ret; - char **buffers; - char *hugepage; - struct fid_mr **mr_arr; - int i; - int buf_size = __BUF_LEN * sizeof(char); - int lazy_limit = 0; - - mr_arr = calloc(regions, sizeof(struct fid_mr *)); - cr_assert(mr_arr); - - buffers = calloc(regions, sizeof(char *)); - cr_assert(buffers, "failed to allocate array of buffers"); - - hugepage = calloc(regions * 4 * __BUF_LEN, sizeof(char)); - cr_assert(hugepage); - - for (i = 0; i < regions; ++i) { - buffers[i] = (char *) (hugepage + ((i * 4) * __BUF_LEN)); - } - - /* create the initial memory registrations */ - for (i = 0; i < regions; ++i) { - ret = fi_mr_reg(dom, (void *) buffers[i], buf_size, - default_access, default_offset, default_req_key, - default_flags, &mr_arr[i], NULL); - cr_assert(ret == FI_SUCCESS); - } - - if (HOOK_PRESENT(get_lazy_dereg_limit)) { - lazy_limit = CHECK_HOOK(get_lazy_dereg_limit); - } - - /* all registrations should now be 'in-use' */ - CHECK_HOOK(post_reg_hook, CACHE_RW, regions, 0); - - for (i = 0; i < regions; ++i) { - ret = fi_close(&mr_arr[i]->fid); - cr_assert(ret == FI_SUCCESS); - } - - /* all registrations should now be 'stale' */ - CHECK_HOOK(post_dereg_hook, 0, lazy_limit); - - for (i = 0; i < regions; ++i) { - ret = fi_mr_reg(dom, (void *) buffers[i], buf_size, - default_access, default_offset, default_req_key, - default_flags, &mr_arr[i], NULL); - cr_assert(ret == FI_SUCCESS); - - CHECK_HOOK(post_reg_hook, CACHE_RW, i + 1, regions - (i + 1)); - } - - /* all registrations should have been moved from 'stale' to 'in-use' */ - CHECK_HOOK(post_reg_hook, CACHE_RW, regions, 0); - - for (i = 0; i < regions; ++i) { - ret = fi_close(&mr_arr[i]->fid); - cr_assert(ret == FI_SUCCESS); - } - - /* all registrations should now be 'stale' */ - CHECK_HOOK(post_dereg_hook, 0, regions); - - free(buffers); - buffers = NULL; - - free(mr_arr); - mr_arr = NULL; - - free(hugepage); - hugepage = NULL; -} - -static int __test_stale_lt_or_equal(const char *func, int line, - int cache_type, - int expected_inuse, - int expected_stale) -{ - if (cache_type == CACHE_RO) - cache = GET_DOMAIN_RO_CACHE(domain); - else - cache = GET_DOMAIN_RW_CACHE(domain); - - cr_assert(ofi_atomic_get32(&cache->inuse.elements) == expected_inuse); - cr_assert(ofi_atomic_get32(&cache->stale.elements) <= expected_stale); - - return 0; -} - -static struct _mr_test_hooks __simple_sais_hooks = { - .post_reg_hook = __test_stale_lt_or_equal, - .post_dereg_hook = __simple_post_dereg_hook, - .get_lazy_dereg_limit = __get_lazy_dereg_limit, -}; - - -/* Test repeated registration of a memory region with the same base - * address, increasing the size each time.. This is an explicit - * version of what the test rdm_sr::send_autoreg_uncached does under - * the covers (currently). - */ -static void __simple_same_addr_incr_size_test(HOOK_DECL) -{ - int ret; - int i; - - for (i = 2; i <= buf_len; i *= 2) { - ret = fi_mr_reg(dom, (void *) buf, i, default_access, - default_offset, default_req_key, - default_flags, &mr, NULL); - cr_assert(ret == FI_SUCCESS); - - CHECK_HOOK(init_hook); - - CHECK_HOOK(post_reg_hook, CACHE_RW, 1, 1); - - ret = fi_close(&mr->fid); - cr_assert(ret == FI_SUCCESS); - - CHECK_HOOK(post_dereg_hook, 0, 1); - } -} - -/* Same as above, except with decreasing sizes */ -static void __simple_same_addr_decr_size_test(HOOK_DECL) -{ - int ret; - int i; - - for (i = buf_len; i >= 2; i /= 2) { - ret = fi_mr_reg(dom, (void *) buf, i, default_access, - default_offset, default_req_key, - default_flags, &mr, NULL); - cr_assert(ret == FI_SUCCESS); - - CHECK_HOOK(init_hook); - - CHECK_HOOK(post_reg_hook, CACHE_RW, 1, 0); - - ret = fi_close(&mr->fid); - cr_assert(ret == FI_SUCCESS); - - CHECK_HOOK(post_dereg_hook, 0, 1); - } -} - -/* Test simple cache initialization */ -Test(mr_internal_cache, basic_init) -{ - __simple_init_test(&__simple_test_hooks); -} - -Test(mr_internal_cache, basic_init_ro) -{ - __simple_init_ro_test(&__simple_test_hooks); -} - -/* Test duplicate registration. Since this is a valid operation, we - * provide a unique fid_mr but internally, a second reference to the same - * entry is provided to prevent expensive calls to GNI_MemRegister - */ -Test(mr_internal_cache, duplicate_registration) -{ - __simple_duplicate_registration_test(&__simple_test_hooks); -} - -/* Test registration of 1024 elements, all distinct. Cache element counts - * should meet expected values - */ -Test(mr_internal_cache, register_1024_distinct_regions) -{ - __simple_register_distinct_regions(&__simple_rdr_hooks, - DEFAULT_REGION_COUNT); -} - -/* Test registration of 1024 registrations backed by the same initial - * registration. There should only be a single registration in the cache - */ -Test(mr_internal_cache, register_1024_non_unique_regions) -{ - __simple_register_non_unique_regions_test(&__simple_rnur_hooks, - DEFAULT_REGION_COUNT); -} - -/* Test registration of 128 regions that will be cycled in and out of the - * inuse and stale trees. inuse + stale should never exceed 128 - */ -Test(mr_internal_cache, cyclic_register_128_distinct_regions) -{ - __simple_cyclic_register_distinct_regions(&__simple_lazy_hooks, 128); -} - -/* Test repeated registration of a memory region with the same base - * address, increasing the size each time.. This is an explicit - * version of what the test rdm_sr::send_autoreg_uncached does under - * the covers (currently). - */ -Test(mr_internal_cache, same_addr_incr_size) -{ - __simple_same_addr_incr_size_test(&__simple_sais_hooks); -} - -/* Same as above, except with decreasing sizes */ -Test(mr_internal_cache, same_addr_decr_size) -{ - __simple_same_addr_decr_size_test(&__simple_test_hooks); -} - -Test(mr_internal_cache, lru_evict_first_entry) -{ - int ret; - char **buffers; - char *hugepage; - struct fid_mr **mr_arr; - int i; - int buf_size = __BUF_LEN * sizeof(char); - int regions; - - regions = domain->mr_cache_attr.hard_stale_limit << 1; - cr_assert(regions > 0); - mr_arr = calloc(regions, sizeof(struct fid_mr *)); - cr_assert(mr_arr); - - buffers = calloc(regions, sizeof(char *)); - cr_assert(buffers, "failed to allocate array of buffers"); - - hugepage = calloc(regions * 4 * __BUF_LEN, sizeof(char)); - cr_assert(hugepage); - - for (i = 0; i < regions; ++i) { - buffers[i] = (char *) (hugepage + ((i * 4) * __BUF_LEN)); - } - - /* create the initial memory registrations */ - for (i = 0; i < regions; ++i) { - ret = fi_mr_reg(dom, (void *) buffers[i], buf_size, - default_access, default_offset, default_req_key, - default_flags, &mr_arr[i], NULL); - cr_assert(ret == FI_SUCCESS); - } - - /* all registrations should now be 'in-use' */ - cache = GET_DOMAIN_RW_CACHE(domain); - cr_assert(ofi_atomic_get32(&cache->inuse.elements) == regions); - cr_assert(ofi_atomic_get32(&cache->stale.elements) == 0); - - /* deregister cache->stale_reg_limit + 1 to test if the first region was - * deregistered - */ - for (i = 0; i < cache->attr.hard_stale_limit + 1; ++i) { - ret = fi_close(&mr_arr[i]->fid); - cr_assert(ret == FI_SUCCESS); - } - - for (i = 1; i < MIN(cache->attr.hard_stale_limit + 1, regions); ++i) { - ret = fi_mr_reg(dom, (void *) buffers[i], buf_size, - default_access, default_offset, default_req_key, - default_flags, &mr_arr[i], NULL); - cr_assert(ret == FI_SUCCESS); - } - - /* all registrations should now be 'stale' */ - cr_assert(ofi_atomic_get32(&cache->inuse.elements) == regions - 1); - cr_assert(ofi_atomic_get32(&cache->stale.elements) == 0); - - for (i = 1; i < regions; ++i) { - ret = fi_close(&mr_arr[i]->fid); - cr_assert(ret == FI_SUCCESS); - } - - /* all registrations should now be 'stale' */ - cr_assert(ofi_atomic_get32(&cache->inuse.elements) == 0); - cr_assert(ofi_atomic_get32(&cache->stale.elements) == MIN(regions - 1, - cache->attr.hard_stale_limit)); - - free(buffers); - buffers = NULL; - - free(mr_arr); - mr_arr = NULL; - - free(hugepage); - hugepage = NULL; -} - -Test(mr_internal_cache, lru_evict_middle_entry) -{ - int ret; - char **buffers; - char *hugepage; - struct fid_mr **mr_arr; - int i, limit; - int buf_size = __BUF_LEN * sizeof(char); - int regions; - - regions = domain->mr_cache_attr.hard_stale_limit << 1; - cr_assert(regions > 0); - mr_arr = calloc(regions, sizeof(struct fid_mr *)); - cr_assert(mr_arr); - - buffers = calloc(regions, sizeof(char *)); - cr_assert(buffers, "failed to allocate array of buffers"); - - hugepage = calloc(regions * 4 * __BUF_LEN, sizeof(char)); - cr_assert(hugepage); - - for (i = 0; i < regions; ++i) { - buffers[i] = (char *) (hugepage + ((i * 4) * __BUF_LEN)); - } - - /* create the initial memory registrations */ - for (i = 0; i < regions; ++i) { - ret = fi_mr_reg(dom, (void *) buffers[i], buf_size, - default_access, default_offset, default_req_key, - default_flags, &mr_arr[i], NULL); - cr_assert(ret == FI_SUCCESS); - } - - /* all registrations should now be 'in-use' */ - cache = GET_DOMAIN_RW_CACHE(domain); - limit = cache->attr.hard_stale_limit; - cr_assert(limit < regions); - - cr_assert(ofi_atomic_get32(&cache->inuse.elements) == regions); - cr_assert(ofi_atomic_get32(&cache->stale.elements) == 0); - - /* deregister cache->stale_reg_limit + 1 to test if the first region was - * deregistered - */ - for (i = 0; i < limit + 1; ++i) { - ret = fi_close(&mr_arr[i]->fid); - cr_assert(ret == FI_SUCCESS); - } - - cr_assert(ofi_atomic_get32(&cache->inuse.elements) == (regions - (limit + 1))); - cr_assert(ofi_atomic_get32(&cache->stale.elements) == limit); - - /* re-register this region in the middle to test removal */ - i = (regions >> 2); - ret = fi_mr_reg(dom, (void *) buffers[i], buf_size, - default_access, default_offset, default_req_key, - default_flags, &mr_arr[i], NULL); - cr_assert(ret == FI_SUCCESS); - - cr_assert(ofi_atomic_get32(&cache->inuse.elements) == (regions - limit)); - cr_assert(ofi_atomic_get32(&cache->stale.elements) == (limit - 1)); - - for (i = limit + 1; i < regions; ++i) { - ret = fi_close(&mr_arr[i]->fid); - cr_assert(ret == FI_SUCCESS); - } - - cr_assert(ofi_atomic_get32(&cache->inuse.elements) == 1); - cr_assert(ofi_atomic_get32(&cache->stale.elements) == limit); - - i = (regions >> 2); - ret = fi_close(&mr_arr[i]->fid); - cr_assert(ret == FI_SUCCESS); - - /* all registrations should now be 'stale' */ - cr_assert(ofi_atomic_get32(&cache->inuse.elements) == 0); - cr_assert(ofi_atomic_get32(&cache->stale.elements) == limit); - - free(buffers); - buffers = NULL; - - free(mr_arr); - mr_arr = NULL; - - free(hugepage); - hugepage = NULL; -} - -static inline void _repeated_registration(const char *label) -{ - int ret, i; - int region_len = 1 << 24; - int registrations = 4096 * 16; - unsigned char *region = calloc(region_len, sizeof(unsigned char)); - struct fid_mr **f_mr; - int reg_time, dereg_time, seconds; - - cr_assert(region != NULL); - - f_mr = calloc(registrations, sizeof(*f_mr)); - cr_assert(f_mr != NULL); - - ret = fi_mr_reg(dom, (void *) region, - region_len, default_access, - default_offset, default_req_key, - default_flags, &mr, NULL); - cr_assert(ret == FI_SUCCESS); - - cache = GET_DOMAIN_RW_CACHE(domain); - - gettimeofday(&s1, 0); - for (i = 0; i < registrations; i++) { - ret = fi_mr_reg(dom, (void *) region, - region_len, default_access, - default_offset, default_req_key, - default_flags, &f_mr[i], NULL); - cr_assert(ret == FI_SUCCESS); - } - gettimeofday(&s2, 0); - - calculate_time_difference(&s1, &s2, &seconds, ®_time); - reg_time += seconds * 1000000; - - gettimeofday(&s1, 0); - for (i = 0; i < registrations; i++) { - ret = fi_close(&f_mr[i]->fid); - cr_assert(ret == FI_SUCCESS); - } - gettimeofday(&s2, 0); - - calculate_time_difference(&s1, &s2, &seconds, &dereg_time); - dereg_time += seconds * 1000000; - - ret = fi_close(&mr->fid); - cr_assert(ret == FI_SUCCESS); - - fprintf(stderr, "[%s] best(repeated) case: reg_time=%.3f " - "usec dereg_time=%.3f usec\n", label, - reg_time / (registrations * 1.0), - dereg_time / (registrations * 1.0)); - - free(region); -} - - - -static inline void _single_large_registration(const char *label) -{ - int ret, i; - int region_len = 1 << 24; - int registration_width = 1 << 12; - int registrations = region_len / registration_width; - unsigned char *region = calloc(region_len, sizeof(unsigned char)); - struct fid_mr **f_mr; - int reg_time, dereg_time, seconds; - - cr_assert(region != NULL); - - f_mr = calloc(registrations, sizeof(*f_mr)); - cr_assert(f_mr != NULL); - - ret = fi_mr_reg(dom, (void *) region, - region_len, default_access, - default_offset, default_req_key, - default_flags, &mr, NULL); - - cache = GET_DOMAIN_RW_CACHE(domain); - - gettimeofday(&s1, 0); - for (i = 0; i < registrations; i++) { - ret = fi_mr_reg(dom, (void *) (region + - (registration_width * i)), - registration_width, default_access, - default_offset, default_req_key, - default_flags, &f_mr[i], NULL); - cr_assert(ret == FI_SUCCESS); - } - gettimeofday(&s2, 0); - - calculate_time_difference(&s1, &s2, &seconds, ®_time); - reg_time += seconds * 1000000; - - gettimeofday(&s1, 0); - for (i = 0; i < registrations; i++) { - ret = fi_close(&f_mr[i]->fid); - cr_assert(ret == FI_SUCCESS); - } - gettimeofday(&s2, 0); - - calculate_time_difference(&s1, &s2, &seconds, &dereg_time); - dereg_time += seconds * 1000000; - - ret = fi_close(&mr->fid); - cr_assert(ret == FI_SUCCESS); - - fprintf(stderr, "[%s] best(overlap) case: reg_time=%.3f " - "usec dereg_time=%.3f usec\n", label, - reg_time / (registrations * 1.0), - dereg_time / (registrations * 1.0)); - - free(region); -} - -static inline void _random_analysis(const char *label) -{ - int ret, i; - int region_len = 1 << 24; - int registration_width = 1 << 12; - int registrations = region_len / registration_width; - unsigned char *region = calloc(region_len, sizeof(unsigned char)); - struct fid_mr **f_mr; - int reg_time, dereg_time, seconds; - void *ptr; - uint64_t ptr_len; - - srand(0xDEADBEEF); - cr_assert(region != NULL); - - f_mr = calloc(registrations, sizeof(*f_mr)); - cr_assert(f_mr != NULL); - - /* prep the cache by adding and removing an entry before timing */ - ret = fi_mr_reg(dom, (void *) buf, buf_len, default_access, - default_offset, default_req_key, - default_flags, &mr, NULL); - cr_assert(ret == FI_SUCCESS); - - ret = fi_close(&mr->fid); - cr_assert(ret == FI_SUCCESS); - - gettimeofday(&s1, 0); - for (i = 0; i < registrations; i++) { - ptr = region + rand() % region_len; - ptr_len = registration_width; - if ((uint64_t) ((char *) ptr + ptr_len) > - (uint64_t) (region + region_len)) { - ptr_len = ((uint64_t) region + region_len) - (uint64_t) ptr; - } - - ret = fi_mr_reg(dom, (void *) ptr, - ptr_len, default_access, - default_offset, default_req_key, - default_flags, &f_mr[i], NULL); - cr_assert(ret == FI_SUCCESS); - } - gettimeofday(&s2, 0); - - calculate_time_difference(&s1, &s2, &seconds, ®_time); - reg_time += seconds * 1000000; - - gettimeofday(&s1, 0); - for (i = 0; i < registrations; i++) { - ret = fi_close(&f_mr[i]->fid); - cr_assert(ret == FI_SUCCESS); - } - gettimeofday(&s2, 0); - - calculate_time_difference(&s1, &s2, &seconds, &dereg_time); - dereg_time += seconds * 1000000; - - fprintf(stderr, "[%s] random case: reg_time=%.3f usec " - "dereg_time=%.3f usec\n", label, - reg_time / (registrations * 1.0), - dereg_time / (registrations * 1.0)); - - free(region); -} - -Test(perf_mr_internal, repeated_registration) -{ - _repeated_registration("internal"); -} - -Test(perf_mr_internal, single_large_registration) -{ - _single_large_registration("internal"); -} - -Test(perf_mr_internal, random_analysis) -{ - _random_analysis("internal"); -} - -/* - * This test exercises the ability of the cache to drop registrations that - * have been subsumed by other registrations - */ -Test(mr_internal_cache, regression_615) -{ - int ret; - struct fid_mr *f_mr; - char *buffer = calloc(1 << 19, sizeof(char)); - - cr_assert(buffer != NULL); - - /* set up stale cache */ - ret = fi_mr_reg(dom, (void *) (buffer + 0x18000), 0x8000, - default_access, default_offset, default_req_key, - default_flags, &f_mr, NULL); - cr_assert(ret == FI_SUCCESS); - - ret = fi_close(&f_mr->fid); - cr_assert(ret == FI_SUCCESS); - - ret = fi_mr_reg(dom, (void *) (buffer + 0x0), 0x80000, - default_access, default_offset, default_req_key, - default_flags, &f_mr, NULL); - cr_assert(ret == FI_SUCCESS); - - ret = fi_close(&f_mr->fid); - cr_assert(ret == FI_SUCCESS); - - /* set up inuse */ - ret = fi_mr_reg(dom, (void *) (buffer + 0x28000), 0x4000, - default_access, default_offset, default_req_key, - default_flags, &f_mr, NULL); - cr_assert(ret == FI_SUCCESS); - - cache = GET_DOMAIN_RW_CACHE(domain); - - cr_assert(ofi_atomic_get32(&cache->inuse.elements) == 1); - cr_assert(ofi_atomic_get32(&cache->stale.elements) == 0); - - ret = fi_close(&f_mr->fid); - cr_assert(ret == FI_SUCCESS); - - free(buffer); -} - -void simple_register_distinct_regions(int regions) -{ - int ret; - uint64_t **buffers; - char *buffer; - struct fid_mr **mr_arr; - int i; - int requested_key; - - mr_arr = calloc(regions, sizeof(struct fid_mr *)); - cr_assert(mr_arr); - - buffers = calloc(regions, sizeof(uint64_t *)); - cr_assert(buffers, "failed to allocate array of buffers"); - - buffer = calloc(regions * 4 * __BUF_LEN, sizeof(char)); - cr_assert(buffer); - - for (i = 0; i < regions; ++i) { - buffers[i] = (uint64_t *) (buffer + ((i * 4) * __BUF_LEN)); - } - - for (i = 0; i < regions; ++i) { - requested_key = USING_SCALABLE(fi) ? i : default_req_key; - ret = fi_mr_reg(dom, (void *) buffers[i], __BUF_LEN, - default_access, default_offset, requested_key, - default_flags, &mr_arr[i], NULL); - cr_assert(ret == FI_SUCCESS); - } - - for (i = 0; i < regions; ++i) { - ret = fi_close(&mr_arr[i]->fid); - cr_assert(ret == FI_SUCCESS); - } - - free(buffers); - buffers = NULL; - - free(mr_arr); - mr_arr = NULL; - - free(buffer); - buffer = NULL; -} - -void simple_register_non_unique_regions(int regions) -{ - int ret; - char *hugepage; - struct fid_mr *hugepage_mr; - char **buffers; - struct fid_mr **mr_arr; - int i; - int requested_key; - - mr_arr = calloc(regions, sizeof(struct fid_mr *)); - cr_assert(mr_arr); - - buffers = calloc(regions, sizeof(uint64_t *)); - cr_assert(buffers, "failed to allocate array of buffers"); - - hugepage = calloc(regions * regions, sizeof(char)); - cr_assert(hugepage); - - for (i = 0; i < regions; ++i) { - buffers[i] = &hugepage[i * regions]; - cr_assert(buffers[i]); - } - - ret = fi_mr_reg(dom, (void *) hugepage, - regions * regions * sizeof(char), - default_access, default_offset, default_req_key, - default_flags, &hugepage_mr, NULL); - cr_assert(ret == FI_SUCCESS); - - for (i = 0; i < regions; ++i) { - requested_key = USING_SCALABLE(fi) ? - i + 1 : default_req_key; - ret = fi_mr_reg(dom, (void *) buffers[i], regions, - default_access, default_offset, - requested_key, - default_flags, &mr_arr[i], NULL); - cr_assert(ret == FI_SUCCESS); - } - - for (i = 0; i < regions; ++i) { - ret = fi_close(&mr_arr[i]->fid); - cr_assert(ret == FI_SUCCESS); - } - - ret = fi_close(&hugepage_mr->fid); - cr_assert(ret == FI_SUCCESS); - - free(hugepage); - hugepage = NULL; - - free(buffers); - buffers = NULL; - - free(mr_arr); - mr_arr = NULL; -} - -#ifdef HAVE_UDREG -/* Test registration of 1024 elements, all distinct. Cache element counts - * should meet expected values - */ -Test(mr_udreg_cache, register_1024_distinct_regions) -{ - simple_register_distinct_regions(DEFAULT_REGION_COUNT); -} - -/* Test registration of 1024 registrations backed by the same initial - * registration. There should only be a single registration in the cache - */ -Test(mr_udreg_cache, register_1024_non_unique_regions) -{ - simple_register_non_unique_regions(DEFAULT_REGION_COUNT); -} - -/* performance tests */ -Test(perf_mr_udreg, repeated_registration) -{ - _repeated_registration("udreg"); -} - -Test(perf_mr_udreg, single_large_registration) -{ - _single_large_registration("udreg"); -} - -Test(perf_mr_udreg, random_analysis) -{ - _random_analysis("udreg"); -} - -/* no lazy dereg tests */ -Test(mr_udreg_cache_nld, register_1024_distinct_regions) -{ - simple_register_distinct_regions(DEFAULT_REGION_COUNT); -} - -Test(mr_udreg_cache_nld, register_1024_non_unique_regions) -{ - simple_register_non_unique_regions(DEFAULT_REGION_COUNT); -} - -#endif - - -Test(mr_no_cache_basic, register_1024_distinct_regions) -{ - int nbufs = DEFAULT_REGION_COUNT; - - simple_register_distinct_regions(nbufs); -} - -Test(mr_no_cache_scalable, register_1024_distinct_regions) -{ - int nbufs = DEFAULT_SCALABLE_REGION_COUNT; - - simple_register_distinct_regions(nbufs); -} - -/* Test registration of 1024 registrations backed by the same initial - * registration. There should only be a single registration in the cache - */ -Test(mr_no_cache_basic, register_1024_non_unique_regions) -{ - int nbufs = DEFAULT_REGION_COUNT; - - simple_register_non_unique_regions(nbufs); -} - -Test(mr_no_cache_scalable, register_1024_non_unique_regions) -{ - int nbufs = DEFAULT_SCALABLE_REGION_COUNT; - - simple_register_non_unique_regions(nbufs); -} - -Test(perf_mr_no_cache, repeated_registration) -{ - _repeated_registration("no caching"); -} - -Test(perf_mr_no_cache, single_large_registration) -{ - _single_large_registration("no caching"); -} - -Test(perf_mr_no_cache, random_analysis) -{ - _random_analysis("no caching"); -} - - -/* simple tests without lazy deregistration. Empty hooks can be used with all - * of the tests */ - -/* Test simple cache initialization */ -Test(mr_internal_cache_nld, basic_init) -{ - __simple_init_test(&empty_hooks); -} - -/* Test duplicate registration. Since this is a valid operation, we - * provide a unique fid_mr but internally, a second reference to the same - * entry is provided to prevent expensive calls to GNI_MemRegister - */ -Test(mr_internal_cache_nld, duplicate_registration) -{ - __simple_duplicate_registration_test(&empty_hooks); -} - -/* Test registration of 1024 elements, all distinct. Cache element counts - * should meet expected values - */ -Test(mr_internal_cache_nld, register_1024_distinct_regions) -{ - __simple_register_distinct_regions(&empty_hooks, DEFAULT_REGION_COUNT); -} - -/* Test registration of 1024 registrations backed by the same initial - * registration. There should only be a single registration in the cache - */ -Test(mr_internal_cache_nld, register_1024_non_unique_regions) -{ - __simple_register_non_unique_regions_test(&empty_hooks, - DEFAULT_REGION_COUNT); -} - -/* Test registration of 128 regions that will be cycled in and out of the - * inuse and stale trees. inuse + stale should never exceed 128 - */ -Test(mr_internal_cache_nld, cyclic_register_128_distinct_regions) -{ - __simple_cyclic_register_distinct_regions(&empty_hooks, 128); -} - -/* Test repeated registration of a memory region with the same base - * address, increasing the size each time.. This is an explicit - * version of what the test rdm_sr::send_autoreg_uncached does under - * the covers (currently). - */ -Test(mr_internal_cache_nld, same_addr_incr_size) -{ - __simple_same_addr_incr_size_test(&empty_hooks); -} - -/* Same as above, except with decreasing sizes */ -Test(mr_internal_cache_nld, same_addr_decr_size) -{ - __simple_same_addr_decr_size_test(&empty_hooks); -} diff --git a/prov/gni/test/mr_notifier.c b/prov/gni/test/mr_notifier.c deleted file mode 100644 index b90dfc8a2c1..00000000000 --- a/prov/gni/test/mr_notifier.c +++ /dev/null @@ -1,503 +0,0 @@ -/* - * Copyright (c) 2016-2018 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - - -#include -#include -#include -#include "gnix_mr_notifier.h" - -#include "common.h" -#include -#include "common.h" - -static struct gnix_mr_notifier *mr_notifier; -static void mr_notifier_setup(void) -{ - int ret; - - ret = _gnix_notifier_init(); - cr_assert(ret == 0, "_gnix_notifier_init failed"); - - ret = _gnix_notifier_open(&mr_notifier); - cr_assert(ret == 0, "_gnix_notifier_open failed"); -} - -static void mr_notifier_teardown(void) -{ - int ret; - - ret = _gnix_notifier_close(mr_notifier); - cr_assert(ret == 0, "_gnix_notifier_close failed"); -} - -TestSuite(mr_notifier, - .init = mr_notifier_setup, - .fini = mr_notifier_teardown); - -static void -monitor_single(size_t len) { - int ret; - uint64_t cookie; - char *mem = mmap(NULL, len, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - cr_assert_neq(mem, MAP_FAILED, "Could not allocate %ld bytes\n", len); - - ret = _gnix_notifier_monitor(mr_notifier, mem, len, (uint64_t) mem); - cr_assert(ret == 0, "_gnix_notifier_monitor failed"); - - munmap(mem, len); - - ret = _gnix_notifier_get_event(mr_notifier, &cookie, sizeof(cookie)); - if (ret >= 0) { - cr_assert(cookie == (uint64_t) mem, - "Unexpected cookie (got %lu, expected %lu)", - cookie, (uint64_t) mem); - cr_assert(ret == sizeof(cookie), - "Unexpected number of bytes (got %d, expected %lu)", - ret, sizeof(cookie)); - } else if (ret == -FI_EAGAIN) { - /* Nothing to read, ok */ - ret = _gnix_notifier_unmonitor(mr_notifier, (uint64_t) mem); - cr_assert(ret == 0, "_gnix_notifier_unmonitor failed"); - } else { - cr_assert(0, "Unexpected error"); - } -} - -Test(mr_notifier, single) -{ - monitor_single(131); - monitor_single(4099); - monitor_single((1<<22) + 1); - monitor_single((1<<26) - 1); -} - -static void -monitor_multiple(size_t *lens, size_t num_lens) -{ - int i, ret; - uint64_t cookie; - char *mems[num_lens]; - bool unmapped[num_lens]; - - for (i = 0; i < num_lens; i++) { - mems[i] = mmap(NULL, lens[i], PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - cr_assert_neq(mems[i], MAP_FAILED, - "Could not allocate %ld bytes\n", lens[i]); - - ret = _gnix_notifier_monitor(mr_notifier, mems[i], lens[i], - (uint64_t) &mems[i]); - cr_assert(ret == 0, "_gnix_notifier_monitor failed"); - unmapped[i] = false; - } - - for (i = 0; i < num_lens; i++) { - munmap(mems[i], lens[i]); - } - - for (ret = _gnix_notifier_get_event(mr_notifier, - &cookie, sizeof(cookie)); - ret > 0; - ret = _gnix_notifier_get_event(mr_notifier, - &cookie, sizeof(cookie))) { - i = (int) (cookie - (uint64_t) mems)/8; - cr_assert(cookie == (uint64_t) &mems[i], - "Unexpected cookie (got %lu, expected %lu)", - cookie, (uint64_t) &mems[i]); - cr_assert(ret == sizeof(cookie), - "Unexpected number of bytes (got %d, expected %lu)", - ret, sizeof(cookie)); - unmapped[i] = true; - } - - for (i = 0; i < num_lens; i++) { - if (unmapped[i] == false) { - ret = _gnix_notifier_unmonitor(mr_notifier, - (uint64_t) &mems[i]); - cr_assert(ret == 0, "_gnix_notifier_unmonitor failed"); - } - } -} - -Test(mr_notifier, multiple) -{ - const int num_lens = 11; - size_t lens[num_lens]; - - lens[0] = 131; - lens[1] = 4099; - lens[2] = 8096; - lens[3] = (1<<15)-1; - lens[4] = 777; - lens[5] = (1<<19)+1; - lens[6] = 1<<20; - lens[7] = 1<<20; - lens[8] = 42; - lens[9] = (1<<14)-1; - lens[10] = (1<<21)+1; - - monitor_multiple(lens+5, 2); - monitor_multiple(lens, num_lens); - -} - -#include -#include "gnix_rdma_headers.h" -static struct fi_info *fi; -/* Note: Set to ~FI_NOTIFY_FLAGS_ONLY since this was written before api 1.5 */ -static uint64_t mode_bits = ~FI_NOTIFY_FLAGS_ONLY; -static struct fid_fabric *fab; -static struct fid_domain *dom; -static uint64_t default_access = (FI_REMOTE_READ | FI_REMOTE_WRITE | - FI_READ | FI_WRITE); -static uint64_t default_flags; -static uint64_t default_req_key; -static uint64_t default_offset; - -static pthread_t freer; - -/* simple bounded buffer for 2 threads */ -#include "ofi_atom.h" -#define buflen 23 -static void *to_free_buf[buflen]; -static ofi_atomic32_t head, tail; - -static inline int next_head(void) -{ - int val = ofi_atomic_inc32(&head); - - while (ofi_atomic_get32(&tail)-buflen >= val) { - pthread_yield(); - } - return val%buflen; -} - -static inline int next_tail(void) -{ - int val = ofi_atomic_inc32(&tail); - - while (ofi_atomic_get32(&head) <= val) { - pthread_yield(); - } - return val%buflen; -} - -static void *do_free(void *data) -{ - int loc = next_tail(); - void *addr = to_free_buf[loc]; - - while (addr != MAP_FAILED) { - if (addr) { - free(addr); - } - - pthread_yield(); - - loc = next_tail(); - addr = to_free_buf[loc]; - } - return NULL; -} - -static void mr_stressor_setup_common(void) -{ - int ret = 0; - struct fi_info *hints; - - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - - hints->domain_attr->mr_mode = GNIX_DEFAULT_MR_MODE; - hints->domain_attr->cq_data_size = 4; - hints->mode = mode_bits; - - hints->fabric_attr->prov_name = strdup("gni"); - - ret = fi_getinfo(fi_version(), NULL, 0, 0, hints, &fi); - cr_assert(!ret, "fi_getinfo"); - - fi_freeinfo(hints); - - ret = fi_fabric(fi->fabric_attr, &fab, NULL); - cr_assert(!ret, "fi_fabric"); - - ret = fi_domain(fab, fi, &dom, NULL); - cr_assert(!ret, "fi_domain"); - -} - -static void mr_notifier_stressor_setup(void) -{ - int ret = 0; - - mr_stressor_setup_common(); - - ofi_atomic_initialize32(&head, 0); - ofi_atomic_initialize32(&tail, -1); - - ret = pthread_create(&freer, NULL, do_free, NULL); - cr_assert_eq(ret, 0, "Could not create pthread"); - - srand(0); /* want repeatable sequence, I think */ -} - -static void mr_stressor_teardown_common(void) -{ - int ret = 0; - - ret = fi_close(&dom->fid); - cr_assert_eq(ret, 0, "Could not close domain"); - - ret = fi_close(&fab->fid); - cr_assert_eq(ret, 0, "Could not close fabric"); - - fi_freeinfo(fi); -} - -static void mr_notifier_stressor_teardown(void) -{ - int ret = 0; - - ret = pthread_join(freer, NULL); - cr_assert_eq(ret, 0, "Could not join pthread"); - - mr_stressor_teardown_common(); -} - -TestSuite(mr_notifier_stressor, - .init = mr_notifier_stressor_setup, - .fini = mr_notifier_stressor_teardown, - .disabled = true); - -/* good 'nuff */ -static int get_len(int min_len, int max_len) -{ - int r; - int m = max_len - min_len; - - cr_assert(m > 0); - r = rand()%m; - - return min_len+r; - -} - -static void do_notifier_stressor(int num_allocs, int min_len, int max_len, - bool free_imm, bool close_imm) -{ - int i, len, ret; - char **r = calloc(num_allocs, sizeof(char *)); - struct fid_mr **mr = calloc(num_allocs, sizeof(struct fid_mr)); - int loc = ofi_atomic_get32(&head); - - for (i = 0; i < num_allocs; i++) { - len = get_len(min_len, max_len); - r[i] = calloc(len, sizeof(char)); - cr_assert_neq(r[i], NULL, - "Could not allocate %d bytes\n", len); - - ret = fi_mr_reg(dom, (void *) r[i], len, default_access, - default_offset, default_req_key, - default_flags, &mr[i], NULL); - cr_assert_eq(ret, FI_SUCCESS); - - MR_ENABLE(mr[i], r[i], len); - - to_free_buf[loc] = free_imm ? r[i] : 0x0; - - loc = next_head(); - pthread_yield(); - - if (close_imm) { - fi_close(&mr[i]->fid); - } - } - - to_free_buf[loc] = MAP_FAILED; - loc = next_head(); - - for (i = 0; i < num_allocs; i++) { - if (!free_imm) { - free(r[i]); - } - if (!close_imm) { - fi_close(&mr[i]->fid); - } - } - - free(r); - free(mr); -} - -Test(mr_notifier_stressor, free_and_close) -{ - do_notifier_stressor(300, 1<<10, 1<<25, true, true); -} - -Test(mr_notifier_stressor, close_only) -{ - do_notifier_stressor(300, 1<<10, 1<<25, false, true); -} - -Test(mr_notifier_stressor, free_only) -{ - do_notifier_stressor(300, 1<<10, 1<<25, true, false); -} - -Test(mr_notifier_stressor, no_free_no_close) -{ - do_notifier_stressor(300, 1<<10, 1<<25, false, false); -} - -Test(mr_notifier_stressor, small_free_and_close) -{ - do_notifier_stressor(3000, 1<<4, 1<<14, true, true); -} - -Test(mr_notifier_stressor, small_close_only) -{ - do_notifier_stressor(3000, 1<<4, 1<<14, false, true); -} - -Test(mr_notifier_stressor, small_free_only) -{ - do_notifier_stressor(3000, 1<<4, 1<<14, true, false); -} - -Test(mr_notifier_stressor, small_no_free_no_close) -{ - do_notifier_stressor(3000, 1<<4, 1<<14, false, false); -} - - -static void mr_reuse_stressor_setup(void) -{ - mr_stressor_setup_common(); -} - -static void mr_reuse_stressor_teardown(void) -{ - mr_stressor_teardown_common(); -} - -TestSuite(mr_reuse_stressor, - .init = mr_reuse_stressor_setup, - .fini = mr_reuse_stressor_teardown, - .disabled = true); - -static void do_reuse(int num_allocs, int num_reuse, - int min_len, int max_len) -{ - int i, a, len, ret; - - cr_assert(num_allocs > 2*num_reuse); - - char **r = calloc(num_reuse, sizeof(char *)); - - cr_assert_neq(r, NULL); - - int *r_len = calloc(num_reuse, sizeof(int)); - - cr_assert_neq(r_len, NULL); - - for (i = 0; i < num_reuse; i++) { - len = get_len(min_len, max_len); - r[i] = calloc(len, sizeof(char)); - cr_assert_neq(r[i], NULL, - "Could not allocate %d bytes\n", len); - r_len[i] = len; - } - - int nr = 0; - - for (a = 0; a < num_allocs; a++) { - char *buf; - struct fid_mr *buf_mr; - bool free_me; - - if ((nr < num_reuse) && (a % 2)) { - buf = r[nr]; - len = r_len[nr]; - nr++; - free_me = false; - } else { - len = get_len(min_len, max_len); - buf = calloc(len, sizeof(char)); - cr_assert_neq(buf, NULL, - "Could not allocate %d bytes\n", len); - free_me = true; - } - ret = fi_mr_reg(dom, (void *) buf, len, default_access, - default_offset, default_req_key, - default_flags, &buf_mr, NULL); - cr_assert_eq(ret, FI_SUCCESS); - - MR_ENABLE(buf_mr, buf, len); - - fi_close(&buf_mr->fid); - - if (free_me) { - free(buf); - } - } - - for (i = 0; i < num_reuse; i++) { - free(r[i]); - } - - free(r); - free(r_len); -} - -Test(mr_reuse_stressor, few_small) -{ - do_reuse(2347, 11, 1<<8, 1<<12); -} - -Test(mr_reuse_stressor, few_large) -{ - do_reuse(2347, 11, 1<<18, 1<<24); -} - -Test(mr_reuse_stressor, lots_small) -{ - do_reuse(2347, 491, 1<<8, 1<<12); -} - -Test(mr_reuse_stressor, lots_large) -{ - do_reuse(2347, 491, 1<<18, 1<<24); -} - diff --git a/prov/gni/test/nic.c b/prov/gni/test/nic.c deleted file mode 100644 index 5a931585bc1..00000000000 --- a/prov/gni/test/nic.c +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include - - -#include "gnix.h" -#include "gnix_nic.h" - -#include -#include "gnix_rdma_headers.h" -#include "common.h" - -static struct fi_info *hints; -static struct fi_info *fi; -static struct fid_fabric *fab; -static struct fid_domain *dom; - -static void __setup(uint32_t version, int mr_mode) -{ - int ret; - - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - - hints->domain_attr->mr_mode = mr_mode; - hints->fabric_attr->prov_name = strdup("gni"); - - ret = fi_getinfo(version, NULL, 0, 0, hints, &fi); - cr_assert(!ret, "fi_getinfo"); - - ret = fi_fabric(fi->fabric_attr, &fab, NULL); - cr_assert(!ret, "fi_fabric"); - - if (USING_SCALABLE(fi)) { - struct fi_gni_ops_fab *ops; - int in; - - /* nic test opens many nics and exhausts reserved keys */ - in = 256; - - ret = fi_open_ops(&fab->fid, - FI_GNI_FAB_OPS_1, 0, (void **) &ops, NULL); - cr_assert_eq(ret, FI_SUCCESS); - cr_assert(ops); - - ret = ops->set_val(&fab->fid, - GNI_DEFAULT_PROV_REGISTRATION_LIMIT, - &in); - cr_assert_eq(ret, FI_SUCCESS); - } - - ret = fi_domain(fab, fi, &dom, NULL); - cr_assert(!ret, "fi_domain"); -} - -static void setup_basic(void) -{ - __setup(fi_version(), GNIX_MR_BASIC); -} - -static void setup_scalable(void) -{ - __setup(fi_version(), GNIX_MR_SCALABLE); -} - -static void teardown(void) -{ - int ret; - - ret = fi_close(&dom->fid); - cr_assert(!ret, "fi_close domain"); - - ret = fi_close(&fab->fid); - cr_assert(!ret, "fi_close fabric"); - - fi_freeinfo(fi); - fi_freeinfo(hints); -} - -TestSuite(nic_basic, .init = setup_basic, .fini = teardown); -TestSuite(nic_scalable, .init = setup_scalable, .fini = teardown); - -static inline void __alloc_free(void) -{ - int i, ret; - const int num_nics = 79; - struct gnix_nic *nics[num_nics]; - struct gnix_fid_domain *domain = container_of( - dom, struct gnix_fid_domain, domain_fid); - struct gnix_auth_key *auth_key = domain->auth_key; - struct gnix_nic_attr nic_attr = {0}; - - nic_attr.auth_key = auth_key; - - for (i = 0; i < num_nics; i++) { - ret = gnix_nic_alloc(container_of(dom, struct gnix_fid_domain, - domain_fid), - &nic_attr, - &nics[i]); - cr_assert_eq(ret, FI_SUCCESS, "Could not allocate nic"); - } - - for (i = 0; i < num_nics; i++) { - ret = _gnix_nic_free(nics[i]); - cr_assert_eq(ret, FI_SUCCESS, "Could not free nic"); - } -} - -Test(nic_basic, alloc_free) -{ - __alloc_free(); -} - -Test(nic_scalable, alloc_free) -{ - __alloc_free(); -} diff --git a/prov/gni/test/pmi_utils.c b/prov/gni/test/pmi_utils.c deleted file mode 100644 index dda646eebb0..00000000000 --- a/prov/gni/test/pmi_utils.c +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights reserved. - * Copyright (c) 2015 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include "pmi.h" -#include "criterion/criterion.h" - -static int pmi_initialized; - -ReportHook(PRE_ALL)(struct criterion_test_set *test) -{ - int rc, spawned; - - rc = PMI_Init(&spawned); - if (rc != PMI_SUCCESS) { - fprintf(stderr, "PMI_Init failed - returned %d.\n", rc); - fprintf(stderr, "Debugging support may not be available\n"); - } else - pmi_initialized = 1; -} - -ReportHook(POST_ALL)(struct criterion_global_stats *stats) -{ - if (pmi_initialized == 1) - PMI_Finalize(); -} diff --git a/prov/gni/test/queue.c b/prov/gni/test/queue.c deleted file mode 100644 index 83f96633e02..00000000000 --- a/prov/gni/test/queue.c +++ /dev/null @@ -1,223 +0,0 @@ -/* - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights reserved. - * Copyright (c) 2015-2016 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include - -#include "gnix_queue.h" - -#include -#include "gnix_rdma_headers.h" - -struct gnix_queue *queue; - -struct int_entry { - int x; - struct slist_entry item; -}; - -static struct slist_entry *alloc_int_entry(size_t size) -{ - (void) size; - struct int_entry *entry = malloc(sizeof(*entry)); - - if (!entry) - return NULL; - - return &entry->item; -} - -static void free_int_entry(struct slist_entry *list_entry) -{ - struct int_entry *entry; - - entry = container_of(list_entry, struct int_entry, item); - - free(entry); -} - -void setup_count_eight(void) -{ - int ret; - - ret = _gnix_queue_create(&queue, alloc_int_entry, free_int_entry, - 0, 8); - cr_assert_eq(ret, FI_SUCCESS, "failed to create queue."); -} - -void setup_count_zero(void) -{ - int ret; - - ret = _gnix_queue_create(&queue, alloc_int_entry, free_int_entry, - 0, 0); - cr_assert_eq(ret, FI_SUCCESS, "failed to create queue."); -} - -void teardown_queue(void) -{ - _gnix_queue_destroy(queue); -} - -TestSuite(empty, .init = setup_count_zero, .fini = teardown_queue); - -Test(empty, null_read) -{ - struct slist_entry *list_entry; - - list_entry = _gnix_queue_dequeue(queue); - cr_expect(!list_entry, "non null read on empty queue."); - - list_entry = _gnix_queue_dequeue_free(queue); - cr_expect(!list_entry, "non null read on free list of empty queue."); - - list_entry = _gnix_queue_peek(queue); - cr_expect(!list_entry, "non null peek on empty queue."); -} - -Test(empty, single_write) -{ - struct slist_entry *list_entry; - struct int_entry *entry; - - /* - * Write single entry with value 4 to queue. - */ - list_entry = _gnix_queue_get_free(queue); - cr_expect(list_entry, "null entry from queue get free function."); - - entry = container_of(list_entry, struct int_entry, item); - - entry->x = 4; - - _gnix_queue_enqueue(queue, &entry->item); - - /* - * Read back entry with value 4. - */ - list_entry = _gnix_queue_dequeue(queue); - cr_expect(list_entry, "null entry from queue after enqueue."); - - entry = container_of(list_entry, struct int_entry, item); - - cr_expect_eq(4, entry->x, "entry does not contain assigned value."); - - /* - * Add to free list. - */ - _gnix_queue_enqueue_free(queue, &entry->item); - - /* - * Read from now empty queue. - */ - list_entry = _gnix_queue_dequeue(queue); - cr_expect(!list_entry, "entry read from empty queue is non-null."); - - /* - * Read from free and make sure it's the same. - */ - list_entry = _gnix_queue_get_free(queue); - cr_expect(list_entry, - "null entry from free queue after adding to free."); - - entry = container_of(list_entry, struct int_entry, item); - - cr_expect_eq(4, entry->x, "entry does not contain assigned value."); - - /* - * Completely empty list. Shouldn't seg fault on teardown. - */ - queue->free_item(&entry->item); -} - -TestSuite(eight, .init = setup_count_eight, .fini = teardown_queue); - -Test(eight, read_nine) -{ - struct slist_entry *list_entry; - struct int_entry *entry; - - /* - * Fill the queue. The value of each will be the counter position. - */ - for (size_t i = 0; i < 8; i++) { - list_entry = _gnix_queue_get_free(queue); - cr_expect(list_entry, - "null entry from queue get free function."); - - entry = container_of(list_entry, struct int_entry, item); - - entry->x = i; - - _gnix_queue_enqueue(queue, &entry->item); - } - - /* - * Peek and make sure the top of queue is 0. - */ - list_entry = _gnix_queue_peek(queue); - cr_expect(list_entry, "null entry from peek."); - - entry = container_of(list_entry, struct int_entry, item); - - cr_expect_eq(0, entry->x, "value of peek isn't first added to queue."); - - /* - * Peek again and make sure it's still 0. - */ - list_entry = _gnix_queue_peek(queue); - cr_expect(list_entry, "null entry from peek."); - - entry = container_of(list_entry, struct int_entry, item); - - cr_expect_eq(0, entry->x, "value of peek isn't first added to queue."); - - /* - * Read it back. - */ - for (size_t i = 0; i < 8; i++) { - list_entry = _gnix_queue_dequeue(queue); - cr_expect(list_entry, "null entry from queue dequeue."); - - entry = container_of(list_entry, struct int_entry, item); - - cr_expect_eq(i, entry->x, "value not same as assigned."); - - _gnix_queue_enqueue_free(queue, &entry->item); - } - - /* - * Read an extra item. Should return null. - */ - list_entry = _gnix_queue_dequeue(queue); - cr_expect(!list_entry, "entry from empty queue not null."); -} diff --git a/prov/gni/test/rdm_addr_str_sr.c b/prov/gni/test/rdm_addr_str_sr.c deleted file mode 100644 index 9f600618b03..00000000000 --- a/prov/gni/test/rdm_addr_str_sr.c +++ /dev/null @@ -1,344 +0,0 @@ -/* - * Copyright (c) 2020 Triad National Security, LLC. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "gnix_vc.h" -#include "gnix_cm_nic.h" -#include "gnix_hashtable.h" -#include "gnix_rma.h" -#include "gnix_mr.h" -#include "common.h" - -#include -#include "gnix_rdma_headers.h" -#include "common.h" - -#if 1 -#define dbg_printf(...) -#else -#define dbg_printf(...) fprintf(stderr, __VA_ARGS__); fflush(stderr) -#endif - -#define NUMEPS 2 - -static struct fid_fabric *fab; -static struct fid_domain *dom[NUMEPS]; -static struct fid_ep *ep[NUMEPS]; -static struct fid_av *av[NUMEPS]; -static void *ep_name[NUMEPS]; -static fi_addr_t gni_addr[NUMEPS]; -static struct fid_cq *msg_cq[NUMEPS]; -static struct fi_info *fi[NUMEPS]; -static struct fi_cq_attr cq_attr; -static struct fi_info *hints; -static size_t addrlen = 0; - -#define BUF_SZ (1<<20) -#define BUF_RNDZV (1<<14) - -static char *target, *target_base; -static char *source, *source_base; - -void rdm_str_addr_sr_setup_common(void) -{ - int ret = 0, i = 0, j = 0; - struct fi_av_attr attr; - - memset(&attr, 0, sizeof(attr)); - attr.type = FI_AV_MAP; - attr.count = NUMEPS; - - cq_attr.format = FI_CQ_FORMAT_TAGGED; - cq_attr.size = 1024; - cq_attr.wait_obj = 0; - - target_base = malloc(GNIT_ALIGN_LEN(BUF_SZ)); - assert(target_base); - target = GNIT_ALIGN_BUFFER(char *, target_base); - - source_base = malloc(GNIT_ALIGN_LEN(BUF_SZ)); - assert(source_base); - source = GNIT_ALIGN_BUFFER(char *, source_base); - - ret = fi_fabric(fi[0]->fabric_attr, &fab, NULL); - cr_assert(!ret, "fi_fabric"); - - for (i = 0; i < NUMEPS; i++) { - ret = fi_domain(fab, fi[i], dom + i, NULL); - cr_assert(!ret, "fi_domain"); - - ret = fi_av_open(dom[i], &attr, av + i, NULL); - cr_assert(!ret, "fi_av_open"); - - ret = fi_endpoint(dom[i], fi[i], ep + i, NULL); - cr_assert(!ret, "fi_endpoint"); - - ret = fi_cq_open(dom[i], &cq_attr, msg_cq + i, 0); - cr_assert(!ret, "fi_cq_open"); - - ret = fi_ep_bind(ep[i], &msg_cq[i]->fid, FI_SEND | FI_RECV); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_getname(&ep[i]->fid, NULL, &addrlen); - cr_assert(addrlen > 0); - - ep_name[i] = malloc(addrlen); - cr_assert(ep_name[i] != NULL); - - ret = fi_getname(&ep[i]->fid, ep_name[i], &addrlen); - cr_assert(ret == FI_SUCCESS); - } - - for (i = 0; i < NUMEPS; i++) { - /* - * To test API-1.1: Reporting of unknown source addresses -- - * only insert addresses into the sender's av - */ - if (i < (NUMEPS / 2)) { - for (j = 0; j < NUMEPS; j++) { - dbg_printf("Only does src EP insertions\n"); - ret = fi_av_insert(av[i], ep_name[j], 1, - &gni_addr[j], - 0, NULL); - cr_assert(ret == 1); - } - } - - ret = fi_ep_bind(ep[i], &av[i]->fid, 0); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_enable(ep[i]); - cr_assert(!ret, "fi_ep_enable"); - - } -} - - -void rdm_str_addr_sr_setup(void) -{ - int ret = 0, i = 0; - - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - - hints->domain_attr->mr_mode = GNIX_DEFAULT_MR_MODE; - hints->domain_attr->cq_data_size = NUMEPS * 2; - hints->mode = FI_CONTEXT; - hints->caps = FI_SOURCE | FI_MSG | FI_SOURCE | FI_SOURCE_ERR; - hints->fabric_attr->prov_name = strdup("gni"); - hints->addr_format = FI_ADDR_STR; - - /* Get info about fabric services with the provided hints */ - for (; i < NUMEPS; i++) { - ret = fi_getinfo(fi_version(), NULL, 0, 0, hints, &fi[i]); - cr_assert(!ret, "fi_getinfo"); - } - - rdm_str_addr_sr_setup_common(); -} - -static void rdm_str_addr_sr_teardown(void) -{ - int ret = 0, i = 0; - - for (; i < NUMEPS; i++) { - ret = fi_close(&ep[i]->fid); - cr_assert(!ret, "failure in closing ep."); - - ret = fi_close(&msg_cq[i]->fid); - cr_assert(!ret, "failure in send cq."); - - ret = fi_close(&av[i]->fid); - cr_assert(!ret, "failure in closing av."); - - ret = fi_close(&dom[i]->fid); - cr_assert(!ret, "failure in closing domain."); - - fi_freeinfo(fi[i]); - - free(ep_name[i]); - } - - free(target_base); - free(source_base); - - ret = fi_close(&fab->fid); - cr_assert(!ret, "failure in closing fabric."); - - fi_freeinfo(hints); -} - -void rdm_str_addr_sr_init_data(char *buf, int len, char seed) -{ - int i; - - for (i = 0; i < len; i++) { - buf[i] = seed++; - } -} - -static inline int rdm_str_addr_sr_check_data(char *buf1, char *buf2, int len) -{ - int i; - - for (i = 0; i < len; i++) { - if (buf1[i] != buf2[i]) { - printf("data mismatch, elem: %d, exp: %hhx, act: %hhx\n" - , i, buf1[i], buf2[i]); - return 0; - } - } - - return 1; -} - -void rdm_str_addr_sr_xfer_for_each_size(void (*xfer)(int len), int slen, int elen) -{ - int i; - - for (i = slen; i <= elen; i *= 2) { - xfer(i); - } -} - -static inline int rdm_str_addr_sr_check_err_cqe(struct fid_cq *cq) -{ - int ret = FI_SUCCESS, cnt; - struct fi_cq_err_entry ee; - size_t name_size; - char *buffer; - fi_addr_t fi_addr; - - /*application provided error_data buffer and length*/ - ee.err_data_size = addrlen; - ee.err_data = malloc(addrlen); - cr_assert((ee.err_data != NULL), "malloc failed"); - buffer = malloc(addrlen); - cr_assert((buffer != NULL), "malloc failed"); - - cnt = fi_cq_readerr(cq, &ee, 0); - cr_assert((cnt == 1), "fi_cq_readerr didn't return entry"); - - if ((hints->caps & FI_SOURCE_ERR) && ee.err == FI_EADDRNOTAVAIL) { - ret = fi_av_insert(av[1], ee.err_data, 1, &fi_addr, - 0, NULL); - cr_assert(ret == 1, "fi_av_insert failed"); - name_size = addrlen; - ret = fi_av_lookup(av[1], fi_addr, - buffer, &name_size); - cr_assert(ret == FI_SUCCESS, "fi_av_lookup failed"); - cr_assert(name_size == addrlen); - cr_assert(strncmp((char *)buffer, - (char *)ee.err_data, - addrlen) == 0); - } - return ret; -} - -/******************************************************************************* - * Test MSG functions - ******************************************************************************/ - -TestSuite(rdm_str_addr_sr, - .init = rdm_str_addr_sr_setup, - .fini = rdm_str_addr_sr_teardown, - .disabled = false); - -/* - * ssize_t fi_send(struct fid_ep *ep, void *buf, size_t len, - * void *desc, fi_addr_t dest_addr, void *context); - * - * ssize_t fi_recv(struct fid_ep *ep, void * buf, size_t len, - * void *desc, fi_addr_t src_addr, void *context); - */ -static void do_send(int len) -{ - int ret; - int source_done = 0, dest_done = 0; - struct fi_cq_tagged_entry s_cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct fi_cq_tagged_entry d_cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct fi_cq_err_entry d_err_cqe; - struct fi_cq_err_entry s_err_cqe; - - ssize_t sz; - - memset(&d_err_cqe, -1, sizeof(struct fi_cq_err_entry)); - memset(&s_err_cqe, -1, sizeof(struct fi_cq_err_entry)); - - rdm_str_addr_sr_init_data(source, len, 0xab); - rdm_str_addr_sr_init_data(target, len, 0); - - sz = fi_send(ep[0], source, len, NULL, gni_addr[1], target); - cr_assert_eq(sz, 0); - - sz = fi_recv(ep[1], target, len, NULL, FI_ADDR_UNSPEC, source); - cr_assert_eq(sz, 0); - - /* need to progress both CQs simultaneously for rendezvous */ - do { - ret = fi_cq_read(msg_cq[0], &s_cqe, 1); - if (ret == 1) { - source_done = 1; - } - - ret = fi_cq_read(msg_cq[1], &d_cqe, 1); - if (ret == 1) { - dest_done = 1; - } - - if (ret == -FI_EAVAIL) { - ret = rdm_str_addr_sr_check_err_cqe(msg_cq[1]); - cr_assert((ret == FI_SUCCESS), "Err CQE processing failed"); - dest_done = 1; - } - } while (!(source_done && dest_done)); - - cr_assert(rdm_str_addr_sr_check_data(source, target, len), "Data mismatch"); -} - -Test(rdm_str_addr_sr, send) -{ - rdm_str_addr_sr_xfer_for_each_size(do_send, 1, BUF_SZ); -} diff --git a/prov/gni/test/rdm_atomic.c b/prov/gni/test/rdm_atomic.c deleted file mode 100644 index 99d237ee361..00000000000 --- a/prov/gni/test/rdm_atomic.c +++ /dev/null @@ -1,5519 +0,0 @@ -/* - * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights reserved. - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * Copyright (c) 2019-2020 Triad National Security, LLC. - * All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - - -#include -#include -#include - -#include "gnix_vc.h" -#include "gnix_cm_nic.h" -#include "gnix_hashtable.h" -#include "gnix_atomic.h" -#include "gnix_util.h" - -#include -#include "gnix_rdma_headers.h" -#include "common.h" - -#if 1 -#define dbg_printf(...) -#else -#define dbg_printf(...) \ - do { \ - printf(__VA_ARGS__); \ - fflush(stdout); \ - } while (0) -#endif - -#define NUMEPS 2 - -/* Note: Set to ~FI_NOTIFY_FLAGS_ONLY since this was written before api 1.5 */ -static uint64_t mode_bits = ~FI_NOTIFY_FLAGS_ONLY; -static struct fid_fabric *fab; -static struct fid_domain *dom[NUMEPS]; -static struct fi_gni_ops_domain *gni_domain_ops[NUMEPS]; -static struct fid_ep *ep[NUMEPS]; -static struct fid_av *av[NUMEPS]; -static struct fi_info *hints; -static struct fi_info *fi; -static void *ep_name[NUMEPS]; -static size_t gni_addr[NUMEPS]; -static struct fid_cq *send_cq[NUMEPS]; -static struct fid_cq *recv_cq[NUMEPS]; -static struct fi_cq_attr cq_attr; - -#define BUF_SZ (64*1024) -static char *target, *target_base; -static char *source, *source_base; -static char *uc_source, *uc_source_base; -static struct fid_mr *rem_mr[NUMEPS], *loc_mr[NUMEPS]; -static uint64_t mr_key[NUMEPS]; - -static struct fid_cntr *write_cntr[NUMEPS], *read_cntr[NUMEPS]; -static struct fid_cntr *rwrite_cntr; -static struct fid_cntr *rread_cntr; -static struct fi_cntr_attr cntr_attr = { - .events = FI_CNTR_EVENTS_COMP, - .flags = 0 -}; -static uint64_t writes[NUMEPS] = {0}, reads[NUMEPS] = {0}, - write_errs[NUMEPS] = {0}, read_errs[NUMEPS] = {0}; - -static void common_atomic_setup(uint32_t version, int mr_mode) -{ - int ret = 0, i = 0, j = 0; - struct fi_av_attr attr; - size_t addrlen = 0; - - memset(&attr, 0, sizeof(attr)); - attr.type = FI_AV_MAP; - attr.count = NUMEPS; - - cq_attr.format = FI_CQ_FORMAT_TAGGED; - cq_attr.size = 1024; - cq_attr.wait_obj = 0; - - hints->ep_attr->type = FI_EP_RDM; - hints->domain_attr->mr_mode = mr_mode; - hints->domain_attr->cq_data_size = 4; - hints->mode = mode_bits; - hints->fabric_attr->prov_name = strdup("gni"); - hints->caps |= FI_ATOMIC | FI_READ | FI_REMOTE_READ | - FI_WRITE | FI_REMOTE_WRITE; - - target_base = malloc(GNIT_ALIGN_LEN(BUF_SZ)); - assert(target_base); - - source_base = malloc(GNIT_ALIGN_LEN(BUF_SZ)); - assert(source_base); - - uc_source_base = malloc(GNIT_ALIGN_LEN(BUF_SZ)); - assert(uc_source_base); - - target = GNIT_ALIGN_BUFFER(char *, target_base); - source = GNIT_ALIGN_BUFFER(char *, source_base); - uc_source = GNIT_ALIGN_BUFFER(char *, uc_source_base); - - ret = fi_getinfo(version, NULL, 0, 0, hints, &fi); - cr_assert(!ret, "fi_getinfo"); - - ret = fi_fabric(fi->fabric_attr, &fab, NULL); - cr_assert(!ret, "fi_fabric"); - - for (; i < NUMEPS; i++) { - ret = fi_domain(fab, fi, dom + i, NULL); - cr_assert(!ret, "fi_domain"); - - ret = fi_open_ops(&dom[i]->fid, FI_GNI_DOMAIN_OPS_1, - 0, (void **) (gni_domain_ops + i), NULL); - cr_assert(!ret, "fi_open_ops"); - - ret = fi_av_open(dom[i], &attr, av + i, NULL); - cr_assert(!ret, "fi_av_open"); - - ret = fi_endpoint(dom[i], fi, &ep[i], NULL); - cr_assert(!ret, "fi_endpoint"); - - ret = fi_getname(&ep[i]->fid, NULL, &addrlen); - cr_assert(addrlen > 0); - - ep_name[i] = malloc(addrlen); - cr_assert(ep_name[i] != NULL); - - ret = fi_getname(&ep[i]->fid, ep_name[i], &addrlen); - cr_assert(addrlen > 0); - - ret = fi_cq_open(dom[i], &cq_attr, send_cq + i, 0); - cr_assert(!ret, "fi_cq_open"); - - ret = fi_ep_bind(ep[i], &send_cq[i]->fid, FI_TRANSMIT); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_cq_open(dom[i], &cq_attr, recv_cq + i, 0); - cr_assert(!ret, "fi_cq_open"); - - ret = fi_ep_bind(ep[i], &recv_cq[i]->fid, FI_RECV); - cr_assert(!ret, "fi_ep_bind"); - } - - for (i = 0; i < NUMEPS; i++) { - int target_requested_key = USING_SCALABLE(fi) ? (i * 2) : 0; - int source_requested_key = USING_SCALABLE(fi) ? (i * 2) + 1 : 0; - - for (j = 0; j < NUMEPS; j++) { - ret = fi_av_insert(av[i], ep_name[j], 1, &gni_addr[j], - 0, NULL); - cr_assert(ret == 1); - } - - ret = fi_ep_bind(ep[i], &av[i]->fid, 0); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_mr_reg(dom[i], - target, - BUF_SZ, - FI_REMOTE_WRITE, - 0, - target_requested_key, - 0, - rem_mr + i, - &target); - cr_assert_eq(ret, 0); - - ret = fi_mr_reg(dom[i], - source, - BUF_SZ, - FI_REMOTE_WRITE, - 0, - source_requested_key, - 0, - loc_mr + i, - &source); - cr_assert_eq(ret, 0); - - if (USING_SCALABLE(fi)) { - MR_ENABLE(rem_mr[i], target, BUF_SZ); - MR_ENABLE(loc_mr[i], source, BUF_SZ); - } - - mr_key[i] = fi_mr_key(rem_mr[i]); - - ret = fi_cntr_open(dom[i], &cntr_attr, write_cntr + i, 0); - cr_assert(!ret, "fi_cntr_open"); - - ret = fi_ep_bind(ep[i], &write_cntr[i]->fid, FI_WRITE); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_cntr_open(dom[i], &cntr_attr, read_cntr + i, 0); - cr_assert(!ret, "fi_cntr_open"); - - ret = fi_ep_bind(ep[i], &read_cntr[i]->fid, FI_READ); - cr_assert(!ret, "fi_ep_bind"); - - if (i != 1) { - ret = fi_enable(ep[i]); - cr_assert(!ret, "fi_ep_enable"); - } - } - - if (hints->caps & FI_RMA_EVENT) { - ret = fi_cntr_open(dom[1], &cntr_attr, &rwrite_cntr, 0); - cr_assert(!ret, "fi_cntr_open"); - - ret = fi_ep_bind(ep[1], &rwrite_cntr->fid, FI_REMOTE_WRITE); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_cntr_open(dom[1], &cntr_attr, &rread_cntr, 0); - cr_assert(!ret, "fi_cntr_open"); - - ret = fi_ep_bind(ep[1], &rread_cntr->fid, FI_REMOTE_READ); - cr_assert(!ret, "fi_ep_bind"); - - } - - ret = fi_enable(ep[1]); - cr_assert(!ret, "fi_ep_enable"); - -} - -static inline void __rdm_atomic_setup(uint32_t version, int mr_mode) -{ - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - common_atomic_setup(version, mr_mode); -} - -static void rdm_atomic_default_setup(void) -{ - __rdm_atomic_setup(fi_version(), GNIX_DEFAULT_MR_MODE); -} - -static void rdm_atomic_basic_setup(void) -{ - __rdm_atomic_setup(fi_version(), GNIX_MR_BASIC); -} - -static void rdm_atomic_scalable_setup(void) -{ - __rdm_atomic_setup(fi_version(), GNIX_MR_SCALABLE); -} - -static inline void __rdm_atomic_rcntr_setup(uint32_t version, int mr_mode) -{ - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - hints->caps = FI_RMA_EVENT; - common_atomic_setup(version, mr_mode); -} - -static void rdm_atomic_rcntr_basic_setup(void) -{ - __rdm_atomic_rcntr_setup(fi_version(), GNIX_MR_BASIC); -} - -static void rdm_atomic_rcntr_scalable_setup(void) -{ - __rdm_atomic_rcntr_setup(fi_version(), GNIX_MR_SCALABLE); -} - -void rdm_atomic_teardown(void) -{ - int ret = 0, i = 0; - - if (hints->caps & FI_RMA_EVENT) { - ret = fi_close(&rwrite_cntr->fid); - cr_assert(!ret, "failure in closing dom[1] rwrite counter."); - - ret = fi_close(&rread_cntr->fid); - cr_assert(!ret, "failure in closing dom[1] rread counter."); - } - - for (; i < NUMEPS; i++) { - ret = fi_close(&read_cntr[i]->fid); - cr_assert(!ret, "failure in closing read counter."); - - ret = fi_close(&write_cntr[i]->fid); - cr_assert(!ret, "failure in closing write counter."); - - ret = fi_close(&loc_mr[i]->fid); - cr_assert(!ret, "failure in closing av."); - - ret = fi_close(&rem_mr[i]->fid); - cr_assert(!ret, "failure in closing av."); - - ret = fi_close(&ep[i]->fid); - cr_assert(!ret, "failure in closing ep."); - - ret = fi_close(&recv_cq[i]->fid); - cr_assert(!ret, "failure in recv cq."); - - ret = fi_close(&send_cq[i]->fid); - cr_assert(!ret, "failure in send cq."); - - ret = fi_close(&av[i]->fid); - cr_assert(!ret, "failure in closing av."); - - ret = fi_close(&dom[i]->fid); - cr_assert(!ret, "failure in closing domain."); - - free(ep_name[i]); - } - - free(target_base); - free(source_base); - free(uc_source_base); - - ret = fi_close(&fab->fid); - cr_assert(!ret, "failure in closing fabric."); - - fi_freeinfo(fi); - fi_freeinfo(hints); -} - -void rdm_atomic_check_tcqe(struct fi_cq_tagged_entry *tcqe, void *ctx, - uint64_t flags, uint64_t data) -{ - cr_assert(tcqe->op_context == ctx, "CQE Context mismatch"); - cr_assert(tcqe->flags == flags, "CQE flags mismatch"); - - if (flags & FI_REMOTE_CQ_DATA) { - cr_assert(tcqe->data == data, "CQE data invalid"); - } else { - cr_assert(tcqe->data == 0, "CQE data invalid"); - } - - cr_assert(tcqe->len == 0, "CQE length mismatch"); - cr_assert(tcqe->buf == 0, "CQE address mismatch"); - cr_assert(tcqe->tag == 0, "CQE tag invalid"); -} - -void rdm_atomic_check_cntrs(uint64_t w[], uint64_t r[], uint64_t w_e[], - uint64_t r_e[]) -{ - int i = 0; - - for (; i < NUMEPS; i++) { - writes[i] += w[i]; - reads[i] += r[i]; - write_errs[i] += w_e[i]; - read_errs[i] += r_e[i]; - - cr_assert(fi_cntr_read(write_cntr[i]) == writes[i], - "Bad write count"); - cr_assert(fi_cntr_read(read_cntr[i]) == reads[i], - "Bad read count"); - cr_assert(fi_cntr_readerr(write_cntr[i]) == write_errs[i], - "Bad write err count"); - cr_assert(fi_cntr_readerr(read_cntr[i]) == read_errs[i], - "Bad read err count"); - } - - if (hints->caps & FI_RMA_EVENT) { - cr_assert(fi_cntr_read(rwrite_cntr) == writes[0], - "Bad rwrite count"); - cr_assert(fi_cntr_read(rread_cntr) == reads[0], - "Bad rread count"); - cr_assert(fi_cntr_readerr(rwrite_cntr) == 0, - "Bad rwrite err count"); - cr_assert(fi_cntr_readerr(rread_cntr) == 0, - "Bad rread err count"); - } -} - -void rdm_atomic_xfer_for_each_size(void (*xfer)(int len), int slen, int elen) -{ - int i; - - for (i = slen; i <= elen; i *= 2) { - xfer(i); - } -} - -void rdm_atomic_err_inject_enable(void) -{ - int ret, err_count_val = 1, i = 0; - - for (; i < NUMEPS; i++) { - ret = gni_domain_ops[i]->set_val(&dom[i]->fid, - GNI_ERR_INJECT_COUNT, - &err_count_val); - cr_assert(!ret, "setval(GNI_ERR_INJECT_COUNT)"); - } -} - -/******************************************************************************* - * Test RMA functions - ******************************************************************************/ - -TestSuite(rdm_atomic_default, - .init = rdm_atomic_default_setup, - .fini = rdm_atomic_teardown, - .disabled = false); - -TestSuite(rdm_atomic_basic, - .init = rdm_atomic_basic_setup, - .fini = rdm_atomic_teardown, - .disabled = false); - -TestSuite(rdm_atomic_scalable, - .init = rdm_atomic_scalable_setup, - .fini = rdm_atomic_teardown, - .disabled = false); - -#if 1 -#define SOURCE_DATA 0xBBBB0000CCCCULL -#define TARGET_DATA 0xAAAA0000DDDDULL -#define SOURCE_DATA_FP 0.83203125 -#define TARGET_DATA_FP 0.83984375 -#else -/* swapped */ -#define TARGET_DATA 0xB0000000CULL -#define SOURCE_DATA 0xA0000000DULL -#define TARGET_DATA_FP 0.83203125 -#define SOURCE_DATA_FP 0.83984375 -#endif -#define FETCH_SOURCE_DATA 0xACEDACEDULL -#define DATA_MASK 0xa5a5a5a5a5a5a5a5 -#define U32_MASK 0xFFFFFFFFULL - -#define ALL_GNI_DATATYPES_SUPPORTED { 0,0,0,0,1,1,1,1,1,1 } -#define GNI_DATATYPES_NO_FP_SUPPORTED { 0,0,0,0,1,1,1,1,0,0 } -#define NO_DATATYPES_SUPPORTED { } - -/****************************************************************************** - * - * Basic atomics - * - *****************************************************************************/ - -int supported_atomic_ops[FI_ATOMIC_OP_LAST][FI_DATATYPE_LAST] = { - [FI_MIN] = { 0,0,0,0,1,0,1,0,1,1 }, - [FI_MAX] = { 0,0,0,0,1,0,1,0,1,1 }, - [FI_SUM] = { 0,0,0,0,1,1,1,1,1,0 }, /* GNI DP sum is broken */ - [FI_PROD] = NO_DATATYPES_SUPPORTED, - [FI_LOR] = NO_DATATYPES_SUPPORTED, - [FI_LAND] = NO_DATATYPES_SUPPORTED, - [FI_BOR] = GNI_DATATYPES_NO_FP_SUPPORTED, - [FI_BAND] = GNI_DATATYPES_NO_FP_SUPPORTED, - [FI_LXOR] = NO_DATATYPES_SUPPORTED, - [FI_BXOR] = GNI_DATATYPES_NO_FP_SUPPORTED, - [FI_ATOMIC_READ] = NO_DATATYPES_SUPPORTED, - [FI_ATOMIC_WRITE] = ALL_GNI_DATATYPES_SUPPORTED, - [FI_CSWAP] = NO_DATATYPES_SUPPORTED, - [FI_CSWAP_NE] = NO_DATATYPES_SUPPORTED, - [FI_CSWAP_LE] = NO_DATATYPES_SUPPORTED, - [FI_CSWAP_LT] = NO_DATATYPES_SUPPORTED, - [FI_CSWAP_GE] = NO_DATATYPES_SUPPORTED, - [FI_CSWAP_GT] = NO_DATATYPES_SUPPORTED, - [FI_MSWAP] = NO_DATATYPES_SUPPORTED, -}; - -void do_invalid_atomic(enum fi_datatype dt, enum fi_op op) -{ - ssize_t sz; - size_t count; - - if (!supported_atomic_ops[op][dt]) { - sz = fi_atomic(ep[0], source, 1, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], dt, op, target); - - cr_assert(sz == -FI_EOPNOTSUPP); - - sz = fi_atomicvalid(ep[0], dt, op, &count); - cr_assert(sz == -FI_EOPNOTSUPP, "fi_atomicvalid() succeeded\n"); - } else { - sz = fi_atomicvalid(ep[0], dt, op, &count); - cr_assert(!sz, "fi_atomicvalid() failed\n"); - cr_assert(count == 1, "fi_atomicvalid(): bad count \n"); - } -} - -Test(rdm_atomic_default, invalid_atomic) -{ - int i, j; - - for(i = 0; i < FI_ATOMIC_OP_LAST; i++) { - for(j = 0; j < FI_DATATYPE_LAST; j++) { - do_invalid_atomic(j, i); - } - } -} - -void do_min(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t min; - float min_fp; - double min_dp; - uint64_t w[NUMEPS] = {0}, r[NUMEPS] = {0}, w_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - /* i64 */ - *((int64_t *)source) = SOURCE_DATA; - *((int64_t *)target) = TARGET_DATA; - sz = fi_atomic(ep[0], source, 1, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_INT64, FI_MIN, target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - - w[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - min = ((int64_t)SOURCE_DATA < (int64_t)TARGET_DATA) ? - SOURCE_DATA : TARGET_DATA; - ret = *((int64_t *)target) == min; - cr_assert(ret, "Data mismatch"); - - /* i32 */ - *((int64_t *)source) = SOURCE_DATA; - *((int64_t *)target) = TARGET_DATA; - sz = fi_atomic(ep[0], source, 1, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_INT32, FI_MIN, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - min = ((int32_t)SOURCE_DATA < (int32_t)TARGET_DATA) ? - SOURCE_DATA : TARGET_DATA; - min = (min & U32_MASK) | (TARGET_DATA & (U32_MASK << 32)); - ret = *((int64_t *)target) == min; - cr_assert(ret, "Data mismatch"); - - /* float */ - *((float *)source) = SOURCE_DATA_FP; - *((float *)target) = TARGET_DATA_FP; - sz = fi_atomic(ep[0], source, 1, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_FLOAT, FI_MIN, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - min_fp = (float)SOURCE_DATA_FP < (float)TARGET_DATA_FP ? - SOURCE_DATA_FP : TARGET_DATA_FP; - ret = *((float *)target) == min_fp; - cr_assert(ret, "Data mismatch"); - - /* double */ - *((double *)source) = SOURCE_DATA_FP; - *((double *)target) = TARGET_DATA_FP; - sz = fi_atomic(ep[0], source, 1, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_DOUBLE, FI_MIN, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - min_dp = (double)SOURCE_DATA_FP < (double)TARGET_DATA_FP ? - SOURCE_DATA_FP : TARGET_DATA_FP; - ret = *((double *)target) == min_dp; - cr_assert(ret, "Data mismatch"); -} - -Test(rdm_atomic_basic, min) -{ - rdm_atomic_xfer_for_each_size(do_min, 1, 1); -} - -Test(rdm_atomic_scalable, min) -{ - rdm_atomic_xfer_for_each_size(do_min, 1, 1); -} - -void do_max(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t min; - float min_fp; - double min_dp; - uint64_t w[NUMEPS] = {0}, r[NUMEPS] = {0}, w_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - /* i64 */ - *((int64_t *)source) = SOURCE_DATA; - *((int64_t *)target) = TARGET_DATA; - sz = fi_atomic(ep[0], source, 1, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_INT64, FI_MAX, target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - - w[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - min = ((int64_t)SOURCE_DATA > (int64_t)TARGET_DATA) ? - SOURCE_DATA : TARGET_DATA; - ret = *((int64_t *)target) == min; - cr_assert(ret, "Data mismatch"); - - /* i32 */ - *((int64_t *)source) = SOURCE_DATA; - *((int64_t *)target) = TARGET_DATA; - sz = fi_atomic(ep[0], source, 1, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_INT32, FI_MAX, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - min = ((int32_t)SOURCE_DATA > (int32_t)TARGET_DATA) ? - SOURCE_DATA : TARGET_DATA; - min = (min & U32_MASK) | (TARGET_DATA & (U32_MASK << 32)); - ret = *((int64_t *)target) == min; - cr_assert(ret, "Data mismatch"); - - /* float */ - *((float *)source) = SOURCE_DATA_FP; - *((float *)target) = TARGET_DATA_FP; - sz = fi_atomic(ep[0], source, 1, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_FLOAT, FI_MAX, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - min_fp = (float)SOURCE_DATA_FP > (float)TARGET_DATA_FP ? - SOURCE_DATA_FP : TARGET_DATA_FP; - ret = *((float *)target) == min_fp; - cr_assert(ret, "Data mismatch"); - - /* double */ - *((double *)source) = SOURCE_DATA_FP; - *((double *)target) = TARGET_DATA_FP; - sz = fi_atomic(ep[0], source, 1, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_DOUBLE, FI_MAX, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - min_dp = (double)SOURCE_DATA_FP > (double)TARGET_DATA_FP ? - SOURCE_DATA_FP : TARGET_DATA_FP; - ret = *((double *)target) == min_dp; - cr_assert(ret, "Data mismatch"); -} - -Test(rdm_atomic_basic, max) -{ - rdm_atomic_xfer_for_each_size(do_max, 1, 1); -} - -Test(rdm_atomic_scalable, max) -{ - rdm_atomic_xfer_for_each_size(do_max, 1, 1); -} - -void do_sum(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t w[NUMEPS] = {0}, r[NUMEPS] = {0}, w_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - /* u64 */ - *((uint64_t *)source) = SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_atomic(ep[0], source, 1, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_UINT64, FI_SUM, target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - - w[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((uint64_t *)target) == (SOURCE_DATA + TARGET_DATA); - cr_assert(ret, "Data mismatch"); - - /* U32 */ - *((uint64_t *)source) = SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_atomic(ep[0], source, 1, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_UINT32, FI_SUM, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((uint64_t *)target) == - (uint64_t)((SOURCE_DATA & U32_MASK) + TARGET_DATA); - cr_assert(ret, "Data mismatch"); - - /* i64 */ - *((uint64_t *)source) = SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_atomic(ep[0], source, 1, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_INT64, FI_SUM, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((uint64_t *)target) == (SOURCE_DATA + TARGET_DATA); - cr_assert(ret, "Data mismatch"); - - /* i32 */ - *((uint64_t *)source) = SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_atomic(ep[0], source, 1, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_INT32, FI_SUM, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((uint64_t *)target) == - (uint64_t)((SOURCE_DATA & U32_MASK) + TARGET_DATA); - cr_assert(ret, "Data mismatch"); - - /* float */ - *((float *)source) = SOURCE_DATA_FP; - *((float *)target) = TARGET_DATA_FP; - sz = fi_atomic(ep[0], source, 1, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_FLOAT, FI_SUM, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((float *)target) == - (float)(SOURCE_DATA_FP + TARGET_DATA_FP); - cr_assert(ret, "Data mismatch"); -} - -Test(rdm_atomic_basic, sum) -{ - rdm_atomic_xfer_for_each_size(do_sum, 1, 1); -} - -Test(rdm_atomic_scalable, sum) -{ - rdm_atomic_xfer_for_each_size(do_sum, 1, 1); -} - -void do_bor(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t res; - uint64_t w[NUMEPS] = {0}, r[NUMEPS] = {0}, w_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - /* u64 */ - *((uint64_t *)source) = SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_atomic(ep[0], source, 1, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_UINT64, FI_BOR, target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - - w[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - res = SOURCE_DATA | TARGET_DATA; - ret = *((uint64_t *)target) == res; - cr_assert(ret, "Data mismatch"); - - /* U32 */ - *((uint64_t *)source) = SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_atomic(ep[0], source, 1, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_UINT32, FI_BOR, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - res = SOURCE_DATA | TARGET_DATA; - res = (res & U32_MASK) | (TARGET_DATA & (U32_MASK << 32)); - ret = *((uint64_t *)target) == res; - cr_assert(ret, "Data mismatch"); - - /* i64 */ - *((uint64_t *)source) = SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_atomic(ep[0], source, 1, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_INT64, FI_BOR, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - res = SOURCE_DATA | TARGET_DATA; - ret = *((uint64_t *)target) == res; - cr_assert(ret, "Data mismatch"); - - /* i32 */ - *((uint64_t *)source) = SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_atomic(ep[0], source, 1, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_INT32, FI_BOR, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - res = SOURCE_DATA | TARGET_DATA; - res = (res & U32_MASK) | (TARGET_DATA & (U32_MASK << 32)); - ret = *((uint64_t *)target) == res; - cr_assert(ret, "Data mismatch"); -} - -Test(rdm_atomic_basic, bor) -{ - rdm_atomic_xfer_for_each_size(do_bor, 1, 1); -} - -Test(rdm_atomic_scalable, bor) -{ - rdm_atomic_xfer_for_each_size(do_bor, 1, 1); -} - -void do_band(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t res; - uint64_t w[NUMEPS] = {0}, r[NUMEPS] = {0}, w_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - /* u64 */ - *((uint64_t *)source) = SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_atomic(ep[0], source, 1, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_UINT64, FI_BAND, target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - - w[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - res = SOURCE_DATA & TARGET_DATA; - ret = *((uint64_t *)target) == res; - cr_assert(ret, "Data mismatch"); - - /* U32 */ - *((uint64_t *)source) = SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_atomic(ep[0], source, 1, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_UINT32, FI_BAND, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - res = SOURCE_DATA & TARGET_DATA; - res = (res & U32_MASK) | (TARGET_DATA & (U32_MASK << 32)); - ret = *((uint64_t *)target) == res; - cr_assert(ret, "Data mismatch"); - - /* i64 */ - *((uint64_t *)source) = SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_atomic(ep[0], source, 1, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_INT64, FI_BAND, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - res = SOURCE_DATA & TARGET_DATA; - ret = *((uint64_t *)target) == res; - cr_assert(ret, "Data mismatch"); - - /* i32 */ - *((uint64_t *)source) = SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_atomic(ep[0], source, 1, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_INT32, FI_BAND, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - res = SOURCE_DATA & TARGET_DATA; - res = (res & U32_MASK) | (TARGET_DATA & (U32_MASK << 32)); - ret = *((uint64_t *)target) == res; - cr_assert(ret, "Data mismatch"); -} - -Test(rdm_atomic_basic, band) -{ - rdm_atomic_xfer_for_each_size(do_band, 1, 1); -} - -Test(rdm_atomic_scalable, band) -{ - rdm_atomic_xfer_for_each_size(do_band, 1, 1); -} - -void do_bxor(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t res; - uint64_t w[NUMEPS] = {0}, r[NUMEPS] = {0}, w_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - /* u64 */ - *((uint64_t *)source) = SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_atomic(ep[0], source, 1, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_UINT64, FI_BXOR, target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - - w[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - res = SOURCE_DATA ^ TARGET_DATA; - ret = *((uint64_t *)target) == res; - cr_assert(ret, "Data mismatch"); - - /* U32 */ - *((uint64_t *)source) = SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_atomic(ep[0], source, 1, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_UINT32, FI_BXOR, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - res = SOURCE_DATA ^ TARGET_DATA; - res = (res & U32_MASK) | (TARGET_DATA & (U32_MASK << 32)); - ret = *((uint64_t *)target) == res; - cr_assert(ret, "Data mismatch"); - - /* i64 */ - *((uint64_t *)source) = SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_atomic(ep[0], source, 1, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], - FI_INT64, FI_BXOR, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - res = SOURCE_DATA ^ TARGET_DATA; - ret = *((uint64_t *)target) == res; - cr_assert(ret, "Data mismatch"); - - /* i32 */ - *((uint64_t *)source) = SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_atomic(ep[0], source, 1, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], - FI_INT32, FI_BXOR, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - res = SOURCE_DATA ^ TARGET_DATA; - res = (res & U32_MASK) | (TARGET_DATA & (U32_MASK << 32)); - ret = *((uint64_t *)target) == res; - cr_assert(ret, "Data mismatch"); -} - -Test(rdm_atomic_basic, bxor) -{ - rdm_atomic_xfer_for_each_size(do_bxor, 1, 1); -} - -Test(rdm_atomic_scalable, bxor) -{ - rdm_atomic_xfer_for_each_size(do_bxor, 1, 1); -} - -#define AX_S_MASK 0x00000000FFFFFFFFUL -#define AX_OP1 0x000000000FF0FFFFUL -#define AX_OP2 0xFFFF0000FFFF0000UL -#define AX_TGT_DATA 0x00FFFF0000FFFF00UL -void do_axor(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t exp; - uint64_t w[NUMEPS] = {0}, r[NUMEPS] = {0}, w_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - struct fi_gni_ops_ep *ep_ops; - uint64_t operand[2]; - - ret = fi_open_ops(&ep[0]->fid, FI_GNI_EP_OPS_1, 0, - (void **) &ep_ops, NULL); - cr_assert(!ret, "fi_open_ops endpoint"); - - /* u64 */ - operand[0] = AX_OP1; /* AND operand */ - operand[1] = AX_OP2; /* XOR operand */ - *((uint64_t *)target) = AX_TGT_DATA; - dbg_printf("initial %016lx\n", *((uint64_t *)target)); - - sz = ep_ops->native_amo(ep[0], operand, 1, NULL, NULL, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_LONG_DOUBLE, - GNIX_FAB_RQ_NAMO_AX, target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - - w[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("result %016lx\n", *((uint64_t *)target)); - exp = (AX_OP1 & AX_TGT_DATA) ^ AX_OP2; - ret = *((uint64_t *)target) == exp; - cr_assert(ret, "Data mismatch"); - - /* U32 */ - operand[0] = AX_OP1; - operand[1] = AX_OP2; - *((uint64_t *)target) = AX_TGT_DATA; - - dbg_printf("initial %016lx\n", *((uint64_t *)target)); - dbg_printf("AX_OP1 %016lx\n", AX_OP1); - dbg_printf("AX_OP2 %016lx\n", AX_OP2); - - sz = ep_ops->native_amo(ep[0], operand, 1, NULL, NULL, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_UINT64, - GNIX_FAB_RQ_NAMO_AX_S, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("AX_TGT_DATA & (AX_OP1 | ~AX_S_MASK) %016lx\n", - AX_TGT_DATA & (AX_OP1 | ~AX_S_MASK)); - dbg_printf("AX_OP2 %016lx\n", AX_OP2); - exp = (AX_TGT_DATA & (AX_OP1 | ~AX_S_MASK)) ^ (AX_OP2 & AX_S_MASK); - ret = *((uint64_t *)target) == exp; - cr_assert(ret, "Data mismatch expected %016lx: result %016lx", - exp, *((uint64_t *)target)); - dbg_printf("result %016lx\n", *((uint64_t *)target)); - - /* fetching u64 */ - operand[0] = AX_OP1; /* AND operand */ - operand[1] = AX_OP2; /* XOR operand */ - *((uint64_t *)target) = AX_TGT_DATA; - *((uint64_t *)source) = 0; - dbg_printf("initial %016lx\n", *((uint64_t *)target)); - - sz = ep_ops->native_amo(ep[0], operand, 1, NULL, source, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_UINT64, - GNIX_FAB_RQ_NAMO_FAX, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - - w[0] = 0; - r[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("result %016lx\n", *((uint64_t *)target)); - exp = (AX_OP1 & AX_TGT_DATA) ^ AX_OP2; - ret = *((uint64_t *)target) == exp; - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)source) == AX_TGT_DATA; - dbg_printf("fetchv %016lx\n", *((uint64_t *)source)); - cr_assert(ret, "Data mismatch expected %016lx: fetchv %016lx", - AX_TGT_DATA, *((uint64_t *)source)); - cr_assert(ret, "Data mismatch"); - - /* fetching U32 */ - operand[0] = AX_OP1; - operand[1] = AX_OP2; - *((uint64_t *)target) = AX_TGT_DATA; - *((uint64_t *)source) = 0; - - dbg_printf("initial %016lx\n", *((uint64_t *)target)); - dbg_printf("source %016lx\n", *((uint64_t *)source)); - dbg_printf("AX_OP1 %016lx\n", AX_OP1); - dbg_printf("AX_OP2 %016lx\n", AX_OP2); - - sz = ep_ops->native_amo(ep[0], operand, 1, NULL, source, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_UINT32, - GNIX_FAB_RQ_NAMO_FAX_S, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("AX_TGT_DATA & (AX_OP1 | ~AX_S_MASK) %016lx\n", - AX_TGT_DATA & (AX_OP1 | ~AX_S_MASK)); - dbg_printf("AX_OP2 %016lx\n", AX_OP2); - exp = (AX_TGT_DATA & (AX_OP1 | ~AX_S_MASK)) ^ (AX_OP2 & AX_S_MASK); - ret = *((uint64_t *)target) == exp; - cr_assert(ret, "Data mismatch expected %016lx: result %016lx", - exp, *((uint64_t *)target)); - dbg_printf("result %016lx\n", *((uint64_t *)target)); - /* 32 bit fetch */ - ret = *((uint64_t *)source) == (AX_TGT_DATA & AX_S_MASK); - dbg_printf("fetchv %016lx\n", *((uint64_t *)source)); - cr_assert(ret, "Data mismatch expected %016lx: fetchv %016lx", - AX_TGT_DATA & AX_S_MASK, *((uint64_t *)source)); -} - -Test(rdm_atomic_basic, axor) -{ - rdm_atomic_xfer_for_each_size(do_axor, 1, 1); -} - -Test(rdm_atomic_scalable, axor) -{ - rdm_atomic_xfer_for_each_size(do_axor, 1, 1); -} - -void do_atomic_write(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t w[NUMEPS] = {0}, r[NUMEPS] = {0}, w_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - /* u64 */ - *((uint64_t *)source) = SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_atomic(ep[0], source, 1, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_UINT64, FI_ATOMIC_WRITE, target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - - w[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((uint64_t *)target) == SOURCE_DATA; - cr_assert(ret, "Data mismatch"); - - /* U32 */ - *((uint64_t *)source) = SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_atomic(ep[0], source, 1, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_UINT32, FI_ATOMIC_WRITE, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((uint64_t *)target) == - (uint64_t)((SOURCE_DATA & U32_MASK) | - (TARGET_DATA & (U32_MASK << 32))); - cr_assert(ret, "Data mismatch"); - - /* i64 */ - *((uint64_t *)source) = SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_atomic(ep[0], source, 1, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_INT64, FI_ATOMIC_WRITE, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((uint64_t *)target) == SOURCE_DATA; - cr_assert(ret, "Data mismatch"); - - /* i32 */ - *((uint64_t *)source) = SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_atomic(ep[0], source, 1, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_INT32, FI_ATOMIC_WRITE, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((uint64_t *)target) == - (uint64_t)((SOURCE_DATA & U32_MASK) | - (TARGET_DATA & (U32_MASK << 32))); - cr_assert(ret, "Data mismatch"); - - /* float */ - *((float *)source) = SOURCE_DATA_FP; - *((float *)target) = TARGET_DATA_FP; - sz = fi_atomic(ep[0], source, 1, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_FLOAT, FI_ATOMIC_WRITE, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((float *)target) == (float)SOURCE_DATA_FP; - cr_assert(ret, "Data mismatch"); - - /* double */ - *((double *)source) = SOURCE_DATA_FP; - *((double *)target) = TARGET_DATA_FP; - sz = fi_atomic(ep[0], source, 1, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_DOUBLE, FI_ATOMIC_WRITE, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((double *)target) == (double)SOURCE_DATA_FP; - cr_assert(ret, "Data mismatch"); -} - -Test(rdm_atomic_basic, write) -{ - rdm_atomic_xfer_for_each_size(do_atomic_write, 1, 1); -} - -Test(rdm_atomic_scalable, write) -{ - rdm_atomic_xfer_for_each_size(do_atomic_write, 1, 1); -} - -void do_min_buf(void *s, void *t) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t min; - float min_fp; - double min_dp; - uint64_t w[NUMEPS] = {0}, r[NUMEPS] = {0}, w_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - /* i64 */ - *((int64_t *)s) = SOURCE_DATA; - *((int64_t *)t) = TARGET_DATA; - sz = fi_atomic(ep[0], s, 1, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, t), mr_key[1], - FI_INT64, FI_MIN, t); - if ((uint64_t)t & 0x7) { - cr_assert_eq(sz, -FI_EINVAL); - } else { - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], t, 0)); - rdm_atomic_check_tcqe(&cqe, t, FI_ATOMIC | FI_WRITE, 0); - - w[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - min = ((int64_t)SOURCE_DATA < (int64_t)TARGET_DATA) ? - SOURCE_DATA : TARGET_DATA; - ret = *((int64_t *)t) == min; - cr_assert(ret, "Data mismatch"); - } - - /* i32 */ - *((int64_t *)s) = SOURCE_DATA; - *((int64_t *)t) = TARGET_DATA; - sz = fi_atomic(ep[0], s, 1, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, t), mr_key[1], - FI_INT32, FI_MIN, t); - if ((uint64_t)t & 0x3) { - cr_assert_eq(sz, -FI_EINVAL); - } else { - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], t, 0)); - rdm_atomic_check_tcqe(&cqe, t, FI_ATOMIC | FI_WRITE, 0); - - w[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - min = ((int32_t)SOURCE_DATA < (int32_t)TARGET_DATA) ? - SOURCE_DATA : TARGET_DATA; - min = (min & U32_MASK) | (TARGET_DATA & (U32_MASK << 32)); - ret = *((int64_t *)t) == min; - cr_assert(ret, "Data mismatch"); - } - - /* float */ - *((float *)s) = SOURCE_DATA_FP; - *((float *)t) = TARGET_DATA_FP; - sz = fi_atomic(ep[0], s, 1, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, t), mr_key[1], - FI_FLOAT, FI_MIN, t); - if ((uint64_t)t & 0x3) { - cr_assert_eq(sz, -FI_EINVAL); - } else { - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], t, 0)); - rdm_atomic_check_tcqe(&cqe, t, FI_ATOMIC | FI_WRITE, 0); - - w[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - min_fp = (float)SOURCE_DATA_FP < (float)TARGET_DATA_FP ? - SOURCE_DATA_FP : TARGET_DATA_FP; - ret = *((float *)t) == min_fp; - cr_assert(ret, "Data mismatch"); - } - - /* double */ - *((double *)s) = SOURCE_DATA_FP; - *((double *)t) = TARGET_DATA_FP; - sz = fi_atomic(ep[0], s, 1, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, t), mr_key[1], - FI_DOUBLE, FI_MIN, t); - if ((uint64_t)t & 0x7) { - cr_assert_eq(sz, -FI_EINVAL); - } else { - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], t, 0)); - rdm_atomic_check_tcqe(&cqe, t, FI_ATOMIC | FI_WRITE, 0); - - w[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - min_dp = (double)SOURCE_DATA_FP < (double)TARGET_DATA_FP ? - SOURCE_DATA_FP : TARGET_DATA_FP; - ret = *((double *)t) == min_dp; - cr_assert(ret, "Data mismatch"); - } -} - -static inline void __atomic_alignment(void) -{ - int s_off, t_off; - - for (s_off = 0; s_off < 7; s_off++) { - for (t_off = 0; t_off < 7; t_off++) { - do_min_buf(source + s_off, target + t_off); - } - } -} - -Test(rdm_atomic_basic, atomic_alignment) -{ - __atomic_alignment(); -} - -Test(rdm_atomic_scalable, atomic_alignment) -{ - __atomic_alignment(); -} - -static inline void __atomicv(void) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t min; - float min_fp; - double min_dp; - struct fi_ioc iov; - uint64_t w[NUMEPS] = {0}, r[NUMEPS] = {0}, w_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - iov.addr = source; - iov.count = 1; - - /* i64 */ - *((int64_t *)source) = SOURCE_DATA; - *((int64_t *)target) = TARGET_DATA; - sz = fi_atomicv(ep[0], &iov, (void **)loc_mr, 1, gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_INT64, FI_MIN, target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - - w[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - min = ((int64_t)SOURCE_DATA < (int64_t)TARGET_DATA) ? - SOURCE_DATA : TARGET_DATA; - ret = *((int64_t *)target) == min; - cr_assert(ret, "Data mismatch"); - - /* i32 */ - *((int64_t *)source) = SOURCE_DATA; - *((int64_t *)target) = TARGET_DATA; - sz = fi_atomicv(ep[0], &iov, (void **)loc_mr, 1, gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_INT32, FI_MIN, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - min = ((int32_t)SOURCE_DATA < (int32_t)TARGET_DATA) ? - SOURCE_DATA : TARGET_DATA; - min = (min & U32_MASK) | (TARGET_DATA & (U32_MASK << 32)); - ret = *((int64_t *)target) == min; - cr_assert(ret, "Data mismatch"); - - /* float */ - *((float *)source) = SOURCE_DATA_FP; - *((float *)target) = TARGET_DATA_FP; - sz = fi_atomicv(ep[0], &iov, (void **)loc_mr, 1, gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_FLOAT, FI_MIN, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - min_fp = (float)SOURCE_DATA_FP < (float)TARGET_DATA_FP ? - SOURCE_DATA_FP : TARGET_DATA_FP; - ret = *((float *)target) == min_fp; - cr_assert(ret, "Data mismatch"); - - /* double */ - *((double *)source) = SOURCE_DATA_FP; - *((double *)target) = TARGET_DATA_FP; - sz = fi_atomicv(ep[0], &iov, (void **)loc_mr, 1, gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_DOUBLE, FI_MIN, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - min_dp = (double)SOURCE_DATA_FP < (double)TARGET_DATA_FP ? - SOURCE_DATA_FP : TARGET_DATA_FP; - ret = *((double *)target) == min_dp; - cr_assert(ret, "Data mismatch"); -} - -Test(rdm_atomic_basic, atomicv) -{ - __atomicv(); -} - -Test(rdm_atomic_scalable, atomicv) -{ - __atomicv(); -} - -static inline void __atomicmsg(void) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t min; - float min_fp; - double min_dp; - struct fi_msg_atomic msg; - struct fi_ioc msg_iov; - struct fi_rma_ioc rma_iov; - uint64_t w[NUMEPS] = {0}, r[NUMEPS] = {0}, w_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - msg_iov.addr = source; - msg_iov.count = 1; - msg.msg_iov = &msg_iov; - msg.desc = (void **)loc_mr; - msg.iov_count = 1; - msg.addr = gni_addr[1]; - rma_iov.addr = _REM_ADDR(fi, target, target); - rma_iov.count = 1; - rma_iov.key = mr_key[1]; - msg.rma_iov = &rma_iov; - msg.context = target; - msg.op = FI_MIN; - - /* i64 */ - *((int64_t *)source) = SOURCE_DATA; - *((int64_t *)target) = TARGET_DATA; - msg.datatype = FI_INT64; - sz = fi_atomicmsg(ep[0], &msg, 0); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], NULL, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - - w[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - min = ((int64_t)SOURCE_DATA < (int64_t)TARGET_DATA) ? - SOURCE_DATA : TARGET_DATA; - ret = *((int64_t *)target) == min; - cr_assert(ret, "Data mismatch"); - - /* i32 */ - *((int64_t *)source) = SOURCE_DATA; - *((int64_t *)target) = TARGET_DATA; - msg.datatype = FI_INT32; - sz = fi_atomicmsg(ep[0], &msg, 0); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], NULL, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - min = ((int32_t)SOURCE_DATA < (int32_t)TARGET_DATA) ? - SOURCE_DATA : TARGET_DATA; - min = (min & U32_MASK) | (TARGET_DATA & (U32_MASK << 32)); - ret = *((int64_t *)target) == min; - cr_assert(ret, "Data mismatch"); - - /* float */ - *((float *)source) = SOURCE_DATA_FP; - *((float *)target) = TARGET_DATA_FP; - msg.datatype = FI_FLOAT; - sz = fi_atomicmsg(ep[0], &msg, 0); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], NULL, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - min_fp = (float)SOURCE_DATA_FP < (float)TARGET_DATA_FP ? - SOURCE_DATA_FP : TARGET_DATA_FP; - ret = *((float *)target) == min_fp; - cr_assert(ret, "Data mismatch"); - - /* double */ - *((double *)source) = SOURCE_DATA_FP; - *((double *)target) = TARGET_DATA_FP; - msg.datatype = FI_DOUBLE; - sz = fi_atomicmsg(ep[0], &msg, 0); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], NULL, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - min_dp = (double)SOURCE_DATA_FP < (double)TARGET_DATA_FP ? - SOURCE_DATA_FP : TARGET_DATA_FP; - ret = *((double *)target) == min_dp; - cr_assert(ret, "Data mismatch"); -} - -Test(rdm_atomic_basic, atomicmsg) -{ - __atomicmsg(); -} - -Test(rdm_atomic_scalable, atomicmsg) -{ - __atomicmsg(); -} - -static inline void __atomicinject(void) -{ - int ret, loops; - ssize_t sz; - struct fi_cq_tagged_entry cqe; - uint64_t min; - float min_fp; - double min_dp; - static gnix_mr_cache_t *cache; - struct gnix_fid_ep *ep_priv; - int already_registered = 0; - - /* i64 */ - *((int64_t *)source) = SOURCE_DATA; - *((int64_t *)target) = TARGET_DATA; - - - if (!USING_SCALABLE(fi)) { - ep_priv = container_of(ep[0], struct gnix_fid_ep, ep_fid); - cache = GET_DOMAIN_RW_CACHE(ep_priv->domain); - cr_assert(cache != NULL); - already_registered = ofi_atomic_get32(&cache->inuse.elements); - } - - sz = fi_inject_atomic(ep[0], source, 1, - gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_INT64, FI_MIN); - cr_assert_eq(sz, 0); - - if (!USING_SCALABLE(fi)) { - /* - * shouldn't have registered the source buffer, - * trust but verify - */ - cr_assert(ofi_atomic_get32(&cache->inuse.elements) - == already_registered); - } - - min = ((int64_t)SOURCE_DATA < (int64_t)TARGET_DATA) ? - SOURCE_DATA : TARGET_DATA; - loops = 0; - while (*((int64_t *)target) != min) { - ret = fi_cq_read(send_cq[0], &cqe, 1); /* for progress */ - cr_assert(ret == -FI_EAGAIN, - "Received unexpected event\n"); - - pthread_yield(); - cr_assert(++loops < 10000, "Data mismatch"); - } - - /* i32 */ - *((int64_t *)source) = SOURCE_DATA; - *((int64_t *)target) = TARGET_DATA; - sz = fi_inject_atomic(ep[0], source, 1, - gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_INT32, FI_MIN); - cr_assert_eq(sz, 0); - - min = ((int32_t)SOURCE_DATA < (int32_t)TARGET_DATA) ? - SOURCE_DATA : TARGET_DATA; - min = (min & U32_MASK) | (TARGET_DATA & (U32_MASK << 32)); - loops = 0; - while (*((int64_t *)target) != min) { - ret = fi_cq_read(send_cq[0], &cqe, 1); /* for progress */ - cr_assert(ret == -FI_EAGAIN, - "Received unexpected event\n"); - - pthread_yield(); - cr_assert(++loops < 10000, "Data mismatch"); - } - - /* float */ - *((float *)source) = SOURCE_DATA_FP; - *((float *)target) = TARGET_DATA_FP; - sz = fi_inject_atomic(ep[0], source, 1, - gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_FLOAT, FI_MIN); - cr_assert_eq(sz, 0); - - min_fp = (float)SOURCE_DATA_FP < (float)TARGET_DATA_FP ? - SOURCE_DATA_FP : TARGET_DATA_FP; - loops = 0; - while (*((float *)target) != min_fp) { - ret = fi_cq_read(send_cq[0], &cqe, 1); /* for progress */ - cr_assert(ret == -FI_EAGAIN, - "Received unexpected event\n"); - - pthread_yield(); - cr_assert(++loops < 10000, "Data mismatch"); - } - - /* double */ - *((double *)source) = SOURCE_DATA_FP; - *((double *)target) = TARGET_DATA_FP; - sz = fi_inject_atomic(ep[0], source, 1, - gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_DOUBLE, FI_MIN); - cr_assert_eq(sz, 0); - - min_dp = (double)SOURCE_DATA_FP < (double)TARGET_DATA_FP ? - SOURCE_DATA_FP : TARGET_DATA_FP; - loops = 0; - while (*((double *)target) != min_dp) { - ret = fi_cq_read(send_cq[0], &cqe, 1); /* for progress */ - cr_assert(ret == -FI_EAGAIN, - "Received unexpected event\n"); - - pthread_yield(); - cr_assert(++loops < 10000, "Data mismatch"); - } -} - -Test(rdm_atomic_basic, atomicinject) -{ - __atomicinject(); -} - -Test(rdm_atomic_scalable, atomicinject) -{ - __atomicinject(); -} - -/****************************************************************************** - * - * Fetching atomics - * - *****************************************************************************/ - -int supported_fetch_atomic_ops[FI_ATOMIC_OP_LAST][FI_DATATYPE_LAST] = { - [FI_MIN] = { 0,0,0,0,1,0,1,0,1,1 }, - [FI_MAX] = { 0,0,0,0,1,0,1,0,1,1 }, - [FI_SUM] = { 0,0,0,0,1,1,1,1,1,0 }, /* GNI DP sum is broken */ - [FI_PROD] = NO_DATATYPES_SUPPORTED, - [FI_LOR] = NO_DATATYPES_SUPPORTED, - [FI_LAND] = NO_DATATYPES_SUPPORTED, - [FI_BOR] = GNI_DATATYPES_NO_FP_SUPPORTED, - [FI_BAND] = GNI_DATATYPES_NO_FP_SUPPORTED, - [FI_LXOR] = NO_DATATYPES_SUPPORTED, - [FI_BXOR] = GNI_DATATYPES_NO_FP_SUPPORTED, - [FI_ATOMIC_READ] = ALL_GNI_DATATYPES_SUPPORTED, - [FI_ATOMIC_WRITE] = ALL_GNI_DATATYPES_SUPPORTED, - [FI_CSWAP] = NO_DATATYPES_SUPPORTED, - [FI_CSWAP_NE] = NO_DATATYPES_SUPPORTED, - [FI_CSWAP_LE] = NO_DATATYPES_SUPPORTED, - [FI_CSWAP_LT] = NO_DATATYPES_SUPPORTED, - [FI_CSWAP_GE] = NO_DATATYPES_SUPPORTED, - [FI_CSWAP_GT] = NO_DATATYPES_SUPPORTED, - [FI_MSWAP] = NO_DATATYPES_SUPPORTED, -}; - -void do_invalid_fetch_atomic(enum fi_datatype dt, enum fi_op op) -{ - ssize_t sz; - size_t count; - uint64_t operand; - - if (!supported_fetch_atomic_ops[op][dt]) { - sz = fi_fetch_atomic(ep[0], &operand, 1, NULL, - source, loc_mr[0], - gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], - dt, op, target); - cr_assert(sz == -FI_EOPNOTSUPP); - - sz = fi_fetch_atomicvalid(ep[0], dt, op, &count); - cr_assert(sz == -FI_EOPNOTSUPP, "fi_atomicvalid() succeeded\n"); - } else { - sz = fi_fetch_atomicvalid(ep[0], dt, op, &count); - cr_assert(!sz, "fi_atomicvalid() failed\n"); - cr_assert(count == 1, "fi_atomicvalid(): bad count \n"); - } -} - -Test(rdm_atomic_default, invalid_fetch_atomic) -{ - int i, j; - - for(i = 0; i < FI_ATOMIC_OP_LAST; i++) { - for(j = 0; j < FI_DATATYPE_LAST; j++) { - do_invalid_fetch_atomic(j, i); - } - } -} - -void do_fetch_min(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t min; - float min_fp; - double min_dp; - uint64_t operand = SOURCE_DATA; - float operand_fp; - double operand_dp; - uint64_t w[NUMEPS] = {0}, r[NUMEPS] = {0}, w_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - /* i64 */ - *((int64_t *)source) = FETCH_SOURCE_DATA; - *((int64_t *)target) = TARGET_DATA; - sz = fi_fetch_atomic(ep[0], &operand, 1, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_INT64, FI_MIN, target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - - r[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - min = ((int64_t)SOURCE_DATA < (int64_t)TARGET_DATA) ? - SOURCE_DATA : TARGET_DATA; - ret = *((int64_t *)target) == min; - cr_assert(ret, "Data mismatch"); - ret = *((int64_t *)source) == TARGET_DATA; - cr_assert(ret, "Fetch data mismatch"); - - /* i32 */ - *((int64_t *)source) = FETCH_SOURCE_DATA; - *((int64_t *)target) = TARGET_DATA; - sz = fi_fetch_atomic(ep[0], &operand, 1, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_INT32, FI_MIN, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - min = ((int32_t)SOURCE_DATA < (int32_t)TARGET_DATA) ? - SOURCE_DATA : TARGET_DATA; - min = (min & U32_MASK) | (TARGET_DATA & (U32_MASK << 32)); - ret = *((int64_t *)target) == min; - cr_assert(ret, "Data mismatch"); - min = (TARGET_DATA & U32_MASK) | (FETCH_SOURCE_DATA & (U32_MASK << 32)); - ret = *((int64_t *)source) == min; - cr_assert(ret, "Fetch data mismatch"); - - /* float */ - *((float *)&operand_fp) = SOURCE_DATA_FP; - *((float *)source) = FETCH_SOURCE_DATA; - *((float *)target) = TARGET_DATA_FP; - sz = fi_fetch_atomic(ep[0], &operand_fp, 1, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_FLOAT, FI_MIN, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - min_fp = (float)SOURCE_DATA_FP < (float)TARGET_DATA_FP ? - SOURCE_DATA_FP : TARGET_DATA_FP; - ret = *((float *)target) == min_fp; - cr_assert(ret, "Data mismatch"); - ret = *((float *)source) == (float)TARGET_DATA_FP; - cr_assert(ret, "Fetch data mismatch"); - - /* double */ - *((double *)&operand_dp) = SOURCE_DATA_FP; - *((double *)source) = SOURCE_DATA_FP; - *((double *)target) = TARGET_DATA_FP; - sz = fi_fetch_atomic(ep[0], &operand_dp, 1, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_DOUBLE, FI_MIN, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - min_dp = (double)SOURCE_DATA_FP < (double)TARGET_DATA_FP ? - SOURCE_DATA_FP : TARGET_DATA_FP; - ret = *((double *)target) == min_dp; - cr_assert(ret, "Data mismatch"); - ret = *((double *)source) == (double)TARGET_DATA_FP; - cr_assert(ret, "Fetch data mismatch"); -} - -Test(rdm_atomic_basic, fetch_min) -{ - rdm_atomic_xfer_for_each_size(do_fetch_min, 1, 1); -} - -Test(rdm_atomic_scalable, fetch_min) -{ - rdm_atomic_xfer_for_each_size(do_fetch_min, 1, 1); -} - -void do_fetch_max(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t min; - float min_fp; - double min_dp; - uint64_t operand = SOURCE_DATA; - float operand_fp; - double operand_dp; - uint64_t w[NUMEPS] = {0}, r[NUMEPS] = {0}, w_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - /* i64 */ - *((int64_t *)source) = FETCH_SOURCE_DATA; - *((int64_t *)target) = TARGET_DATA; - sz = fi_fetch_atomic(ep[0], &operand, 1, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_INT64, FI_MAX, target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - - r[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - min = ((int64_t)SOURCE_DATA > (int64_t)TARGET_DATA) ? - SOURCE_DATA : TARGET_DATA; - ret = *((int64_t *)target) == min; - cr_assert(ret, "Data mismatch"); - ret = *((int64_t *)source) == TARGET_DATA; - cr_assert(ret, "Fetch data mismatch"); - - /* i32 */ - *((int64_t *)source) = FETCH_SOURCE_DATA; - *((int64_t *)target) = TARGET_DATA; - sz = fi_fetch_atomic(ep[0], &operand, 1, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_INT32, FI_MAX, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - min = ((int32_t)SOURCE_DATA > (int32_t)TARGET_DATA) ? - SOURCE_DATA : TARGET_DATA; - min = (min & U32_MASK) | (TARGET_DATA & (U32_MASK << 32)); - ret = *((int64_t *)target) == min; - cr_assert(ret, "Data mismatch"); - min = (TARGET_DATA & U32_MASK) | (FETCH_SOURCE_DATA & (U32_MASK << 32)); - ret = *((int64_t *)source) == min; - cr_assert(ret, "Fetch data mismatch"); - - /* float */ - *((float *)&operand_fp) = SOURCE_DATA_FP; - *((float *)source) = FETCH_SOURCE_DATA; - *((float *)target) = TARGET_DATA_FP; - sz = fi_fetch_atomic(ep[0], &operand_fp, 1, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_FLOAT, FI_MAX, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - min_fp = (float)SOURCE_DATA_FP > (float)TARGET_DATA_FP ? - SOURCE_DATA_FP : TARGET_DATA_FP; - ret = *((float *)target) == min_fp; - cr_assert(ret, "Data mismatch"); - ret = *((float *)source) == (float)TARGET_DATA_FP; - cr_assert(ret, "Fetch data mismatch"); - - /* double */ - *((double *)&operand_dp) = SOURCE_DATA_FP; - *((double *)source) = SOURCE_DATA_FP; - *((double *)target) = TARGET_DATA_FP; - sz = fi_fetch_atomic(ep[0], &operand_dp, 1, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_DOUBLE, FI_MAX, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - min_dp = (double)SOURCE_DATA_FP > (double)TARGET_DATA_FP ? - SOURCE_DATA_FP : TARGET_DATA_FP; - ret = *((double *)target) == min_dp; - cr_assert(ret, "Data mismatch"); - ret = *((double *)source) == (double)TARGET_DATA_FP; - cr_assert(ret, "Fetch data mismatch"); -} - -Test(rdm_atomic_basic, fetch_max) -{ - rdm_atomic_xfer_for_each_size(do_fetch_max, 1, 1); -} - -Test(rdm_atomic_scalable, fetch_max) -{ - rdm_atomic_xfer_for_each_size(do_fetch_max, 1, 1); -} - -void do_fetch_sum(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t operand = SOURCE_DATA; - float operand_fp; - uint64_t w[NUMEPS] = {0}, r[NUMEPS] = {0}, w_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - /* u64 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_fetch_atomic(ep[0], &operand, 1, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_UINT64, FI_SUM, target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - - r[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((uint64_t *)target) == (SOURCE_DATA + TARGET_DATA); - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)source) == TARGET_DATA; - cr_assert(ret, "Fetch data mismatch"); - - /* U32 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_fetch_atomic(ep[0], &operand, 1, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_UINT32, FI_SUM, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((uint64_t *)target) == - (uint64_t)((SOURCE_DATA & U32_MASK) + TARGET_DATA); - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)source) == - (uint64_t)((TARGET_DATA & U32_MASK) | - (FETCH_SOURCE_DATA & (U32_MASK << 32))); - cr_assert(ret, "Fetch data mismatch"); - - /* i64 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_fetch_atomic(ep[0], &operand, 1, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_INT64, FI_SUM, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((uint64_t *)target) == (SOURCE_DATA + TARGET_DATA); - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)source) == TARGET_DATA; - cr_assert(ret, "Fetch data mismatch"); - - /* i32 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_fetch_atomic(ep[0], &operand, 1, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_INT32, FI_SUM, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((uint64_t *)target) == - (uint64_t)((SOURCE_DATA & U32_MASK) + TARGET_DATA); - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)source) == - (uint64_t)((TARGET_DATA & U32_MASK) | - (FETCH_SOURCE_DATA & (U32_MASK << 32))); - cr_assert(ret, "Fetch data mismatch"); - - /* float */ - *((float *)&operand_fp) = SOURCE_DATA_FP; - *((float *)source) = FETCH_SOURCE_DATA; - *((float *)target) = TARGET_DATA_FP; - sz = fi_fetch_atomic(ep[0], &operand_fp, 1, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_FLOAT, FI_SUM, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((float *)target) == - (float)(SOURCE_DATA_FP + TARGET_DATA_FP); - cr_assert(ret, "Data mismatch"); - ret = *((float *)source) == (float)TARGET_DATA_FP; - cr_assert(ret, "Fetch data mismatch"); -} - -Test(rdm_atomic_basic, fetch_sum) -{ - rdm_atomic_xfer_for_each_size(do_fetch_sum, 1, 1); -} - -Test(rdm_atomic_scalable, fetch_sum) -{ - rdm_atomic_xfer_for_each_size(do_fetch_sum, 1, 1); -} - -void do_fetch_bor(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t res; - uint64_t operand = SOURCE_DATA; - uint64_t w[NUMEPS] = {0}, r[NUMEPS] = {0}, w_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - /* u64 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_fetch_atomic(ep[0], &operand, 1, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_UINT64, FI_BOR, target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - - r[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - res = SOURCE_DATA | TARGET_DATA; - ret = *((uint64_t *)target) == res; - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)source) == TARGET_DATA; - cr_assert(ret, "Fetch data mismatch"); - - /* U32 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_fetch_atomic(ep[0], &operand, 1, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_UINT32, FI_BOR, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - res = SOURCE_DATA | TARGET_DATA; - res = (res & U32_MASK) | (TARGET_DATA & (U32_MASK << 32)); - ret = *((uint64_t *)target) == res; - cr_assert(ret, "Data mismatch"); - res = (TARGET_DATA & U32_MASK) | (FETCH_SOURCE_DATA & (U32_MASK << 32)); - ret = *((uint64_t *)source) == res; - cr_assert(ret, "Fetch data mismatch"); - - /* i64 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_fetch_atomic(ep[0], &operand, 1, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_INT64, FI_BOR, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - res = SOURCE_DATA | TARGET_DATA; - ret = *((uint64_t *)target) == res; - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)source) == TARGET_DATA; - cr_assert(ret, "Fetch data mismatch"); - - /* i32 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_fetch_atomic(ep[0], &operand, 1, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_INT32, FI_BOR, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - res = SOURCE_DATA | TARGET_DATA; - res = (res & U32_MASK) | (TARGET_DATA & (U32_MASK << 32)); - ret = *((uint64_t *)target) == res; - cr_assert(ret, "Data mismatch"); - res = (TARGET_DATA & U32_MASK) | (FETCH_SOURCE_DATA & (U32_MASK << 32)); - ret = *((uint64_t *)source) == res; - cr_assert(ret, "Fetch data mismatch"); -} - -Test(rdm_atomic_basic, fetch_bor) -{ - rdm_atomic_xfer_for_each_size(do_fetch_bor, 1, 1); -} - -Test(rdm_atomic_scalable, fetch_bor) -{ - rdm_atomic_xfer_for_each_size(do_fetch_bor, 1, 1); -} - -void do_fetch_band(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t res; - uint64_t operand = SOURCE_DATA; - uint64_t w[NUMEPS] = {0}, r[NUMEPS] = {0}, w_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - /* u64 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_fetch_atomic(ep[0], &operand, 1, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_UINT64, FI_BAND, target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - - r[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - res = SOURCE_DATA & TARGET_DATA; - ret = *((uint64_t *)target) == res; - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)source) == TARGET_DATA; - cr_assert(ret, "Fetch data mismatch"); - - /* U32 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_fetch_atomic(ep[0], &operand, 1, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_UINT32, FI_BAND, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - res = SOURCE_DATA & TARGET_DATA; - res = (res & U32_MASK) | (TARGET_DATA & (U32_MASK << 32)); - ret = *((uint64_t *)target) == res; - cr_assert(ret, "Data mismatch"); - res = (TARGET_DATA & U32_MASK) | (FETCH_SOURCE_DATA & (U32_MASK << 32)); - ret = *((uint64_t *)source) == res; - cr_assert(ret, "Fetch data mismatch"); - - /* i64 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_fetch_atomic(ep[0], &operand, 1, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_INT64, FI_BAND, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - res = SOURCE_DATA & TARGET_DATA; - ret = *((uint64_t *)target) == res; - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)source) == TARGET_DATA; - cr_assert(ret, "Fetch data mismatch"); - - /* i32 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_fetch_atomic(ep[0], &operand, 1, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_INT32, FI_BAND, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - res = SOURCE_DATA & TARGET_DATA; - res = (res & U32_MASK) | (TARGET_DATA & (U32_MASK << 32)); - ret = *((uint64_t *)target) == res; - cr_assert(ret, "Data mismatch"); - res = (TARGET_DATA & U32_MASK) | (FETCH_SOURCE_DATA & (U32_MASK << 32)); - ret = *((uint64_t *)source) == res; - cr_assert(ret, "Fetch data mismatch"); -} - -Test(rdm_atomic_basic, fetch_band) -{ - rdm_atomic_xfer_for_each_size(do_fetch_band, 1, 1); -} - -Test(rdm_atomic_scalable, fetch_band) -{ - rdm_atomic_xfer_for_each_size(do_fetch_band, 1, 1); -} - -void do_fetch_bxor(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t res; - uint64_t operand = SOURCE_DATA; - uint64_t w[NUMEPS] = {0}, r[NUMEPS] = {0}, w_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - /* u64 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_fetch_atomic(ep[0], &operand, 1, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_UINT64, FI_BXOR, target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - - r[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - res = SOURCE_DATA ^ TARGET_DATA; - ret = *((uint64_t *)target) == res; - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)source) == TARGET_DATA; - cr_assert(ret, "Fetch data mismatch"); - - /* U32 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_fetch_atomic(ep[0], &operand, 1, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_UINT32, FI_BXOR, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - res = SOURCE_DATA ^ TARGET_DATA; - res = (res & U32_MASK) | (TARGET_DATA & (U32_MASK << 32)); - ret = *((uint64_t *)target) == res; - cr_assert(ret, "Data mismatch"); - res = (TARGET_DATA & U32_MASK) | (FETCH_SOURCE_DATA & (U32_MASK << 32)); - ret = *((uint64_t *)source) == res; - cr_assert(ret, "Fetch data mismatch"); - - /* i64 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_fetch_atomic(ep[0], &operand, 1, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_INT64, FI_BXOR, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - res = SOURCE_DATA ^ TARGET_DATA; - ret = *((uint64_t *)target) == res; - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)source) == TARGET_DATA; - cr_assert(ret, "Fetch data mismatch"); - - /* i32 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_fetch_atomic(ep[0], &operand, 1, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_INT32, FI_BXOR, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - res = SOURCE_DATA ^ TARGET_DATA; - res = (res & U32_MASK) | (TARGET_DATA & (U32_MASK << 32)); - ret = *((uint64_t *)target) == res; - cr_assert(ret, "Data mismatch"); - res = (TARGET_DATA & U32_MASK) | (FETCH_SOURCE_DATA & (U32_MASK << 32)); - ret = *((uint64_t *)source) == res; - cr_assert(ret, "Fetch data mismatch"); -} - -Test(rdm_atomic_basic, fetch_bxor) -{ - rdm_atomic_xfer_for_each_size(do_fetch_bxor, 1, 1); -} - -Test(rdm_atomic_scalable, fetch_bxor) -{ - rdm_atomic_xfer_for_each_size(do_fetch_bxor, 1, 1); -} - -void do_fetch_atomic_write(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t operand = SOURCE_DATA; - float operand_fp; - double operand_dp; - uint64_t w[NUMEPS] = {0}, r[NUMEPS] = {0}, w_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - /* u64 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_fetch_atomic(ep[0], &operand, 1, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_UINT64, FI_ATOMIC_WRITE, target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - - r[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((uint64_t *)target) == SOURCE_DATA; - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)source) == TARGET_DATA; - cr_assert(ret, "Fetch data mismatch"); - - /* U32 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_fetch_atomic(ep[0], &operand, 1, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_UINT32, FI_ATOMIC_WRITE, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((uint64_t *)target) == - (uint64_t)((SOURCE_DATA & U32_MASK) | - (TARGET_DATA & (U32_MASK << 32))); - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)source) == - (uint64_t)((TARGET_DATA & U32_MASK) | - (FETCH_SOURCE_DATA & (U32_MASK << 32))); - cr_assert(ret, "Fetch data mismatch"); - - /* i64 */ - *((uint64_t *)source) = 0; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_fetch_atomic(ep[0], &operand, 1, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_INT64, FI_ATOMIC_WRITE, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((uint64_t *)target) == SOURCE_DATA; - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)source) == TARGET_DATA; - cr_assert(ret, "Fetch data mismatch"); - - /* i32 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_fetch_atomic(ep[0], &operand, 1, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_INT32, FI_ATOMIC_WRITE, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((uint64_t *)target) == - (uint64_t)((SOURCE_DATA & U32_MASK) | - (TARGET_DATA & (U32_MASK << 32))); - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)source) == - (uint64_t)((TARGET_DATA & U32_MASK) | - (FETCH_SOURCE_DATA & (U32_MASK << 32))); - cr_assert(ret, "Fetch data mismatch"); - - /* float */ - *((float *)&operand_fp) = SOURCE_DATA_FP; - *((float *)source) = FETCH_SOURCE_DATA; - *((float *)target) = TARGET_DATA_FP; - sz = fi_fetch_atomic(ep[0], &operand_fp, 1, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_FLOAT, FI_ATOMIC_WRITE, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((float *)target) == (float)SOURCE_DATA_FP; - cr_assert(ret, "Data mismatch"); - ret = *((float *)source) == (float)TARGET_DATA_FP; - cr_assert(ret, "Fetch data mismatch"); - - /* double */ - *(double *)&operand_dp = SOURCE_DATA_FP; - *((double *)source) = FETCH_SOURCE_DATA; - *((double *)target) = TARGET_DATA_FP; - sz = fi_fetch_atomic(ep[0], &operand_dp, 1, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_DOUBLE, FI_ATOMIC_WRITE, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((double *)target) == (double)SOURCE_DATA_FP; - cr_assert(ret, "Data mismatch"); - ret = *((double *)source) == (double)TARGET_DATA_FP; - cr_assert(ret, "Fetch data mismatch"); -} - -Test(rdm_atomic_basic, fetch_atomic_write) -{ - rdm_atomic_xfer_for_each_size(do_fetch_atomic_write, 1, 1); -} - -Test(rdm_atomic_scalable, fetch_atomic_write) -{ - rdm_atomic_xfer_for_each_size(do_fetch_atomic_write, 1, 1); -} - -void do_fetch_atomic_read(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - float operand_fp; - double operand_dp; - uint64_t w[NUMEPS] = {0}, r[NUMEPS] = {0}, w_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - /* u64 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_fetch_atomic(ep[0], NULL, 1, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_UINT64, FI_ATOMIC_READ, target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - - r[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((uint64_t *)target) == TARGET_DATA; - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)source) == (uint64_t)TARGET_DATA; - cr_assert(ret, "Fetch data mismatch"); - - /* U32 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_fetch_atomic(ep[0], NULL, 1, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_UINT32, FI_ATOMIC_READ, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((uint64_t *)target) == TARGET_DATA; - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)source) == - (uint64_t)((TARGET_DATA & U32_MASK) | - (FETCH_SOURCE_DATA & (U32_MASK << 32))); - cr_assert(ret, "Fetch data mismatch"); - - /* i64 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_fetch_atomic(ep[0], NULL, 1, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_INT64, FI_ATOMIC_READ, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((uint64_t *)target) == TARGET_DATA; - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)source) == TARGET_DATA; - cr_assert(ret, "Fetch data mismatch"); - - /* i32 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_fetch_atomic(ep[0], NULL, 1, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_INT32, FI_ATOMIC_READ, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((uint64_t *)target) == TARGET_DATA; - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)source) == - (uint64_t)((TARGET_DATA & U32_MASK) | - (FETCH_SOURCE_DATA & (U32_MASK << 32))); - cr_assert(ret, "Fetch data mismatch"); - - /* float */ - *((float *)&operand_fp) = SOURCE_DATA_FP; - *((float *)source) = FETCH_SOURCE_DATA; - *((float *)target) = TARGET_DATA_FP; - sz = fi_fetch_atomic(ep[0], NULL, 1, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_FLOAT, FI_ATOMIC_READ, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((float *)target) == (float)TARGET_DATA_FP; - cr_assert(ret, "Data mismatch"); - ret = *((float *)source) == (float)TARGET_DATA_FP; - cr_assert(ret, "Fetch data mismatch"); - - /* double */ - *(double *)&operand_dp = SOURCE_DATA_FP; - *((double *)source) = FETCH_SOURCE_DATA; - *((double *)target) = TARGET_DATA_FP; - sz = fi_fetch_atomic(ep[0], NULL, 1, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_DOUBLE, FI_ATOMIC_READ, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((double *)target) == (double)TARGET_DATA_FP; - cr_assert(ret, "Data mismatch"); - ret = *((double *)source) == (double)TARGET_DATA_FP; - cr_assert(ret, "Fetch data mismatch"); -} - -Test(rdm_atomic_basic, fetch_atomic_read) -{ - rdm_atomic_xfer_for_each_size(do_fetch_atomic_read, 1, 1); -} - -Test(rdm_atomic_scalable, fetch_atomic_read) -{ - rdm_atomic_xfer_for_each_size(do_fetch_atomic_read, 1, 1); -} - -void do_fetch_min_buf(void *s, void *t) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t min; - float min_fp; - double min_dp; - uint64_t operand = SOURCE_DATA; - float operand_fp; - double operand_dp; - uint64_t w[NUMEPS] = {0}, r[NUMEPS] = {0}, w_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - /* i64 */ - *((int64_t *)s) = FETCH_SOURCE_DATA; - *((int64_t *)t) = TARGET_DATA; - sz = fi_fetch_atomic(ep[0], &operand, 1, NULL, - s, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, t), mr_key[1], - FI_INT64, FI_MIN, t); - if ((uint64_t)s & 0x7 || (uint64_t)t & 0x7) { - cr_assert_eq(sz, -FI_EINVAL); - } else { - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], t, 0)); - rdm_atomic_check_tcqe(&cqe, t, FI_ATOMIC | FI_READ, 0); - - r[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - min = ((int64_t)SOURCE_DATA < (int64_t)TARGET_DATA) ? - SOURCE_DATA : TARGET_DATA; - ret = *((int64_t *)t) == min; - cr_assert(ret, "Data mismatch"); - ret = *((int64_t *)s) == TARGET_DATA; - cr_assert(ret, "Fetch data mismatch"); - } - - /* i32 */ - *((int64_t *)s) = FETCH_SOURCE_DATA; - *((int64_t *)t) = TARGET_DATA; - sz = fi_fetch_atomic(ep[0], &operand, 1, NULL, - s, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, t), mr_key[1], - FI_INT32, FI_MIN, t); - if ((uint64_t)s & 0x3 || (uint64_t)t & 0x3) { - cr_assert_eq(sz, -FI_EINVAL); - } else { - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], t, 0)); - rdm_atomic_check_tcqe(&cqe, t, FI_ATOMIC | FI_READ, 0); - - r[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - min = ((int32_t)SOURCE_DATA < (int32_t)TARGET_DATA) ? - SOURCE_DATA : TARGET_DATA; - min = (min & U32_MASK) | (TARGET_DATA & (U32_MASK << 32)); - ret = *((int64_t *)t) == min; - cr_assert(ret, "Data mismatch"); - min = (TARGET_DATA & U32_MASK) | - (FETCH_SOURCE_DATA & (U32_MASK << 32)); - ret = *((int64_t *)s) == min; - cr_assert(ret, "Fetch data mismatch"); - } - - /* float */ - *((float *)&operand_fp) = SOURCE_DATA_FP; - *((float *)s) = FETCH_SOURCE_DATA; - *((float *)t) = TARGET_DATA_FP; - sz = fi_fetch_atomic(ep[0], &operand_fp, 1, NULL, - s, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, t), mr_key[1], - FI_FLOAT, FI_MIN, t); - if ((uint64_t)s & 0x3 || (uint64_t)t & 0x3) { - cr_assert_eq(sz, -FI_EINVAL); - } else { - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], t, 0)); - rdm_atomic_check_tcqe(&cqe, t, FI_ATOMIC | FI_READ, 0); - - r[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - min_fp = (float)SOURCE_DATA_FP < (float)TARGET_DATA_FP ? - SOURCE_DATA_FP : TARGET_DATA_FP; - ret = *((float *)t) == min_fp; - cr_assert(ret, "Data mismatch"); - ret = *((float *)s) == (float)TARGET_DATA_FP; - cr_assert(ret, "Fetch data mismatch"); - } - - /* double */ - *((double *)&operand_dp) = SOURCE_DATA_FP; - *((double *)s) = SOURCE_DATA_FP; - *((double *)t) = TARGET_DATA_FP; - sz = fi_fetch_atomic(ep[0], &operand_dp, 1, NULL, - s, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, t), mr_key[1], - FI_DOUBLE, FI_MIN, t); - if ((uint64_t)s & 0x7 || (uint64_t)t & 0x7) { - cr_assert_eq(sz, -FI_EINVAL); - } else { - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], t, 0)); - rdm_atomic_check_tcqe(&cqe, t, FI_ATOMIC | FI_READ, 0); - - r[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - min_dp = (double)SOURCE_DATA_FP < (double)TARGET_DATA_FP ? - SOURCE_DATA_FP : TARGET_DATA_FP; - ret = *((double *)t) == min_dp; - cr_assert(ret, "Data mismatch"); - ret = *((double *)s) == (double)TARGET_DATA_FP; - cr_assert(ret, "Fetch data mismatch"); - } -} - -static inline void __atomic_fetch_alignment(void) -{ - int s_off, t_off; - - for (s_off = 0; s_off < 7; s_off++) { - for (t_off = 0; t_off < 7; t_off++) { - do_fetch_min_buf(source + s_off, target + t_off); - } - } -} - -Test(rdm_atomic_basic, atomic_fetch_alignment) -{ - __atomic_fetch_alignment(); -} - -Test(rdm_atomic_scalable, atomic_fetch_alignment) -{ - __atomic_fetch_alignment(); -} - -static inline void __fatomicv(void) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t min; - float min_fp; - double min_dp; - uint64_t operand = SOURCE_DATA; - float operand_fp; - double operand_dp; - struct fi_ioc iov, r_iov; - uint64_t w[NUMEPS] = {0}, r[NUMEPS] = {0}, w_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - iov.count = 1; - r_iov.count = 1; - - /* i64 */ - *((int64_t *)source) = FETCH_SOURCE_DATA; - *((int64_t *)target) = TARGET_DATA; - iov.addr = &operand; - r_iov.addr = source; - sz = fi_fetch_atomicv(ep[0], &iov, NULL, 1, - &r_iov, (void **)loc_mr, 1, - gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_INT64, FI_MIN, target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - - r[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - min = ((int64_t)SOURCE_DATA < (int64_t)TARGET_DATA) ? - SOURCE_DATA : TARGET_DATA; - ret = *((int64_t *)target) == min; - cr_assert(ret, "Data mismatch"); - ret = *((int64_t *)source) == TARGET_DATA; - cr_assert(ret, "Fetch data mismatch"); - - /* i32 */ - *((int64_t *)source) = FETCH_SOURCE_DATA; - *((int64_t *)target) = TARGET_DATA; - iov.addr = &operand; - r_iov.addr = source; - sz = fi_fetch_atomicv(ep[0], &iov, NULL, 1, - &r_iov, (void **)loc_mr, 1, - gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_INT32, FI_MIN, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], NULL, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - min = ((int32_t)SOURCE_DATA < (int32_t)TARGET_DATA) ? - SOURCE_DATA : TARGET_DATA; - min = (min & U32_MASK) | (TARGET_DATA & (U32_MASK << 32)); - ret = *((int64_t *)target) == min; - cr_assert(ret, "Data mismatch"); - min = (TARGET_DATA & U32_MASK) | (FETCH_SOURCE_DATA & (U32_MASK << 32)); - ret = *((int64_t *)source) == min; - cr_assert(ret, "Fetch data mismatch"); - - /* float */ - *((float *)&operand_fp) = SOURCE_DATA_FP; - *((float *)source) = FETCH_SOURCE_DATA; - *((float *)target) = TARGET_DATA_FP; - iov.addr = &operand_fp; - r_iov.addr = source; - sz = fi_fetch_atomicv(ep[0], &iov, NULL, 1, - &r_iov, (void **)loc_mr, 1, - gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_FLOAT, FI_MIN, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - min_fp = (float)SOURCE_DATA_FP < (float)TARGET_DATA_FP ? - SOURCE_DATA_FP : TARGET_DATA_FP; - ret = *((float *)target) == min_fp; - cr_assert(ret, "Data mismatch"); - ret = *((float *)source) == (float)TARGET_DATA_FP; - cr_assert(ret, "Fetch data mismatch"); - - /* double */ - *((double *)&operand_dp) = SOURCE_DATA_FP; - *((double *)source) = SOURCE_DATA_FP; - *((double *)target) = TARGET_DATA_FP; - iov.addr = &operand_dp; - r_iov.addr = source; - sz = fi_fetch_atomicv(ep[0], &iov, NULL, 1, - &r_iov, (void **)loc_mr, 1, - gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_DOUBLE, FI_MIN, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - min_dp = (double)SOURCE_DATA_FP < (double)TARGET_DATA_FP ? - SOURCE_DATA_FP : TARGET_DATA_FP; - ret = *((double *)target) == min_dp; - cr_assert(ret, "Data mismatch"); - ret = *((double *)source) == (double)TARGET_DATA_FP; - cr_assert(ret, "Fetch data mismatch"); -} - -Test(rdm_atomic_basic, fatomicv) -{ - __fatomicv(); -} - -Test(rdm_atomic_scalable, fatomicv) -{ - __fatomicv(); -} - -static inline void __fatomicmsg(void) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t min; - float min_fp; - double min_dp; - uint64_t operand = SOURCE_DATA; - float operand_fp; - double operand_dp; - struct fi_msg_atomic msg; - struct fi_ioc msg_iov, res_iov; - struct fi_rma_ioc rma_iov; - uint64_t w[NUMEPS] = {0}, r[NUMEPS] = {0}, w_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - msg_iov.count = 1; - msg.msg_iov = &msg_iov; - msg.desc = (void **)loc_mr; - msg.iov_count = 1; - msg.addr = gni_addr[1]; - rma_iov.addr = _REM_ADDR(fi, target, target); - rma_iov.count = 1; - rma_iov.key = mr_key[1]; - msg.rma_iov = &rma_iov; - msg.context = target; - msg.op = FI_MIN; - - res_iov.addr = source; - res_iov.count = 1; - - /* i64 */ - *((int64_t *)source) = FETCH_SOURCE_DATA; - *((int64_t *)target) = TARGET_DATA; - msg_iov.addr = &operand; - msg.datatype = FI_INT64; - sz = fi_fetch_atomicmsg(ep[0], &msg, &res_iov, (void **)loc_mr, 1, 0); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], NULL, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - - r[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - min = ((int64_t)SOURCE_DATA < (int64_t)TARGET_DATA) ? - SOURCE_DATA : TARGET_DATA; - ret = *((int64_t *)target) == min; - cr_assert(ret, "Data mismatch"); - ret = *((int64_t *)source) == TARGET_DATA; - cr_assert(ret, "Fetch data mismatch"); - - /* i32 */ - *((int64_t *)source) = FETCH_SOURCE_DATA; - *((int64_t *)target) = TARGET_DATA; - msg_iov.addr = &operand; - msg.datatype = FI_INT32; - sz = fi_fetch_atomicmsg(ep[0], &msg, &res_iov, (void **)loc_mr, 1, 0); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], NULL, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - min = ((int32_t)SOURCE_DATA < (int32_t)TARGET_DATA) ? - SOURCE_DATA : TARGET_DATA; - min = (min & U32_MASK) | (TARGET_DATA & (U32_MASK << 32)); - ret = *((int64_t *)target) == min; - cr_assert(ret, "Data mismatch"); - min = (TARGET_DATA & U32_MASK) | (FETCH_SOURCE_DATA & (U32_MASK << 32)); - ret = *((int64_t *)source) == min; - cr_assert(ret, "Fetch data mismatch"); - - /* float */ - *((float *)&operand_fp) = SOURCE_DATA_FP; - *((float *)source) = FETCH_SOURCE_DATA; - *((float *)target) = TARGET_DATA_FP; - msg_iov.addr = &operand_fp; - msg.datatype = FI_FLOAT; - sz = fi_fetch_atomicmsg(ep[0], &msg, &res_iov, (void **)loc_mr, 1, 0); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], NULL, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - min_fp = (float)SOURCE_DATA_FP < (float)TARGET_DATA_FP ? - SOURCE_DATA_FP : TARGET_DATA_FP; - ret = *((float *)target) == min_fp; - cr_assert(ret, "Data mismatch"); - ret = *((float *)source) == (float)TARGET_DATA_FP; - cr_assert(ret, "Fetch data mismatch"); - - /* double */ - *((double *)&operand_dp) = SOURCE_DATA_FP; - *((double *)source) = SOURCE_DATA_FP; - *((double *)target) = TARGET_DATA_FP; - msg_iov.addr = &operand_dp; - msg.datatype = FI_DOUBLE; - sz = fi_fetch_atomicmsg(ep[0], &msg, &res_iov, (void **)loc_mr, 1, 0); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], NULL, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - min_dp = (double)SOURCE_DATA_FP < (double)TARGET_DATA_FP ? - SOURCE_DATA_FP : TARGET_DATA_FP; - ret = *((double *)target) == min_dp; - cr_assert(ret, "Data mismatch"); - ret = *((double *)source) == (double)TARGET_DATA_FP; - cr_assert(ret, "Fetch data mismatch"); -} - -Test(rdm_atomic_basic, fatomicmsg) -{ - __fatomicmsg(); -} - -Test(rdm_atomic_scalable, fatomicmsg) -{ - __fatomicmsg(); -} - -/****************************************************************************** - * - * Compare atomics - * - *****************************************************************************/ - -int supported_compare_atomic_ops[FI_ATOMIC_OP_LAST][FI_DATATYPE_LAST] = { - [FI_MIN] = NO_DATATYPES_SUPPORTED, - [FI_MAX] = NO_DATATYPES_SUPPORTED, - [FI_SUM] = NO_DATATYPES_SUPPORTED, - [FI_PROD] = NO_DATATYPES_SUPPORTED, - [FI_LOR] = NO_DATATYPES_SUPPORTED, - [FI_LAND] = NO_DATATYPES_SUPPORTED, - [FI_BOR] = NO_DATATYPES_SUPPORTED, - [FI_BAND] = NO_DATATYPES_SUPPORTED, - [FI_LXOR] = NO_DATATYPES_SUPPORTED, - [FI_BXOR] = NO_DATATYPES_SUPPORTED, - [FI_ATOMIC_READ] = NO_DATATYPES_SUPPORTED, - [FI_ATOMIC_WRITE] = NO_DATATYPES_SUPPORTED, - [FI_CSWAP] = ALL_GNI_DATATYPES_SUPPORTED, - [FI_CSWAP_NE] = NO_DATATYPES_SUPPORTED, - [FI_CSWAP_LE] = NO_DATATYPES_SUPPORTED, - [FI_CSWAP_LT] = NO_DATATYPES_SUPPORTED, - [FI_CSWAP_GE] = NO_DATATYPES_SUPPORTED, - [FI_CSWAP_GT] = NO_DATATYPES_SUPPORTED, - [FI_MSWAP] = ALL_GNI_DATATYPES_SUPPORTED, -}; - -void do_invalid_compare_atomic(enum fi_datatype dt, enum fi_op op) -{ - ssize_t sz; - size_t count; - uint64_t operand, op2; - - if (!supported_compare_atomic_ops[op][dt]) { - sz = fi_compare_atomic(ep[0], &operand, 1, NULL, &op2, NULL, - source, loc_mr, - gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], - dt, op, target); - cr_assert(sz == -FI_EOPNOTSUPP); - - sz = fi_compare_atomicvalid(ep[0], dt, op, &count); - cr_assert(sz == -FI_EOPNOTSUPP, "fi_atomicvalid() succeeded\n"); - } else { - sz = fi_compare_atomicvalid(ep[0], dt, op, &count); - cr_assert(!sz, "fi_atomicvalid() failed\n"); - cr_assert(count == 1, "fi_atomicvalid(): bad count \n"); - } -} - -Test(rdm_atomic_default, invalid_compare_atomic) -{ - int i, j; - - for(i = 0; i < FI_ATOMIC_OP_LAST; i++) { - for(j = 0; j < FI_DATATYPE_LAST; j++) { - do_invalid_compare_atomic(j, i); - } - } -} - -void do_cswap(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t operand = SOURCE_DATA, op2 = TARGET_DATA; - float operand_fp, op2_fp; - double operand_dp, op2_dp; - uint64_t w[NUMEPS] = {0}, r[NUMEPS] = {0}, w_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - /* u64 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_compare_atomic(ep[0], &operand, 1, NULL, &op2, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_UINT64, FI_CSWAP, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - - r[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((uint64_t *)target) == SOURCE_DATA; - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)source) == TARGET_DATA; - cr_assert(ret, "Fetch data mismatch"); - - /* U32 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_compare_atomic(ep[0], &operand, 1, NULL, &op2, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_UINT32, FI_CSWAP, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((uint64_t *)target) == - (uint64_t)((SOURCE_DATA & U32_MASK) | - (TARGET_DATA & (U32_MASK << 32))); - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)source) == - (uint64_t)((TARGET_DATA & U32_MASK) | - (FETCH_SOURCE_DATA & (U32_MASK << 32))); - cr_assert(ret, "Fetch data mismatch"); - - /* i64 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_compare_atomic(ep[0], &operand, 1, NULL, &op2, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_INT64, FI_CSWAP, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((uint64_t *)target) == SOURCE_DATA; - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)source) == TARGET_DATA; - cr_assert(ret, "Fetch data mismatch"); - - /* i32 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_compare_atomic(ep[0], &operand, 1, NULL, &op2, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_INT32, FI_CSWAP, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((uint64_t *)target) == - (uint64_t)((SOURCE_DATA & U32_MASK) | - (TARGET_DATA & (U32_MASK << 32))); - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)source) == - (uint64_t)((TARGET_DATA & U32_MASK) | - (FETCH_SOURCE_DATA & (U32_MASK << 32))); - cr_assert(ret, "Fetch data mismatch"); - - /* float */ - *((float *)&operand_fp) = SOURCE_DATA_FP; - *((float *)&op2_fp) = TARGET_DATA_FP; - *((float *)source) = FETCH_SOURCE_DATA; - *((float *)target) = TARGET_DATA_FP; - sz = fi_compare_atomic(ep[0], &operand_fp, 1, NULL, &op2_fp, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_FLOAT, FI_CSWAP, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((float *)target) == (float)SOURCE_DATA_FP; - cr_assert(ret, "Data mismatch"); - ret = *((float *)source) == (float)TARGET_DATA_FP; - cr_assert(ret, "Fetch data mismatch"); - - /* double */ - *((double *)&operand_dp) = SOURCE_DATA_FP; - *((double *)&op2_dp) = TARGET_DATA_FP; - *((double *)source) = FETCH_SOURCE_DATA; - *((double *)target) = TARGET_DATA_FP; - sz = fi_compare_atomic(ep[0], &operand_dp, 1, NULL, &op2_dp, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_DOUBLE, FI_CSWAP, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((double *)target) == (double)SOURCE_DATA_FP; - cr_assert(ret, "Data mismatch"); - ret = *((double *)source) == (double)TARGET_DATA_FP; - cr_assert(ret, "Fetch data mismatch"); -} - -Test(rdm_atomic_basic, cswap) -{ - rdm_atomic_xfer_for_each_size(do_cswap, 1, 1); -} - -Test(rdm_atomic_scalable, cswap) -{ - rdm_atomic_xfer_for_each_size(do_cswap, 1, 1); -} - -void do_mswap(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t res; - uint64_t operand = SOURCE_DATA, op2 = DATA_MASK; - float operand_fp, op2_fp; - double operand_dp, op2_dp; - uint64_t w[NUMEPS] = {0}, r[NUMEPS] = {0}, w_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - /* u64 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_compare_atomic(ep[0], &operand, 1, NULL, &op2, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_UINT64, FI_MSWAP, target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - - r[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - res = (SOURCE_DATA & DATA_MASK) | (TARGET_DATA & ~DATA_MASK); - ret = *((uint64_t *)target) == res; - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)source) == TARGET_DATA; - cr_assert(ret, "Fetch data mismatch"); - - /* U32 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_compare_atomic(ep[0], &operand, 1, NULL, &op2, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_UINT32, FI_MSWAP, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - res = (SOURCE_DATA & DATA_MASK) | (TARGET_DATA & ~DATA_MASK); - ret = *((uint64_t *)target) == - (uint64_t)((res & U32_MASK) | - (TARGET_DATA & (U32_MASK << 32))); - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)source) == - (uint64_t)((TARGET_DATA & U32_MASK) | - (FETCH_SOURCE_DATA & (U32_MASK << 32))); - cr_assert(ret, "Fetch data mismatch"); - - /* i64 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_compare_atomic(ep[0], &operand, 1, NULL, &op2, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_INT64, FI_MSWAP, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - res = (SOURCE_DATA & DATA_MASK) | (TARGET_DATA & ~DATA_MASK); - ret = *((uint64_t *)target) == res; - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)source) == TARGET_DATA; - cr_assert(ret, "Fetch data mismatch"); - - /* i32 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_compare_atomic(ep[0], &operand, 1, NULL, &op2, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_INT32, FI_MSWAP, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - res = (SOURCE_DATA & DATA_MASK) | (TARGET_DATA & ~DATA_MASK); - ret = *((uint64_t *)target) == - (uint64_t)((res & U32_MASK) | - (TARGET_DATA & (U32_MASK << 32))); - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)source) == - (uint64_t)((TARGET_DATA & U32_MASK) | - (FETCH_SOURCE_DATA & (U32_MASK << 32))); - cr_assert(ret, "Fetch data mismatch"); - - /* float */ - *((float *)&operand_fp) = SOURCE_DATA_FP; - *((float *)&op2_fp) = TARGET_DATA_FP; - *((float *)source) = FETCH_SOURCE_DATA; - *((float *)target) = TARGET_DATA_FP; - sz = fi_compare_atomic(ep[0], &operand_fp, 1, NULL, &op2_fp, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_FLOAT, FI_MSWAP, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((float *)target) == (float)SOURCE_DATA_FP; - cr_assert(ret, "Data mismatch"); - ret = *((float *)source) == (float)TARGET_DATA_FP; - cr_assert(ret, "Fetch data mismatch"); - - /* double */ - *((double *)&operand_dp) = SOURCE_DATA_FP; - *((double *)&op2_dp) = TARGET_DATA_FP; - *((double *)source) = FETCH_SOURCE_DATA; - *((double *)target) = TARGET_DATA_FP; - sz = fi_compare_atomic(ep[0], &operand_dp, 1, NULL, &op2_dp, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_DOUBLE, FI_MSWAP, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((double *)target) == (double)SOURCE_DATA_FP; - cr_assert(ret, "Data mismatch"); - ret = *((double *)source) == (double)TARGET_DATA_FP; - cr_assert(ret, "Fetch data mismatch"); -} - -Test(rdm_atomic_basic, mswap) -{ - rdm_atomic_xfer_for_each_size(do_mswap, 1, 1); -} - -Test(rdm_atomic_scalable, mswap) -{ - rdm_atomic_xfer_for_each_size(do_mswap, 1, 1); -} - -void do_cswap_buf(void *s, void *t) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t operand = SOURCE_DATA, op2 = TARGET_DATA; - float operand_fp, op2_fp; - double operand_dp, op2_dp; - uint64_t w[NUMEPS] = {0}, r[NUMEPS] = {0}, w_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - /* u64 */ - *((uint64_t *)s) = FETCH_SOURCE_DATA; - *((uint64_t *)t) = TARGET_DATA; - sz = fi_compare_atomic(ep[0], &operand, 1, NULL, &op2, NULL, - s, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, t), - mr_key[1], FI_UINT64, FI_CSWAP, t); - if ((uint64_t)s & 0x7 || (uint64_t)t & 0x7) { - cr_assert_eq(sz, -FI_EINVAL); - } else { - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], t, 0)); - rdm_atomic_check_tcqe(&cqe, t, FI_ATOMIC | FI_READ, 0); - - r[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((uint64_t *)t) == SOURCE_DATA; - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)s) == TARGET_DATA; - cr_assert(ret, "Fetch data mismatch"); - } - - /* U32 */ - *((uint64_t *)s) = FETCH_SOURCE_DATA; - *((uint64_t *)t) = TARGET_DATA; - sz = fi_compare_atomic(ep[0], &operand, 1, NULL, &op2, NULL, - s, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, t), - mr_key[1], FI_UINT32, FI_CSWAP, t); - if ((uint64_t)s & 0x3 || (uint64_t)t & 0x3) { - cr_assert_eq(sz, -FI_EINVAL); - } else { - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], t, 0)); - rdm_atomic_check_tcqe(&cqe, t, FI_ATOMIC | FI_READ, 0); - - r[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((uint64_t *)t) == - (uint64_t)((SOURCE_DATA & U32_MASK) | - (TARGET_DATA & (U32_MASK << 32))); - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)s) == - (uint64_t)((TARGET_DATA & U32_MASK) | - (FETCH_SOURCE_DATA & - (U32_MASK << 32))); - cr_assert(ret, "Fetch data mismatch"); - } - - /* i64 */ - *((uint64_t *)s) = FETCH_SOURCE_DATA; - *((uint64_t *)t) = TARGET_DATA; - sz = fi_compare_atomic(ep[0], &operand, 1, NULL, &op2, NULL, - s, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, t), - mr_key[1], FI_INT64, FI_CSWAP, t); - if ((uint64_t)s & 0x7 || (uint64_t)t & 0x7) { - cr_assert_eq(sz, -FI_EINVAL); - } else { - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], t, 0)); - rdm_atomic_check_tcqe(&cqe, t, FI_ATOMIC | FI_READ, 0); - - r[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((uint64_t *)t) == SOURCE_DATA; - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)s) == TARGET_DATA; - cr_assert(ret, "Fetch data mismatch"); - } - - /* i32 */ - *((uint64_t *)s) = FETCH_SOURCE_DATA; - *((uint64_t *)t) = TARGET_DATA; - sz = fi_compare_atomic(ep[0], &operand, 1, NULL, &op2, NULL, - s, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, t), - mr_key[1], FI_INT32, FI_CSWAP, t); - if ((uint64_t)s & 0x3 || (uint64_t)t & 0x3) { - cr_assert_eq(sz, -FI_EINVAL); - } else { - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], t, 0)); - rdm_atomic_check_tcqe(&cqe, t, FI_ATOMIC | FI_READ, 0); - - r[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((uint64_t *)t) == - (uint64_t)((SOURCE_DATA & U32_MASK) | - (TARGET_DATA & (U32_MASK << 32))); - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)s) == - (uint64_t)((TARGET_DATA & U32_MASK) | - (FETCH_SOURCE_DATA & - (U32_MASK << 32))); - cr_assert(ret, "Fetch data mismatch"); - } - - /* float */ - *((float *)&operand_fp) = SOURCE_DATA_FP; - *((float *)&op2_fp) = TARGET_DATA_FP; - *((float *)s) = FETCH_SOURCE_DATA; - *((float *)t) = TARGET_DATA_FP; - sz = fi_compare_atomic(ep[0], &operand_fp, 1, NULL, &op2_fp, NULL, - s, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, t), - mr_key[1], FI_FLOAT, FI_CSWAP, t); - if ((uint64_t)s & 0x3 || (uint64_t)t & 0x3) { - cr_assert_eq(sz, -FI_EINVAL); - } else { - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], t, 0)); - rdm_atomic_check_tcqe(&cqe, t, FI_ATOMIC | FI_READ, 0); - - r[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((float *)t) == (float)SOURCE_DATA_FP; - cr_assert(ret, "Data mismatch"); - ret = *((float *)s) == (float)TARGET_DATA_FP; - cr_assert(ret, "Fetch data mismatch"); - } - - /* double */ - *((double *)&operand_dp) = SOURCE_DATA_FP; - *((double *)&op2_dp) = TARGET_DATA_FP; - *((double *)s) = FETCH_SOURCE_DATA; - *((double *)t) = TARGET_DATA_FP; - sz = fi_compare_atomic(ep[0], &operand_dp, 1, NULL, &op2_dp, NULL, - s, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, t), - mr_key[1], FI_DOUBLE, FI_CSWAP, t); - if ((uint64_t)s & 0x7 || (uint64_t)t & 0x7) { - cr_assert_eq(sz, -FI_EINVAL); - } else { - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], t, 0)); - rdm_atomic_check_tcqe(&cqe, t, FI_ATOMIC | FI_READ, 0); - - r[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((double *)t) == (double)SOURCE_DATA_FP; - cr_assert(ret, "Data mismatch"); - ret = *((double *)s) == (double)TARGET_DATA_FP; - cr_assert(ret, "Fetch data mismatch"); - } -} - -static inline void __atomic_compare_alignment(void) -{ - int s_off, t_off; - - for (s_off = 0; s_off < 7; s_off++) { - for (t_off = 0; t_off < 7; t_off++) { - do_cswap_buf(source + s_off, target + t_off); - } - } -} - -Test(rdm_atomic_basic, atomic_compare_alignment) -{ - __atomic_compare_alignment(); -} - -Test(rdm_atomic_scalable, atomic_compare_alignment) -{ - __atomic_compare_alignment(); -} - -static inline void __catomicv(void) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t operand = SOURCE_DATA, op2 = TARGET_DATA; - float operand_fp, op2_fp; - double operand_dp, op2_dp; - struct fi_ioc iov, r_iov, c_iov; - uint64_t w[NUMEPS] = {0}, r[NUMEPS] = {0}, w_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - iov.count = 1; - r_iov.count = 1; - c_iov.count = 1; - - /* u64 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - iov.addr = &operand; - r_iov.addr = source; - c_iov.addr = &op2; - sz = fi_compare_atomicv(ep[0], - &iov, NULL, 1, - &c_iov, NULL, 1, - &r_iov, (void **)loc_mr, 1, - gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_UINT64, FI_CSWAP, target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - - r[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((uint64_t *)target) == SOURCE_DATA; - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)source) == TARGET_DATA; - cr_assert(ret, "Fetch data mismatch"); - - /* U32 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - iov.addr = &operand; - r_iov.addr = source; - c_iov.addr = &op2; - sz = fi_compare_atomicv(ep[0], - &iov, NULL, 1, - &c_iov, NULL, 1, - &r_iov, (void **)loc_mr, 1, - gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_UINT32, FI_CSWAP, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((uint64_t *)target) == - (uint64_t)((SOURCE_DATA & U32_MASK) | - (TARGET_DATA & (U32_MASK << 32))); - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)source) == - (uint64_t)((TARGET_DATA & U32_MASK) | - (FETCH_SOURCE_DATA & (U32_MASK << 32))); - cr_assert(ret, "Fetch data mismatch"); - - /* i64 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - iov.addr = &operand; - r_iov.addr = source; - c_iov.addr = &op2; - sz = fi_compare_atomicv(ep[0], - &iov, NULL, 1, - &c_iov, NULL, 1, - &r_iov, (void **)loc_mr, 1, - gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_INT64, FI_CSWAP, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((uint64_t *)target) == SOURCE_DATA; - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)source) == TARGET_DATA; - cr_assert(ret, "Fetch data mismatch"); - - /* i32 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - iov.addr = &operand; - r_iov.addr = source; - c_iov.addr = &op2; - sz = fi_compare_atomicv(ep[0], - &iov, NULL, 1, - &c_iov, NULL, 1, - &r_iov, (void **)loc_mr, 1, - gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_INT32, FI_CSWAP, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((uint64_t *)target) == - (uint64_t)((SOURCE_DATA & U32_MASK) | - (TARGET_DATA & (U32_MASK << 32))); - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)source) == - (uint64_t)((TARGET_DATA & U32_MASK) | - (FETCH_SOURCE_DATA & (U32_MASK << 32))); - cr_assert(ret, "Fetch data mismatch"); - - /* float */ - *((float *)&operand_fp) = SOURCE_DATA_FP; - *((float *)&op2_fp) = TARGET_DATA_FP; - *((float *)source) = FETCH_SOURCE_DATA; - *((float *)target) = TARGET_DATA_FP; - iov.addr = &operand_fp; - r_iov.addr = source; - c_iov.addr = &op2_fp; - sz = fi_compare_atomicv(ep[0], - &iov, NULL, 1, - &c_iov, NULL, 1, - &r_iov, (void **)loc_mr, 1, - gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_FLOAT, FI_CSWAP, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((float *)target) == (float)SOURCE_DATA_FP; - cr_assert(ret, "Data mismatch"); - ret = *((float *)source) == (float)TARGET_DATA_FP; - cr_assert(ret, "Fetch data mismatch"); - - /* double */ - *((double *)&operand_dp) = SOURCE_DATA_FP; - *((double *)&op2_dp) = TARGET_DATA_FP; - *((double *)source) = FETCH_SOURCE_DATA; - *((double *)target) = TARGET_DATA_FP; - iov.addr = &operand_dp; - r_iov.addr = source; - c_iov.addr = &op2_dp; - sz = fi_compare_atomicv(ep[0], - &iov, NULL, 1, - &c_iov, NULL, 1, - &r_iov, (void **)loc_mr, 1, - gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_DOUBLE, FI_CSWAP, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], target, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((double *)target) == (double)SOURCE_DATA_FP; - cr_assert(ret, "Data mismatch"); - ret = *((double *)source) == (double)TARGET_DATA_FP; - cr_assert(ret, "Fetch data mismatch"); -} - -Test(rdm_atomic_basic, catomicv) -{ - __catomicv(); -} - -Test(rdm_atomic_scalable, catomicv) -{ - __catomicv(); -} - -static inline void __catomicmsg(void) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t operand = SOURCE_DATA, op2 = TARGET_DATA; - float operand_fp, op2_fp; - double operand_dp, op2_dp; - struct fi_msg_atomic msg; - struct fi_ioc msg_iov, res_iov, cmp_iov; - struct fi_rma_ioc rma_iov; - uint64_t w[NUMEPS] = {0}, r[NUMEPS] = {0}, w_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - msg_iov.count = 1; - msg.msg_iov = &msg_iov; - msg.desc = (void **)loc_mr; - msg.iov_count = 1; - msg.addr = gni_addr[1]; - rma_iov.addr = _REM_ADDR(fi, target, target); - rma_iov.count = 1; - rma_iov.key = mr_key[1]; - msg.rma_iov = &rma_iov; - msg.context = target; - msg.op = FI_CSWAP; - - res_iov.count = 1; - cmp_iov.count = 1; - - /* u64 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - msg_iov.addr = &operand; - msg.datatype = FI_UINT64; - res_iov.addr = source; - cmp_iov.addr = &op2; - sz = fi_compare_atomicmsg(ep[0], &msg, &cmp_iov, NULL, 1, - &res_iov, (void **)loc_mr, 1, 0); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], NULL, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - - r[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((uint64_t *)target) == SOURCE_DATA; - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)source) == TARGET_DATA; - cr_assert(ret, "Fetch data mismatch"); - - /* U32 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - msg_iov.addr = &operand; - msg.datatype = FI_UINT32; - res_iov.addr = source; - cmp_iov.addr = &op2; - sz = fi_compare_atomicmsg(ep[0], &msg, &cmp_iov, NULL, 1, - &res_iov, (void **)loc_mr, 1, 0); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], NULL, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((uint64_t *)target) == - (uint64_t)((SOURCE_DATA & U32_MASK) | - (TARGET_DATA & (U32_MASK << 32))); - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)source) == - (uint64_t)((TARGET_DATA & U32_MASK) | - (FETCH_SOURCE_DATA & (U32_MASK << 32))); - cr_assert(ret, "Fetch data mismatch"); - - /* i64 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - msg_iov.addr = &operand; - msg.datatype = FI_INT64; - res_iov.addr = source; - cmp_iov.addr = &op2; - sz = fi_compare_atomicmsg(ep[0], &msg, &cmp_iov, NULL, 1, - &res_iov, (void **)loc_mr, 1, 0); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], NULL, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((uint64_t *)target) == SOURCE_DATA; - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)source) == TARGET_DATA; - cr_assert(ret, "Fetch data mismatch"); - - /* i32 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - msg_iov.addr = &operand; - msg.datatype = FI_INT32; - res_iov.addr = source; - cmp_iov.addr = &op2; - sz = fi_compare_atomicmsg(ep[0], &msg, &cmp_iov, NULL, 1, - &res_iov, (void **)loc_mr, 1, 0); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], NULL, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((uint64_t *)target) == - (uint64_t)((SOURCE_DATA & U32_MASK) | - (TARGET_DATA & (U32_MASK << 32))); - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)source) == - (uint64_t)((TARGET_DATA & U32_MASK) | - (FETCH_SOURCE_DATA & (U32_MASK << 32))); - cr_assert(ret, "Fetch data mismatch"); - - /* float */ - *((float *)&operand_fp) = SOURCE_DATA_FP; - *((float *)&op2_fp) = TARGET_DATA_FP; - *((float *)source) = FETCH_SOURCE_DATA; - *((float *)target) = TARGET_DATA_FP; - msg_iov.addr = &operand_fp; - msg.datatype = FI_FLOAT; - res_iov.addr = source; - cmp_iov.addr = &op2_fp; - sz = fi_compare_atomicmsg(ep[0], &msg, &cmp_iov, NULL, 1, - &res_iov, (void **)loc_mr, 1, 0); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], NULL, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((float *)target) == (float)SOURCE_DATA_FP; - cr_assert(ret, "Data mismatch"); - ret = *((float *)source) == (float)TARGET_DATA_FP; - cr_assert(ret, "Fetch data mismatch"); - - /* double */ - *((double *)&operand_dp) = SOURCE_DATA_FP; - *((double *)&op2_dp) = TARGET_DATA_FP; - *((double *)source) = FETCH_SOURCE_DATA; - *((double *)target) = TARGET_DATA_FP; - msg_iov.addr = &operand_dp; - msg.datatype = FI_DOUBLE; - res_iov.addr = source; - cmp_iov.addr = &op2_dp; - sz = fi_compare_atomicmsg(ep[0], &msg, &cmp_iov, NULL, 1, - &res_iov, (void **)loc_mr, 1, 0); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1, "fi_cq_read returned %d %d", ret, - dump_cq_error(send_cq[0], NULL, 0)); - rdm_atomic_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0); - rdm_atomic_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - ret = *((double *)target) == (double)SOURCE_DATA_FP; - cr_assert(ret, "Data mismatch"); - ret = *((double *)source) == (double)TARGET_DATA_FP; - cr_assert(ret, "Fetch data mismatch"); -} - -Test(rdm_atomic_basic, catomicmsg) -{ - __catomicmsg(); -} - -Test(rdm_atomic_scalable, catomicmsg) -{ - __catomicmsg(); -} - -/****************************************************************************** - * - * Other - * - *****************************************************************************/ - -Test(rdm_atomic_default, atomic_err) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe; - struct fi_cq_err_entry err_cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX, - UINT_MAX, INT_MAX, INT_MAX, - (void *) NULL, 0 }; - uint64_t w[NUMEPS] = {0}, r[NUMEPS] = {0}, w_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - rdm_atomic_err_inject_enable(); - - /* i64 */ - *((int64_t *)source) = SOURCE_DATA; - *((int64_t *)target) = TARGET_DATA; - sz = fi_atomic(ep[0], source, 1, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - FI_INT64, FI_MIN, target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - cr_assert_eq(ret, -FI_EAVAIL); - cr_assert_eq(err_cqe.err_data_size, 0); - ret = fi_cq_readerr(send_cq[0], &err_cqe, 0); - cr_assert_eq(ret, 1); - - cr_assert((uint64_t)err_cqe.op_context == (uint64_t)target, - "Bad error context"); - cr_assert(err_cqe.flags == (FI_ATOMIC | FI_WRITE)); - cr_assert(err_cqe.len == 0, "Bad error len"); - cr_assert(err_cqe.buf == 0, "Bad error buf"); - cr_assert(err_cqe.data == 0, "Bad error data"); - cr_assert(err_cqe.tag == 0, "Bad error tag"); - cr_assert(err_cqe.olen == 0, "Bad error olen"); - cr_assert(err_cqe.err == FI_ECANCELED, "Bad error errno"); - cr_assert(err_cqe.prov_errno == gnixu_to_fi_errno(GNI_RC_TRANSACTION_ERROR), - "Bad prov errno"); - cr_assert(err_cqe.err_data_size == 0); - cr_assert(err_cqe.err_data == NULL, "Bad error provider data"); - - w_e[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); -} - -Test(rdm_atomic_default, fetch_atomic_err) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe; - uint64_t operand = SOURCE_DATA; - struct fi_cq_err_entry err_cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX, - UINT_MAX, INT_MAX, INT_MAX, - (void *) NULL, 0 }; - uint64_t w[NUMEPS] = {0}, r[NUMEPS] = {0}, w_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - rdm_atomic_err_inject_enable(); - - /* i64 */ - *((int64_t *)source) = FETCH_SOURCE_DATA; - *((int64_t *)target) = TARGET_DATA; - sz = fi_fetch_atomic(ep[0], &operand, 1, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_INT64, FI_MIN, target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - cr_assert_eq(ret, -FI_EAVAIL); - - ret = fi_cq_readerr(send_cq[0], &err_cqe, 0); - cr_assert_eq(ret, 1); - - cr_assert((uint64_t)err_cqe.op_context == (uint64_t)target, - "Bad error context"); - cr_assert(err_cqe.flags == (FI_ATOMIC | FI_READ)); - cr_assert(err_cqe.len == 0, "Bad error len"); - cr_assert(err_cqe.buf == 0, "Bad error buf"); - cr_assert(err_cqe.data == 0, "Bad error data"); - cr_assert(err_cqe.tag == 0, "Bad error tag"); - cr_assert(err_cqe.olen == 0, "Bad error olen"); - cr_assert(err_cqe.err == FI_ECANCELED, "Bad error errno"); - cr_assert(err_cqe.prov_errno == gnixu_to_fi_errno(GNI_RC_TRANSACTION_ERROR), - "Bad prov errno"); - cr_assert(err_cqe.err_data == NULL, "Bad error provider data"); - - r_e[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); -} - -Test(rdm_atomic_default, compare_atomic_err) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe; - uint64_t operand = SOURCE_DATA, op2 = TARGET_DATA; - struct fi_cq_err_entry err_cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX, - UINT_MAX, INT_MAX, INT_MAX, - (void *) NULL, 0}; - uint64_t w[NUMEPS] = {0}, r[NUMEPS] = {0}, w_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - rdm_atomic_err_inject_enable(); - - /* u64 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_compare_atomic(ep[0], &operand, 1, NULL, &op2, NULL, - source, loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), - mr_key[1], FI_UINT64, FI_CSWAP, target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - cr_assert_eq(ret, -FI_EAVAIL); - - ret = fi_cq_readerr(send_cq[0], &err_cqe, 0); - cr_assert_eq(ret, 1); - - cr_assert((uint64_t)err_cqe.op_context == (uint64_t)target, - "Bad error context"); - cr_assert(err_cqe.flags == (FI_ATOMIC | FI_READ)); - cr_assert(err_cqe.len == 0, "Bad error len"); - cr_assert(err_cqe.buf == 0, "Bad error buf"); - cr_assert(err_cqe.data == 0, "Bad error data"); - cr_assert(err_cqe.tag == 0, "Bad error tag"); - cr_assert(err_cqe.olen == 0, "Bad error olen"); - cr_assert(err_cqe.err == FI_ECANCELED, "Bad error errno"); - cr_assert(err_cqe.prov_errno == gnixu_to_fi_errno(GNI_RC_TRANSACTION_ERROR), - "Bad prov errno"); - cr_assert(err_cqe.err_data == NULL, "Bad error provider data"); - - r_e[0] = 1; - rdm_atomic_check_cntrs(w, r, w_e, r_e); -} - -TestSuite(rdm_atomic_rcntr_basic, .init = rdm_atomic_rcntr_basic_setup, - .fini = rdm_atomic_teardown, .disabled = false); - -TestSuite(rdm_atomic_rcntr_scalable, .init = rdm_atomic_rcntr_scalable_setup, - .fini = rdm_atomic_teardown, .disabled = false); - -Test(rdm_atomic_rcntr_basic, amo_rcntr) -{ - rdm_atomic_xfer_for_each_size(do_min, 1, 1); -} - -Test(rdm_atomic_rcntr_basic, famo_rcntr) -{ - rdm_atomic_xfer_for_each_size(do_fetch_min, 1, 1); -} - -Test(rdm_atomic_rcntr_basic, camo_rcntr) -{ - rdm_atomic_xfer_for_each_size(do_cswap, 1, 1); -} - -Test(rdm_atomic_rcntr_scalable, amo_rcntr) -{ - rdm_atomic_xfer_for_each_size(do_min, 1, 1); -} - -Test(rdm_atomic_rcntr_scalable, famo_rcntr) -{ - rdm_atomic_xfer_for_each_size(do_fetch_min, 1, 1); -} - -Test(rdm_atomic_rcntr_scalable, camo_rcntr) -{ - rdm_atomic_xfer_for_each_size(do_cswap, 1, 1); -} - diff --git a/prov/gni/test/rdm_dgram_rma.c b/prov/gni/test/rdm_dgram_rma.c deleted file mode 100644 index f07cc8497d4..00000000000 --- a/prov/gni/test/rdm_dgram_rma.c +++ /dev/null @@ -1,3235 +0,0 @@ -/* - * Copyright (c) 2015-2017 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2015-2018 Cray Inc. All rights reserved. - * Copyright (c) 2019-2020 Triad National Security, LLC. - * All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - - -#include -#include -#include - -#include "gnix_vc.h" -#include "gnix_cm_nic.h" -#include "gnix_hashtable.h" -#include "gnix_rma.h" - -#include -#include "gnix_rdma_headers.h" -#include "common.h" - -#if 1 -#define dbg_printf(...) -#else -#define dbg_printf(...) \ - do { \ - printf(__VA_ARGS__); \ - fflush(stdout); \ - } while (0) -#endif - -/* Note: Set to ~FI_NOTIFY_FLAGS_ONLY since this was written before api 1.5 */ -static uint64_t mode_bits = ~FI_NOTIFY_FLAGS_ONLY; -static struct fid_fabric *fab; -static struct fid_domain *dom[2]; -static struct fi_gni_ops_domain *gni_domain_ops[2]; -static struct fid_ep *ep[2]; -static struct fid_av *av[2]; -static struct fi_info *hints; -static struct fi_info *fi; -static void *ep_name[2]; -static size_t gni_addr[2]; -static struct fid_cq *send_cq[2]; -static struct fid_cq *recv_cq[2]; -static struct fi_cq_attr cq_attr[2]; - -#define BUF_SZ (64*1024) -static char *target, *target_base; -static char *target2, *target2_base; -static char *source, *source_base; -static char *source2, *source2_base; -static char *uc_source; -static struct fid_mr *rem_mr[2], *loc_mr[2], *rem_mr2[2], *loc_mr2[2]; -static uint64_t mr_key[2], mr_key2[2]; - -static struct fid_cntr *write_cntr[2], *read_cntr[2]; -static struct fid_cntr *rwrite_cntr; -static struct fid_cntr *rread_cntr; -static struct fi_cntr_attr cntr_attr = { - .events = FI_CNTR_EVENTS_COMP, - .flags = 0 -}; -static uint64_t writes[2] = {0}, reads[2] = {0}, write_errs[2] = {0}, - read_errs[2] = {0}; -#define MLOOPS 1000 -static int dgm_fail; -static bool fi_more_set; - -static void __common_setup(uint32_t version, int mr_mode) -{ - int ret = 0; - struct fi_av_attr attr; - size_t addrlen = 0; - uint64_t requested_key[2][4] = { - {0, 0, 0, 0}, - {0, 0, 0, 0} }; - int i, j; - - dgm_fail = 0; - - hints->domain_attr->mr_mode = mr_mode; - hints->domain_attr->cq_data_size = 4; - hints->mode = mode_bits; - hints->caps |= FI_RMA | FI_READ | FI_REMOTE_READ | - FI_WRITE | FI_REMOTE_WRITE | FI_MSG | - FI_SEND | FI_RECV; - - hints->fabric_attr->prov_name = strdup("gni"); - - ret = fi_getinfo(version, NULL, 0, 0, hints, &fi); - cr_assert(!ret, "fi_getinfo"); - - ret = fi_fabric(fi->fabric_attr, &fab, NULL); - cr_assert(!ret, "fi_fabric"); - - ret = fi_domain(fab, fi, dom, NULL); - cr_assert(!ret, "fi_domain"); - - ret = fi_open_ops(&dom[0]->fid, FI_GNI_DOMAIN_OPS_1, - 0, (void **) gni_domain_ops, NULL); - - memset(&attr, 0, sizeof(attr)); - attr.type = FI_AV_MAP; - attr.count = 2; - - ret = fi_av_open(dom[0], &attr, av, NULL); - cr_assert(!ret, "fi_av_open"); - - ret = fi_endpoint(dom[0], fi, &ep[0], NULL); - cr_assert(!ret, "fi_endpoint"); - - cq_attr[0].format = FI_CQ_FORMAT_TAGGED; - cq_attr[0].size = 1024; - cq_attr[0].wait_obj = 0; - - ret = fi_cq_open(dom[0], cq_attr, send_cq, 0); - cr_assert(!ret, "fi_cq_open"); - - ret = fi_cq_open(dom[0], cq_attr, recv_cq, 0); - cr_assert(!ret, "fi_cq_open"); - - ret = fi_domain(fab, fi, dom + 1, NULL); - cr_assert(!ret, "fi_domain"); - - ret = fi_open_ops(&dom[1]->fid, FI_GNI_DOMAIN_OPS_1, - 0, (void **) gni_domain_ops + 1, NULL); - - ret = fi_av_open(dom[1], &attr, av + 1, NULL); - cr_assert(!ret, "fi_av_open"); - - ret = fi_endpoint(dom[1], fi, &ep[1], NULL); - cr_assert(!ret, "fi_endpoint"); - - cq_attr[1].format = FI_CQ_FORMAT_TAGGED; - cq_attr[1].size = 1024; - cq_attr[1].wait_obj = 0; - - ret = fi_cq_open(dom[1], cq_attr + 1, send_cq + 1, 0); - cr_assert(!ret, "fi_cq_open"); - - ret = fi_cq_open(dom[1], cq_attr + 1, recv_cq + 1, 0); - cr_assert(!ret, "fi_cq_open"); - - /* - * imitate shmem, etc. use FI_WRITE for bind - * flag - */ - ret = fi_ep_bind(ep[0], &send_cq[0]->fid, FI_TRANSMIT); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_ep_bind(ep[0], &recv_cq[0]->fid, FI_RECV); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_getname(&ep[0]->fid, NULL, &addrlen); - cr_assert(addrlen > 0); - - ep_name[0] = malloc(addrlen); - cr_assert(ep_name[0] != NULL); - - ret = fi_getname(&ep[0]->fid, ep_name[0], &addrlen); - cr_assert(ret == FI_SUCCESS); - - ret = fi_ep_bind(ep[1], &send_cq[1]->fid, FI_TRANSMIT); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_ep_bind(ep[1], &recv_cq[1]->fid, FI_RECV); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_getname(&ep[1]->fid, NULL, &addrlen); - cr_assert(addrlen > 0); - - ep_name[1] = malloc(addrlen); - cr_assert(ep_name[1] != NULL); - - ret = fi_getname(&ep[1]->fid, ep_name[1], &addrlen); - cr_assert(ret == FI_SUCCESS); - - ret = fi_av_insert(av[0], ep_name[0], 1, &gni_addr[0], 0, - NULL); - cr_assert(ret == 1); - ret = fi_av_insert(av[0], ep_name[1], 1, &gni_addr[1], 0, - NULL); - cr_assert(ret == 1); - - ret = fi_av_insert(av[1], ep_name[0], 1, &gni_addr[0], 0, - NULL); - cr_assert(ret == 1); - ret = fi_av_insert(av[1], ep_name[1], 1, &gni_addr[1], 0, - NULL); - cr_assert(ret == 1); - - ret = fi_ep_bind(ep[0], &av[0]->fid, 0); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_ep_bind(ep[1], &av[1]->fid, 0); - cr_assert(!ret, "fi_ep_bind"); - - if (USING_SCALABLE(fi)) { - for (i = 0; i < 2; i++) - for (j = 0; j < 4; j++) - requested_key[i][j] = (i * 4) + j; - } - - - target_base = malloc(GNIT_ALIGN_LEN(BUF_SZ)); - assert(target_base); - target = GNIT_ALIGN_BUFFER(char *, target_base); - - target2_base = malloc(GNIT_ALIGN_LEN(BUF_SZ)); - assert(target2_base); - target2 = GNIT_ALIGN_BUFFER(char *, target2_base); - - source_base = malloc(GNIT_ALIGN_LEN(BUF_SZ)); - assert(source_base); - source = GNIT_ALIGN_BUFFER(char *, source_base); - - source2_base = malloc(GNIT_ALIGN_LEN(BUF_SZ)); - assert(source2_base); - source2 = GNIT_ALIGN_BUFFER(char *, source2_base); - - ret = fi_mr_reg(dom[0], - target, - BUF_SZ, - FI_REMOTE_WRITE, - 0, - requested_key[0][0], - 0, - &rem_mr[0], - &target); - cr_assert_eq(ret, 0); - - ret = fi_mr_reg(dom[1], - target, - BUF_SZ, - FI_REMOTE_WRITE, - 0, - requested_key[1][0], - 0, - &rem_mr[1], - &target); - cr_assert_eq(ret, 0); - - ret = fi_mr_reg(dom[0], - target2, - BUF_SZ, - FI_REMOTE_WRITE, - 0, - requested_key[0][1], - 0, - &rem_mr2[0], - &target2); - cr_assert_eq(ret, 0); - - ret = fi_mr_reg(dom[1], - target2, - BUF_SZ, - FI_REMOTE_WRITE, - 0, - requested_key[1][1], - 0, - &rem_mr2[1], - &target2); - cr_assert_eq(ret, 0); - - ret = fi_mr_reg(dom[0], - source, - BUF_SZ, - FI_REMOTE_WRITE, - 0, - requested_key[0][2], - 0, - &loc_mr[0], - &source); - cr_assert_eq(ret, 0); - - ret = fi_mr_reg(dom[1], - source, - BUF_SZ, - FI_REMOTE_WRITE, - 0, - requested_key[1][2], - 0, - &loc_mr[1], - &source); - cr_assert_eq(ret, 0); - - ret = fi_mr_reg(dom[0], - source2, - BUF_SZ, - FI_REMOTE_WRITE, - 0, - requested_key[0][3], - 0, - &loc_mr2[0], - &source2); - cr_assert_eq(ret, 0); - - ret = fi_mr_reg(dom[1], - source2, - BUF_SZ, - FI_REMOTE_WRITE, - 0, - requested_key[1][3], - 0, - &loc_mr2[1], - &source2); - cr_assert_eq(ret, 0); - - for (i = 0; i < 2; i++) { - if (USING_SCALABLE(fi)) { - MR_ENABLE(rem_mr[i], target, BUF_SZ); - MR_ENABLE(rem_mr2[i], target2, BUF_SZ); - MR_ENABLE(loc_mr[i], source, BUF_SZ); - MR_ENABLE(loc_mr2[i], source2, BUF_SZ); - } - } - - uc_source = malloc(BUF_SZ); - assert(uc_source); - - mr_key[0] = fi_mr_key(rem_mr[0]); - mr_key[1] = fi_mr_key(rem_mr[1]); - mr_key2[0] = fi_mr_key(rem_mr2[0]); - mr_key2[1] = fi_mr_key(rem_mr2[1]); - - ret = fi_cntr_open(dom[0], &cntr_attr, write_cntr, 0); - cr_assert(!ret, "fi_cntr_open"); - - ret = fi_ep_bind(ep[0], &write_cntr[0]->fid, FI_SEND | FI_WRITE); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_cntr_open(dom[0], &cntr_attr, read_cntr, 0); - cr_assert(!ret, "fi_cntr_open"); - - ret = fi_ep_bind(ep[0], &read_cntr[0]->fid, FI_RECV | FI_READ); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_cntr_open(dom[1], &cntr_attr, write_cntr + 1, 0); - cr_assert(!ret, "fi_cntr_open"); - - ret = fi_ep_bind(ep[1], &write_cntr[1]->fid, FI_SEND | FI_WRITE); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_cntr_open(dom[1], &cntr_attr, read_cntr + 1, 0); - cr_assert(!ret, "fi_cntr_open"); - - ret = fi_ep_bind(ep[1], &read_cntr[1]->fid, FI_RECV | FI_READ); - cr_assert(!ret, "fi_ep_bind"); - - if (hints->caps & FI_RMA_EVENT) { - ret = fi_cntr_open(dom[1], &cntr_attr, &rwrite_cntr, 0); - cr_assert(!ret, "fi_cntr_open"); - - ret = fi_ep_bind(ep[1], &rwrite_cntr->fid, FI_REMOTE_WRITE); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_cntr_open(dom[1], &cntr_attr, &rread_cntr, 0); - cr_assert(!ret, "fi_cntr_open"); - - ret = fi_ep_bind(ep[1], &rread_cntr->fid, FI_REMOTE_READ); - cr_assert(!ret, "fi_ep_bind"); - } - - ret = fi_enable(ep[0]); - cr_assert(!ret, "fi_ep_enable"); - - ret = fi_enable(ep[1]); - cr_assert(!ret, "fi_ep_enable"); - -} - -static void __common_setup_1dom(uint32_t version, int mr_mode) -{ - int ret = 0; - struct fi_av_attr attr; - size_t addrlen = 0; - - dgm_fail = 0; - - hints->domain_attr->mr_mode = mr_mode; - hints->domain_attr->cq_data_size = 4; - hints->mode = mode_bits; - hints->caps |= FI_RMA | FI_READ | FI_REMOTE_READ | - FI_WRITE | FI_REMOTE_WRITE; - - hints->fabric_attr->prov_name = strdup("gni"); - - ret = fi_getinfo(version, NULL, 0, 0, hints, &fi); - cr_assert(!ret, "fi_getinfo"); - - ret = fi_fabric(fi->fabric_attr, &fab, NULL); - cr_assert(!ret, "fi_fabric"); - - ret = fi_domain(fab, fi, dom, NULL); - cr_assert(!ret, "fi_domain"); - - ret = fi_open_ops(&dom[0]->fid, FI_GNI_DOMAIN_OPS_1, - 0, (void **) gni_domain_ops, NULL); - - memset(&attr, 0, sizeof(attr)); - attr.type = FI_AV_MAP; - attr.count = 2; - - ret = fi_av_open(dom[0], &attr, av, NULL); - cr_assert(!ret, "fi_av_open"); - - ret = fi_endpoint(dom[0], fi, &ep[0], NULL); - cr_assert(!ret, "fi_endpoint"); - - cq_attr[0].format = FI_CQ_FORMAT_TAGGED; - cq_attr[0].size = 1024; - cq_attr[0].wait_obj = 0; - - ret = fi_cq_open(dom[0], cq_attr, send_cq, 0); - cr_assert(!ret, "fi_cq_open"); - - ret = fi_cq_open(dom[0], cq_attr, recv_cq, 0); - cr_assert(!ret, "fi_cq_open"); - - ret = fi_endpoint(dom[0], fi, &ep[1], NULL); - cr_assert(!ret, "fi_endpoint"); - - cq_attr[1].format = FI_CQ_FORMAT_TAGGED; - cq_attr[1].size = 1024; - cq_attr[1].wait_obj = 0; - - ret = fi_cq_open(dom[0], cq_attr + 1, send_cq + 1, 0); - cr_assert(!ret, "fi_cq_open"); - - ret = fi_cq_open(dom[0], cq_attr + 1, recv_cq + 1, 0); - cr_assert(!ret, "fi_cq_open"); - - /* - * imitate shmem, etc. use FI_WRITE for bind - * flag - */ - ret = fi_ep_bind(ep[0], &send_cq[0]->fid, FI_TRANSMIT); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_ep_bind(ep[0], &recv_cq[0]->fid, FI_RECV); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_getname(&ep[0]->fid, NULL, &addrlen); - cr_assert(addrlen > 0); - - ep_name[0] = malloc(addrlen); - cr_assert(ep_name[0] != NULL); - - ret = fi_getname(&ep[0]->fid, ep_name[0], &addrlen); - cr_assert(ret == FI_SUCCESS); - - ret = fi_ep_bind(ep[1], &send_cq[1]->fid, FI_TRANSMIT); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_ep_bind(ep[1], &recv_cq[1]->fid, FI_RECV); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_getname(&ep[1]->fid, NULL, &addrlen); - cr_assert(addrlen > 0); - - ep_name[1] = malloc(addrlen); - cr_assert(ep_name[1] != NULL); - - ret = fi_getname(&ep[1]->fid, ep_name[1], &addrlen); - cr_assert(ret == FI_SUCCESS); - - ret = fi_av_insert(av[0], ep_name[0], 1, &gni_addr[0], 0, - NULL); - cr_assert(ret == 1); - ret = fi_av_insert(av[0], ep_name[1], 1, &gni_addr[1], 0, - NULL); - cr_assert(ret == 1); - - ret = fi_ep_bind(ep[0], &av[0]->fid, 0); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_ep_bind(ep[1], &av[0]->fid, 0); - cr_assert(!ret, "fi_ep_bind"); - - target_base = malloc(GNIT_ALIGN_LEN(BUF_SZ)); - assert(target_base); - target = GNIT_ALIGN_BUFFER(char *, target_base); - - source_base = malloc(GNIT_ALIGN_LEN(BUF_SZ)); - assert(source_base); - source = GNIT_ALIGN_BUFFER(char *, source_base); - - ret = fi_mr_reg(dom[0], - target, - BUF_SZ, - FI_REMOTE_WRITE, - 0, - (USING_SCALABLE(fi) ? 1 : 0), - 0, - &rem_mr[0], - &target); - cr_assert_eq(ret, 0); - - ret = fi_mr_reg(dom[0], - source, - BUF_SZ, - FI_REMOTE_WRITE, - 0, - (USING_SCALABLE(fi) ? 2 : 0), - 0, - &loc_mr[0], - &source); - cr_assert_eq(ret, 0); - - if (USING_SCALABLE(fi)) { - MR_ENABLE(rem_mr[0], target, BUF_SZ); - MR_ENABLE(loc_mr[0], source, BUF_SZ); - } - - uc_source = malloc(BUF_SZ); - assert(uc_source); - - mr_key[0] = fi_mr_key(rem_mr[0]); - mr_key[1] = mr_key[0]; - - ret = fi_cntr_open(dom[0], &cntr_attr, write_cntr, 0); - cr_assert(!ret, "fi_cntr_open"); - - ret = fi_ep_bind(ep[0], &write_cntr[0]->fid, FI_WRITE); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_cntr_open(dom[0], &cntr_attr, read_cntr, 0); - cr_assert(!ret, "fi_cntr_open"); - - ret = fi_ep_bind(ep[0], &read_cntr[0]->fid, FI_READ); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_cntr_open(dom[0], &cntr_attr, write_cntr + 1, 0); - cr_assert(!ret, "fi_cntr_open"); - - ret = fi_ep_bind(ep[1], &write_cntr[1]->fid, FI_WRITE); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_cntr_open(dom[0], &cntr_attr, read_cntr + 1, 0); - cr_assert(!ret, "fi_cntr_open"); - - ret = fi_ep_bind(ep[1], &read_cntr[1]->fid, FI_READ); - cr_assert(!ret, "fi_ep_bind"); - - if (hints->caps & FI_RMA_EVENT) { - ret = fi_cntr_open(dom[0], &cntr_attr, &rwrite_cntr, 0); - cr_assert(!ret, "fi_cntr_open"); - - ret = fi_ep_bind(ep[1], &rwrite_cntr->fid, FI_REMOTE_WRITE); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_cntr_open(dom[0], &cntr_attr, &rread_cntr, 0); - cr_assert(!ret, "fi_cntr_open"); - - ret = fi_ep_bind(ep[1], &rread_cntr->fid, FI_REMOTE_READ); - cr_assert(!ret, "fi_ep_bind"); - } - - ret = fi_enable(ep[0]); - cr_assert(!ret, "fi_ep_enable"); - - ret = fi_enable(ep[1]); - cr_assert(!ret, "fi_ep_enable"); - -} - -void rdm_rma_setup_basic(void) -{ - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - hints->ep_attr->type = FI_EP_RDM; - hints->caps = FI_RMA_EVENT; - __common_setup(fi_version(), GNIX_MR_BASIC); -} - -void more_rdm_rma_setup_basic(void) -{ - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - hints->ep_attr->type = FI_EP_RDM; - __common_setup(fi_version(), GNIX_MR_BASIC); -} - -void dgram_setup_basic(void) -{ - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - hints->ep_attr->type = FI_EP_DGRAM; - hints->caps = FI_RMA_EVENT; - __common_setup(fi_version(), GNIX_MR_BASIC); -} - -void dgram_setup_1dom_basic(void) -{ - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - hints->ep_attr->type = FI_EP_DGRAM; - hints->caps = FI_RMA_EVENT; - __common_setup_1dom(fi_version(), GNIX_MR_BASIC); -} - -void rdm_rma_rcntr_setup_basic(void) -{ - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - hints->ep_attr->type = FI_EP_RDM; - hints->caps = FI_RMA_EVENT; - __common_setup(fi_version(), GNIX_MR_BASIC); -} - -void rdm_rma_setup_scalable(void) -{ - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - hints->ep_attr->type = FI_EP_RDM; - hints->caps = FI_RMA_EVENT; - __common_setup(fi_version(), GNIX_MR_SCALABLE); -} - -void more_rdm_rma_setup_scalable(void) -{ - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - hints->ep_attr->type = FI_EP_RDM; - __common_setup(fi_version(), GNIX_MR_SCALABLE); -} - -void dgram_setup_scalable(void) -{ - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - hints->ep_attr->type = FI_EP_DGRAM; - hints->caps = FI_RMA_EVENT; - __common_setup(fi_version(), GNIX_MR_SCALABLE); -} - -void dgram_setup_1dom_scalable(void) -{ - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - hints->ep_attr->type = FI_EP_DGRAM; - hints->caps = FI_RMA_EVENT; - __common_setup_1dom(fi_version(), GNIX_MR_SCALABLE); -} - -void rdm_rma_rcntr_setup_scalable(void) -{ - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - hints->ep_attr->type = FI_EP_RDM; - hints->caps = FI_RMA_EVENT; - __common_setup(fi_version(), GNIX_MR_SCALABLE); -} - -void rdm_rma_teardown(void) -{ - int ret = 0; - - if (hints->caps & FI_RMA_EVENT) { - ret = fi_close(&rwrite_cntr->fid); - cr_assert(!ret, "failure in closing dom[1] rwrite counter."); - - ret = fi_close(&rread_cntr->fid); - cr_assert(!ret, "failure in closing dom[1] rread counter."); - } - - ret = fi_close(&read_cntr[0]->fid); - cr_assert(!ret, "failure in closing dom[0] read counter."); - - ret = fi_close(&read_cntr[1]->fid); - cr_assert(!ret, "failure in closing dom[1] read counter."); - - ret = fi_close(&write_cntr[0]->fid); - cr_assert(!ret, "failure in closing dom[0] write counter."); - - ret = fi_close(&write_cntr[1]->fid); - cr_assert(!ret, "failure in closing dom[1] write counter."); - - free(uc_source); - - ret = fi_close(&loc_mr[0]->fid); - cr_assert(!ret, "failure in closing dom[0] local mr."); - - if (loc_mr[1] != NULL) { - ret = fi_close(&loc_mr[1]->fid); - cr_assert(!ret, "failure in closing dom[1] local mr."); - } - - if (loc_mr2[0] != NULL) { - ret = fi_close(&loc_mr2[0]->fid); - cr_assert(!ret, "failure in closing dom[0] local mr."); - } - - if (loc_mr2[1] != NULL) { - ret = fi_close(&loc_mr2[1]->fid); - cr_assert(!ret, "failure in closing dom[1] local mr."); - } - - ret = fi_close(&rem_mr[0]->fid); - cr_assert(!ret, "failure in closing dom[0] remote mr."); - - if (rem_mr[1] != NULL) { - ret = fi_close(&rem_mr[1]->fid); - cr_assert(!ret, "failure in closing dom[1] remote mr."); - } - - if (rem_mr2[0] != NULL) { - ret = fi_close(&rem_mr2[0]->fid); - cr_assert(!ret, "failure in closing dom[0] remote mr."); - } - - if (rem_mr2[1] != NULL) { - ret = fi_close(&rem_mr2[1]->fid); - cr_assert(!ret, "failure in closing dom[1] remote mr."); - } - - free(target_base); - free(target2_base); - free(source_base); - free(source2_base); - - ret = fi_close(&ep[0]->fid); - cr_assert(!ret, "failure in closing ep[0]."); - - ret = fi_close(&ep[1]->fid); - cr_assert(!ret, "failure in closing ep[1]."); - - ret = fi_close(&recv_cq[0]->fid); - cr_assert(!ret, "failure in dom[0] recv cq."); - - ret = fi_close(&recv_cq[1]->fid); - cr_assert(!ret, "failure in dom[1] recv cq."); - - ret = fi_close(&send_cq[0]->fid); - cr_assert(!ret, "failure in dom[0] send cq."); - - ret = fi_close(&send_cq[1]->fid); - cr_assert(!ret, "failure in dom[1] send cq."); - - ret = fi_close(&av[0]->fid); - cr_assert(!ret, "failure in closing dom[0] av."); - - if (av[1] != NULL) { - ret = fi_close(&av[1]->fid); - cr_assert(!ret, "failure in closing dom[1] av."); - } - - ret = fi_close(&dom[0]->fid); - cr_assert(!ret, "failure in closing domain dom[0]."); - - if (dom[1] != NULL) { - ret = fi_close(&dom[1]->fid); - cr_assert(!ret, - "failure in closing domain dom[1]."); - } - - ret = fi_close(&fab->fid); - cr_assert(!ret, "failure in closing fabric."); - - fi_freeinfo(fi); - fi_freeinfo(hints); - hints = NULL; - dgm_fail = 0; - free(ep_name[0]); - free(ep_name[1]); -} - -void init_data(char *buf, int len, char seed) -{ - int i; - - for (i = 0; i < len; i++) { - buf[i] = seed++; - } -} - -int check_data(char *buf1, char *buf2, int len) -{ - int i; - - for (i = 0; i < len; i++) { - if (buf1[i] != buf2[i]) { - printf("data mismatch, elem: %d, b1: 0x%hhx, b2: 0x%hhx, len: %d\n", - i, buf1[i], buf2[i], len); - return 0; - } - } - - return 1; -} - -void rdm_rma_check_tcqe(struct fi_cq_tagged_entry *tcqe, void *ctx, - uint64_t flags, uint64_t data, struct fid_ep *fid_ep) -{ - struct gnix_fid_ep *gnix_ep = get_gnix_ep(fid_ep); - - cr_assert(tcqe->op_context == ctx, "CQE Context mismatch"); - cr_assert(tcqe->flags == flags, "CQE flags mismatch"); - - /* TODO: Remove GNIX_ALLOW_FI_REMOTE_CQ_DATA and only check flags for FI_RMA_EVENT */ - if (GNIX_ALLOW_FI_REMOTE_CQ_DATA(flags, gnix_ep->caps)) { - cr_assert(tcqe->data == data, "CQE data invalid"); - } else { - cr_assert(tcqe->data == 0, "CQE data invalid"); - } - - cr_assert(tcqe->len == 0, "CQE length mismatch"); - cr_assert(tcqe->buf == 0, "CQE address mismatch"); - cr_assert(tcqe->tag == 0, "CQE tag invalid"); -} - -void rdm_rma_check_cntrs(uint64_t w[2], uint64_t r[2], uint64_t w_e[2], - uint64_t r_e[2]) -{ - /* Domain 0 */ - writes[0] += w[0]; - reads[0] += r[0]; - write_errs[0] += w_e[0]; - read_errs[0] += r_e[0]; - /*dbg_printf("%ld, %ld\n", fi_cntr_read(write_cntr[0]), writes[0]);*/ - cr_assert(fi_cntr_read(write_cntr[0]) == writes[0], "Bad write count"); - cr_assert(fi_cntr_read(read_cntr[0]) == reads[0], "Bad read count"); - cr_assert(fi_cntr_readerr(write_cntr[0]) == write_errs[0], - "Bad write err count"); - cr_assert(fi_cntr_readerr(read_cntr[0]) == read_errs[0], - "Bad read err count"); - - /* Domain 1 */ - writes[1] += w[1]; - reads[1] += r[1]; - write_errs[1] += w_e[1]; - read_errs[1] += r_e[1]; - cr_assert(fi_cntr_read(write_cntr[1]) == writes[1], "Bad write count"); - cr_assert(fi_cntr_read(read_cntr[1]) == reads[1], "Bad read count"); - cr_assert(fi_cntr_readerr(write_cntr[1]) == write_errs[1], - "Bad write err count"); - cr_assert(fi_cntr_readerr(read_cntr[1]) == read_errs[1], - "Bad read err count"); - - /* - * These tests should be refactored and all occurrences of - * fi_more_set should be removed. - * - * When fi_more_set is true, we do not want to check the rwrite and - * rread counters since they are not being used within the provider. - */ - if (hints->caps & FI_RMA_EVENT && fi_more_set == false) { - cr_assert(fi_cntr_read(rwrite_cntr) == writes[0], - "Bad rwrite count: expected(%lu) actual(%lu)", - writes[0], fi_cntr_read(rwrite_cntr)); - cr_assert(fi_cntr_read(rread_cntr) == reads[0], - "Bad rread count: expected(%lu) actual(%lu)", - reads[0], fi_cntr_read(rread_cntr)); - cr_assert(fi_cntr_readerr(rwrite_cntr) == 0, - "Bad rwrite err count"); - cr_assert(fi_cntr_readerr(rread_cntr) == 0, - "Bad rread err count"); - } -} - -void xfer_for_each_size(void (*xfer)(int len), int slen, int elen) -{ - int i; - - for (i = slen; i <= elen; i *= 2) { - xfer(i); - } -} - -void err_inject_enable(void) -{ - int ret, err_count_val = 1; - - ret = gni_domain_ops[0]->set_val(&dom[0]->fid, GNI_ERR_INJECT_COUNT, - &err_count_val); - cr_assert(!ret, "setval(GNI_ERR_INJECT_COUNT)"); - - if (gni_domain_ops[1] != NULL) { - ret = gni_domain_ops[1]->set_val(&dom[1]->fid, - GNI_ERR_INJECT_COUNT, - &err_count_val); - cr_assert(!ret, "setval(GNI_ERR_INJECT_COUNT)"); - } -} - -/******************************************************************************* - * Test RMA functions - ******************************************************************************/ - -TestSuite(dgram_rma_basic, - .init = dgram_setup_basic, - .fini = rdm_rma_teardown, - .disabled = false); - -TestSuite(rdm_rma_basic, - .init = rdm_rma_setup_basic, - .fini = rdm_rma_teardown, - .disabled = false); - -TestSuite(more_rdm_rma_basic, - .init = more_rdm_rma_setup_basic, - .fini = rdm_rma_teardown, - .disabled = false); - -TestSuite(dgram_rma_1dom_basic, - .init = dgram_setup_1dom_basic, - .fini = rdm_rma_teardown, - .disabled = false); - -TestSuite(dgram_rma_scalable, - .init = dgram_setup_scalable, - .fini = rdm_rma_teardown, - .disabled = false); - -TestSuite(rdm_rma_scalable, - .init = rdm_rma_setup_scalable, - .fini = rdm_rma_teardown, - .disabled = false); - -TestSuite(more_rdm_rma_scalable, - .init = more_rdm_rma_setup_scalable, - .fini = rdm_rma_teardown, - .disabled = false); - -TestSuite(dgram_rma_1dom_scalable, - .init = dgram_setup_1dom_scalable, - .fini = rdm_rma_teardown, - .disabled = false); - -void do_write(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct fi_cq_err_entry cq_err; - int errors_to_read = (dgm_fail) ? 1 : 0; - uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0}; - - init_data(source, len, 0xab); - init_data(target, len, 0); - - sz = fi_write(ep[0], source, len, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - target); - cr_assert_eq(sz, 0); - - do { - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - if (dgm_fail) { - cr_assert_eq(ret, -FI_EAVAIL); - - ret = fi_cq_readerr(send_cq[0], &cq_err, 0); - cr_assert_eq(ret, 1); - - errors_to_read--; - } - } while (errors_to_read > 0); - - if (dgm_fail) - return; - - cr_assert_eq(ret, 1); - rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, ep[0]); - - w[0] = 1; - rdm_rma_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - cr_assert(check_data(source, target, len), "Data mismatch"); -} - -Test(rdm_rma_basic, write) -{ - xfer_for_each_size(do_write, 8, BUF_SZ); -} - -Test(rdm_rma_basic, write_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_write, 8, BUF_SZ); -} - -Test(dgram_rma_basic, write) -{ - xfer_for_each_size(do_write, 8, BUF_SZ); -} - -Test(dgram_rma_basic, write_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_write, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_basic, write) -{ - xfer_for_each_size(do_write, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_basic, write_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_write, 8, BUF_SZ); -} - -/* scalable */ -Test(rdm_rma_scalable, write) -{ - xfer_for_each_size(do_write, 8, BUF_SZ); -} - -Test(rdm_rma_scalable, write_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_write, 8, BUF_SZ); -} - -Test(dgram_rma_scalable, write) -{ - xfer_for_each_size(do_write, 8, BUF_SZ); -} - -Test(dgram_rma_scalable, write_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_write, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_scalable, write) -{ - xfer_for_each_size(do_write, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_scalable, write_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_write, 8, BUF_SZ); -} - -void do_writev(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct iovec iov; - uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0}; - - iov.iov_base = source; - iov.iov_len = len; - - init_data(source, len, 0x25); - init_data(target, len, 0); - - sz = fi_writev(ep[0], &iov, (void **)loc_mr, 1, - gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - if (dgm_fail) { - cr_assert_eq(ret, -FI_EAVAIL); - return; - } - - cr_assert_eq(ret, 1); - rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, ep[0]); - - w[0] = 1; - rdm_rma_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - cr_assert(check_data(source, target, len), "Data mismatch"); -} - -Test(rdm_rma_basic, writev) -{ - xfer_for_each_size(do_writev, 8, BUF_SZ); -} - -Test(rdm_rma_basic, writev_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_writev, 8, BUF_SZ); -} - -Test(dgram_rma_basic, writev) -{ - xfer_for_each_size(do_writev, 8, BUF_SZ); -} - -Test(dgram_rma_basic, writev_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_writev, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_basic, writev) -{ - xfer_for_each_size(do_writev, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_basic, writev_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_writev, 8, BUF_SZ); -} - -/* scalable */ -Test(rdm_rma_scalable, writev) -{ - xfer_for_each_size(do_writev, 8, BUF_SZ); -} - -Test(rdm_rma_scalable, writev_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_writev, 8, BUF_SZ); -} - -Test(dgram_rma_scalable, writev) -{ - xfer_for_each_size(do_writev, 8, BUF_SZ); -} - -Test(dgram_rma_scalable, writev_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_writev, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_scalable, writev) -{ - xfer_for_each_size(do_writev, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_scalable, writev_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_writev, 8, BUF_SZ); -} - -void do_writemsg(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct iovec iov; - struct fi_msg_rma msg; - struct fi_rma_iov rma_iov; - uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0}; - - iov.iov_base = source; - iov.iov_len = len; - - rma_iov.addr = _REM_ADDR(fi, target, target); - rma_iov.len = len; - rma_iov.key = mr_key[1]; - - msg.msg_iov = &iov; - msg.desc = (void **)loc_mr; - msg.iov_count = 1; - msg.addr = gni_addr[1]; - msg.rma_iov = &rma_iov; - msg.rma_iov_count = 1; - msg.context = target; - msg.data = (uint64_t)target; - - init_data(source, len, 0xef); - init_data(target, len, 0); - sz = fi_writemsg(ep[0], &msg, 0); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - if (dgm_fail) { - cr_assert_eq(ret, -FI_EAVAIL); - return; - } - cr_assert_eq(ret, 1); - rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, ep[0]); - - w[0] = 1; - rdm_rma_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - cr_assert(check_data(source, target, len), "Data mismatch"); -} - -Test(rdm_rma_basic, writemsg) -{ - xfer_for_each_size(do_writemsg, 8, BUF_SZ); -} - -Test(rdm_rma_basic, writemsg_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_writemsg, 8, BUF_SZ); -} - -Test(dgram_rma_basic, writemsg) -{ - xfer_for_each_size(do_writemsg, 8, BUF_SZ); -} - -Test(dgram_rma_basic, writemsg_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_writemsg, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_basic, writemsg) -{ - xfer_for_each_size(do_writemsg, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_basic, writemsg_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_writemsg, 8, BUF_SZ); -} - -/* scalable */ - -Test(rdm_rma_scalable, writemsg) -{ - xfer_for_each_size(do_writemsg, 8, BUF_SZ); -} - -Test(rdm_rma_scalable, writemsg_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_writemsg, 8, BUF_SZ); -} - -Test(dgram_rma_scalable, writemsg) -{ - xfer_for_each_size(do_writemsg, 8, BUF_SZ); -} - -Test(dgram_rma_scalable, writemsg_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_writemsg, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_scalable, writemsg) -{ - xfer_for_each_size(do_writemsg, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_scalable, writemsg_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_writemsg, 8, BUF_SZ); -} - -void do_writemsg_more(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct iovec iov, iov2; - struct fi_msg_rma msg, msg2; - struct fi_rma_iov rma_iov, rma_iov2; - uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0}; - - iov.iov_base = source; - iov.iov_len = len; - - iov2.iov_base = source2; - iov2.iov_len = len; - - rma_iov.addr = _REM_ADDR(fi, target, target); - rma_iov.len = len; - rma_iov.key = mr_key[1]; - - rma_iov2.addr = _REM_ADDR(fi, target2, target2); - rma_iov2.len = len; - rma_iov2.key = mr_key2[1]; /* use different mr_key? */ - - msg.msg_iov = &iov; - msg.desc = (void **)loc_mr; - msg.iov_count = 1; - msg.addr = gni_addr[1]; - msg.rma_iov = &rma_iov; - msg.rma_iov_count = 1; - msg.context = target; - msg.data = (uint64_t)target; - - msg2.msg_iov = &iov2; - msg2.desc = (void **)loc_mr2; - msg2.iov_count = 1; - msg2.addr = gni_addr[1]; - msg2.rma_iov = &rma_iov2; - msg2.rma_iov_count = 1; - msg2.context = target2; - msg2.data = (uint64_t)target2; - - init_data(source, len, 0xef); - init_data(target, len, 0); - init_data(source2, len, 0xef); - init_data(target2, len, 0); - - /* Write first message, with FI_MORE */ - sz = fi_writemsg(ep[0], &msg, FI_MORE); - cr_assert_eq(sz, 0); - - /* If FI_RMA_EVENT is a capability, check if the FI_MORE chain is interrupted */ - if (hints->caps & FI_RMA_EVENT) { - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - cr_assert_eq(ret, 1); - rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, ep[0]); - } - - /* Write second message */ - sz = fi_writemsg(ep[0], &msg2, 0); - cr_assert_eq(sz, 0); - - /* If FI_RMA_EVENT is a capability, check cq now */ - if (hints->caps & FI_RMA_EVENT) { - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - cr_assert_eq(ret, 1); - rdm_rma_check_tcqe(&cqe, target2, FI_RMA | FI_WRITE, 0, ep[0]); - /* Otherwise, check for both events now */ - } else { - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1); - rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, ep[0]); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - cr_assert_eq(ret, 1); - rdm_rma_check_tcqe(&cqe, target2, FI_RMA | FI_WRITE, 0, ep[0]); - } - - w[0] = 2; - rdm_rma_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got 2 write context events!\n"); - - cr_assert(check_data(source, target, len), "Data mismatch"); - cr_assert(check_data(source2, target2, len), "Data mismatch2"); - -} - -Test(more_rdm_rma_basic, writemsgmore) -{ - fi_more_set = true; - xfer_for_each_size(do_writemsg_more, 8, BUF_SZ); - fi_more_set = false; -} - -Test(rdm_rma_basic, writemsgmore) -{ - fi_more_set = true; - xfer_for_each_size(do_writemsg_more, 8, BUF_SZ); - fi_more_set = false; -} - -Test(more_rdm_rma_scalable, writemsgmore) -{ - fi_more_set = true; - xfer_for_each_size(do_writemsg_more, 8, BUF_SZ); - fi_more_set = false; -} - -Test(rdm_rma_scalable, writemsgmore) -{ - fi_more_set = true; - xfer_for_each_size(do_writemsg_more, 8, BUF_SZ); - fi_more_set = false; -} - -void do_mixed_more(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct iovec iov, iov2; - struct fi_msg_rma msg; - struct fi_msg msg2; - struct fi_rma_iov rma_iov; - - iov.iov_base = source; - iov.iov_len = len; - - rma_iov.addr = _REM_ADDR(fi, target, target); - rma_iov.len = len; - rma_iov.key = mr_key[1]; - - msg.msg_iov = &iov; - msg.desc = (void **)loc_mr; - msg.iov_count = 1; - msg.addr = gni_addr[1]; - msg.rma_iov = &rma_iov; - msg.rma_iov_count = 1; - msg.context = target; - msg.data = (uint64_t)target; - - iov2.iov_base = source2; - iov2.iov_len = len; - - msg2.msg_iov = &iov2; - msg2.desc = (void **)loc_mr2; - msg2.iov_count = 1; - msg2.addr = gni_addr[1]; - msg2.context = target2; - msg2.data = (uint64_t)target2; - - init_data(source, len, 0xef); - init_data(target, len, 0); - init_data(source2, len, 0xef); - init_data(target2, len, 0); - - sz = fi_writemsg(ep[0], &msg, FI_MORE); - cr_assert_eq(sz, 0); - - sz = fi_recv(ep[1], target2, len, rem_mr2[0], FI_ADDR_UNSPEC, source2); - cr_assert_eq(sz, 0); - - sz = fi_sendmsg(ep[0], &msg2, 0); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - cr_assert_eq(ret, 1); - - while ((ret = fi_cq_read(recv_cq[1], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - cr_assert_eq(ret, 1); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - cr_assert_eq(ret, 1); - - cr_assert(check_data(source, target, len), "Data mismatch"); - cr_assert(check_data(source2, target2, len), "Data mismatch2"); - -} - -Test(more_rdm_rma_basic, mixedmore) -{ - xfer_for_each_size(do_mixed_more, 8, BUF_SZ); -} -Test(rdm_rma_basic, mixedmore) -{ - xfer_for_each_size(do_mixed_more, 8, BUF_SZ); -} - -Test(more_rdm_rma_scalable, mixedmore) -{ - xfer_for_each_size(do_mixed_more, 8, BUF_SZ); -} -Test(rdm_rma_scalable, mixedmore) -{ - xfer_for_each_size(do_mixed_more, 8, BUF_SZ); -} - -/* - * write_fence should be validated by inspecting debug. - * - * The following sequence of events should be seen: - * - * TX request processed: A - * TX request queue stalled on FI_FENCE request: B - * Added event: A - * TX request processed: B - * - */ - -void do_write_fence(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct iovec iov; - struct fi_msg_rma msg; - struct fi_rma_iov rma_iov; - uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0}; - - iov.iov_base = source; - iov.iov_len = len; - - rma_iov.addr = _REM_ADDR(fi, target, target); - rma_iov.len = sizeof(target); - rma_iov.key = mr_key[1]; - - msg.msg_iov = &iov; - msg.desc = (void **)loc_mr; - msg.iov_count = 1; - msg.addr = gni_addr[1]; - msg.rma_iov = &rma_iov; - msg.rma_iov_count = 1; - msg.context = target; - msg.data = (uint64_t)target; - - init_data(source, len, 0xef); - init_data(target, len, 0); - - /* write A */ - sz = fi_writemsg(ep[0], &msg, 0); - cr_assert_eq(sz, 0); - - /* write B */ - sz = fi_writemsg(ep[0], &msg, FI_FENCE); - cr_assert_eq(sz, 0); - - /* event A */ - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - if (dgm_fail) { - cr_assert_eq(ret, -FI_EAVAIL); - return; - } - - cr_assert_eq(ret, 1); - rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, ep[0]); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - - /* event B */ - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1); - rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, ep[0]); - - w[0] = 2; - rdm_rma_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - cr_assert(check_data(source, target, len), "Data mismatch"); -} - -Test(rdm_rma_basic, write_fence) -{ - xfer_for_each_size(do_write_fence, 8, BUF_SZ); -} - -Test(rdm_rma_basic, write_fence_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_write_fence, 8, BUF_SZ); -} - -Test(dgram_rma_basic, write_fence) -{ - xfer_for_each_size(do_write_fence, 8, BUF_SZ); -} - -Test(dgram_rma_basic, write_fence_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_write_fence, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_basic, write_fence) -{ - xfer_for_each_size(do_write_fence, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_basic, write_fence_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_write_fence, 8, BUF_SZ); -} - -/* scalable */ -Test(rdm_rma_scalable, write_fence) -{ - xfer_for_each_size(do_write_fence, 8, BUF_SZ); -} - -Test(rdm_rma_scalable, write_fence_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_write_fence, 8, BUF_SZ); -} - -Test(dgram_rma_scalable, write_fence) -{ - xfer_for_each_size(do_write_fence, 8, BUF_SZ); -} - -Test(dgram_rma_scalable, write_fence_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_write_fence, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_scalable, write_fence) -{ - xfer_for_each_size(do_write_fence, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_scalable, write_fence_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_write_fence, 8, BUF_SZ); -} - -#define INJECT_SIZE 64 -void do_inject_write(int len) -{ - ssize_t sz; - int ret, i, loops = 0; - struct fi_cq_tagged_entry cqe; - static gnix_mr_cache_t *cache; - struct gnix_fid_ep *ep_priv; - int already_registered = 0; - - init_data(source, len, 0x23); - init_data(target, len, 0); - - ep_priv = container_of(ep[0], struct gnix_fid_ep, ep_fid); - if (!USING_SCALABLE(fi)) { - cache = GET_DOMAIN_RW_CACHE(ep_priv->domain); - cr_assert(cache != NULL); - - already_registered = ofi_atomic_get32(&cache->inuse.elements); - } - - sz = fi_inject_write(ep[0], source, len, - gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1]); - cr_assert_eq(sz, 0); - - /* - * shouldn't have registeredd the source buffer, trust but verify - */ - if (!USING_SCALABLE(fi)) { - cr_assert(ofi_atomic_get32(&cache->inuse.elements) - == already_registered); - } - - for (i = 0; i < len; i++) { - loops = 0; - while (source[i] != target[i]) { - /* for progress */ - ret = fi_cq_read(send_cq[0], &cqe, 1); - cr_assert(ret == -FI_EAGAIN || ret == -FI_EAVAIL, - "Received unexpected event\n"); - - pthread_yield(); - cr_assert(++loops < MLOOPS || dgm_fail, - "Data mismatch"); - if (dgm_fail && loops > MLOOPS) - break; - } - } - cr_assert(!dgm_fail || (dgm_fail && loops >= MLOOPS), "Should fail"); -} - -Test(rdm_rma_basic, inject_write) -{ - /* FIXME intermittent test failures */ - cr_skip_test("intermittent test failures"); - xfer_for_each_size(do_inject_write, 8, INJECT_SIZE); -} - -Test(rdm_rma_basic, inject_write_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_inject_write, 8, INJECT_SIZE); -} - -Test(dgram_rma_basic, inject_write) -{ - xfer_for_each_size(do_inject_write, 8, INJECT_SIZE); -} - -Test(dgram_rma_basic, inject_write_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_inject_write, 8, INJECT_SIZE); -} - -Test(dgram_rma_1dom_basic, inject_write) -{ - xfer_for_each_size(do_inject_write, 8, INJECT_SIZE); -} - -Test(dgram_rma_1dom_basic, inject_write_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_inject_write, 8, INJECT_SIZE); -} - -/* scalable */ -Test(rdm_rma_scalable, inject_write) -{ - /* FIXME intermittent test failures */ - cr_skip_test("intermittent test failures"); - xfer_for_each_size(do_inject_write, 8, INJECT_SIZE); -} - -Test(rdm_rma_scalable, inject_write_retrans) -{ - /* FIXME intermittent test failures */ - cr_skip_test("intermittent test failures"); - err_inject_enable(); - xfer_for_each_size(do_inject_write, 8, INJECT_SIZE); -} - -Test(dgram_rma_scalable, inject_write) -{ - /* FIXME intermittent test failures */ - cr_skip_test("intermittent test failures"); - xfer_for_each_size(do_inject_write, 8, INJECT_SIZE); -} - -Test(dgram_rma_scalable, inject_write_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_inject_write, 8, INJECT_SIZE); -} - -Test(dgram_rma_1dom_scalable, inject_write) -{ - xfer_for_each_size(do_inject_write, 8, INJECT_SIZE); -} - -Test(dgram_rma_1dom_scalable, inject_write_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_inject_write, 8, INJECT_SIZE); -} - -void do_writedata(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct fi_cq_tagged_entry dcqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0}; - - -#define WRITE_DATA 0x5123da1a145 - init_data(source, len, 0x23); - init_data(target, len, 0); - sz = fi_writedata(ep[0], source, len, loc_mr[0], WRITE_DATA, - gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - if (dgm_fail) { - cr_assert_eq(ret, -FI_EAVAIL); - return; - } - - cr_assert_eq(ret, 1); - rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, ep[0]); - - w[0] = 1; - rdm_rma_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - cr_assert(check_data(source, target, len), "Data mismatch"); - - while ((ret = fi_cq_read(recv_cq[1], &dcqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - cr_assert(ret != FI_SUCCESS, "Missing remote data"); - - rdm_rma_check_tcqe(&dcqe, NULL, - (FI_RMA | FI_REMOTE_WRITE | FI_REMOTE_CQ_DATA), - WRITE_DATA, ep[1]); -} - -Test(rdm_rma_basic, writedata) -{ - xfer_for_each_size(do_writedata, 8, BUF_SZ); -} - -Test(rdm_rma_basic, writedata_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_writedata, 8, BUF_SZ); -} - -Test(dgram_rma_basic, writedata) -{ - xfer_for_each_size(do_writedata, 8, BUF_SZ); -} - -Test(dgram_rma_basic, writedata_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_writedata, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_basic, writedata) -{ - xfer_for_each_size(do_writedata, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_basic, writedata_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_writedata, 8, BUF_SZ); -} - -/* scalable */ - -Test(rdm_rma_scalable, writedata) -{ - xfer_for_each_size(do_writedata, 8, BUF_SZ); -} - -Test(rdm_rma_scalable, writedata_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_writedata, 8, BUF_SZ); -} - -Test(dgram_rma_scalable, writedata) -{ - xfer_for_each_size(do_writedata, 8, BUF_SZ); -} - -Test(dgram_rma_scalable, writedata_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_writedata, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_scalable, writedata) -{ - xfer_for_each_size(do_writedata, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_scalable, writedata_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_writedata, 8, BUF_SZ); -} - -#define INJECTWRITE_DATA 0xdededadadeadbeaf -void do_inject_writedata(int len) -{ - ssize_t sz; - int ret, i, loops = 0; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct fi_cq_tagged_entry dcqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - - init_data(source, len, 0x23); - init_data(target, len, 0); - sz = fi_inject_writedata(ep[0], source, len, INJECTWRITE_DATA, - gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1]); - cr_assert_eq(sz, 0); - - for (i = 0; i < len; i++) { - loops = 0; - while (source[i] != target[i]) { - /* for progress */ - ret = fi_cq_read(send_cq[0], &cqe, 1); - cr_assert(ret == -FI_EAGAIN || ret == -FI_EAVAIL, - "Received unexpected event\n"); - - pthread_yield(); - cr_assert(++loops < MLOOPS || dgm_fail, - "Data mismatch"); - if (dgm_fail && loops > MLOOPS) - break; - } - } - cr_assert(!dgm_fail || (dgm_fail && loops >= MLOOPS), "Should fail"); - if (dgm_fail && loops >= MLOOPS) - return; - - while ((ret = fi_cq_read(recv_cq[1], &dcqe, 1)) == -FI_EAGAIN) { - ret = fi_cq_read(send_cq[0], &cqe, 1); /* for progress */ - pthread_yield(); - } - cr_assert(ret != FI_SUCCESS, "Missing remote data"); - - rdm_rma_check_tcqe(&dcqe, NULL, - (FI_RMA | FI_REMOTE_WRITE | FI_REMOTE_CQ_DATA), - INJECTWRITE_DATA, ep[1]); -} - -Test(rdm_rma_basic, inject_writedata) -{ - /* FIXME intermittent test failures */ - cr_skip_test("intermittent test failures"); - xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE); -} - -Test(rdm_rma_basic, inject_writedata_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE); -} - -Test(dgram_rma_basic, inject_writedata) -{ - /* FIXME intermittent test failures */ - cr_skip_test("intermittent test failures"); - xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE); -} - -Test(dgram_rma_basic, inject_writedata_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE); -} - -Test(dgram_rma_1dom_basic, inject_writedata) -{ - xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE); -} - -Test(dgram_rma_1dom_basic, inject_writedata_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE); -} - -/* scalable */ -Test(rdm_rma_scalable, inject_writedata) -{ - /* FIXME intermittent test failures */ - cr_skip_test("intermittent test failures"); - xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE); -} - -Test(rdm_rma_scalable, inject_writedata_retrans) -{ - /* FIXME intermittent test failures */ - cr_skip_test("intermittent test failures"); - err_inject_enable(); - xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE); -} - -Test(dgram_rma_scalable, inject_writedata) -{ - /* FIXME intermittent test failures */ - cr_skip_test("intermittent test failures"); - xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE); -} - -Test(dgram_rma_scalable, inject_writedata_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE); -} - -Test(dgram_rma_1dom_scalable, inject_writedata) -{ - xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE); -} - -Test(dgram_rma_1dom_scalable, inject_writedata_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE); -} - -void do_read(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0}; - -#define READ_CTX 0x4e3dda1aULL - init_data(source, len, 0); - init_data(target, len, 0xad); - - /* domain 0 from domain 1 */ - sz = fi_read(ep[0], source, len, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - (void *)READ_CTX); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1); - rdm_rma_check_tcqe(&cqe, (void *)READ_CTX, FI_RMA | FI_READ, 0, ep[0]); - - r[0] = 1; - rdm_rma_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got read context event!\n"); - - cr_assert(check_data(source, target, len), "Data mismatch"); -} - -Test(rdm_rma_basic, read) -{ - xfer_for_each_size(do_read, 8, BUF_SZ); -} - -Test(rdm_rma_basic, read_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_read, 8, BUF_SZ); -} - -Test(dgram_rma_basic, read) -{ - xfer_for_each_size(do_read, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_basic, read) -{ - xfer_for_each_size(do_read, 8, BUF_SZ); -} - -/* scalable */ - -Test(rdm_rma_scalable, read) -{ - xfer_for_each_size(do_read, 8, BUF_SZ); -} - -Test(rdm_rma_scalable, read_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_read, 8, BUF_SZ); -} - -Test(dgram_rma_scalable, read) -{ - xfer_for_each_size(do_read, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_scalable, read) -{ - xfer_for_each_size(do_read, 8, BUF_SZ); -} - -void do_readv(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct iovec iov; - uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0}; - - iov.iov_base = source; - iov.iov_len = len; - - init_data(target, len, 0x25); - init_data(source, len, 0); - sz = fi_readv(ep[0], &iov, (void **)loc_mr, 1, - gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1); - rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_READ, 0, ep[0]); - - r[0] = 1; - rdm_rma_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - cr_assert(check_data(source, target, len), "Data mismatch"); -} - -Test(rdm_rma_basic, readv) -{ - xfer_for_each_size(do_readv, 8, BUF_SZ); -} - -Test(rdm_rma_basic, readv_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_readv, 8, BUF_SZ); -} - -Test(dgram_rma_basic, readv) -{ - xfer_for_each_size(do_readv, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_basic, readv) -{ - xfer_for_each_size(do_readv, 8, BUF_SZ); -} - -/* scalable */ - -Test(rdm_rma_scalable, readv) -{ - xfer_for_each_size(do_readv, 8, BUF_SZ); -} - -Test(rdm_rma_scalable, readv_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_readv, 8, BUF_SZ); -} - -Test(dgram_rma_scalable, readv) -{ - xfer_for_each_size(do_readv, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_scalable, readv) -{ - xfer_for_each_size(do_readv, 8, BUF_SZ); -} - -void do_readmsg(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct iovec iov; - struct fi_msg_rma msg; - struct fi_rma_iov rma_iov; - uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0}; - - iov.iov_base = source; - iov.iov_len = len; - - rma_iov.addr = _REM_ADDR(fi, target, target); - rma_iov.len = len; - rma_iov.key = mr_key[1]; - - msg.msg_iov = &iov; - msg.desc = (void **)loc_mr; - msg.iov_count = 1; - msg.addr = gni_addr[1]; - msg.rma_iov = &rma_iov; - msg.rma_iov_count = 1; - msg.context = target; - msg.data = (uint64_t)target; - - init_data(target, len, 0xef); - init_data(source, len, 0); - sz = fi_readmsg(ep[0], &msg, 0); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1); - rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_READ, 0, ep[0]); - - r[0] = 1; - rdm_rma_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - cr_assert(check_data(source, target, len), "Data mismatch"); - - iov.iov_base = source; - iov.iov_len = len; - - rma_iov.addr = _REM_ADDR(fi, target, target); - rma_iov.len = len; - rma_iov.key = mr_key[0]; - - msg.msg_iov = &iov; - msg.desc = (void **)(loc_mr + 1); - msg.iov_count = 1; - msg.addr = gni_addr[0]; - msg.rma_iov = &rma_iov; - msg.rma_iov_count = 1; - msg.context = target; - msg.data = (uint64_t)target; -} - -Test(rdm_rma_basic, readmsg) -{ - xfer_for_each_size(do_readmsg, 8, BUF_SZ); -} - -Test(rdm_rma_basic, readmsg_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_readmsg, 8, BUF_SZ); -} - -Test(dgram_rma_basic, readmsg) -{ - xfer_for_each_size(do_readmsg, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_basic, readmsg) -{ - xfer_for_each_size(do_readmsg, 8, BUF_SZ); -} - -/* scalable */ - -Test(rdm_rma_scalable, readmsg) -{ - xfer_for_each_size(do_readmsg, 8, BUF_SZ); -} - -Test(rdm_rma_scalable, readmsg_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_readmsg, 8, BUF_SZ); -} - -Test(dgram_rma_scalable, readmsg) -{ - xfer_for_each_size(do_readmsg, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_scalable, readmsg) -{ - xfer_for_each_size(do_readmsg, 8, BUF_SZ); -} - -void do_readmsg_more(int len, void *s, void *t, int len2, void *s2, void *t2) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct iovec iov, iov2; - struct fi_msg_rma msg, msg2; - struct fi_rma_iov rma_iov, rma_iov2; - uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0}; - - iov.iov_base = s; - iov.iov_len = len; - - rma_iov.addr = _REM_ADDR(fi, target, t); - rma_iov.len = len; - rma_iov.key = mr_key[1]; - - msg.msg_iov = &iov; - msg.desc = (void **)loc_mr; - msg.iov_count = 1; - msg.addr = gni_addr[1]; - msg.rma_iov = &rma_iov; - msg.rma_iov_count = 1; - msg.context = t; - msg.data = (uint64_t)t; - - iov2.iov_base = s2; - iov2.iov_len = len2; - - rma_iov2.addr = _REM_ADDR(fi, target2, t2); - rma_iov2.len = len2; - rma_iov2.key = mr_key2[1]; - - msg2.msg_iov = &iov2; - msg2.desc = (void **)loc_mr2; - msg2.iov_count = 1; - msg2.addr = gni_addr[1]; - msg2.rma_iov = &rma_iov2; - msg2.rma_iov_count = 1; - msg2.context = t2; - msg2.data = (uint64_t)t2; - - - init_data(t, len, 0xef); - init_data(t2, len2, 0xff); - init_data(s, len, 0); - init_data(s2, len2, 0); - - /* Read first message, with FI_MORE */ - sz = fi_readmsg(ep[0], &msg, FI_MORE); - cr_assert_eq(sz, 0); - - /* If FI_RMA_EVENT is a capability, check if the FI_MORE chain is interrupted */ - if (hints->caps & FI_RMA_EVENT) { - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - cr_assert_eq(ret, 1); - rdm_rma_check_tcqe(&cqe, t, FI_RMA | FI_READ, 0, ep[0]); - } - - /* Read second message */ - sz = fi_readmsg(ep[0], &msg2, 0); - cr_assert_eq(sz, 0); - - /* If FI_RMA_EVENT is a capability, check cq now */ - if (hints->caps & FI_RMA_EVENT) { - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - cr_assert_eq(ret, 1); - rdm_rma_check_tcqe(&cqe, t2, FI_RMA | FI_READ, 0, ep[0]); - /* Otherwise, check for both events now */ - } else { - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - cr_assert_eq(ret, 1); - rdm_rma_check_tcqe(&cqe, t, FI_RMA | FI_READ, 0, ep[0]); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - cr_assert_eq(ret, 1); - rdm_rma_check_tcqe(&cqe, t2, FI_RMA | FI_READ, 0, ep[0]); - } - r[0] = 2; - rdm_rma_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - cr_assert(check_data(s, t, len), "Data mismatch1"); - cr_assert(check_data(s2, t2, len2), "Data mismatch2"); -} - -void do_read_alignment_more(void) -{ - /* Size below GNIX_RMA_UREAD_CHAINED_THRESH size. All of these should - * have the same behavior. Code related to head/tail buffers should - * be bypassed */ - do_readmsg_more(8, source, target, 8, source2, target2); - dbg_printf("MORE All Aligned\n"); - - do_readmsg_more(8, source, target, 9, source2, target2); - dbg_printf("MORE M1 A M2 TUA INDIRECT\n"); - - do_readmsg_more(9, source, target, 8, source2, target2); - dbg_printf("MORE M1 UA M2 A INDIRECT\n"); - - do_readmsg_more(8, source, target, 8, source2, target2+2); - dbg_printf("MORE M1 A M2 H&TUA INDIRECT\n"); - - /* Size above GNIX_RMA_UREAD_CHAINED_THRESH 'H/T buffers will actually - * be used. Have one test for each possible combination of 2 messages - * with 4 possible paths to follow. - * 'Aligned', 'Head Unaligned', 'Tail Unaligned', 'H&T Unaligned' */ - - /* M1 Aligned */ - do_readmsg_more(64, source, target, 64, source2, target2); - dbg_printf("MORE M1 A M2 A\n"); - - do_readmsg_more(64, source, target, 65, source2, target2); - dbg_printf("MORE M1 A M2 TUA\n"); - - do_readmsg_more(64, source, target, 65, source2, target2+3); - dbg_printf("MORE M1 A M2 HUA\n"); - - do_readmsg_more(64, source, target, 64, source2, target2+2); - dbg_printf("MORE M1 A M2 H&TUA\n"); - - - /* M1 Tail Unaligned */ - do_readmsg_more(65, source, target, 64, source2, target2); - dbg_printf("MORE M1 TUA M2 A\n"); - - do_readmsg_more(65, source, target, 65, source2, target2); - dbg_printf("MORE M1 TUA M2 TUA\n"); - - do_readmsg_more(65, source, target, 65, source2, target2+3); - dbg_printf("MORE M1 TUA M2 HUA\n"); - - do_readmsg_more(65, source, target, 64, source2, target2+2); - dbg_printf("MORE M1 TUA M2 H&TUA\n"); - - /* M1 Head Unaligned */ - do_readmsg_more(65, source, target+3, 64, source2, target2); - dbg_printf("MORE M1 HUA M2 A\n"); - - do_readmsg_more(65, source, target+3, 65, source2, target2); - dbg_printf("MORE M1 HUA M2 TUA\n"); - - do_readmsg_more(65, source, target+3, 65, source2, target2+3); - dbg_printf("MORE M1 HUA M2 HUA\n"); - - do_readmsg_more(65, source, target+3, 64, source2, target2+2); - dbg_printf("MORE M1 HUA M2 H&TUA\n"); - - - /* M1 Head&Tail Unaligned */ - do_readmsg_more(64, source, target+2, 64, source2, target2); - dbg_printf("MORE M1 H&TUA M2 A\n"); - - do_readmsg_more(64, source, target+2, 65, source2, target2); - dbg_printf("MORE M1 H&TUA M2 TUA\n"); - - do_readmsg_more(64, source, target+2, 65, source2, target2+3); - dbg_printf("MORE M1 H&TUA M2 HUA\n"); - - do_readmsg_more(64, source, target+2, 64, source2, target2+2); - dbg_printf("MORE M1 H&TUA M2 H&TUA\n"); - -} - -Test(more_rdm_rma_basic, readmsgmore) -{ - fi_more_set = true; - do_read_alignment_more(); - fi_more_set = false; -} - -Test(rdm_rma_basic, readmsgmore) -{ - fi_more_set = true; - do_read_alignment_more(); - fi_more_set = false; -} - -/* scalable */ - -Test(more_rdm_rma_scalable, readmsgmore) -{ - fi_more_set = true; - do_read_alignment_more(); - fi_more_set = false; -} - -Test(rdm_rma_scalable, readmsgmore) -{ - fi_more_set = true; - do_read_alignment_more(); - fi_more_set = false; -} - -void inject_common(void) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct iovec iov; - struct fi_msg_rma msg; - struct fi_rma_iov rma_iov; - uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0}; - - iov.iov_base = source; - iov.iov_len = GNIX_INJECT_SIZE; - - rma_iov.addr = _REM_ADDR(fi, target, target); - rma_iov.len = GNIX_INJECT_SIZE; - rma_iov.key = mr_key[1]; - - msg.msg_iov = &iov; - msg.desc = (void **)loc_mr; - msg.iov_count = 1; - msg.addr = gni_addr[1]; - msg.rma_iov = &rma_iov; - msg.rma_iov_count = 1; - msg.context = target; - msg.data = (uint64_t)target; - - init_data(source, GNIX_INJECT_SIZE, 0xef); - init_data(target, GNIX_INJECT_SIZE, 0); - - sz = fi_writemsg(ep[0], &msg, FI_INJECT); - cr_assert_eq(sz, 0); - - iov.iov_len = GNIX_INJECT_SIZE+1; - sz = fi_writemsg(ep[0], &msg, FI_INJECT); - cr_assert_eq(sz, -FI_EINVAL); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1); - rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, ep[0]); - - w[0] = 1; - rdm_rma_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - cr_assert(check_data(source, target, GNIX_INJECT_SIZE), - "Data mismatch"); -} - -Test(rdm_rma_basic, inject) -{ - inject_common(); -} - -Test(dgram_rma_basic, inject) -{ - inject_common(); -} - -Test(dgram_rma_1dom_basic, inject) -{ - inject_common(); -} - -Test(rdm_rma_scalable, inject) -{ - inject_common(); -} - -Test(dgram_rma_scalable, inject) -{ - inject_common(); -} - -Test(dgram_rma_1dom_scalable, inject) -{ - inject_common(); -} - -void do_write_autoreg(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0}; - - init_data(source, len, 0xab); - init_data(target, len, 0); - sz = fi_write(ep[0], source, len, - NULL, gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1); - rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, ep[0]); - - w[0] = 1; - rdm_rma_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - cr_assert(check_data(source, target, len), "Data mismatch"); -} - -Test(rdm_rma_basic, write_autoreg) -{ - xfer_for_each_size(do_write_autoreg, 8, BUF_SZ); -} - -Test(dgram_rma_basic, write_autoreg) -{ - xfer_for_each_size(do_write_autoreg, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_basic, write_autoreg) -{ - xfer_for_each_size(do_write_autoreg, 8, BUF_SZ); -} - -/* scalable */ - -Test(rdm_rma_scalable, write_autoreg) -{ - xfer_for_each_size(do_write_autoreg, 8, BUF_SZ); -} - -Test(dgram_rma_scalable, write_autoreg) -{ - xfer_for_each_size(do_write_autoreg, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_scalable, write_autoreg) -{ - xfer_for_each_size(do_write_autoreg, 8, BUF_SZ); -} - -void do_write_autoreg_uncached(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0}; - - init_data(uc_source, len, 0xab); - init_data(target, len, 0); - sz = fi_write(ep[0], uc_source, len, - NULL, gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1); - rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, ep[0]); - - w[0] = 1; - rdm_rma_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - cr_assert(check_data(uc_source, target, len), "Data mismatch"); -} - -Test(rdm_rma_basic, write_autoreg_uncached) -{ - xfer_for_each_size(do_write_autoreg_uncached, 8, BUF_SZ); -} - -Test(dgram_rma_basic, write_autoreg_uncached) -{ - xfer_for_each_size(do_write_autoreg_uncached, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_basic, write_autoreg_uncached) -{ - xfer_for_each_size(do_write_autoreg_uncached, 8, BUF_SZ); -} - -/* scalable */ - -Test(rdm_rma_scalable, write_autoreg_uncached) -{ - xfer_for_each_size(do_write_autoreg_uncached, 8, BUF_SZ); -} - -Test(dgram_rma_scalable, write_autoreg_uncached) -{ - xfer_for_each_size(do_write_autoreg_uncached, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_scalable, write_autoreg_uncached) -{ - xfer_for_each_size(do_write_autoreg_uncached, 8, BUF_SZ); -} - -void do_write_error(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe; - struct fi_cq_err_entry err_cqe = {0}; - - err_cqe.err_data_size = 0; - uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0}; - - init_data(source, len, 0xab); - init_data(target, len, 0); - sz = fi_write(ep[0], source, len, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, -FI_EAVAIL); - - ret = fi_cq_readerr(send_cq[0], &err_cqe, 0); - cr_assert_eq(ret, 1); - - cr_assert((uint64_t)err_cqe.op_context == (uint64_t)target, - "Bad error context"); - cr_assert(err_cqe.flags == (FI_RMA | FI_WRITE)); - cr_assert(err_cqe.len == 0, "Bad error len"); - cr_assert(err_cqe.buf == 0, "Bad error buf"); - cr_assert(err_cqe.data == 0, "Bad error data"); - cr_assert(err_cqe.tag == 0, "Bad error tag"); - cr_assert(err_cqe.olen == 0, "Bad error olen"); - cr_assert(err_cqe.err == FI_ECANCELED, "Bad error errno"); - cr_assert(err_cqe.prov_errno == gnixu_to_fi_errno(GNI_RC_TRANSACTION_ERROR), - "Bad prov errno"); - cr_assert(err_cqe.err_data == NULL, "Bad error provider data"); - - w_e[0] = 1; - rdm_rma_check_cntrs(w, r, w_e, r_e); -} - -static inline void __write_error(void) -{ - int ret, max_retrans_val = 1; - - ret = gni_domain_ops[0]->set_val(&dom[0]->fid, GNI_MAX_RETRANSMITS, - &max_retrans_val); - cr_assert(!ret, "setval(GNI_MAX_RETRANSMITS)"); - - ret = gni_domain_ops[1]->set_val(&dom[1]->fid, GNI_MAX_RETRANSMITS, - &max_retrans_val); - cr_assert(!ret, "setval(GNI_MAX_RETRANSMITS)"); - err_inject_enable(); - - xfer_for_each_size(do_write_error, 8, BUF_SZ); -} - -Test(rdm_rma_basic, write_error) -{ - __write_error(); -} - -Test(rdm_rma_scalable, write_error) -{ - __write_error(); -} - -Test(dgram_rma_basic, write_error) -{ - __write_error(); -} - -Test(dgram_rma_scalable, write_error) -{ - __write_error(); -} - -void do_read_error(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe; - struct fi_cq_err_entry err_cqe = {0}; - uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0}; - - init_data(source, len, 0); - init_data(target, len, 0xad); - sz = fi_read(ep[0], source, len, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - (void *)READ_CTX); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, -FI_EAVAIL); - - ret = fi_cq_readerr(send_cq[0], &err_cqe, 0); - cr_assert_eq(ret, 1); - - cr_assert((uint64_t)err_cqe.op_context == (uint64_t)READ_CTX, - "Bad error context"); - cr_assert(err_cqe.flags == (FI_RMA | FI_READ)); - cr_assert(err_cqe.len == 0, "Bad error len"); - cr_assert(err_cqe.buf == 0, "Bad error buf"); - cr_assert(err_cqe.data == 0, "Bad error data"); - cr_assert(err_cqe.tag == 0, "Bad error tag"); - cr_assert(err_cqe.olen == 0, "Bad error olen"); - cr_assert(err_cqe.err == FI_ECANCELED, "Bad error errno"); - cr_assert(err_cqe.prov_errno == gnixu_to_fi_errno(GNI_RC_TRANSACTION_ERROR), - "Bad prov errno"); - cr_assert(err_cqe.err_data == NULL, "Bad error provider data"); - - r_e[0] = 1; - rdm_rma_check_cntrs(w, r, w_e, r_e); -} - -static inline void __read_error(void) -{ - int ret, max_retrans_val = 1; - - ret = gni_domain_ops[0]->set_val(&dom[0]->fid, GNI_MAX_RETRANSMITS, - &max_retrans_val); - cr_assert(!ret, "setval(GNI_MAX_RETRANSMITS)"); - - ret = gni_domain_ops[1]->set_val(&dom[1]->fid, GNI_MAX_RETRANSMITS, - &max_retrans_val); - cr_assert(!ret, "setval(GNI_MAX_RETRANSMITS)"); - err_inject_enable(); - - xfer_for_each_size(do_read_error, 8, BUF_SZ); -} - -Test(rdm_rma_basic, read_error) -{ - __read_error(); -} - -Test(rdm_rma_scalable, read_error) -{ - __read_error(); -} - -void do_read_buf(void *s, void *t, int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0}; - -#define READ_CTX 0x4e3dda1aULL - init_data(s, len, 0); - init_data(t, len, 0xad); - sz = fi_read(ep[0], s, len, NULL, gni_addr[1], - _REM_ADDR(fi, target, t), mr_key[1], - (void *)READ_CTX); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1); - rdm_rma_check_tcqe(&cqe, (void *)READ_CTX, FI_RMA | FI_READ, 0, ep[0]); - - r[0] = 1; - rdm_rma_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got read context event!\n"); - - cr_assert(check_data(s, t, len), "Data mismatch"); -} - -void do_read_alignment(int len) -{ - int s_off, t_off, l_off; - - for (s_off = 0; s_off < 7; s_off++) { - for (t_off = 0; t_off < 7; t_off++) { - for (l_off = 0; l_off < 7; l_off++) { - do_read_buf(source + s_off, - target + t_off, - len + l_off); - } - } - } -} - -Test(rdm_rma_basic, read_chained) -{ - do_read_buf(source, target, 60); -} -Test(rdm_rma_basic, read_alignment) -{ - xfer_for_each_size(do_read_alignment, 1, (BUF_SZ - 1)); -} - -Test(rdm_rma_basic, read_alignment_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_read_alignment, 1, (BUF_SZ - 1)); -} - -Test(dgram_rma_basic, read_alignment) -{ - xfer_for_each_size(do_read_alignment, 1, (BUF_SZ - 1)); -} - -Test(dgram_rma_1dom_basic, read_alignment) -{ - xfer_for_each_size(do_read_alignment, 1, (BUF_SZ - 1)); -} - -/* scalable */ - -Test(rdm_rma_scalable, read_chained) -{ - do_read_buf(source, target, 60); -} -Test(rdm_rma_scalable, read_alignment) -{ - xfer_for_each_size(do_read_alignment, 1, (BUF_SZ - 1)); -} - -Test(rdm_rma_scalable, read_alignment_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_read_alignment, 1, (BUF_SZ - 1)); -} - -Test(dgram_rma_scalable, read_alignment) -{ - xfer_for_each_size(do_read_alignment, 1, (BUF_SZ - 1)); -} - -Test(dgram_rma_1dom_scalable, read_alignment) -{ - xfer_for_each_size(do_read_alignment, 1, (BUF_SZ - 1)); -} - -void do_write_buf(void *s, void *t, int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct fi_cq_err_entry cq_err; - int errors_to_read = (dgm_fail) ? 1 : 0; - uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0}; - - init_data(s, len, 0xab); - init_data(t, len, 0); - sz = fi_write(ep[0], s, len, NULL, gni_addr[1], - _REM_ADDR(fi, target, t), mr_key[1], - t); - cr_assert_eq(sz, 0); - - do { - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - if (dgm_fail) { - cr_assert_eq(ret, -FI_EAVAIL); - - ret = fi_cq_readerr(send_cq[0], &cq_err, 0); - cr_assert_eq(ret, 1); - - errors_to_read--; - } - } while (errors_to_read > 0); - - if (dgm_fail) - return; - - cr_assert_eq(ret, 1); - rdm_rma_check_tcqe(&cqe, t, FI_RMA | FI_WRITE, 0, ep[0]); - - w[0] = 1; - rdm_rma_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - cr_assert(check_data(s, t, len), "Data mismatch"); -} - -void do_write_alignment(int len) -{ - int s_off, t_off, l_off; - - for (s_off = 0; s_off < 7; s_off++) { - for (t_off = 0; t_off < 7; t_off++) { - for (l_off = 0; l_off < 7; l_off++) { - do_write_buf(source + s_off, - target + t_off, - len + l_off); - } - } - } -} - -Test(rdm_rma_basic, write_alignment) -{ - xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1)); -} - -Test(rdm_rma_basic, write_alignment_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1)); -} - -Test(dgram_rma_basic, write_alignment) -{ - xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1)); -} - -Test(dgram_rma_basic, write_alignment_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1)); -} - -Test(dgram_rma_1dom_basic, write_alignment) -{ - xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1)); -} - -Test(dgram_rma_1dom_basic, write_alignment_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1)); -} - - - -/* scalable */ - -Test(rdm_rma_scalable, write_alignment) -{ - xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1)); -} - -Test(rdm_rma_scalable, write_alignment_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1)); -} - -Test(dgram_rma_scalable, write_alignment) -{ - xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1)); -} - -Test(dgram_rma_scalable, write_alignment_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1)); -} - -Test(dgram_rma_1dom_scalable, write_alignment) -{ - xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1)); -} - -Test(dgram_rma_1dom_scalable, write_alignment_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1)); -} - -void do_trigger(int len) -{ - int ret, i; - ssize_t sz; - struct fi_cq_tagged_entry cqe; - struct fi_msg_rma msg[4]; - struct iovec iov; - struct fi_rma_iov rma_iov; - struct fi_triggered_context t_ctx[4]; - void *ctxs[4]; - - iov.iov_base = source; - iov.iov_len = len; - - rma_iov.addr = _REM_ADDR(fi, target, target); - rma_iov.len = len; - rma_iov.key = mr_key[1]; - - msg[0].msg_iov = &iov; - msg[0].desc = (void **)loc_mr; - msg[0].iov_count = 1; - msg[0].addr = gni_addr[1]; - msg[0].rma_iov = &rma_iov; - msg[0].rma_iov_count = 1; - msg[0].data = (uint64_t)target; - msg[1] = msg[2] = msg[3] = msg[0]; - - /* XXX: Req 0 is guaranteed to be sent before req 2, but req 2 will - * race req 0 through the network. Fix race if needed. */ - t_ctx[0].trigger.threshold.threshold = 1; - t_ctx[1].trigger.threshold.threshold = 2; - t_ctx[2].trigger.threshold.threshold = 1; - t_ctx[3].trigger.threshold.threshold = 0; - ctxs[0] = &t_ctx[3]; - ctxs[1] = &t_ctx[0]; - ctxs[2] = &t_ctx[2]; - ctxs[3] = &t_ctx[1]; - - for (i = 0; i < 4; i++) { - t_ctx[i].event_type = FI_TRIGGER_THRESHOLD; - t_ctx[i].trigger.threshold.cntr = write_cntr[0]; - msg[i].context = &t_ctx[i]; - - sz = fi_writemsg(ep[0], &msg[i], FI_TRIGGER); - cr_assert_eq(sz, 0); - } - - for (i = 0; i < 4; i++) { - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1); - - rdm_rma_check_tcqe(&cqe, ctxs[i], FI_RMA | FI_WRITE, 0, ep[0]); - } - - sz = fi_cntr_set(write_cntr[0], 0); - cr_assert_eq(sz, 0); -} - -/* - * TODO: this test fails periodically - */ -Test(rdm_rma_basic, trigger, .disabled = true) -{ - xfer_for_each_size(do_trigger, 8, BUF_SZ); -} - -Test(rdm_rma_scalable, trigger) -{ - xfer_for_each_size(do_trigger, 8, BUF_SZ); -} - -TestSuite(rdm_rma_rcntr_basic, - .init = rdm_rma_rcntr_setup_basic, - .fini = rdm_rma_teardown, - .disabled = false); - -Test(rdm_rma_rcntr_basic, write_rcntr) -{ - xfer_for_each_size(do_write, 8, BUF_SZ); -} - -Test(rdm_rma_rcntr_basic, read_rcntr) -{ - xfer_for_each_size(do_read, 8, BUF_SZ); -} - -TestSuite(rdm_rma_rcntr_scalable, - .init = rdm_rma_rcntr_setup_scalable, - .fini = rdm_rma_teardown, - .disabled = false); - -Test(rdm_rma_rcntr_scalable, write_rcntr) -{ - xfer_for_each_size(do_write, 8, BUF_SZ); -} - -Test(rdm_rma_rcntr_scalable, read_rcntr) -{ - xfer_for_each_size(do_read, 8, BUF_SZ); -} diff --git a/prov/gni/test/rdm_dgram_stx.c b/prov/gni/test/rdm_dgram_stx.c deleted file mode 100644 index c1bceade365..00000000000 --- a/prov/gni/test/rdm_dgram_stx.c +++ /dev/null @@ -1,2676 +0,0 @@ -/* - * Copyright (c) 2015-2017 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2015-2018 Cray Inc. All rights reserved. - * Copyright (c) 2019-2020 Triad National Security, LLC. - * All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - - -#include -#include -#include - -#include "gnix_vc.h" -#include "gnix_cm_nic.h" -#include "gnix_hashtable.h" -#include "gnix_rma.h" -#include "gnix_util.h" - -#include -#include "gnix_rdma_headers.h" -#include "common.h" - -#if 1 -#define dbg_printf(...) -#else -#define dbg_printf(...) \ - do { \ - printf(__VA_ARGS__); \ - fflush(stdout); \ - } while (0) -#endif - -/* Note: Set to ~FI_NOTIFY_FLAGS_ONLY since this was written before api 1.5 */ -static uint64_t mode_bits = ~FI_NOTIFY_FLAGS_ONLY; -static struct fid_fabric *fab; -static struct fid_domain *dom[2]; -static struct fi_gni_ops_domain *gni_domain_ops[2]; -static struct fid_ep *ep[2]; -static struct fid_av *av[2]; -static struct fi_info *hints; -static struct fi_info *fi; -static void *ep_name[2]; -static size_t gni_addr[2]; -static struct fid_cq *send_cq[2]; -static struct fid_cq *recv_cq[2]; -static struct fi_cq_attr cq_attr[2]; -static struct fid_stx *stx_ctx[2]; -static struct fid_stx *stx_ctx_too_late; - -#define BUF_SZ (64*1024) -static char *target, *target_base; -static char *source, *source_base; -static char *uc_source; -static struct fid_mr *rem_mr[2], *loc_mr[2]; -static uint64_t mr_key[2]; - -static struct fid_cntr *write_cntr[2], *read_cntr[2]; -static struct fid_cntr *rwrite_cntr; -static struct fid_cntr *rread_cntr; -static struct fi_cntr_attr cntr_attr = { - .events = FI_CNTR_EVENTS_COMP, - .flags = 0 -}; -static uint64_t writes[2] = {0}, reads[2] = {0}, write_errs[2] = {0}, - read_errs[2] = {0}; -#define MLOOPS 1000 -static int dgm_fail; - -static void common_setup_stx(uint32_t version, int mr_mode) -{ - int ret = 0; - struct fi_av_attr attr; - size_t addrlen = 0; - int requested_key[2][2] = {{0, 0}, {0, 0} }; - int i, j; - dgm_fail = 0; - - hints->domain_attr->mr_mode = mr_mode; - hints->domain_attr->cq_data_size = 4; - hints->ep_attr->tx_ctx_cnt = FI_SHARED_CONTEXT; - hints->mode = mode_bits; - hints->caps |= FI_RMA | FI_READ | FI_REMOTE_READ | - FI_WRITE | FI_REMOTE_WRITE; - - hints->fabric_attr->prov_name = strdup("gni"); - - ret = fi_getinfo(version, NULL, 0, 0, hints, &fi); - cr_assert(!ret, "fi_getinfo"); - - ret = fi_fabric(fi->fabric_attr, &fab, NULL); - cr_assert(!ret, "fi_fabric"); - - ret = fi_domain(fab, fi, dom, NULL); - cr_assert(!ret, "fi_domain"); - - ret = fi_open_ops(&dom[0]->fid, FI_GNI_DOMAIN_OPS_1, - 0, (void **) gni_domain_ops, NULL); - - memset(&attr, 0, sizeof(attr)); - attr.type = FI_AV_MAP; - attr.count = 2; - - ret = fi_av_open(dom[0], &attr, av, NULL); - cr_assert(!ret, "fi_av_open"); - - ret = fi_endpoint(dom[0], fi, &ep[0], NULL); - cr_assert(!ret, "fi_endpoint"); - - cq_attr[0].format = FI_CQ_FORMAT_TAGGED; - cq_attr[0].size = 1024; - cq_attr[0].wait_obj = 0; - - ret = fi_cq_open(dom[0], cq_attr, send_cq, 0); - cr_assert(!ret, "fi_cq_open"); - - ret = fi_cq_open(dom[0], cq_attr, recv_cq, 0); - cr_assert(!ret, "fi_cq_open"); - - ret = fi_stx_context(dom[0], NULL, &stx_ctx[0], 0); - cr_assert(!ret, "fi_stx_context"); - - ret = fi_stx_context(dom[0], NULL, &stx_ctx_too_late, 0); - cr_assert(!ret, "fi_stx_context"); - - ret = fi_domain(fab, fi, dom + 1, NULL); - cr_assert(!ret, "fi_domain"); - - ret = fi_open_ops(&dom[1]->fid, FI_GNI_DOMAIN_OPS_1, - 0, (void **) gni_domain_ops + 1, NULL); - - ret = fi_av_open(dom[1], &attr, av + 1, NULL); - cr_assert(!ret, "fi_av_open"); - - ret = fi_endpoint(dom[1], fi, &ep[1], NULL); - cr_assert(!ret, "fi_endpoint"); - - ret = fi_stx_context(dom[1], NULL, &stx_ctx[1], 0); - cr_assert(!ret, "fi_stx_context"); - - cq_attr[1].format = FI_CQ_FORMAT_TAGGED; - cq_attr[1].size = 1024; - cq_attr[1].wait_obj = 0; - - ret = fi_cq_open(dom[1], cq_attr + 1, send_cq + 1, 0); - cr_assert(!ret, "fi_cq_open"); - - ret = fi_cq_open(dom[1], cq_attr + 1, recv_cq + 1, 0); - cr_assert(!ret, "fi_cq_open"); - - /* - * imitate shmem, etc. use FI_WRITE for bind - * flag - */ - ret = fi_ep_bind(ep[0], &send_cq[0]->fid, FI_TRANSMIT); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_ep_bind(ep[0], &recv_cq[0]->fid, FI_RECV); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_ep_bind(ep[0], &stx_ctx[0]->fid, 0); - cr_assert(!ret, "fi_ep_bind stx"); - - /* - * this shouldn't work, wrong domain - */ - - ret = fi_ep_bind(ep[0], &stx_ctx[1]->fid, 0); - cr_assert_eq(ret, -FI_EINVAL); - - ret = fi_getname(&ep[0]->fid, NULL, &addrlen); - cr_assert(addrlen > 0); - - ep_name[0] = malloc(addrlen); - cr_assert(ep_name[0] != NULL); - - ret = fi_getname(&ep[0]->fid, ep_name[0], &addrlen); - cr_assert(ret == FI_SUCCESS); - - ret = fi_ep_bind(ep[1], &send_cq[1]->fid, FI_TRANSMIT); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_ep_bind(ep[1], &recv_cq[1]->fid, FI_RECV); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_ep_bind(ep[1], &stx_ctx[1]->fid, 0); - cr_assert(!ret, "fi_ep_bind stx"); - - ret = fi_getname(&ep[1]->fid, NULL, &addrlen); - cr_assert(addrlen > 0); - - ep_name[1] = malloc(addrlen); - cr_assert(ep_name[1] != NULL); - - ret = fi_getname(&ep[1]->fid, ep_name[1], &addrlen); - cr_assert(ret == FI_SUCCESS); - - ret = fi_av_insert(av[0], ep_name[0], 1, &gni_addr[0], 0, - NULL); - cr_assert(ret == 1); - ret = fi_av_insert(av[0], ep_name[1], 1, &gni_addr[1], 0, - NULL); - cr_assert(ret == 1); - - ret = fi_av_insert(av[1], ep_name[0], 1, &gni_addr[0], 0, - NULL); - cr_assert(ret == 1); - ret = fi_av_insert(av[1], ep_name[1], 1, &gni_addr[1], 0, - NULL); - cr_assert(ret == 1); - - ret = fi_ep_bind(ep[0], &av[0]->fid, 0); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_ep_bind(ep[1], &av[1]->fid, 0); - cr_assert(!ret, "fi_ep_bind"); - - target_base = malloc(GNIT_ALIGN_LEN(BUF_SZ)); - assert(target_base); - target = GNIT_ALIGN_BUFFER(char *, target_base); - - source_base = malloc(GNIT_ALIGN_LEN(BUF_SZ)); - assert(source_base); - source = GNIT_ALIGN_BUFFER(char *, source_base); - - if (USING_SCALABLE(fi)) { - for (i = 0; i < 2; i++) - for (j = 0; j < 2; j++) - requested_key[i][j] = (i * 2) + j; - } - - ret = fi_mr_reg(dom[0], - target, - BUF_SZ, - FI_REMOTE_WRITE, - 0, - requested_key[0][0], - 0, - &rem_mr[0], - &target); - cr_assert_eq(ret, 0); - ret = fi_mr_reg(dom[1], - target, - BUF_SZ, - FI_REMOTE_WRITE, - 0, - requested_key[1][0], - 0, - &rem_mr[1], - &target); - cr_assert_eq(ret, 0); - - ret = fi_mr_reg(dom[0], - source, - BUF_SZ, - FI_REMOTE_WRITE, - 0, - requested_key[0][1], - 0, - &loc_mr[0], - &source); - cr_assert_eq(ret, 0); - ret = fi_mr_reg(dom[1], - source, - BUF_SZ, - FI_REMOTE_WRITE, - 0, - requested_key[1][1], - 0, - &loc_mr[1], - &source); - cr_assert_eq(ret, 0); - - if (USING_SCALABLE(fi)) { - for (i = 0; i < 2; i++) { - MR_ENABLE(rem_mr[i], target, BUF_SZ); - MR_ENABLE(loc_mr[i], source, BUF_SZ); - } - } - - uc_source = malloc(BUF_SZ); - assert(uc_source); - - mr_key[0] = fi_mr_key(rem_mr[0]); - mr_key[1] = fi_mr_key(rem_mr[1]); - - ret = fi_cntr_open(dom[0], &cntr_attr, write_cntr, 0); - cr_assert(!ret, "fi_cntr_open"); - - ret = fi_ep_bind(ep[0], &write_cntr[0]->fid, FI_WRITE); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_cntr_open(dom[0], &cntr_attr, read_cntr, 0); - cr_assert(!ret, "fi_cntr_open"); - - ret = fi_ep_bind(ep[0], &read_cntr[0]->fid, FI_READ); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_cntr_open(dom[1], &cntr_attr, write_cntr + 1, 0); - cr_assert(!ret, "fi_cntr_open"); - - ret = fi_ep_bind(ep[1], &write_cntr[1]->fid, FI_WRITE); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_cntr_open(dom[1], &cntr_attr, read_cntr + 1, 0); - cr_assert(!ret, "fi_cntr_open"); - - ret = fi_ep_bind(ep[1], &read_cntr[1]->fid, FI_READ); - cr_assert(!ret, "fi_ep_bind"); - - if (hints->caps & FI_RMA_EVENT) { - ret = fi_cntr_open(dom[1], &cntr_attr, &rwrite_cntr, 0); - cr_assert(!ret, "fi_cntr_open"); - - ret = fi_ep_bind(ep[1], &rwrite_cntr->fid, FI_REMOTE_WRITE); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_cntr_open(dom[1], &cntr_attr, &rread_cntr, 0); - cr_assert(!ret, "fi_cntr_open"); - - ret = fi_ep_bind(ep[1], &rread_cntr->fid, FI_REMOTE_READ); - cr_assert(!ret, "fi_ep_bind"); - } - - ret = fi_enable(ep[0]); - cr_assert(!ret, "fi_ep_enable"); - - /* - * this should not work - don't allow binding of STX - * after the EP is enabled - */ - ret = fi_ep_bind(ep[0], &stx_ctx_too_late->fid, 0); - cr_assert_eq(ret, -FI_EOPBADSTATE, "fi_ep_bind stx"); - - ret = fi_close(&stx_ctx_too_late->fid); - cr_assert(!ret, "failure in closing stx_ctx_too_late"); - - ret = fi_enable(ep[1]); - cr_assert(!ret, "fi_ep_enable"); - -} - -static void common_setup_stx_1dom(uint32_t version, int mr_mode) -{ - int ret = 0; - struct fi_av_attr attr; - size_t addrlen = 0; - - dgm_fail = 0; - - hints->domain_attr->mr_mode = mr_mode; - hints->domain_attr->cq_data_size = 4; - hints->ep_attr->tx_ctx_cnt = FI_SHARED_CONTEXT; - hints->mode = mode_bits; - hints->caps |= FI_RMA | FI_READ | FI_REMOTE_READ | - FI_WRITE | FI_REMOTE_WRITE; - - hints->fabric_attr->prov_name = strdup("gni"); - - ret = fi_getinfo(version, NULL, 0, 0, hints, &fi); - cr_assert(!ret, "fi_getinfo"); - - ret = fi_fabric(fi->fabric_attr, &fab, NULL); - cr_assert(!ret, "fi_fabric"); - - ret = fi_domain(fab, fi, dom, NULL); - cr_assert(!ret, "fi_domain"); - - ret = fi_open_ops(&dom[0]->fid, FI_GNI_DOMAIN_OPS_1, - 0, (void **) gni_domain_ops, NULL); - - memset(&attr, 0, sizeof(attr)); - attr.type = FI_AV_MAP; - attr.count = 2; - - ret = fi_av_open(dom[0], &attr, av, NULL); - cr_assert(!ret, "fi_av_open"); - - ret = fi_endpoint(dom[0], fi, &ep[0], NULL); - cr_assert(!ret, "fi_endpoint"); - - cq_attr[0].format = FI_CQ_FORMAT_TAGGED; - cq_attr[0].size = 1024; - cq_attr[0].wait_obj = 0; - - ret = fi_cq_open(dom[0], cq_attr, send_cq, 0); - cr_assert(!ret, "fi_cq_open"); - - ret = fi_cq_open(dom[0], cq_attr, recv_cq, 0); - cr_assert(!ret, "fi_cq_open"); - - ret = fi_stx_context(dom[0], NULL, &stx_ctx[0], 0); - cr_assert(!ret, "fi_stx_context"); - - ret = fi_stx_context(dom[0], NULL, &stx_ctx_too_late, 0); - cr_assert(!ret, "fi_stx_context"); - - ret = fi_endpoint(dom[0], fi, &ep[1], NULL); - cr_assert(!ret, "fi_endpoint"); - - cq_attr[1].format = FI_CQ_FORMAT_TAGGED; - cq_attr[1].size = 1024; - cq_attr[1].wait_obj = 0; - - ret = fi_cq_open(dom[0], cq_attr + 1, send_cq + 1, 0); - cr_assert(!ret, "fi_cq_open"); - - ret = fi_cq_open(dom[0], cq_attr + 1, recv_cq + 1, 0); - cr_assert(!ret, "fi_cq_open"); - - /* - * imitate shmem, etc. use FI_WRITE for bind - * flag - */ - ret = fi_ep_bind(ep[0], &send_cq[0]->fid, FI_TRANSMIT); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_ep_bind(ep[0], &recv_cq[0]->fid, FI_RECV); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_ep_bind(ep[0], &stx_ctx[0]->fid, 0); - cr_assert(!ret, "fi_ep_bind stx"); - - ret = fi_getname(&ep[0]->fid, NULL, &addrlen); - cr_assert(addrlen > 0); - - ep_name[0] = malloc(addrlen); - cr_assert(ep_name[0] != NULL); - - ret = fi_getname(&ep[0]->fid, ep_name[0], &addrlen); - cr_assert(ret == FI_SUCCESS); - - ret = fi_ep_bind(ep[1], &send_cq[1]->fid, FI_TRANSMIT); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_ep_bind(ep[1], &recv_cq[1]->fid, FI_RECV); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_ep_bind(ep[1], &stx_ctx[0]->fid, 0); - cr_assert(!ret, "fi_ep_bind stx"); - - ret = fi_getname(&ep[1]->fid, NULL, &addrlen); - cr_assert(addrlen > 0); - - ep_name[1] = malloc(addrlen); - cr_assert(ep_name[1] != NULL); - - ret = fi_getname(&ep[1]->fid, ep_name[1], &addrlen); - cr_assert(ret == FI_SUCCESS); - - ret = fi_av_insert(av[0], ep_name[0], 1, &gni_addr[0], 0, - NULL); - cr_assert(ret == 1); - ret = fi_av_insert(av[0], ep_name[1], 1, &gni_addr[1], 0, - NULL); - cr_assert(ret == 1); - - ret = fi_ep_bind(ep[0], &av[0]->fid, 0); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_ep_bind(ep[1], &av[0]->fid, 0); - cr_assert(!ret, "fi_ep_bind"); - - target_base = malloc(GNIT_ALIGN_LEN(BUF_SZ)); - assert(target_base); - target = GNIT_ALIGN_BUFFER(char *, target_base); - - source_base = malloc(GNIT_ALIGN_LEN(BUF_SZ)); - assert(source_base); - source = GNIT_ALIGN_BUFFER(char *, source_base); - - ret = fi_mr_reg(dom[0], - target, - BUF_SZ, - FI_REMOTE_WRITE, - 0, - (USING_SCALABLE(fi) ? 1 : 0), - 0, - &rem_mr[0], - &target); - cr_assert_eq(ret, 0); - - ret = fi_mr_reg(dom[0], - source, - BUF_SZ, - FI_REMOTE_WRITE, - 0, - (USING_SCALABLE(fi) ? 2 : 0), - 0, - &loc_mr[0], - &source); - cr_assert_eq(ret, 0); - - if (USING_SCALABLE(fi)) { - MR_ENABLE(rem_mr[0], target, BUF_SZ); - MR_ENABLE(loc_mr[0], source, BUF_SZ); - } - - uc_source = malloc(BUF_SZ); - assert(uc_source); - - mr_key[0] = fi_mr_key(rem_mr[0]); - mr_key[1] = mr_key[0]; - - ret = fi_cntr_open(dom[0], &cntr_attr, write_cntr, 0); - cr_assert(!ret, "fi_cntr_open"); - - ret = fi_ep_bind(ep[0], &write_cntr[0]->fid, FI_WRITE); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_cntr_open(dom[0], &cntr_attr, read_cntr, 0); - cr_assert(!ret, "fi_cntr_open"); - - ret = fi_ep_bind(ep[0], &read_cntr[0]->fid, FI_READ); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_cntr_open(dom[0], &cntr_attr, write_cntr + 1, 0); - cr_assert(!ret, "fi_cntr_open"); - - ret = fi_ep_bind(ep[1], &write_cntr[1]->fid, FI_WRITE); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_cntr_open(dom[0], &cntr_attr, read_cntr + 1, 0); - cr_assert(!ret, "fi_cntr_open"); - - ret = fi_ep_bind(ep[1], &read_cntr[1]->fid, FI_READ); - cr_assert(!ret, "fi_ep_bind"); - - if (hints->caps & FI_RMA_EVENT) { - ret = fi_cntr_open(dom[0], &cntr_attr, &rwrite_cntr, 0); - cr_assert(!ret, "fi_cntr_open"); - - ret = fi_ep_bind(ep[1], &rwrite_cntr->fid, FI_REMOTE_WRITE); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_cntr_open(dom[0], &cntr_attr, &rread_cntr, 0); - cr_assert(!ret, "fi_cntr_open"); - - ret = fi_ep_bind(ep[1], &rread_cntr->fid, FI_REMOTE_READ); - cr_assert(!ret, "fi_ep_bind"); - } - - ret = fi_enable(ep[0]); - cr_assert(!ret, "fi_ep_enable"); - - ret = fi_enable(ep[1]); - cr_assert(!ret, "fi_ep_enable"); - -} - -static void rdm_rma_basic_setup(void) -{ - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - hints->ep_attr->type = FI_EP_RDM; - hints->caps = FI_RMA_EVENT; - common_setup_stx(fi_version(), GNIX_MR_BASIC); -} - -static void dgram_basic_setup(void) -{ - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - hints->ep_attr->type = FI_EP_DGRAM; - hints->caps = FI_RMA_EVENT; - common_setup_stx(fi_version(), GNIX_MR_BASIC); -} - -static void dgram_basic_setup_1dom(void) -{ - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - hints->ep_attr->type = FI_EP_DGRAM; - hints->caps = FI_RMA_EVENT; - common_setup_stx_1dom(fi_version(), GNIX_MR_BASIC); -} - -static void rdm_rma_scalable_setup(void) -{ - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - hints->ep_attr->type = FI_EP_RDM; - hints->caps = FI_RMA_EVENT; - common_setup_stx(fi_version(), GNIX_MR_SCALABLE); -} - -static void dgram_scalable_setup(void) -{ - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - hints->ep_attr->type = FI_EP_DGRAM; - hints->caps = FI_RMA_EVENT; - common_setup_stx(fi_version(), GNIX_MR_SCALABLE); -} - -static void dgram_scalable_setup_1dom(void) -{ - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - hints->ep_attr->type = FI_EP_DGRAM; - hints->caps = FI_RMA_EVENT; - common_setup_stx_1dom(fi_version(), GNIX_MR_SCALABLE); -} - -static void rdm_rma_stx_teardown(void) -{ - int ret = 0; - - if (hints->caps & FI_RMA_EVENT) { - ret = fi_close(&rwrite_cntr->fid); - cr_assert(!ret, "failure in closing dom[1] rwrite counter."); - - ret = fi_close(&rread_cntr->fid); - cr_assert(!ret, "failure in closing dom[1] rread counter."); - } - - ret = fi_close(&read_cntr[0]->fid); - cr_assert(!ret, "failure in closing dom[0] read counter."); - - ret = fi_close(&read_cntr[1]->fid); - cr_assert(!ret, "failure in closing dom[1] read counter."); - - ret = fi_close(&write_cntr[0]->fid); - cr_assert(!ret, "failure in closing dom[0] write counter."); - - ret = fi_close(&write_cntr[1]->fid); - cr_assert(!ret, "failure in closing dom[1] write counter."); - - free(uc_source); - - ret = fi_close(&loc_mr[0]->fid); - cr_assert(!ret, "failure in closing dom[0] local mr."); - - if (loc_mr[1] != NULL) { - ret = fi_close(&loc_mr[1]->fid); - cr_assert(!ret, "failure in closing dom[1] local mr."); - } - - ret = fi_close(&rem_mr[0]->fid); - cr_assert(!ret, "failure in closing dom[0] remote mr."); - - if (rem_mr[1] != NULL) { - ret = fi_close(&rem_mr[1]->fid); - cr_assert(!ret, "failure in closing dom[1] remote mr."); - } - - ret = fi_close(&stx_ctx[0]->fid); - cr_assert(!ret, "failure in closing dom[0] stx_ctx."); - - if (stx_ctx[1] != NULL) { - ret = fi_close(&stx_ctx[1]->fid); - cr_assert(!ret, "failure in closing dom[1] stx_ctx."); - } - - free(target_base); - free(source_base); - - ret = fi_close(&ep[0]->fid); - cr_assert(!ret, "failure in closing ep[0]."); - - ret = fi_close(&ep[1]->fid); - cr_assert(!ret, "failure in closing ep[1]."); - - ret = fi_close(&recv_cq[0]->fid); - cr_assert(!ret, "failure in dom[0] recv cq."); - - ret = fi_close(&recv_cq[1]->fid); - cr_assert(!ret, "failure in dom[1] recv cq."); - - ret = fi_close(&send_cq[0]->fid); - cr_assert(!ret, "failure in dom[0] send cq."); - - ret = fi_close(&send_cq[1]->fid); - cr_assert(!ret, "failure in dom[1] send cq."); - - ret = fi_close(&av[0]->fid); - cr_assert(!ret, "failure in closing dom[0] av."); - - if (av[1] != NULL) { - ret = fi_close(&av[1]->fid); - cr_assert(!ret, "failure in closing dom[1] av."); - } - - ret = fi_close(&dom[0]->fid); - cr_assert(!ret, "failure in closing domain dom[0]."); - - if (dom[1] != NULL) { - ret = fi_close(&dom[1]->fid); - cr_assert(!ret, - "failure in closing domain dom[1]."); - } - - ret = fi_close(&fab->fid); - cr_assert(!ret, "failure in closing fabric."); - - fi_freeinfo(fi); - fi_freeinfo(hints); - hints = NULL; - dgm_fail = 0; - free(ep_name[0]); - free(ep_name[1]); -} - -static void init_data(char *buf, int len, char seed) -{ - int i; - - for (i = 0; i < len; i++) { - buf[i] = seed++; - } -} - -static int check_data(char *buf1, char *buf2, int len) -{ - int i; - - for (i = 0; i < len; i++) { - if (buf1[i] != buf2[i]) { - printf("data mismatch, elem: %d, b1: 0x%hhx," - " b2: 0x%hhx, len: %d\n", - i, buf1[i], buf2[i], len); - return 0; - } - } - - return 1; -} - -static void rdm_rma_check_tcqe(struct fi_cq_tagged_entry *tcqe, void *ctx, - uint64_t flags, uint64_t data, - struct fid_ep *fid_ep) -{ - struct gnix_fid_ep *gnix_ep = get_gnix_ep(fid_ep); - - cr_assert(tcqe->op_context == ctx, "CQE Context mismatch"); - cr_assert(tcqe->flags == flags, "CQE flags mismatch"); - - /* TODO: Remove GNIX_ALLOW_FI_REMOTE_CQ_DATA and only check flags for FI_RMA_EVENT */ - if (GNIX_ALLOW_FI_REMOTE_CQ_DATA(flags, gnix_ep->caps)) { - cr_assert(tcqe->data == data, "CQE data invalid"); - } else { - cr_assert(tcqe->data == 0, "CQE data invalid"); - } - - cr_assert(tcqe->len == 0, "CQE length mismatch"); - cr_assert(tcqe->buf == 0, "CQE address mismatch"); - cr_assert(tcqe->tag == 0, "CQE tag invalid"); -} - -static void rdm_rma_check_cntrs(uint64_t w[2], uint64_t r[2], uint64_t w_e[2], - uint64_t r_e[2]) -{ - /* Domain 0 */ - writes[0] += w[0]; - reads[0] += r[0]; - write_errs[0] += w_e[0]; - read_errs[0] += r_e[0]; - /*dbg_printf("%ld, %ld\n", fi_cntr_read(write_cntr[0]), writes[0]);*/ - cr_assert(fi_cntr_read(write_cntr[0]) == writes[0], "Bad write count"); - cr_assert(fi_cntr_read(read_cntr[0]) == reads[0], "Bad read count"); - cr_assert(fi_cntr_readerr(write_cntr[0]) == write_errs[0], - "Bad write err count"); - cr_assert(fi_cntr_readerr(read_cntr[0]) == read_errs[0], - "Bad read err count"); - - /* Domain 1 */ - writes[1] += w[1]; - reads[1] += r[1]; - write_errs[1] += w_e[1]; - read_errs[1] += r_e[1]; - cr_assert(fi_cntr_read(write_cntr[1]) == writes[1], "Bad write count"); - cr_assert(fi_cntr_read(read_cntr[1]) == reads[1], "Bad read count"); - cr_assert(fi_cntr_readerr(write_cntr[1]) == write_errs[1], - "Bad write err count"); - cr_assert(fi_cntr_readerr(read_cntr[1]) == read_errs[1], - "Bad read err count"); - - if (hints->caps & FI_RMA_EVENT) { - cr_assert(fi_cntr_read(rwrite_cntr) == writes[0], - "Bad rwrite count"); - cr_assert(fi_cntr_read(rread_cntr) == reads[0], - "Bad rread count"); - cr_assert(fi_cntr_readerr(rwrite_cntr) == 0, - "Bad rwrite err count"); - cr_assert(fi_cntr_readerr(rread_cntr) == 0, - "Bad rread err count"); - } -} - -static void xfer_for_each_size(void (*xfer)(int len), int slen, int elen) -{ - int i; - - for (i = slen; i <= elen; i *= 2) { - xfer(i); - } -} - -static void err_inject_enable(void) -{ - int ret, err_count_val = 1; - - ret = gni_domain_ops[0]->set_val(&dom[0]->fid, GNI_ERR_INJECT_COUNT, - &err_count_val); - cr_assert(!ret, "setval(GNI_ERR_INJECT_COUNT)"); - - if (gni_domain_ops[1] != NULL) { - ret = gni_domain_ops[1]->set_val(&dom[1]->fid, - GNI_ERR_INJECT_COUNT, - &err_count_val); - cr_assert(!ret, "setval(GNI_ERR_INJECT_COUNT)"); - } -} - -/******************************************************************************* - * Test RMA functions - ******************************************************************************/ -TestSuite(dgram_rma_stx_basic, - .init = dgram_basic_setup, - .fini = rdm_rma_stx_teardown, - .disabled = false); - -TestSuite(rdm_rma_stx_basic, - .init = rdm_rma_basic_setup, - .fini = rdm_rma_stx_teardown, - .disabled = false); - -TestSuite(dgram_rma_1dom_stx_basic, - .init = dgram_basic_setup_1dom, - .fini = rdm_rma_stx_teardown, - .disabled = false); - -TestSuite(dgram_rma_stx_scalable, - .init = dgram_scalable_setup, - .fini = rdm_rma_stx_teardown, - .disabled = false); - -TestSuite(rdm_rma_stx_scalable, - .init = rdm_rma_scalable_setup, - .fini = rdm_rma_stx_teardown, - .disabled = false); - -TestSuite(dgram_rma_1dom_stx_scalable, - .init = dgram_scalable_setup_1dom, - .fini = rdm_rma_stx_teardown, - .disabled = false); - -static void do_write(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0}; - - init_data(source, len, 0xab); - init_data(target, len, 0); - - sz = fi_write(ep[0], source, len, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - if (dgm_fail) { - cr_assert_eq(ret, -FI_EAVAIL); - return; - } - cr_assert_eq(ret, 1); - rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, ep[0]); - - w[0] = 1; - rdm_rma_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - cr_assert(check_data(source, target, len), "Data mismatch"); -} - -Test(rdm_rma_stx_basic, write) -{ - xfer_for_each_size(do_write, 8, BUF_SZ); -} - -Test(rdm_rma_stx_basic, write_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_write, 8, BUF_SZ); -} - -Test(dgram_rma_stx_basic, write) -{ - xfer_for_each_size(do_write, 8, BUF_SZ); -} - -Test(dgram_rma_stx_basic, write_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_write, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_stx_basic, write) -{ - xfer_for_each_size(do_write, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_stx_basic, write_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_write, 8, BUF_SZ); -} - -/* scalable */ - -Test(rdm_rma_stx_scalable, write) -{ - xfer_for_each_size(do_write, 8, BUF_SZ); -} - -Test(rdm_rma_stx_scalable, write_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_write, 8, BUF_SZ); -} - -Test(dgram_rma_stx_scalable, write) -{ - xfer_for_each_size(do_write, 8, BUF_SZ); -} - -Test(dgram_rma_stx_scalable, write_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_write, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_stx_scalable, write) -{ - xfer_for_each_size(do_write, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_stx_scalable, write_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_write, 8, BUF_SZ); -} - -static void do_writev(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct iovec iov; - uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0}; - - iov.iov_base = source; - iov.iov_len = len; - - init_data(source, len, 0x25); - init_data(target, len, 0); - - sz = fi_writev(ep[0], &iov, (void **)loc_mr, 1, - gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - if (dgm_fail) { - cr_assert_eq(ret, -FI_EAVAIL); - return; - } - - cr_assert_eq(ret, 1); - rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, ep[0]); - - w[0] = 1; - rdm_rma_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - cr_assert(check_data(source, target, len), "Data mismatch"); -} - -Test(rdm_rma_stx_basic, writev) -{ - xfer_for_each_size(do_writev, 8, BUF_SZ); -} - -Test(rdm_rma_stx_basic, writev_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_writev, 8, BUF_SZ); -} - -Test(dgram_rma_stx_basic, writev) -{ - xfer_for_each_size(do_writev, 8, BUF_SZ); -} - -Test(dgram_rma_stx_basic, writev_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_writev, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_stx_basic, writev) -{ - xfer_for_each_size(do_writev, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_stx_basic, writev_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_writev, 8, BUF_SZ); -} - -/* scalable */ - -Test(rdm_rma_stx_scalable, writev) -{ - xfer_for_each_size(do_writev, 8, BUF_SZ); -} - -Test(rdm_rma_stx_scalable, writev_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_writev, 8, BUF_SZ); -} - -Test(dgram_rma_stx_scalable, writev) -{ - xfer_for_each_size(do_writev, 8, BUF_SZ); -} - -Test(dgram_rma_stx_scalable, writev_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_writev, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_stx_scalable, writev) -{ - xfer_for_each_size(do_writev, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_stx_scalable, writev_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_writev, 8, BUF_SZ); -} - -static void do_writemsg(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct iovec iov; - struct fi_msg_rma msg; - struct fi_rma_iov rma_iov; - uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0}; - - iov.iov_base = source; - iov.iov_len = len; - - rma_iov.addr = _REM_ADDR(fi, target, target); - rma_iov.len = len; - rma_iov.key = mr_key[1]; - - msg.msg_iov = &iov; - msg.desc = (void **)loc_mr; - msg.iov_count = 1; - msg.addr = gni_addr[1]; - msg.rma_iov = &rma_iov; - msg.rma_iov_count = 1; - msg.context = target; - msg.data = (uint64_t)target; - - init_data(source, len, 0xef); - init_data(target, len, 0); - sz = fi_writemsg(ep[0], &msg, 0); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - if (dgm_fail) { - cr_assert_eq(ret, -FI_EAVAIL); - return; - } - cr_assert_eq(ret, 1); - rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, ep[0]); - - w[0] = 1; - rdm_rma_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - cr_assert(check_data(source, target, len), "Data mismatch"); -} - -Test(rdm_rma_stx_basic, writemsg) -{ - xfer_for_each_size(do_writemsg, 8, BUF_SZ); -} - -Test(rdm_rma_stx_basic, writemsg_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_writemsg, 8, BUF_SZ); -} - -Test(dgram_rma_stx_basic, writemsg) -{ - xfer_for_each_size(do_writemsg, 8, BUF_SZ); -} - -Test(dgram_rma_stx_basic, writemsg_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_writemsg, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_stx_basic, writemsg) -{ - xfer_for_each_size(do_writemsg, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_stx_basic, writemsg_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_writemsg, 8, BUF_SZ); -} - -/* scalable */ - -Test(rdm_rma_stx_scalable, writemsg) -{ - xfer_for_each_size(do_writemsg, 8, BUF_SZ); -} - -Test(rdm_rma_stx_scalable, writemsg_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_writemsg, 8, BUF_SZ); -} - -Test(dgram_rma_stx_scalable, writemsg) -{ - xfer_for_each_size(do_writemsg, 8, BUF_SZ); -} - -Test(dgram_rma_stx_scalable, writemsg_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_writemsg, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_stx_scalable, writemsg) -{ - xfer_for_each_size(do_writemsg, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_stx_scalable, writemsg_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_writemsg, 8, BUF_SZ); -} - -/* - * write_fence should be validated by inspecting debug. - * - * The following sequence of events should be seen: - * - * TX request processed: A - * TX request queue stalled on FI_FENCE request: B - * Added event: A - * TX request processed: B - * - */ - -static void do_write_fence(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct iovec iov; - struct fi_msg_rma msg; - struct fi_rma_iov rma_iov; - uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0}; - - iov.iov_base = source; - iov.iov_len = len; - - rma_iov.addr = _REM_ADDR(fi, target, target); - rma_iov.len = sizeof(target); - rma_iov.key = mr_key[1]; - - msg.msg_iov = &iov; - msg.desc = (void **)loc_mr; - msg.iov_count = 1; - msg.addr = gni_addr[1]; - msg.rma_iov = &rma_iov; - msg.rma_iov_count = 1; - msg.context = target; - msg.data = (uint64_t)target; - - init_data(source, len, 0xef); - init_data(target, len, 0); - - /* write A */ - sz = fi_writemsg(ep[0], &msg, 0); - cr_assert_eq(sz, 0); - - /* write B */ - sz = fi_writemsg(ep[0], &msg, FI_FENCE); - cr_assert_eq(sz, 0); - - /* event A */ - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - if (dgm_fail) { - cr_assert_eq(ret, -FI_EAVAIL); - return; - } - - cr_assert_eq(ret, 1); - rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, ep[0]); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - - /* event B */ - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1); - rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, ep[0]); - - w[0] = 2; - rdm_rma_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - cr_assert(check_data(source, target, len), "Data mismatch"); -} - -Test(rdm_rma_stx_basic, write_fence) -{ - xfer_for_each_size(do_write_fence, 8, BUF_SZ); -} - -Test(rdm_rma_stx_basic, write_fence_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_write_fence, 8, BUF_SZ); -} - -Test(dgram_rma_stx_basic, write_fence) -{ - xfer_for_each_size(do_write_fence, 8, BUF_SZ); -} - -Test(dgram_rma_stx_basic, write_fence_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_write_fence, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_stx_basic, write_fence) -{ - xfer_for_each_size(do_write_fence, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_stx_basic, write_fence_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_write_fence, 8, BUF_SZ); -} - -/* scalable */ - -Test(rdm_rma_stx_scalable, write_fence) -{ - xfer_for_each_size(do_write_fence, 8, BUF_SZ); -} - -Test(rdm_rma_stx_scalable, write_fence_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_write_fence, 8, BUF_SZ); -} - -Test(dgram_rma_stx_scalable, write_fence) -{ - xfer_for_each_size(do_write_fence, 8, BUF_SZ); -} - -Test(dgram_rma_stx_scalable, write_fence_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_write_fence, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_stx_scalable, write_fence) -{ - xfer_for_each_size(do_write_fence, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_stx_scalable, write_fence_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_write_fence, 8, BUF_SZ); -} - -#define INJECT_SIZE 64 -static void do_inject_write(int len) -{ - ssize_t sz; - int ret, i, loops = 0; - struct fi_cq_tagged_entry cqe; - - init_data(source, len, 0x23); - init_data(target, len, 0); - sz = fi_inject_write(ep[0], source, len, - gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1]); - cr_assert_eq(sz, 0); - - for (i = 0; i < len; i++) { - loops = 0; - while (source[i] != target[i]) { - /* for progress */ - ret = fi_cq_read(send_cq[0], &cqe, 1); - cr_assert(ret == -FI_EAGAIN || ret == -FI_EAVAIL, - "Received unexpected event\n"); - - pthread_yield(); - cr_assert(++loops < MLOOPS || dgm_fail, - "Data mismatch"); - if (dgm_fail && loops > MLOOPS) - break; - } - } - cr_assert(!dgm_fail || (dgm_fail && loops >= MLOOPS), "Should fail"); -} - -Test(rdm_rma_stx_basic, inject_write) -{ - /* FIXME intermittent test failures */ - cr_skip_test("intermittent test failures"); - xfer_for_each_size(do_inject_write, 8, INJECT_SIZE); -} - -Test(rdm_rma_stx_basic, inject_write_retrans) -{ - /* FIXME intermittent test failures */ - cr_skip_test("intermittent test failures"); - err_inject_enable(); - xfer_for_each_size(do_inject_write, 8, INJECT_SIZE); -} - -Test(dgram_rma_stx_basic, inject_write) -{ - /* FIXME intermittent test failures */ - cr_skip_test("intermittent test failures"); - xfer_for_each_size(do_inject_write, 8, INJECT_SIZE); -} - -Test(dgram_rma_stx_basic, inject_write_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_inject_write, 8, INJECT_SIZE); -} - -Test(dgram_rma_1dom_stx_basic, inject_write) -{ - xfer_for_each_size(do_inject_write, 8, INJECT_SIZE); -} - -Test(dgram_rma_1dom_stx_basic, inject_write_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_inject_write, 8, INJECT_SIZE); -} - -/* scalable */ - -Test(rdm_rma_stx_scalable, inject_write) -{ - /* FIXME intermittent test failures */ - cr_skip_test("intermittent test failures"); - xfer_for_each_size(do_inject_write, 8, INJECT_SIZE); -} - -Test(rdm_rma_stx_scalable, inject_write_retrans) -{ - /* FIXME intermittent test failures */ - cr_skip_test("intermittent test failures"); - err_inject_enable(); - xfer_for_each_size(do_inject_write, 8, INJECT_SIZE); -} - -Test(dgram_rma_stx_scalable, inject_write) -{ - /* FIXME intermittent test failures */ - cr_skip_test("intermittent test failures"); - xfer_for_each_size(do_inject_write, 8, INJECT_SIZE); -} - -Test(dgram_rma_stx_scalable, inject_write_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_inject_write, 8, INJECT_SIZE); -} - -Test(dgram_rma_1dom_stx_scalable, inject_write) -{ - xfer_for_each_size(do_inject_write, 8, INJECT_SIZE); -} - -Test(dgram_rma_1dom_stx_scalable, inject_write_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_inject_write, 8, INJECT_SIZE); -} - -static void do_writedata(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct fi_cq_tagged_entry dcqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0}; - - -#define WRITE_DATA 0x5123da1a145 - init_data(source, len, 0x23); - init_data(target, len, 0); - sz = fi_writedata(ep[0], source, len, loc_mr[0], WRITE_DATA, - gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - if (dgm_fail) { - cr_assert_eq(ret, -FI_EAVAIL); - return; - } - - cr_assert_eq(ret, 1); - rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, ep[0]); - - w[0] = 1; - rdm_rma_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - cr_assert(check_data(source, target, len), "Data mismatch"); - - while ((ret = fi_cq_read(recv_cq[1], &dcqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - cr_assert(ret != FI_SUCCESS, "Missing remote data"); - - rdm_rma_check_tcqe(&dcqe, NULL, - (FI_RMA | FI_REMOTE_WRITE | FI_REMOTE_CQ_DATA), - WRITE_DATA, ep[1]); -} - -Test(rdm_rma_stx_basic, writedata) -{ - xfer_for_each_size(do_writedata, 8, BUF_SZ); -} - -Test(rdm_rma_stx_basic, writedata_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_writedata, 8, BUF_SZ); -} - -Test(dgram_rma_stx_basic, writedata) -{ - xfer_for_each_size(do_writedata, 8, BUF_SZ); -} - -Test(dgram_rma_stx_basic, writedata_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_writedata, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_stx_basic, writedata) -{ - xfer_for_each_size(do_writedata, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_stx_basic, writedata_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_writedata, 8, BUF_SZ); -} - -/* scalable */ - -Test(rdm_rma_stx_scalable, writedata) -{ - xfer_for_each_size(do_writedata, 8, BUF_SZ); -} - -Test(rdm_rma_stx_scalable, writedata_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_writedata, 8, BUF_SZ); -} - -Test(dgram_rma_stx_scalable, writedata) -{ - xfer_for_each_size(do_writedata, 8, BUF_SZ); -} - -Test(dgram_rma_stx_scalable, writedata_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_writedata, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_stx_scalable, writedata) -{ - xfer_for_each_size(do_writedata, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_stx_scalable, writedata_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_writedata, 8, BUF_SZ); -} - -#define INJECTWRITE_DATA 0xdededadadeadbeaf -static void do_inject_writedata(int len) -{ - ssize_t sz; - int ret, i, loops = 0; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct fi_cq_tagged_entry dcqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - - init_data(source, len, 0x23); - init_data(target, len, 0); - sz = fi_inject_writedata(ep[0], source, len, INJECTWRITE_DATA, - gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1]); - cr_assert_eq(sz, 0); - - for (i = 0; i < len; i++) { - loops = 0; - while (source[i] != target[i]) { - /* for progress */ - ret = fi_cq_read(send_cq[0], &cqe, 1); - cr_assert(ret == -FI_EAGAIN || ret == -FI_EAVAIL, - "Received unexpected event\n"); - - pthread_yield(); - cr_assert(++loops < MLOOPS || dgm_fail, - "Data mismatch"); - if (dgm_fail && loops > MLOOPS) - break; - } - } - cr_assert(!dgm_fail || (dgm_fail && loops >= MLOOPS), "Should fail"); - if (dgm_fail && loops >= MLOOPS) - return; - - while ((ret = fi_cq_read(recv_cq[1], &dcqe, 1)) == -FI_EAGAIN) { - ret = fi_cq_read(send_cq[0], &cqe, 1); /* for progress */ - pthread_yield(); - } - cr_assert(ret != FI_SUCCESS, "Missing remote data"); - - rdm_rma_check_tcqe(&dcqe, NULL, - (FI_RMA | FI_REMOTE_WRITE | FI_REMOTE_CQ_DATA), - INJECTWRITE_DATA, ep[1]); -} - -Test(rdm_rma_stx_basic, inject_writedata) -{ - /* FIXME intermittent test failures */ - cr_skip_test("intermittent test failures"); - xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE); -} - -Test(rdm_rma_stx_basic, inject_writedata_retrans) -{ - /* FIXME intermittent test failures */ - cr_skip_test("intermittent test failures"); - err_inject_enable(); - xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE); -} - -Test(dgram_rma_stx_basic, inject_writedata) -{ - /* FIXME intermittent test failures */ - cr_skip_test("intermittent test failures"); - xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE); -} - -Test(dgram_rma_stx_basic, inject_writedata_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE); -} - -Test(dgram_rma_1dom_stx_basic, inject_writedata) -{ - xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE); -} - -Test(dgram_rma_1dom_stx_basic, inject_writedata_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE); -} - -/* scalable */ - -Test(rdm_rma_stx_scalable, inject_writedata) -{ - /* FIXME intermittent test failures */ - cr_skip_test("intermittent test failures"); - xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE); -} - -Test(rdm_rma_stx_scalable, inject_writedata_retrans) -{ - /* FIXME intermittent test failures */ - cr_skip_test("intermittent test failures"); - err_inject_enable(); - xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE); -} - -Test(dgram_rma_stx_scalable, inject_writedata) -{ - /* FIXME intermittent test failures */ - cr_skip_test("intermittent test failures"); - xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE); -} - -Test(dgram_rma_stx_scalable, inject_writedata_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE); -} - -Test(dgram_rma_1dom_stx_scalable, inject_writedata) -{ - xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE); -} - -Test(dgram_rma_1dom_stx_scalable, inject_writedata_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_inject_writedata, 8, INJECT_SIZE); -} - -static void do_read(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0}; - -#define READ_CTX 0x4e3dda1aULL - init_data(source, len, 0); - init_data(target, len, 0xad); - - /* domain 0 from domain 1 */ - sz = fi_read(ep[0], source, len, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - (void *)READ_CTX); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1); - rdm_rma_check_tcqe(&cqe, (void *)READ_CTX, FI_RMA | FI_READ, 0, ep[0]); - - r[0] = 1; - rdm_rma_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got read context event!\n"); - - cr_assert(check_data(source, target, len), "Data mismatch"); -} - -Test(rdm_rma_stx_basic, read) -{ - xfer_for_each_size(do_read, 8, BUF_SZ); -} - -Test(rdm_rma_stx_basic, read_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_read, 8, BUF_SZ); -} - -Test(dgram_rma_stx_basic, read) -{ - xfer_for_each_size(do_read, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_stx_basic, read) -{ - xfer_for_each_size(do_read, 8, BUF_SZ); -} - -/* scalable */ - -Test(rdm_rma_stx_scalable, read) -{ - xfer_for_each_size(do_read, 8, BUF_SZ); -} - -Test(rdm_rma_stx_scalable, read_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_read, 8, BUF_SZ); -} - -Test(dgram_rma_stx_scalable, read) -{ - xfer_for_each_size(do_read, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_stx_scalable, read) -{ - xfer_for_each_size(do_read, 8, BUF_SZ); -} - -static void do_readv(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct iovec iov; - uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0}; - - iov.iov_base = source; - iov.iov_len = len; - - init_data(target, len, 0x25); - init_data(source, len, 0); - sz = fi_readv(ep[0], &iov, (void **)loc_mr, 1, - gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1); - rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_READ, 0, ep[0]); - - r[0] = 1; - rdm_rma_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - cr_assert(check_data(source, target, len), "Data mismatch"); -} - -Test(rdm_rma_stx_basic, readv) -{ - xfer_for_each_size(do_readv, 8, BUF_SZ); -} - -Test(rdm_rma_stx_basic, readv_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_readv, 8, BUF_SZ); -} - -Test(dgram_rma_stx_basic, readv) -{ - xfer_for_each_size(do_readv, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_stx_basic, readv) -{ - xfer_for_each_size(do_readv, 8, BUF_SZ); -} - -/* scalable */ - -Test(rdm_rma_stx_scalable, readv) -{ - xfer_for_each_size(do_readv, 8, BUF_SZ); -} - -Test(rdm_rma_stx_scalable, readv_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_readv, 8, BUF_SZ); -} - -Test(dgram_rma_stx_scalable, readv) -{ - xfer_for_each_size(do_readv, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_stx_scalable, readv) -{ - xfer_for_each_size(do_readv, 8, BUF_SZ); -} - -static void do_readmsg(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct iovec iov; - struct fi_msg_rma msg; - struct fi_rma_iov rma_iov; - uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0}; - - iov.iov_base = source; - iov.iov_len = len; - - rma_iov.addr = _REM_ADDR(fi, target, target); - rma_iov.len = len; - rma_iov.key = mr_key[1]; - - msg.msg_iov = &iov; - msg.desc = (void **)loc_mr; - msg.iov_count = 1; - msg.addr = gni_addr[1]; - msg.rma_iov = &rma_iov; - msg.rma_iov_count = 1; - msg.context = target; - msg.data = (uint64_t)target; - - init_data(target, len, 0xef); - init_data(source, len, 0); - sz = fi_readmsg(ep[0], &msg, 0); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1); - rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_READ, 0, ep[0]); - - r[0] = 1; - rdm_rma_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - cr_assert(check_data(source, target, len), "Data mismatch"); - - iov.iov_base = source; - iov.iov_len = len; - - rma_iov.addr = (uint64_t)target; - rma_iov.len = len; - rma_iov.key = mr_key[0]; - - msg.msg_iov = &iov; - msg.desc = (void **)(loc_mr + 1); - msg.iov_count = 1; - msg.addr = gni_addr[0]; - msg.rma_iov = &rma_iov; - msg.rma_iov_count = 1; - msg.context = target; - msg.data = (uint64_t)target; -} - -Test(rdm_rma_stx_basic, readmsg) -{ - xfer_for_each_size(do_readmsg, 8, BUF_SZ); -} - -Test(rdm_rma_stx_basic, readmsg_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_readmsg, 8, BUF_SZ); -} - -Test(dgram_rma_stx_basic, readmsg) -{ - xfer_for_each_size(do_readmsg, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_stx_basic, readmsg) -{ - xfer_for_each_size(do_readmsg, 8, BUF_SZ); -} - -/* scalable */ - -Test(rdm_rma_stx_scalable, readmsg) -{ - xfer_for_each_size(do_readmsg, 8, BUF_SZ); -} - -Test(rdm_rma_stx_scalable, readmsg_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_readmsg, 8, BUF_SZ); -} - -Test(dgram_rma_stx_scalable, readmsg) -{ - xfer_for_each_size(do_readmsg, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_stx_scalable, readmsg) -{ - xfer_for_each_size(do_readmsg, 8, BUF_SZ); -} - -static void inject_common(void) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct iovec iov; - struct fi_msg_rma msg; - struct fi_rma_iov rma_iov; - uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0}; - - iov.iov_base = source; - iov.iov_len = GNIX_INJECT_SIZE; - - rma_iov.addr = _REM_ADDR(fi, target, target); - rma_iov.len = GNIX_INJECT_SIZE; - rma_iov.key = mr_key[1]; - - msg.msg_iov = &iov; - msg.desc = (void **)loc_mr; - msg.iov_count = 1; - msg.addr = gni_addr[1]; - msg.rma_iov = &rma_iov; - msg.rma_iov_count = 1; - msg.context = target; - msg.data = (uint64_t)target; - - init_data(source, GNIX_INJECT_SIZE, 0xef); - init_data(target, GNIX_INJECT_SIZE, 0); - - sz = fi_writemsg(ep[0], &msg, FI_INJECT); - cr_assert_eq(sz, 0); - - iov.iov_len = GNIX_INJECT_SIZE+1; - sz = fi_writemsg(ep[0], &msg, FI_INJECT); - cr_assert_eq(sz, -FI_EINVAL); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1); - rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, ep[0]); - - w[0] = 1; - rdm_rma_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - cr_assert(check_data(source, target, GNIX_INJECT_SIZE), - "Data mismatch"); -} - -Test(rdm_rma_stx_basic, inject) -{ - inject_common(); -} - -Test(dgram_rma_stx_basic, inject) -{ - inject_common(); -} - -Test(dgram_rma_1dom_stx_basic, inject) -{ - inject_common(); -} - -/* scalable */ - -Test(rdm_rma_stx_scalable, inject) -{ - inject_common(); -} - -Test(dgram_rma_stx_scalable, inject) -{ - inject_common(); -} - -Test(dgram_rma_1dom_stx_scalable, inject) -{ - inject_common(); -} - -static void do_write_autoreg(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0}; - - init_data(source, len, 0xab); - init_data(target, len, 0); - sz = fi_write(ep[0], source, len, - NULL, gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1); - rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, ep[0]); - - w[0] = 1; - rdm_rma_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - cr_assert(check_data(source, target, len), "Data mismatch"); -} - -Test(rdm_rma_stx_basic, write_autoreg) -{ - xfer_for_each_size(do_write_autoreg, 8, BUF_SZ); -} - -Test(dgram_rma_stx_basic, write_autoreg) -{ - xfer_for_each_size(do_write_autoreg, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_stx_basic, write_autoreg) -{ - xfer_for_each_size(do_write_autoreg, 8, BUF_SZ); -} - -/* scalable */ - -Test(rdm_rma_stx_scalable, write_autoreg) -{ - xfer_for_each_size(do_write_autoreg, 8, BUF_SZ); -} - -Test(dgram_rma_stx_scalable, write_autoreg) -{ - xfer_for_each_size(do_write_autoreg, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_stx_scalable, write_autoreg) -{ - xfer_for_each_size(do_write_autoreg, 8, BUF_SZ); -} - -static void do_write_autoreg_uncached(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0}; - - init_data(uc_source, len, 0xab); - init_data(target, len, 0); - sz = fi_write(ep[0], uc_source, len, - NULL, gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1); - rdm_rma_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, ep[0]); - - w[0] = 1; - rdm_rma_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - cr_assert(check_data(uc_source, target, len), "Data mismatch"); -} - -Test(rdm_rma_stx_basic, write_autoreg_uncached) -{ - xfer_for_each_size(do_write_autoreg_uncached, 8, BUF_SZ); -} - -Test(dgram_rma_stx_basic, write_autoreg_uncached) -{ - xfer_for_each_size(do_write_autoreg_uncached, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_stx_basic, write_autoreg_uncached) -{ - xfer_for_each_size(do_write_autoreg_uncached, 8, BUF_SZ); -} - -/* scalable */ - -Test(rdm_rma_stx_scalable, write_autoreg_uncached) -{ - xfer_for_each_size(do_write_autoreg_uncached, 8, BUF_SZ); -} - -Test(dgram_rma_stx_scalable, write_autoreg_uncached) -{ - xfer_for_each_size(do_write_autoreg_uncached, 8, BUF_SZ); -} - -Test(dgram_rma_1dom_stx_scalable, write_autoreg_uncached) -{ - xfer_for_each_size(do_write_autoreg_uncached, 8, BUF_SZ); -} - -static void do_write_error(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe; - struct fi_cq_err_entry err_cqe = {0}; - - err_cqe.err_data_size = 0; - uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0}; - - init_data(source, len, 0xab); - init_data(target, len, 0); - sz = fi_write(ep[0], source, len, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, -FI_EAVAIL); - - ret = fi_cq_readerr(send_cq[0], &err_cqe, 0); - cr_assert_eq(ret, 1); - - cr_assert((uint64_t)err_cqe.op_context == (uint64_t)target, - "Bad error context"); - cr_assert(err_cqe.flags == (FI_RMA | FI_WRITE)); - cr_assert(err_cqe.len == 0, "Bad error len"); - cr_assert(err_cqe.buf == 0, "Bad error buf"); - cr_assert(err_cqe.data == 0, "Bad error data"); - cr_assert(err_cqe.tag == 0, "Bad error tag"); - cr_assert(err_cqe.olen == 0, "Bad error olen"); - cr_assert(err_cqe.err == FI_ECANCELED, "Bad error errno"); - cr_assert(err_cqe.prov_errno == gnixu_to_fi_errno(GNI_RC_TRANSACTION_ERROR), - "Bad prov errno"); - cr_assert(err_cqe.err_data == NULL, "Bad error provider data"); - - w_e[0] = 1; - rdm_rma_check_cntrs(w, r, w_e, r_e); -} - -static inline void __write_error(void) -{ - int ret, max_retrans_val = 1; - - ret = gni_domain_ops[0]->set_val(&dom[0]->fid, GNI_MAX_RETRANSMITS, - &max_retrans_val); - cr_assert(!ret, "setval(GNI_MAX_RETRANSMITS)"); - - ret = gni_domain_ops[1]->set_val(&dom[1]->fid, GNI_MAX_RETRANSMITS, - &max_retrans_val); - cr_assert(!ret, "setval(GNI_MAX_RETRANSMITS)"); - err_inject_enable(); - - xfer_for_each_size(do_write_error, 8, BUF_SZ); -} - -Test(rdm_rma_stx_basic, write_error) -{ - __write_error(); -} - -Test(rdm_rma_stx_scalable, write_error) -{ - __write_error(); -} - -Test(dgram_rma_stx_basic, write_error) -{ - __write_error(); -} - -Test(dgram_rma_stx_scalable, write_error) -{ - __write_error(); -} - -static void do_read_error(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe; - struct fi_cq_err_entry err_cqe = {0}; - - err_cqe.err_data_size = 0; - uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0}; - - init_data(source, len, 0); - init_data(target, len, 0xad); - sz = fi_read(ep[0], source, len, - loc_mr[0], gni_addr[1], - _REM_ADDR(fi, target, target), mr_key[1], - (void *)READ_CTX); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, -FI_EAVAIL); - - ret = fi_cq_readerr(send_cq[0], &err_cqe, 0); - cr_assert_eq(ret, 1); - - cr_assert((uint64_t)err_cqe.op_context == (uint64_t)READ_CTX, - "Bad error context"); - cr_assert(err_cqe.flags == (FI_RMA | FI_READ)); - cr_assert(err_cqe.len == 0, "Bad error len"); - cr_assert(err_cqe.buf == 0, "Bad error buf"); - cr_assert(err_cqe.data == 0, "Bad error data"); - cr_assert(err_cqe.tag == 0, "Bad error tag"); - cr_assert(err_cqe.olen == 0, "Bad error olen"); - cr_assert(err_cqe.err == FI_ECANCELED, "Bad error errno"); - cr_assert(err_cqe.prov_errno == gnixu_to_fi_errno(GNI_RC_TRANSACTION_ERROR), - "Bad prov errno"); - cr_assert(err_cqe.err_data == NULL, "Bad error provider data"); - - r_e[0] = 1; - rdm_rma_check_cntrs(w, r, w_e, r_e); -} - -static inline void __read_error(void) -{ - int ret, max_retrans_val = 1; - - ret = gni_domain_ops[0]->set_val(&dom[0]->fid, GNI_MAX_RETRANSMITS, - &max_retrans_val); - cr_assert(!ret, "setval(GNI_MAX_RETRANSMITS)"); - - ret = gni_domain_ops[1]->set_val(&dom[1]->fid, GNI_MAX_RETRANSMITS, - &max_retrans_val); - cr_assert(!ret, "setval(GNI_MAX_RETRANSMITS)"); - err_inject_enable(); - - xfer_for_each_size(do_read_error, 8, BUF_SZ); -} - -Test(rdm_rma_stx_basic, read_error) -{ - __read_error(); -} - -Test(rdm_rma_stx_scalable, read_error) -{ - __read_error(); -} - -static void do_read_buf(void *s, void *t, int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0}; - -#define READ_CTX 0x4e3dda1aULL - init_data(s, len, 0); - init_data(t, len, 0xad); - sz = fi_read(ep[0], s, len, NULL, gni_addr[1], - _REM_ADDR(fi, target, t), mr_key[1], - (void *)READ_CTX); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1); - rdm_rma_check_tcqe(&cqe, (void *)READ_CTX, FI_RMA | FI_READ, 0, ep[0]); - - r[0] = 1; - rdm_rma_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got read context event!\n"); - - cr_assert(check_data(s, t, len), "Data mismatch"); -} - -static void do_read_alignment(int len) -{ - int s_off, t_off, l_off; - - for (s_off = 0; s_off < 7; s_off++) { - for (t_off = 0; t_off < 7; t_off++) { - for (l_off = 0; l_off < 7; l_off++) { - do_read_buf(source + s_off, - target + t_off, - len + l_off); - } - } - } -} - -Test(rdm_rma_stx_basic, read_alignment) -{ - xfer_for_each_size(do_read_alignment, 1, (BUF_SZ - 1)); -} - -Test(rdm_rma_stx_basic, read_alignment_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_read_alignment, 1, (BUF_SZ - 1)); -} - -Test(dgram_rma_stx_basic, read_alignment) -{ - xfer_for_each_size(do_read_alignment, 1, (BUF_SZ - 1)); -} - -Test(dgram_rma_1dom_stx_basic, read_alignment) -{ - xfer_for_each_size(do_read_alignment, 1, (BUF_SZ - 1)); -} - -/* scalable */ - -Test(rdm_rma_stx_scalable, read_alignment) -{ - xfer_for_each_size(do_read_alignment, 1, (BUF_SZ - 1)); -} - -Test(rdm_rma_stx_scalable, read_alignment_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_read_alignment, 1, (BUF_SZ - 1)); -} - -Test(dgram_rma_stx_scalable, read_alignment) -{ - xfer_for_each_size(do_read_alignment, 1, (BUF_SZ - 1)); -} - -Test(dgram_rma_1dom_stx_scalable, read_alignment) -{ - xfer_for_each_size(do_read_alignment, 1, (BUF_SZ - 1)); -} - -static void do_write_buf(void *s, void *t, int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct fi_cq_err_entry cq_err; - int errors_to_read = (dgm_fail) ? 1 : 0; - uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0}; - - init_data(s, len, 0xab); - init_data(t, len, 0); - sz = fi_write(ep[0], s, len, NULL, gni_addr[1], - _REM_ADDR(fi, target, t), mr_key[1], t); - cr_assert_eq(sz, 0); - - do { - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - if (dgm_fail) { - cr_assert_eq(ret, -FI_EAVAIL); - - ret = fi_cq_readerr(send_cq[0], &cq_err, 0); - cr_assert_eq(ret, 1); - - errors_to_read--; - } - } while (errors_to_read > 0); - - if (dgm_fail) - return; - - cr_assert_eq(ret, 1); - rdm_rma_check_tcqe(&cqe, t, FI_RMA | FI_WRITE, 0, ep[0]); - - w[0] = 1; - rdm_rma_check_cntrs(w, r, w_e, r_e); - - dbg_printf("got write context event!\n"); - - cr_assert(check_data(s, t, len), "Data mismatch"); -} - -static void do_write_alignment(int len) -{ - int s_off, t_off, l_off; - - for (s_off = 0; s_off < 7; s_off++) { - for (t_off = 0; t_off < 7; t_off++) { - for (l_off = 0; l_off < 7; l_off++) { - do_write_buf(source + s_off, - target + t_off, - len + l_off); - } - } - } -} - -Test(rdm_rma_stx_basic, write_alignment) -{ - xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1)); -} - -Test(rdm_rma_stx_basic, write_alignment_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1)); -} - -Test(dgram_rma_stx_basic, write_alignment) -{ - xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1)); -} - -Test(dgram_rma_stx_basic, write_alignment_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1)); -} - -Test(dgram_rma_1dom_stx_basic, write_alignment) -{ - xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1)); -} - -Test(dgram_rma_1dom_stx_basic, write_alignment_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1)); -} - -/* scalable */ - -Test(rdm_rma_stx_scalable, write_alignment) -{ - xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1)); -} - -Test(rdm_rma_stx_scalable, write_alignment_retrans) -{ - err_inject_enable(); - xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1)); -} - -Test(dgram_rma_stx_scalable, write_alignment) -{ - xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1)); -} - -Test(dgram_rma_stx_scalable, write_alignment_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1)); -} - -Test(dgram_rma_1dom_stx_scalable, write_alignment) -{ - xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1)); -} - -Test(dgram_rma_1dom_stx_scalable, write_alignment_retrans) -{ - dgm_fail = 1; - err_inject_enable(); - xfer_for_each_size(do_write_alignment, 1, (BUF_SZ - 1)); -} - -static void do_trigger(int len) -{ - int ret, i; - ssize_t sz; - struct fi_cq_tagged_entry cqe; - struct fi_msg_rma msg[4]; - struct iovec iov; - struct fi_rma_iov rma_iov; - struct fi_triggered_context t_ctx[4]; - void *ctxs[4]; - - iov.iov_base = source; - iov.iov_len = len; - - rma_iov.addr = _REM_ADDR(fi, target, target); - rma_iov.len = len; - rma_iov.key = mr_key[1]; - - msg[0].msg_iov = &iov; - msg[0].desc = (void **)loc_mr; - msg[0].iov_count = 1; - msg[0].addr = gni_addr[1]; - msg[0].rma_iov = &rma_iov; - msg[0].rma_iov_count = 1; - msg[0].data = (uint64_t)target; - msg[1] = msg[2] = msg[3] = msg[0]; - - /* XXX: Req 0 is guaranteed to be sent before req 2, but req 2 will - * race req 0 through the network. Fix race if needed. */ - t_ctx[0].trigger.threshold.threshold = 1; - t_ctx[1].trigger.threshold.threshold = 2; - t_ctx[2].trigger.threshold.threshold = 1; - t_ctx[3].trigger.threshold.threshold = 0; - ctxs[0] = &t_ctx[3]; - ctxs[1] = &t_ctx[0]; - ctxs[2] = &t_ctx[2]; - ctxs[3] = &t_ctx[1]; - - for (i = 0; i < 4; i++) { - t_ctx[i].event_type = FI_TRIGGER_THRESHOLD; - t_ctx[i].trigger.threshold.cntr = write_cntr[0]; - msg[i].context = &t_ctx[i]; - - sz = fi_writemsg(ep[0], &msg[i], FI_TRIGGER); - cr_assert_eq(sz, 0); - } - - for (i = 0; i < 4; i++) { - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(send_cq[0], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1); - - rdm_rma_check_tcqe(&cqe, ctxs[i], FI_RMA | FI_WRITE, 0, ep[0]); - } - - sz = fi_cntr_set(write_cntr[0], 0); - cr_assert_eq(sz, 0); -} - -/* - * TODO: fix this test. fails sporadically - */ -Test(rdm_rma_stx_basic, trigger, .disabled = true) -{ - xfer_for_each_size(do_trigger, 8, BUF_SZ); -} - -Test(rdm_rma_stx_scalable, trigger, .disabled = true) -{ - /* FIXME intermittent test failures */ - cr_skip_test("intermittent test failures"); - xfer_for_each_size(do_trigger, 8, BUF_SZ); -} diff --git a/prov/gni/test/rdm_fi_pcd_trecv_msg.c b/prov/gni/test/rdm_fi_pcd_trecv_msg.c deleted file mode 100644 index e22196eb96a..00000000000 --- a/prov/gni/test/rdm_fi_pcd_trecv_msg.c +++ /dev/null @@ -1,1912 +0,0 @@ -/* - * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights reserved. - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * Copyright (c) 2019 Triad National Security, LLC. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -#include -#include -#include - -#include "gnix_vc.h" -#include "gnix_cm_nic.h" -#include "gnix_hashtable.h" -#include "gnix_rma.h" - -#include -#include "gnix_rdma_headers.h" -#include "common.h" - -/* Both the send and recv paths use independent state machines within - * each test to simulate the behavior you would expect in a client/server - * or independent peer environment - * - * Below are the valid states, but state transitions may differ from test - * to test. To simplify the reading of each test, it is recommended that - * the state transitions in the test are written in order. - * - * Ex. - * switch (send_state) { - * case STATE_1: - * case STATE_2: - * case STATE_3: - * .... - * case STATE_DONE: - * default: - * } - * - * Normal switch semantics should be followed and no tag should be reused - * within the same test. Duplicates will trigger an infinite loop. - */ - -#define stringify(x) helper(x) -#define helper(x) #x - -#define STATE_DEF(x) [x] = stringify(x) - -/* sender states */ -enum send_state { - S_STATE_SEND_MSG_1 = 0, /* send the first message */ - S_STATE_SEND_MSG_1_WAIT_CQ, /* wait for the first completion */ - S_STATE_SEND_MSG_2, /* send the second message */ - S_STATE_SEND_MSG_2_WAIT_CQ, /* wait for the second completion */ - S_STATE_DONE, /* sender is done */ - S_STATE_INVALID, /* invalid state used for test init */ -}; - -__attribute__((unused)) -static char *send_state_strings[S_STATE_INVALID + 1] = { - STATE_DEF(S_STATE_SEND_MSG_1), - STATE_DEF(S_STATE_SEND_MSG_1_WAIT_CQ), - STATE_DEF(S_STATE_SEND_MSG_2), - STATE_DEF(S_STATE_SEND_MSG_2_WAIT_CQ), - STATE_DEF(S_STATE_DONE), - STATE_DEF(S_STATE_INVALID), -}; - -/* receiver states */ -enum recv_state { - R_STATE_PEEK = 0, /* peek a message */ - R_STATE_PEEK_WAIT_CQ, /* wait for cqe from peek */ - /* wait for error cq */ - R_STATE_PEEK_WAIT_ERR_CQ, - R_STATE_PEEK_CLAIM, /* peek|claim a message */ - R_STATE_PEEK_CLAIM_WAIT_CQ, /* wait for cqe from peek|claim */ - /* wait for error cq */ - R_STATE_PEEK_CLAIM_WAIT_ERR_CQ, - R_STATE_PEEK_DISCARD, /* peek|discard a message */ - R_STATE_PEEK_DISCARD_WAIT_CQ, /* wait for cqe from peek|discard */ - /* wait for error cq */ - R_STATE_PEEK_DISCARD_WAIT_ERR_CQ, - R_STATE_CLAIM, /* claim a message */ - R_STATE_CLAIM_WAIT_CQ, /* wait for cqe from claim */ - R_STATE_CLAIM_DISCARD, /* claim|discard a message */ - R_STATE_CLAIM_DISCARD_WAIT_CQ, - R_STATE_RECV_MSG_1, /* recv first message */ - R_STATE_RECV_MSG_1_WAIT_CQ, /* wait for cqe from first recv message */ - R_STATE_RECV_MSG_2, /* recv second message */ - R_STATE_RECV_MSG_2_WAIT_CQ, /* wait for cqe from second recv message*/ - R_STATE_DONE, /* receiver is done */ - R_STATE_INVALID, /* invalid state used for test init */ -}; - -__attribute__((unused)) -static char *recv_state_strings[R_STATE_INVALID + 1] = { - STATE_DEF(R_STATE_PEEK), - STATE_DEF(R_STATE_PEEK_WAIT_CQ), - STATE_DEF(R_STATE_PEEK_WAIT_ERR_CQ), - STATE_DEF(R_STATE_PEEK_CLAIM), - STATE_DEF(R_STATE_PEEK_CLAIM_WAIT_CQ), - STATE_DEF(R_STATE_PEEK_CLAIM_WAIT_ERR_CQ), - STATE_DEF(R_STATE_PEEK_DISCARD), - STATE_DEF(R_STATE_PEEK_DISCARD_WAIT_CQ), - STATE_DEF(R_STATE_PEEK_DISCARD_WAIT_ERR_CQ), - STATE_DEF(R_STATE_CLAIM), - STATE_DEF(R_STATE_CLAIM_WAIT_CQ), - STATE_DEF(R_STATE_CLAIM_DISCARD), - STATE_DEF(R_STATE_RECV_MSG_1), - STATE_DEF(R_STATE_RECV_MSG_1_WAIT_CQ), - STATE_DEF(R_STATE_RECV_MSG_2), - STATE_DEF(R_STATE_RECV_MSG_2_WAIT_CQ), - STATE_DEF(R_STATE_DONE), - STATE_DEF(R_STATE_INVALID), -}; - -/* poll the SCQ with a dummy CQE if true */ -#define SHOULD_BLIND_POLL_SCQ(state) \ - ((state) == S_STATE_DONE) - -/* poll the RCQ with a dummy CQE if true */ -#define SHOULD_BLIND_POLL_RCQ(state) \ - ((state) != R_STATE_PEEK_WAIT_CQ && \ - state != R_STATE_PEEK_CLAIM_WAIT_CQ && \ - state != R_STATE_PEEK_DISCARD_WAIT_CQ && \ - state != R_STATE_CLAIM_WAIT_CQ && \ - state != R_STATE_CLAIM_DISCARD_WAIT_CQ && \ - state != R_STATE_RECV_MSG_1_WAIT_CQ && \ - state != R_STATE_RECV_MSG_2_WAIT_CQ && \ - state != R_STATE_PEEK_WAIT_ERR_CQ && \ - state != R_STATE_PEEK_CLAIM_WAIT_ERR_CQ && \ - state != R_STATE_PEEK_DISCARD_WAIT_ERR_CQ) - -#define RAISE_UNREACHABLE_STATE cr_assert_eq(0, 1, "unreachable state") -#define ASSERT_SEND_RECV_DONE \ - do { \ - if (!SEND_RECV_DONE) \ - dbg_printf("failed on test size %d\n", len); \ - if (s_state != S_STATE_DONE) \ - dbg_printf("failed to finish send side, " \ - "state=%s\n", \ - send_state_strings[s_state]); \ - if (r_state != R_STATE_DONE) \ - dbg_printf("failed to finish recv side, " \ - "state=%s\n", \ - recv_state_strings[r_state]); \ - cr_assert_eq(s_state, S_STATE_DONE); \ - cr_assert_eq(r_state, R_STATE_DONE); \ - } while (0) - -#define PROGRESS_CQS(cqs) \ - do { \ - struct fi_cq_tagged_entry trash; \ - int __events = 0; \ - if (SHOULD_BLIND_POLL_SCQ(s_state)) { \ - __events = fi_cq_read(cqs[0], &trash, 1); \ - if (__events == 1 && rate_limit()) \ - dbg_printf("found unexpected events: scq\n"); \ - } \ - if (SHOULD_BLIND_POLL_RCQ(r_state)) { \ - __events = fi_cq_read(cqs[1], &trash, 1); \ - if (__events == 1 && rate_limit()) \ - dbg_printf("found unexpected events: rcq\n"); \ - } \ - } while (0) - -#define __STATE_TRANSITION(state, next_state, labels) \ - do { \ - if ((state) != (next_state)) \ - dbg_printf(stringify(state) " transition: %s to %s\n", \ - labels[state], labels[next_state]); \ - (state) = (next_state); \ - } while (0) - -#define SEND_STATE_TRANSITION(next_state) \ - __STATE_TRANSITION(s_state, next_state, send_state_strings) - -#define RECV_STATE_TRANSITION(next_state) \ - __STATE_TRANSITION(r_state, next_state, recv_state_strings) - -#define __COND_STATE_TRANSITION(cond, state, true_state, false_state, labels) \ - __STATE_TRANSITION(state, ((cond) ? (true_state) : (false_state)), labels) - -#define COND_RECV_STATE_TRANSITION(actual, expected, true_state, false_state) \ - __COND_STATE_TRANSITION(((actual) == (expected)), \ - r_state, true_state, false_state, recv_state_strings) - -#define COND_SEND_STATE_TRANSITION(actual, expected, true_state, false_state) \ - __COND_STATE_TRANSITION(((actual) == (expected)), \ - s_state, true_state, false_state, send_state_strings) - -#define INIT_TEST_STATE(send_state, recv_state) \ - do { \ - SEND_STATE_TRANSITION(send_state); \ - RECV_STATE_TRANSITION(recv_state); \ - } while (0) - -#define SEND_RECV_DONE (r_state == R_STATE_DONE && s_state == S_STATE_DONE) - -#define TEST_TIME_LIMIT_EXPIRED (elapsed >= max_test_time) - -#if 1 -#define dbg_printf(...) do { } while (0) -#else -#define dbg_printf(...) \ - do { \ - fprintf(stderr, __VA_ARGS__); \ - fflush(stderr); \ - } while (0) -#endif - -struct timeval begin, end; -struct timeval loop_start, loop_end; - -/* Note: Set to ~FI_NOTIFY_FLAGS_ONLY since this was written before api 1.5 */ -static uint64_t mode_bits = ~FI_NOTIFY_FLAGS_ONLY; -static struct fid_fabric *fab; -static struct fid_domain *dom; -static struct fid_ep *ep[2]; -static struct fid_av *av; -static struct fi_info *hints; -static struct fi_info *fi; -static void *ep_name[2]; -static size_t gni_addr[2]; -static struct fid_cq *msg_cq[2]; -static struct fi_cq_attr cq_attr; - -#define BUF_SZ (64*1024) -static char *target, *target_base; -static char *source, *source_base; -static struct fid_mr *rem_mr, *loc_mr; -static uint64_t mr_key; -static const int max_test_time = 10; - -/* test variables */ -static int elapsed; -enum send_state s_state; -enum recv_state r_state; -struct fi_cq_tagged_entry d_peek_cqe; -struct fi_cq_err_entry cqe_error; -struct fi_msg_tagged peek_msg; -struct iovec peek_iov; -char *peek_buffer; - - -static int elapsed_seconds(struct timeval *s, struct timeval *e) -{ - /* rough estimate... I don't care that this is accurate */ - int seconds = e->tv_sec - s->tv_sec; - - if (!seconds) - return seconds; - - if (e->tv_usec <= s->tv_usec) - seconds -= 1; - - return seconds; -} - -static int rate_limit(void) -{ - static struct timeval _rl_begin, _rl_end; - static int init, squelched, events; - int ret = 0; - - if (!init) - gettimeofday(&_rl_begin, NULL); - - gettimeofday(&_rl_end, NULL); - - if (events < ((elapsed_seconds(&_rl_begin, &_rl_end) + 1) * 20)) { - ret = 1; - events++; - } - - if (ret && squelched) { - dbg_printf("squelched %d similar events\n", squelched); - squelched = 0; - } else if (!ret) { - squelched++; - } - - return ret; -} - -static inline void init_test_variables(void) -{ - /* re-initialize all variables necessary here */ - elapsed = 0; - r_state = R_STATE_INVALID; - s_state = S_STATE_INVALID; - - memset(&d_peek_cqe, 0, sizeof(struct fi_cq_tagged_entry)); - memset(&peek_msg, 0, sizeof(struct fi_msg_tagged)); - memset(&peek_iov, 0, sizeof(struct iovec)); -} - -static void rdm_fi_pdc_setup(void) -{ - int ret = 0; - struct fi_av_attr attr; - size_t addrlen = 0; - - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - - hints->domain_attr->mr_mode = GNIX_DEFAULT_MR_MODE; - hints->domain_attr->cq_data_size = 4; - hints->mode = mode_bits; - - hints->fabric_attr->prov_name = strdup("gni"); - - ret = fi_getinfo(fi_version(), NULL, 0, 0, hints, &fi); - cr_assert(!ret, "fi_getinfo"); - - ret = fi_fabric(fi->fabric_attr, &fab, NULL); - cr_assert(!ret, "fi_fabric"); - - ret = fi_domain(fab, fi, &dom, NULL); - cr_assert(!ret, "fi_domain"); - - memset(&attr, 0, sizeof(attr)); - attr.type = FI_AV_MAP; - attr.count = 16; - - ret = fi_av_open(dom, &attr, &av, NULL); - cr_assert(!ret, "fi_av_open"); - - ret = fi_endpoint(dom, fi, &ep[0], NULL); - cr_assert(!ret, "fi_endpoint"); - - cq_attr.format = FI_CQ_FORMAT_TAGGED; - cq_attr.size = 1024; - cq_attr.wait_obj = 0; - - ret = fi_cq_open(dom, &cq_attr, &msg_cq[0], 0); - cr_assert(!ret, "fi_cq_open"); - - ret = fi_cq_open(dom, &cq_attr, &msg_cq[1], 0); - cr_assert(!ret, "fi_cq_open"); - - ret = fi_ep_bind(ep[0], &msg_cq[0]->fid, FI_SEND | FI_RECV); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_getname(&ep[0]->fid, NULL, &addrlen); - cr_assert(addrlen > 0); - - ep_name[0] = malloc(addrlen); - cr_assert(ep_name[0] != NULL); - - ret = fi_getname(&ep[0]->fid, ep_name[0], &addrlen); - cr_assert(ret == FI_SUCCESS); - - ret = fi_endpoint(dom, fi, &ep[1], NULL); - cr_assert(!ret, "fi_endpoint"); - - ret = fi_ep_bind(ep[1], &msg_cq[1]->fid, FI_SEND | FI_RECV); - cr_assert(!ret, "fi_ep_bind"); - - ep_name[1] = malloc(addrlen); - cr_assert(ep_name[1] != NULL); - - ret = fi_getname(&ep[1]->fid, ep_name[1], &addrlen); - cr_assert(ret == FI_SUCCESS); - - ret = fi_av_insert(av, ep_name[0], 1, &gni_addr[0], 0, - NULL); - cr_assert(ret == 1); - - ret = fi_av_insert(av, ep_name[1], 1, &gni_addr[1], 0, - NULL); - cr_assert(ret == 1); - - ret = fi_ep_bind(ep[0], &av->fid, 0); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_ep_bind(ep[1], &av->fid, 0); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_enable(ep[0]); - cr_assert(!ret, "fi_ep_enable"); - - ret = fi_enable(ep[1]); - cr_assert(!ret, "fi_ep_enable"); - - target_base = malloc(GNIT_ALIGN_LEN(BUF_SZ*2)); - assert(target_base); - target = GNIT_ALIGN_BUFFER(char *, target_base); - - source_base = malloc(GNIT_ALIGN_LEN(BUF_SZ*2)); - assert(source_base); - source = GNIT_ALIGN_BUFFER(char *, source_base); - - peek_buffer = calloc(BUF_SZ, sizeof(char)); - cr_assert(peek_buffer); - - ret = fi_mr_reg(dom, - target, - BUF_SZ*2, - FI_REMOTE_WRITE, - 0, - ((USING_SCALABLE(fi)) ? 1 : 0), - 0, - &rem_mr, - &target); - cr_assert_eq(ret, 0); - - ret = fi_mr_reg(dom, - source, - BUF_SZ*2, - FI_REMOTE_WRITE, - 0, - ((USING_SCALABLE(fi)) ? 2 : 0), - 0, - &loc_mr, - &source); - cr_assert_eq(ret, 0); - - if (USING_SCALABLE(fi)) { - MR_ENABLE(rem_mr, target, BUF_SZ*2); - MR_ENABLE(loc_mr, source, BUF_SZ*2); - } - - mr_key = fi_mr_key(rem_mr); - - init_test_variables(); -} - -static void rdm_fi_pdc_teardown(void) -{ - int ret = 0; - - fi_close(&loc_mr->fid); - fi_close(&rem_mr->fid); - - free(target_base); - free(source_base); - - ret = fi_close(&ep[0]->fid); - cr_assert(!ret, "failure in closing ep."); - - ret = fi_close(&ep[1]->fid); - cr_assert(!ret, "failure in closing ep."); - - ret = fi_close(&msg_cq[0]->fid); - cr_assert(!ret, "failure in send cq."); - - ret = fi_close(&msg_cq[1]->fid); - cr_assert(!ret, "failure in recv cq."); - - ret = fi_close(&av->fid); - cr_assert(!ret, "failure in closing av."); - - ret = fi_close(&dom->fid); - cr_assert(!ret, "failure in closing domain."); - - ret = fi_close(&fab->fid); - cr_assert(!ret, "failure in closing fabric."); - - fi_freeinfo(fi); - fi_freeinfo(hints); - free(ep_name[0]); - free(ep_name[1]); - free(peek_buffer); -} - -static void rdm_fi_pdc_init_data_range( - char *buf, - int start, - int len, - char seed) -{ - int i; - - for (i = start; i < start + len; i++) - buf[i] = seed; -} - -static void rdm_fi_pdc_init_data( - char *buf, - int len, - char seed) -{ - rdm_fi_pdc_init_data_range(buf, 0, len, seed); -} - -static int rdm_fi_pdc_check_data_range( - char *src, - char *dst, - int len) -{ - int i; - - for (i = 0; i < len; i++) { - if (src[i] != dst[i]) { - printf("data mismatch, elem: %d, exp: %x, act: %x\n", - i, src[i], dst[i]); - return 0; - } - } - - return 1; -} - -static int rdm_fi_pdc_check_data_pattern( - char *buf, - char pattern, - int len) -{ - int i; - - for (i = 0; i < len; i++) { - if (buf[i] != pattern) { - printf("data mismatch, elem: %d, exp: %x, act: %x\n", - i, pattern, buf[i]); - return 0; - } - } - - return 1; -} - -static int rdm_fi_pdc_check_data( - char *buf1, - char *buf2, - int len) -{ - return rdm_fi_pdc_check_data_range(buf1, buf2, len); -} - -static void rdm_fi_pdc_xfer_for_each_size( - void (*xfer)(int len), - int slen, - int elen) -{ - int i; - - for (i = slen; i <= elen; i *= 2) { - dbg_printf("running test on size %d\n", i); - xfer(i); - } -} - -/******************************************************************************* - * Test MSG functions - ******************************************************************************/ - -TestSuite(rdm_fi_pdc, - .init = rdm_fi_pdc_setup, - .fini = rdm_fi_pdc_teardown, - .disabled = false); - -static void build_message( - struct fi_msg_tagged *msg, - struct iovec *iov, - void *t, - int len, - void **rem_mr, - size_t gni_addr, - void *source, - uint64_t tag, - uint64_t ignore) -{ - iov->iov_base = t; - iov->iov_len = len; - - msg->msg_iov = iov; - msg->desc = rem_mr; - msg->iov_count = 1; - msg->addr = gni_addr; - msg->context = source; - msg->data = (uint64_t) source; - msg->tag = tag; - msg->ignore = ignore; -} - -static void build_peek_message( - struct fi_msg_tagged *peek, - struct fi_msg_tagged *msg) -{ - /* copy contents */ - *peek = *msg; - - peek->msg_iov = &peek_iov; - - peek_iov.iov_base = peek_buffer; - peek_iov.iov_len = msg->msg_iov[0].iov_len; -} - -#define TSEND_FLAGS (FI_MSG | FI_SEND | FI_TAGGED) -#define TRECV_FLAGS (FI_MSG | FI_RECV | FI_TAGGED) - -static void validate_cqe_contents( - struct fi_cq_tagged_entry *entry, - uint64_t flags, - void *buf, - size_t len, - uint64_t tag, - void *context) -{ - cr_assert_eq(entry->op_context, context); - cr_assert_eq(entry->flags, flags); - cr_assert_eq(entry->data, 0); - - if (flags & FI_RECV) { - if (!(flags & FI_DISCARD)) { - cr_assert_eq(entry->len, len); - } - else { - cr_assert_eq(entry->len, 0); - cr_assert_eq(entry->buf, NULL); - } - cr_assert_eq(entry->tag, tag); - if (entry->buf != NULL) - cr_assert_eq(entry->buf, buf); - } else { - cr_assert_eq(entry->len, 0); - cr_assert_eq(entry->tag, 0); - cr_assert_eq(entry->buf, 0); - } -} - -static void validate_cqe_with_message( - struct fi_cq_tagged_entry *entry, - struct fi_msg_tagged *msg, - uint64_t flags) -{ - validate_cqe_contents(entry, flags, msg->msg_iov[0].iov_base, - msg->msg_iov[0].iov_len, msg->tag, msg->context); -} - - -static inline void map_src_cqes_to_src_context( - struct fi_cq_tagged_entry *source_cqes, - struct fi_cq_tagged_entry **mapping, - void **context) -{ - int i, j; - - /* map src cqes to src parameters */ - for (i = 0; i < 2; i++) - for (j = 0; j < 2; j++) - if (source_cqes[i].op_context == context[j]) - mapping[j] = &source_cqes[i]; -} - -static inline void start_test_timer(void) -{ - gettimeofday(&loop_start, NULL); -} - -static inline void update_test_timer(void) -{ - gettimeofday(&loop_end, NULL); - elapsed = elapsed_seconds(&loop_start, &loop_end); -} - -/* -ssize_t (*recvmsg)(struct fid_ep *ep, const struct fi_msg *msg, - uint64_t flags); - */ -Test(rdm_fi_pdc, peek_no_event) -{ - /* - * This test should do nothing but peek into EP to ensure that - * no messages are there. This should be a simple test - */ - - int ret; - struct fi_msg_tagged msg; - struct iovec iov; - - build_message(&msg, &iov, target, 128, (void *) &rem_mr, gni_addr[0], - source, 128, 0); - - ret = fi_trecvmsg(ep[1], &msg, FI_PEEK); - cr_assert_eq(ret, FI_SUCCESS); - - ret = fi_cq_readerr(msg_cq[1], &cqe_error, 0); - cr_assert_eq(ret, 1); - - cr_assert_eq(cqe_error.buf, msg.msg_iov[0].iov_base); - cr_assert_eq(cqe_error.len, msg.msg_iov[0].iov_len); - cr_assert_eq(cqe_error.err, FI_ENOMSG); - cr_assert_eq(cqe_error.olen, msg.msg_iov[0].iov_len); - cr_assert_eq(cqe_error.op_context, msg.context); - cr_assert_eq(cqe_error.prov_errno, FI_ENOMSG); - cr_assert_eq(cqe_error.tag, msg.tag); -} - -static void pdc_peek_event_present_buffer_provided(int len) -{ - /* PEEK then RECV with no buffer during peek test - * - * For each message size, - * 1. send a message - * 2. peek| the receiver to find the message - * 3. receive the message - * - * Test validation is done at the end of the test by verifying the - * contents of the CQEs after asserting that the send and recv - * sides have finished. Buffer contents should match the - * message contents. - * - * This is the special case where the application provides a buffer - * during the peek for which some of the data can be written. - * - * An application may supply a buffer as part of the - * peek operation. If given, the provider may return a copy - * of the message data. - * - * Ideally, both cases should be tested, where the provider - * returns a NULL pointer indicating that no data was available - * yet even though the peek succeeded, and the case where some - * of the data is copied back. - */ - int ret; - struct fi_msg_tagged msg; - struct iovec iov; - struct fi_cq_tagged_entry s_cqe; - struct fi_cq_tagged_entry d_cqe; - - rdm_fi_pdc_init_data(source, len, 0xab); - rdm_fi_pdc_init_data(target, len, 0); - - build_message(&msg, &iov, target, len, (void *) &rem_mr, gni_addr[0], - source, len, 0); - - INIT_TEST_STATE(S_STATE_SEND_MSG_1, R_STATE_PEEK); - - /* we need to set up a peek buffer to ensure the contents of the peek - * are copied correctly. In the event of a discard, the data can be - * fetched with a peek, but the target buffer should remain untouched - */ - build_peek_message(&peek_msg, &msg); - - start_test_timer(); - do { - PROGRESS_CQS(msg_cq); - - switch (s_state) { - case S_STATE_SEND_MSG_1: - ret = fi_tsend(ep[0], source, len, loc_mr, - gni_addr[1], len, target); - cr_assert_eq(ret, FI_SUCCESS); - SEND_STATE_TRANSITION(S_STATE_SEND_MSG_1_WAIT_CQ); - break; - case S_STATE_SEND_MSG_1_WAIT_CQ: - ret = fi_cq_read(msg_cq[0], &s_cqe, 1); - COND_SEND_STATE_TRANSITION(ret, 1, - S_STATE_DONE, - S_STATE_SEND_MSG_1_WAIT_CQ); - break; - case S_STATE_DONE: - break; - default: - RAISE_UNREACHABLE_STATE; - break; - } - - switch (r_state) { - case R_STATE_PEEK: - ret = fi_trecvmsg(ep[1], &peek_msg, FI_PEEK); - - COND_RECV_STATE_TRANSITION(ret, FI_SUCCESS, - R_STATE_PEEK_WAIT_CQ, - R_STATE_PEEK); - break; - case R_STATE_PEEK_WAIT_CQ: - ret = fi_cq_read(msg_cq[1], &d_peek_cqe, 1); - if (ret == -FI_EAVAIL) { - ret = fi_cq_readerr(msg_cq[1], &cqe_error, 0); - COND_RECV_STATE_TRANSITION(ret, 1, - R_STATE_PEEK, - R_STATE_PEEK_WAIT_CQ); - } else { - COND_RECV_STATE_TRANSITION(ret, 1, - R_STATE_RECV_MSG_1, - R_STATE_PEEK_WAIT_CQ); - } - break; - case R_STATE_RECV_MSG_1: - ret = fi_trecvmsg(ep[1], &msg, 0); - cr_assert_eq(ret, FI_SUCCESS); - RECV_STATE_TRANSITION(R_STATE_RECV_MSG_1_WAIT_CQ); - break; - case R_STATE_RECV_MSG_1_WAIT_CQ: - ret = fi_cq_read(msg_cq[1], &d_cqe, 1); - COND_RECV_STATE_TRANSITION(ret, 1, - R_STATE_DONE, - R_STATE_RECV_MSG_1_WAIT_CQ); - break; - case R_STATE_DONE: - break; - default: - RAISE_UNREACHABLE_STATE; - break; - } - - update_test_timer(); - } while (!TEST_TIME_LIMIT_EXPIRED && !SEND_RECV_DONE); - - ASSERT_SEND_RECV_DONE; - - /* validate the expected results */ - validate_cqe_contents(&s_cqe, TSEND_FLAGS, source, len, len, target); - validate_cqe_with_message(&d_peek_cqe, &peek_msg, - TRECV_FLAGS | FI_PEEK); - - /* if CQE provided a buffer back, the data was copied. - * Check the data */ - if (d_peek_cqe.buf) { - cr_assert(rdm_fi_pdc_check_data_pattern(peek_buffer, 0xab, len), - "Data mismatch"); - } - - validate_cqe_with_message(&d_cqe, &msg, TRECV_FLAGS); - cr_assert(rdm_fi_pdc_check_data(source, target, len), "Data mismatch"); -} - -Test(rdm_fi_pdc, peek_event_present_buff_provided) -{ - rdm_fi_pdc_xfer_for_each_size(pdc_peek_event_present_buffer_provided, - 1, BUF_SZ); -} - -static void pdc_peek_event_present_no_buff_provided(int len) -{ - /* PEEK then RECV with no buffer during peek test - * - * For each message size, - * 1. send a message - * 2. peek| the receiver to find the message - * 3. receive the message - * - * Test validation is done at the end of the test by verifying the - * contents of the CQEs after asserting that the send and recv - * sides have finished. Buffer contents should match the - * message contents. - * - * The CQE for the peek should have a NULL buffer field - */ - - int ret; - struct fi_msg_tagged msg; - struct iovec iov; - struct fi_cq_tagged_entry s_cqe; - struct fi_cq_tagged_entry d_cqe; - - rdm_fi_pdc_init_data(source, len, 0xab); - rdm_fi_pdc_init_data(target, len, 0); - - build_message(&msg, &iov, target, len, (void *) &rem_mr, - gni_addr[0], source, len, 0); - - INIT_TEST_STATE(S_STATE_SEND_MSG_1, R_STATE_PEEK); - - /* we need to set up a peek buffer to ensure the contents of the peek - * are copied correctly. In the event of a discard, the data can be - * fetched with a peek, but the target buffer should remain untouched - */ - build_peek_message(&peek_msg, &msg); - - /* send in a null buffer to indicate that we don't want data back */ - peek_iov.iov_base = NULL; - - start_test_timer(); - do { - PROGRESS_CQS(msg_cq); - - switch (s_state) { - case S_STATE_SEND_MSG_1: - ret = fi_tsend(ep[0], source, len, loc_mr, - gni_addr[1], len, target); - cr_assert_eq(ret, FI_SUCCESS); - SEND_STATE_TRANSITION(S_STATE_SEND_MSG_1_WAIT_CQ); - break; - case S_STATE_SEND_MSG_1_WAIT_CQ: - ret = fi_cq_read(msg_cq[0], &s_cqe, 1); - COND_SEND_STATE_TRANSITION(ret, 1, - S_STATE_DONE, - S_STATE_SEND_MSG_1_WAIT_CQ); - break; - case S_STATE_DONE: - break; - default: - RAISE_UNREACHABLE_STATE; - break; - } - - switch (r_state) { - case R_STATE_PEEK: - ret = fi_trecvmsg(ep[1], &peek_msg, FI_PEEK); - COND_RECV_STATE_TRANSITION(ret, FI_SUCCESS, - R_STATE_PEEK_WAIT_CQ, - R_STATE_PEEK); - break; - case R_STATE_PEEK_WAIT_CQ: - ret = fi_cq_read(msg_cq[1], &d_peek_cqe, 1); - if (ret == -FI_EAVAIL) { - ret = fi_cq_readerr(msg_cq[1], &cqe_error, 0); - COND_RECV_STATE_TRANSITION(ret, 1, - R_STATE_PEEK, - R_STATE_PEEK_WAIT_CQ); - } else { - COND_RECV_STATE_TRANSITION(ret, 1, - R_STATE_RECV_MSG_1, - R_STATE_PEEK_WAIT_CQ); - } - break; - case R_STATE_RECV_MSG_1: - ret = fi_trecvmsg(ep[1], &msg, 0); - cr_assert_eq(ret, FI_SUCCESS); - RECV_STATE_TRANSITION(R_STATE_RECV_MSG_1_WAIT_CQ); - break; - case R_STATE_RECV_MSG_1_WAIT_CQ: - ret = fi_cq_read(msg_cq[1], &d_cqe, 1); - COND_RECV_STATE_TRANSITION(ret, 1, - R_STATE_DONE, - R_STATE_RECV_MSG_1_WAIT_CQ); - break; - case R_STATE_DONE: - break; - default: - RAISE_UNREACHABLE_STATE; - break; - } - - update_test_timer(); - } while (!TEST_TIME_LIMIT_EXPIRED && !SEND_RECV_DONE); - - ASSERT_SEND_RECV_DONE; - - /* verify test execution correctness */ - validate_cqe_contents(&s_cqe, TSEND_FLAGS, source, len, len, target); - validate_cqe_with_message(&d_cqe, &msg, TRECV_FLAGS); - - /* a pointer should never be returned */ - cr_assert_eq(d_peek_cqe.buf, NULL); - validate_cqe_with_message(&d_peek_cqe, &peek_msg, - TRECV_FLAGS | FI_PEEK); - cr_assert(rdm_fi_pdc_check_data(source, target, len), - "Data mismatch"); -} - -Test(rdm_fi_pdc, peek_event_present_no_buff_provided) -{ - rdm_fi_pdc_xfer_for_each_size(pdc_peek_event_present_no_buff_provided, - 1, BUF_SZ); -} - -static void pdc_peek_claim_same_tag(int len) -{ - /* PEEK|CLAIM then CLAIM using the same tags test - * - * For each message size, - * 1. send two messages with identical tags but different - * buffer parameters - * 2. peek|claim the receiver to find and claim the first message - * 3. receive the second message - * 4. claim the first message - * - * Test validation is done at the end of the test by verifying the - * contents of the CQEs after asserting that the send and recv - * sides have finished. Buffer contents should match the - * message contents. - */ - int ret, i; - struct fi_msg_tagged msg[2]; - struct iovec iov[2]; - struct fi_cq_tagged_entry s_cqe[2]; - struct fi_cq_tagged_entry d_cqe[2]; - struct fi_cq_tagged_entry *src_cqe[2] = {NULL, NULL}; - char *src_buf[2] = {source, source + len}; - char *dst_buf[2] = {target, target + len}; - void *src_context[2] = {target, target + len}; - - /* initialize the initial data range on the source buffer to have - * different data values for one message than for the other - */ - rdm_fi_pdc_init_data_range(source, 0, len, 0xa5); - rdm_fi_pdc_init_data_range(source, len, len, 0x5a); - rdm_fi_pdc_init_data(target, len*2, 0); - - /* set up messages */ - for (i = 0; i < 2; i++) { - build_message(&msg[i], &iov[i], dst_buf[i], len, (void *) - &rem_mr, gni_addr[0], src_buf[i], len, 0); - } - - /* we need to set up a peek buffer to ensure the contents of the peek - * are copied correctly. In the event of a discard, the data can be - * fetched with a peek, but the target buffer should remain untouched - */ - build_peek_message(&peek_msg, &msg[0]); - - INIT_TEST_STATE(S_STATE_SEND_MSG_1, R_STATE_PEEK_CLAIM); - - start_test_timer(); - do { - PROGRESS_CQS(msg_cq); - - switch (s_state) { - case S_STATE_SEND_MSG_1: - ret = fi_tsend(ep[0], src_buf[0], len, loc_mr, - gni_addr[1], len, dst_buf[0]); - cr_assert_eq(ret, FI_SUCCESS); - SEND_STATE_TRANSITION(S_STATE_SEND_MSG_2); - break; - case S_STATE_SEND_MSG_2: - ret = fi_tsend(ep[0], src_buf[1], len, loc_mr, - gni_addr[1], len, dst_buf[1]); - cr_assert_eq(ret, FI_SUCCESS); - SEND_STATE_TRANSITION(S_STATE_SEND_MSG_1_WAIT_CQ); - break; - case S_STATE_SEND_MSG_1_WAIT_CQ: - ret = fi_cq_read(msg_cq[0], &s_cqe[0], 1); - COND_SEND_STATE_TRANSITION(ret, 1, - S_STATE_SEND_MSG_2_WAIT_CQ, - S_STATE_SEND_MSG_1_WAIT_CQ); - break; - case S_STATE_SEND_MSG_2_WAIT_CQ: - ret = fi_cq_read(msg_cq[0], &s_cqe[1], 1); - COND_SEND_STATE_TRANSITION(ret, 1, - S_STATE_DONE, - S_STATE_SEND_MSG_2_WAIT_CQ); - break; - case S_STATE_DONE: - break; - default: - RAISE_UNREACHABLE_STATE; - break; - } - - switch (r_state) { - case R_STATE_PEEK_CLAIM: - ret = fi_trecvmsg(ep[1], &peek_msg, FI_PEEK | FI_CLAIM); - COND_RECV_STATE_TRANSITION(ret, FI_SUCCESS, - R_STATE_PEEK_CLAIM_WAIT_CQ, - R_STATE_PEEK_CLAIM); - break; - case R_STATE_PEEK_CLAIM_WAIT_CQ: - ret = fi_cq_read(msg_cq[1], &d_peek_cqe, 1); - if (ret == -FI_EAVAIL) { - ret = fi_cq_readerr(msg_cq[1], &cqe_error, 0); - COND_RECV_STATE_TRANSITION(ret, 1, - R_STATE_PEEK_CLAIM, - R_STATE_PEEK_CLAIM_WAIT_CQ); - } else { - COND_RECV_STATE_TRANSITION(ret, 1, - R_STATE_RECV_MSG_2, - R_STATE_PEEK_CLAIM_WAIT_CQ); - } - break; - case R_STATE_RECV_MSG_2: - ret = fi_trecvmsg(ep[1], &msg[1], 0); - COND_RECV_STATE_TRANSITION(ret, FI_SUCCESS, - R_STATE_RECV_MSG_2_WAIT_CQ, - R_STATE_RECV_MSG_2); - break; - case R_STATE_RECV_MSG_2_WAIT_CQ: - ret = fi_cq_read(msg_cq[1], &d_cqe[1], 1); - COND_RECV_STATE_TRANSITION(ret, 1, - R_STATE_CLAIM, - R_STATE_RECV_MSG_2_WAIT_CQ); - break; - case R_STATE_CLAIM: - ret = fi_trecvmsg(ep[1], &msg[0], FI_CLAIM); - COND_RECV_STATE_TRANSITION(ret, FI_SUCCESS, - R_STATE_CLAIM_WAIT_CQ, - R_STATE_CLAIM); - break; - case R_STATE_CLAIM_WAIT_CQ: - ret = fi_cq_read(msg_cq[1], &d_cqe[0], 1); - COND_RECV_STATE_TRANSITION(ret, 1, - R_STATE_DONE, - R_STATE_CLAIM_WAIT_CQ); - break; - case R_STATE_DONE: - break; - default: - RAISE_UNREACHABLE_STATE; - break; - } - - update_test_timer(); - } while (!TEST_TIME_LIMIT_EXPIRED && !SEND_RECV_DONE); - - ASSERT_SEND_RECV_DONE; - - /* map src cqes to src parameters */ - map_src_cqes_to_src_context(s_cqe, src_cqe, src_context); - - /* verify test execution correctness */ - validate_cqe_contents(src_cqe[0], TSEND_FLAGS, - src_buf[0], len, len, dst_buf[0]); - validate_cqe_contents(src_cqe[1], TSEND_FLAGS, - src_buf[1], len, len, dst_buf[1]); - validate_cqe_with_message(&d_peek_cqe, &peek_msg, - TRECV_FLAGS | FI_PEEK | FI_CLAIM); - validate_cqe_with_message(&d_cqe[1], &msg[1], TRECV_FLAGS); - validate_cqe_with_message(&d_cqe[0], &msg[0], TRECV_FLAGS | FI_CLAIM); - - /* if CQE provided a buffer back, the data was copied. - * Check the data */ - if (d_peek_cqe.buf) { - cr_assert(rdm_fi_pdc_check_data_pattern(peek_buffer, 0xa5, len), - "Data mismatch"); - } - - - cr_assert(rdm_fi_pdc_check_data(src_buf[0], dst_buf[0], len), - "Data mismatch"); - - cr_assert(rdm_fi_pdc_check_data(src_buf[1], dst_buf[1], len), - "Data mismatch"); -} - -Test(rdm_fi_pdc, peek_claim_same_tag) -{ - rdm_fi_pdc_xfer_for_each_size(pdc_peek_claim_same_tag, 1, BUF_SZ); -} - -static void pdc_peek_claim_unique_tag(int len) -{ - /* PEEK|CLAIM then CLAIM using unique tags test - * - * For each message size, - * 1. send two messages with unique tags and parameters - * 2. peek|claim the receiver to find and claim the first message - * 3. receive the second message - * 4. claim the first message - * - * Test validation is done at the end of the test by verifying the - * contents of the CQEs after asserting that the send and recv - * sides have finished. Buffer contents should match the - * message contents - */ - - int ret, i; - struct fi_msg_tagged msg[2]; - struct iovec iov[2]; - struct fi_cq_tagged_entry s_cqe[2]; - struct fi_cq_tagged_entry d_cqe[2]; - struct fi_cq_tagged_entry *src_cqe[2] = {NULL, NULL}; - char *src_buf[2] = {source, source + len}; - char *dst_buf[2] = {target, target + len}; - void *src_context[2] = {target, target + len}; - - /* initialize the initial data range on the source buffer to have - * different data vaules for one message than for the other - */ - rdm_fi_pdc_init_data_range(source, 0, len, 0xa5); - rdm_fi_pdc_init_data_range(source, len, len, 0x5a); - rdm_fi_pdc_init_data(target, len*2, 0); - - /* set up messages */ - for (i = 0; i < 2; i++) { - build_message(&msg[i], &iov[i], dst_buf[i], len, - (void *) &rem_mr, gni_addr[0], - src_buf[i], len + i, 0); - } - - /* we need to set up a peek buffer to ensure the contents of the peek - * are copied correctly. In the event of a discard, the data can be - * fetched with a peek, but the target buffer should remain untouched - */ - build_peek_message(&peek_msg, &msg[0]); - - INIT_TEST_STATE(S_STATE_SEND_MSG_1, R_STATE_PEEK_CLAIM); - - start_test_timer(); - do { - PROGRESS_CQS(msg_cq); - - switch (s_state) { - case S_STATE_SEND_MSG_1: - ret = fi_tsend(ep[0], src_buf[0], len, loc_mr, - gni_addr[1], len, dst_buf[0]); - cr_assert_eq(ret, FI_SUCCESS); - SEND_STATE_TRANSITION(S_STATE_SEND_MSG_2); - break; - case S_STATE_SEND_MSG_2: - ret = fi_tsend(ep[0], src_buf[1], len, loc_mr, - gni_addr[1], len + 1, dst_buf[1]); - cr_assert_eq(ret, FI_SUCCESS); - SEND_STATE_TRANSITION(S_STATE_SEND_MSG_1_WAIT_CQ); - break; - case S_STATE_SEND_MSG_1_WAIT_CQ: - ret = fi_cq_read(msg_cq[0], &s_cqe[0], 1); - COND_SEND_STATE_TRANSITION(ret, 1, - S_STATE_SEND_MSG_2_WAIT_CQ, - S_STATE_SEND_MSG_1_WAIT_CQ); - break; - case S_STATE_SEND_MSG_2_WAIT_CQ: - ret = fi_cq_read(msg_cq[0], &s_cqe[1], 1); - COND_SEND_STATE_TRANSITION(ret, 1, - S_STATE_DONE, - S_STATE_SEND_MSG_2_WAIT_CQ); - break; - case S_STATE_DONE: - break; - default: - RAISE_UNREACHABLE_STATE; - break; - } - - switch (r_state) { - case R_STATE_PEEK_CLAIM: - ret = fi_trecvmsg(ep[1], &peek_msg, FI_PEEK | FI_CLAIM); - COND_RECV_STATE_TRANSITION(ret, FI_SUCCESS, - R_STATE_PEEK_CLAIM_WAIT_CQ, - R_STATE_PEEK_CLAIM); - break; - case R_STATE_PEEK_CLAIM_WAIT_CQ: - ret = fi_cq_read(msg_cq[1], &d_peek_cqe, 1); - if (ret == -FI_EAVAIL) { - ret = fi_cq_readerr(msg_cq[1], &cqe_error, 0); - COND_RECV_STATE_TRANSITION(ret, 1, - R_STATE_PEEK_CLAIM, - R_STATE_PEEK_WAIT_CQ); - } else { - COND_RECV_STATE_TRANSITION(ret, 1, - R_STATE_RECV_MSG_2, - R_STATE_PEEK_CLAIM_WAIT_CQ); - } - break; - case R_STATE_RECV_MSG_2: - ret = fi_trecvmsg(ep[1], &msg[1], 0); - cr_assert_eq(ret, FI_SUCCESS); - COND_RECV_STATE_TRANSITION(ret, FI_SUCCESS, - R_STATE_RECV_MSG_2_WAIT_CQ, - R_STATE_RECV_MSG_2); - break; - case R_STATE_RECV_MSG_2_WAIT_CQ: - ret = fi_cq_read(msg_cq[1], &d_cqe[1], 1); - COND_RECV_STATE_TRANSITION(ret, 1, - R_STATE_CLAIM, - R_STATE_RECV_MSG_2_WAIT_CQ); - break; - case R_STATE_CLAIM: - ret = fi_trecvmsg(ep[1], &msg[0], FI_CLAIM); - COND_RECV_STATE_TRANSITION(ret, FI_SUCCESS, - R_STATE_CLAIM_WAIT_CQ, - R_STATE_CLAIM); - break; - case R_STATE_CLAIM_WAIT_CQ: - ret = fi_cq_read(msg_cq[1], &d_cqe[0], 1); - COND_RECV_STATE_TRANSITION(ret, 1, - R_STATE_DONE, - R_STATE_CLAIM_WAIT_CQ); - break; - case R_STATE_DONE: - break; - default: - RAISE_UNREACHABLE_STATE; - break; - } - - update_test_timer(); - } while (!TEST_TIME_LIMIT_EXPIRED && !SEND_RECV_DONE); - - ASSERT_SEND_RECV_DONE; - - /* map src cqes to src parameters */ - map_src_cqes_to_src_context(s_cqe, src_cqe, src_context); - - /* verify test execution correctness */ - for (i = 0; i < 2; i++) - validate_cqe_contents(src_cqe[i], TSEND_FLAGS, - src_buf[i], len, len + i, dst_buf[i]); - - validate_cqe_with_message(&d_peek_cqe, &peek_msg, - TRECV_FLAGS | FI_PEEK | FI_CLAIM); - validate_cqe_with_message(&d_cqe[1], &msg[1], TRECV_FLAGS); - validate_cqe_with_message(&d_cqe[0], &msg[0], TRECV_FLAGS | FI_CLAIM); - - /* if CQE provided a buffer back, the data was copied. - * Check the data */ - if (d_peek_cqe.buf) { - cr_assert(rdm_fi_pdc_check_data_pattern(peek_buffer, 0xa5, len), - "Data mismatch"); - } - - - cr_assert(rdm_fi_pdc_check_data(src_buf[0], dst_buf[0], len), - "Data mismatch"); - cr_assert(rdm_fi_pdc_check_data(src_buf[1], dst_buf[1], len), - "Data mismatch"); -} - -Test(rdm_fi_pdc, peek_claim_unique_tag) -{ - rdm_fi_pdc_xfer_for_each_size(pdc_peek_claim_unique_tag, 1, BUF_SZ); -} - -static void pdc_peek_discard(int len) -{ - /* PEEK|DISCARD then PEEK test - * - * For each message size, - * 1. send one messages - * 2. peek|discard the receiver to find and discard message - * 3. peek the message and fail to find it - * - * Test validation is done at the end of the test by verifying the - * contents of the CQEs after asserting that the send and recv - * sides have finished. Buffer contents should match the - * message contents - */ - - int ret; - struct fi_msg_tagged msg; - struct iovec iov; - struct fi_cq_tagged_entry s_cqe; - - rdm_fi_pdc_init_data(source, len, 0xab); - rdm_fi_pdc_init_data(target, len, 0); - - build_message(&msg, &iov, target, len, (void *) &rem_mr, - gni_addr[0], source, len, 0); - - INIT_TEST_STATE(S_STATE_SEND_MSG_1, R_STATE_PEEK_DISCARD); - - /* we need to set up a peek buffer to ensure the contents of the peek - * are copied correctly. In the event of a discard, the data can be - * fetched with a peek, but the target buffer should remain untouched - */ - build_peek_message(&peek_msg, &msg); - - start_test_timer(); - do { - PROGRESS_CQS(msg_cq); - - switch (s_state) { - case S_STATE_SEND_MSG_1: - ret = fi_tsend(ep[0], source, len, loc_mr, - gni_addr[1], len, target); - cr_assert_eq(ret, FI_SUCCESS); - SEND_STATE_TRANSITION(S_STATE_SEND_MSG_1_WAIT_CQ); - break; - case S_STATE_SEND_MSG_1_WAIT_CQ: - ret = fi_cq_read(msg_cq[0], &s_cqe, 1); - COND_SEND_STATE_TRANSITION(ret, 1, - S_STATE_DONE, - S_STATE_SEND_MSG_1_WAIT_CQ); - break; - case S_STATE_DONE: - break; - default: - RAISE_UNREACHABLE_STATE; - break; - } - - switch (r_state) { - case R_STATE_PEEK_DISCARD: - ret = fi_trecvmsg(ep[1], &peek_msg, FI_PEEK | FI_DISCARD); - COND_RECV_STATE_TRANSITION(ret, FI_SUCCESS, - R_STATE_PEEK_DISCARD_WAIT_CQ, - R_STATE_PEEK_DISCARD); - break; - case R_STATE_PEEK_DISCARD_WAIT_CQ: - ret = fi_cq_read(msg_cq[1], &d_peek_cqe, 1); - if (ret == -FI_EAVAIL) { - ret = fi_cq_readerr(msg_cq[1], &cqe_error, 0); - COND_RECV_STATE_TRANSITION(ret, 1, - R_STATE_PEEK_DISCARD, - R_STATE_PEEK_DISCARD_WAIT_CQ); - } else { - COND_RECV_STATE_TRANSITION(ret, 1, - R_STATE_PEEK, - R_STATE_PEEK_DISCARD_WAIT_CQ); - } - break; - case R_STATE_PEEK: - ret = fi_trecvmsg(ep[1], &msg, FI_PEEK); - cr_assert_eq(ret, FI_SUCCESS); - RECV_STATE_TRANSITION(R_STATE_PEEK_WAIT_ERR_CQ); - break; - case R_STATE_PEEK_WAIT_ERR_CQ: - ret = fi_cq_readerr(msg_cq[1], &cqe_error, 0); - COND_RECV_STATE_TRANSITION(ret, 1, - R_STATE_DONE, - R_STATE_PEEK_WAIT_ERR_CQ); - break; - case R_STATE_DONE: - break; - default: - RAISE_UNREACHABLE_STATE; - break; - } - - update_test_timer(); - } while (!TEST_TIME_LIMIT_EXPIRED && !SEND_RECV_DONE); - - ASSERT_SEND_RECV_DONE; - - /* verify test execution correctness */ - validate_cqe_contents(&s_cqe, TSEND_FLAGS, source, len, len, target); - validate_cqe_with_message(&d_peek_cqe, &peek_msg, - TRECV_FLAGS | FI_PEEK | FI_DISCARD); - - cr_assert(rdm_fi_pdc_check_data_pattern(target, 0, len), - "Data matched"); -} - -Test(rdm_fi_pdc, peek_discard) -{ - rdm_fi_pdc_xfer_for_each_size(pdc_peek_discard, 1, BUF_SZ); -} - -static void pdc_peek_discard_unique_tags(int len) -{ - /* PEEK|DISCARD then PEEK using unique tags test - * - * For each message size, - * 1. send two messages with unique tags and parameters - * 2. peek|discard the receiver to find and discard the first message - * 3. receive the second message - * 4. peek the first message and fail to find it - * - * Test validation is done at the end of the test by verifying the - * contents of the CQEs after asserting that the send and recv - * sides have finished. Buffer contents should match the - * message contents - */ - int ret, i; - struct fi_msg_tagged msg[2]; - struct iovec iov[2]; - struct fi_cq_tagged_entry s_cqe[2]; - struct fi_cq_tagged_entry d_cqe[2]; - struct fi_cq_tagged_entry *src_cqe[2] = {NULL, NULL}; - char *src_buf[2] = {source, source + len}; - char *dst_buf[2] = {target, target + len}; - void *src_context[2] = {target, target + len}; - - /* initialize the initial data range on the source buffer to have - * different data vaules for one message than for the other - */ - rdm_fi_pdc_init_data_range(source, 0, len, 0xa5); - rdm_fi_pdc_init_data_range(source, len, len, 0x5a); - rdm_fi_pdc_init_data(target, len*2, 0); - - /* set up messages */ - for (i = 0; i < 2; i++) { - build_message(&msg[i], &iov[i], dst_buf[i], len, - (void *) &rem_mr, gni_addr[0], - src_buf[i], len + i, 0); - } - - /* we need to set up a peek buffer to ensure the contents of the peek - * are copied correctly. In the event of a discard, the data can be - * fetched with a peek, but the target buffer should remain untouched - */ - build_peek_message(&peek_msg, &msg[0]); - - INIT_TEST_STATE(S_STATE_SEND_MSG_1, R_STATE_PEEK_DISCARD); - - start_test_timer(); - do { - PROGRESS_CQS(msg_cq); - - switch (s_state) { - case S_STATE_SEND_MSG_1: - ret = fi_tsend(ep[0], src_buf[0], len, loc_mr, - gni_addr[1], len, dst_buf[0]); - cr_assert_eq(ret, FI_SUCCESS); - SEND_STATE_TRANSITION(S_STATE_SEND_MSG_2); - break; - case S_STATE_SEND_MSG_2: - ret = fi_tsend(ep[0], src_buf[1], len, loc_mr, - gni_addr[1], len + 1, dst_buf[1]); - cr_assert_eq(ret, FI_SUCCESS); - SEND_STATE_TRANSITION(S_STATE_SEND_MSG_1_WAIT_CQ); - break; - case S_STATE_SEND_MSG_1_WAIT_CQ: - ret = fi_cq_read(msg_cq[0], &s_cqe[0], 1); - COND_SEND_STATE_TRANSITION(ret, 1, - S_STATE_SEND_MSG_2_WAIT_CQ, - S_STATE_SEND_MSG_1_WAIT_CQ); - break; - case S_STATE_SEND_MSG_2_WAIT_CQ: - ret = fi_cq_read(msg_cq[0], &s_cqe[1], 1); - COND_SEND_STATE_TRANSITION(ret, 1, - S_STATE_DONE, - S_STATE_SEND_MSG_2_WAIT_CQ); - break; - case S_STATE_DONE: - break; - default: - RAISE_UNREACHABLE_STATE; - break; - } - - switch (r_state) { - case R_STATE_PEEK_DISCARD: - ret = fi_trecvmsg(ep[1], &peek_msg, FI_PEEK | FI_DISCARD); - COND_RECV_STATE_TRANSITION(ret, FI_SUCCESS, - R_STATE_PEEK_DISCARD_WAIT_CQ, - R_STATE_PEEK_DISCARD); - break; - case R_STATE_PEEK_DISCARD_WAIT_CQ: - ret = fi_cq_read(msg_cq[1], &d_peek_cqe, 1); - if (ret == -FI_EAVAIL) { - ret = fi_cq_readerr(msg_cq[1], &cqe_error, 0); - COND_RECV_STATE_TRANSITION(ret, 1, - R_STATE_PEEK_DISCARD, - R_STATE_PEEK_DISCARD_WAIT_CQ); - } else { - COND_RECV_STATE_TRANSITION(ret, 1, - R_STATE_RECV_MSG_2, - R_STATE_PEEK_DISCARD_WAIT_CQ); - } - break; - case R_STATE_RECV_MSG_2: - ret = fi_trecvmsg(ep[1], &msg[1], 0); - COND_RECV_STATE_TRANSITION(ret, FI_SUCCESS, - R_STATE_RECV_MSG_2_WAIT_CQ, - R_STATE_RECV_MSG_2); - break; - case R_STATE_RECV_MSG_2_WAIT_CQ: - ret = fi_cq_read(msg_cq[1], &d_cqe[1], 1); - COND_RECV_STATE_TRANSITION(ret, 1, - R_STATE_PEEK, - R_STATE_RECV_MSG_2_WAIT_CQ); - break; - case R_STATE_PEEK: - ret = fi_trecvmsg(ep[1], &msg[0], FI_PEEK); - cr_assert_eq(ret, FI_SUCCESS); - RECV_STATE_TRANSITION(R_STATE_PEEK_WAIT_ERR_CQ); - break; - case R_STATE_PEEK_WAIT_ERR_CQ: - ret = fi_cq_readerr(msg_cq[1], &cqe_error, 0); - COND_RECV_STATE_TRANSITION(ret, 1, - R_STATE_DONE, - R_STATE_PEEK_WAIT_ERR_CQ); - break; - case R_STATE_DONE: - break; - default: - RAISE_UNREACHABLE_STATE; - break; - } - - update_test_timer(); - } while (!TEST_TIME_LIMIT_EXPIRED && !SEND_RECV_DONE); - - ASSERT_SEND_RECV_DONE; - - /* map src cqes to src parameters */ - map_src_cqes_to_src_context(s_cqe, src_cqe, src_context); - - /* verify test execution correctness */ - for (i = 0; i < 2; i++) - validate_cqe_contents(src_cqe[i], TSEND_FLAGS, - src_buf[i], len, len + i, dst_buf[i]); - - validate_cqe_with_message(&d_cqe[1], &msg[1], TRECV_FLAGS); - validate_cqe_with_message(&d_peek_cqe, &peek_msg, - TRECV_FLAGS | FI_PEEK | FI_DISCARD); - - cr_assert(rdm_fi_pdc_check_data_pattern(dst_buf[0], 0, len), - "Data mismatch"); - cr_assert(rdm_fi_pdc_check_data(src_buf[1], dst_buf[1], len), - "Data mismatch"); -} - -Test(rdm_fi_pdc, peek_discard_unique_tags) -{ - rdm_fi_pdc_xfer_for_each_size(pdc_peek_discard_unique_tags, - 1, BUF_SZ); -} - -static void pdc_peek_claim_then_claim_discard(int len) -{ - /* PEEK|CLAIM then CLAIM|DISCARD test - * - * For each message size, - * 1. send two messages with unique tags and parameters - * 2. peek|claim the receiver to find and claim the first message - * 3. receive the second message - * 4. claim|discard the first message - * - * From the fi_tagged man page: - * Claimed messages can only be retrieved using a subsequent, - * paired receive operation with the FI_CLAIM flag set. - * - * (FI_DISCARD) flag may also be used in conjunction - * with FI_CLAIM in order to retrieve and discard a message - * previously claimed using an FI_PEEK + FI_CLAIM request. - * - * Test validation is done at the end of the test by verifying the - * contents of the CQEs after asserting that the send and recv - * sides have finished. Buffer contents should match the message - * contents - */ - int ret, i; - struct fi_msg_tagged msg[2]; - struct iovec iov[2]; - struct fi_cq_tagged_entry s_cqe[2]; - struct fi_cq_tagged_entry d_cqe[2]; - struct fi_cq_tagged_entry *src_cqe[2] = {NULL, NULL}; - char *src_buf[2] = {source, source + len}; - char *dst_buf[2] = {target, target + len}; - void *src_context[2] = {target, target + len}; - - /* initialize the initial data range on the source buffer to have - * different data values for one message than for the other - */ - rdm_fi_pdc_init_data_range(source, 0, len, 0xa5); - rdm_fi_pdc_init_data_range(source, len, len, 0x5a); - rdm_fi_pdc_init_data(target, len*2, 0); - - /* set up messages */ - for (i = 0; i < 2; i++) { - build_message(&msg[i], &iov[i], dst_buf[i], len, - (void *) &rem_mr, gni_addr[0], - src_buf[i], len + i, 0); - } - - /* we need to set up a peek buffer to ensure the contents of the peek - * are copied correctly. In the event of a discard, the data can be - * fetched with a peek, but the target buffer should remain untouched - */ - build_peek_message(&peek_msg, &msg[0]); - - INIT_TEST_STATE(S_STATE_SEND_MSG_1, R_STATE_PEEK_CLAIM); - - start_test_timer(); - do { - PROGRESS_CQS(msg_cq); - - switch (s_state) { - case S_STATE_SEND_MSG_1: - ret = fi_tsend(ep[0], src_buf[0], len, loc_mr, - gni_addr[1], len, dst_buf[0]); - cr_assert_eq(ret, FI_SUCCESS); - SEND_STATE_TRANSITION(S_STATE_SEND_MSG_2); - break; - case S_STATE_SEND_MSG_2: - ret = fi_tsend(ep[0], src_buf[1], len, loc_mr, - gni_addr[1], len + 1, dst_buf[1]); - cr_assert_eq(ret, FI_SUCCESS); - SEND_STATE_TRANSITION(S_STATE_SEND_MSG_1_WAIT_CQ); - break; - case S_STATE_SEND_MSG_1_WAIT_CQ: - ret = fi_cq_read(msg_cq[0], &s_cqe[0], 1); - COND_SEND_STATE_TRANSITION(ret, 1, - S_STATE_SEND_MSG_2_WAIT_CQ, - S_STATE_SEND_MSG_1_WAIT_CQ); - break; - case S_STATE_SEND_MSG_2_WAIT_CQ: - ret = fi_cq_read(msg_cq[0], &s_cqe[1], 1); - COND_SEND_STATE_TRANSITION(ret, 1, S_STATE_DONE, - S_STATE_SEND_MSG_2_WAIT_CQ); - break; - case S_STATE_DONE: - break; - default: - RAISE_UNREACHABLE_STATE; - break; - } - - switch (r_state) { - case R_STATE_PEEK_CLAIM: - ret = fi_trecvmsg(ep[1], &peek_msg, FI_PEEK | FI_CLAIM); - COND_RECV_STATE_TRANSITION(ret, FI_SUCCESS, - R_STATE_PEEK_CLAIM_WAIT_CQ, - R_STATE_PEEK_CLAIM); - break; - case R_STATE_PEEK_CLAIM_WAIT_CQ: - ret = fi_cq_read(msg_cq[1], &d_peek_cqe, 1); - if (ret == -FI_EAVAIL) { - ret = fi_cq_readerr(msg_cq[1], &cqe_error, 0); - COND_RECV_STATE_TRANSITION(ret, 1, - R_STATE_PEEK_CLAIM, - R_STATE_PEEK_CLAIM_WAIT_CQ); - } else { - COND_RECV_STATE_TRANSITION(ret, 1, - R_STATE_RECV_MSG_2, - R_STATE_PEEK_CLAIM_WAIT_CQ); - } - break; - case R_STATE_RECV_MSG_2: - ret = fi_trecvmsg(ep[1], &msg[1], 0); - COND_RECV_STATE_TRANSITION(ret, FI_SUCCESS, - R_STATE_RECV_MSG_2_WAIT_CQ, - R_STATE_RECV_MSG_2); - break; - case R_STATE_RECV_MSG_2_WAIT_CQ: - ret = fi_cq_read(msg_cq[1], &d_cqe[1], 1); - COND_RECV_STATE_TRANSITION(ret, 1, - R_STATE_CLAIM_DISCARD, - R_STATE_RECV_MSG_2_WAIT_CQ); - break; - case R_STATE_CLAIM_DISCARD: - ret = fi_trecvmsg(ep[1], &msg[0], - FI_CLAIM | FI_DISCARD); - cr_assert_eq(ret, FI_SUCCESS); - RECV_STATE_TRANSITION(R_STATE_CLAIM_DISCARD_WAIT_CQ); - break; - case R_STATE_CLAIM_DISCARD_WAIT_CQ: - ret = fi_cq_read(msg_cq[1], &d_cqe[0], 1); - COND_RECV_STATE_TRANSITION(ret, 1, - R_STATE_DONE, - R_STATE_CLAIM_DISCARD_WAIT_CQ); - break; - case R_STATE_DONE: - break; - default: - RAISE_UNREACHABLE_STATE; - break; - } - - update_test_timer(); - } while (!TEST_TIME_LIMIT_EXPIRED && !SEND_RECV_DONE); - - ASSERT_SEND_RECV_DONE; - - /* map src cqes to src parameters */ - map_src_cqes_to_src_context(s_cqe, src_cqe, src_context); - - /* verify test execution correctness */ - for (i = 0; i < 2; i++) - validate_cqe_contents(src_cqe[i], TSEND_FLAGS, - src_buf[i], len, len + i, dst_buf[i]); - - validate_cqe_with_message(&d_peek_cqe, &peek_msg, - TRECV_FLAGS | FI_PEEK | FI_CLAIM); - - /* if CQE provided a buffer back, the data was copied. - * Check the data */ - if (d_peek_cqe.buf) { - cr_assert(rdm_fi_pdc_check_data_pattern(peek_buffer, 0xa5, len), - "Data mismatch"); - } - - validate_cqe_with_message(&d_cqe[0], &msg[0], - TRECV_FLAGS | FI_CLAIM | FI_DISCARD); - validate_cqe_with_message(&d_cqe[1], &msg[1], TRECV_FLAGS); - - cr_assert(rdm_fi_pdc_check_data_pattern(dst_buf[0], 0, len), - "Data mismatch"); - cr_assert(rdm_fi_pdc_check_data(src_buf[1], dst_buf[1], len), - "Data mismatch"); -} - -Test(rdm_fi_pdc, peek_claim_then_claim_discard) -{ - rdm_fi_pdc_xfer_for_each_size(pdc_peek_claim_then_claim_discard, - 1, BUF_SZ); -} - -static void pdc_peek_event_present_small_buffer_provided(int len) -{ - /* Like pdc_peek_event_present_buffer_provided except uses an - * undersized receive buffer with the FI_PEEK request. */ - int ret, i; - struct fi_msg_tagged msg; - struct iovec iov; - struct fi_cq_tagged_entry s_cqe; - struct fi_cq_tagged_entry d_cqe; - size_t cum_recv_len = 0; - - rdm_fi_pdc_init_data(source, len, 0xab); - rdm_fi_pdc_init_data(target, len, 0); - - build_message(&msg, &iov, target, len, (void *) &rem_mr, gni_addr[0], - source, len, 0); - - INIT_TEST_STATE(S_STATE_SEND_MSG_1, R_STATE_PEEK); - - /* we need to set up a peek buffer to ensure the contents of the peek - * are copied correctly. In the event of a discard, the data can be - * fetched with a peek, but the target buffer should remain untouched - */ - build_peek_message(&peek_msg, &msg); - - start_test_timer(); - do { - PROGRESS_CQS(msg_cq); - - switch (s_state) { - case S_STATE_SEND_MSG_1: - ret = fi_tsend(ep[0], source, len, loc_mr, - gni_addr[1], len, target); - cr_assert_eq(ret, FI_SUCCESS); - SEND_STATE_TRANSITION(S_STATE_SEND_MSG_1_WAIT_CQ); - break; - case S_STATE_SEND_MSG_1_WAIT_CQ: - ret = fi_cq_read(msg_cq[0], &s_cqe, 1); - COND_SEND_STATE_TRANSITION(ret, 1, - S_STATE_DONE, - S_STATE_SEND_MSG_1_WAIT_CQ); - break; - case S_STATE_DONE: - break; - default: - RAISE_UNREACHABLE_STATE; - break; - } - - switch (r_state) { - case R_STATE_PEEK: - ret = fi_trecvmsg(ep[1], &peek_msg, FI_PEEK); - - COND_RECV_STATE_TRANSITION(ret, FI_SUCCESS, - R_STATE_PEEK_WAIT_CQ, - R_STATE_PEEK); - break; - case R_STATE_PEEK_WAIT_CQ: - ret = fi_cq_read(msg_cq[1], &d_peek_cqe, 1); - if (ret == -FI_EAVAIL) { - ret = fi_cq_readerr(msg_cq[1], &cqe_error, 0); - COND_RECV_STATE_TRANSITION(ret, 1, - R_STATE_PEEK, - R_STATE_PEEK_WAIT_CQ); - } else { - COND_RECV_STATE_TRANSITION(ret, 1, - R_STATE_RECV_MSG_1, - R_STATE_PEEK_WAIT_CQ); - } - break; - case R_STATE_RECV_MSG_1: - ret = fi_trecvmsg(ep[1], &msg, 0); - cr_assert_eq(ret, FI_SUCCESS); - RECV_STATE_TRANSITION(R_STATE_RECV_MSG_1_WAIT_CQ); - break; - case R_STATE_RECV_MSG_1_WAIT_CQ: - ret = fi_cq_read(msg_cq[1], &d_cqe, 1); - COND_RECV_STATE_TRANSITION(ret, 1, - R_STATE_DONE, - R_STATE_RECV_MSG_1_WAIT_CQ); - break; - case R_STATE_DONE: - break; - default: - RAISE_UNREACHABLE_STATE; - break; - } - - update_test_timer(); - } while (!TEST_TIME_LIMIT_EXPIRED && !SEND_RECV_DONE); - - ASSERT_SEND_RECV_DONE; - - for (i = 0; i < peek_msg.iov_count; i++) { - cum_recv_len += peek_msg.msg_iov[i].iov_len; - } - - /* validate the expected results */ - validate_cqe_contents(&s_cqe, TSEND_FLAGS, source, cum_recv_len, len, target); - validate_cqe_contents(&d_peek_cqe, TRECV_FLAGS | FI_PEEK, - peek_msg.msg_iov[0].iov_base, cum_recv_len, peek_msg.tag, - peek_msg.context); - - /* if CQE provided a buffer back, the data was copied. - * Check the data */ - if (d_peek_cqe.buf) { - cr_assert(rdm_fi_pdc_check_data_pattern(peek_buffer, 0xab, - peek_iov.iov_len), - "Data mismatch"); - } - - validate_cqe_with_message(&d_cqe, &msg, TRECV_FLAGS); - - cr_assert(rdm_fi_pdc_check_data(source, target, len), "Data mismatch"); -} - -Test(rdm_fi_pdc, peek_event_present_small_buff_provided) -{ - rdm_fi_pdc_xfer_for_each_size( - pdc_peek_event_present_small_buffer_provided, - 1, BUF_SZ); -} diff --git a/prov/gni/test/rdm_multi_recv.c b/prov/gni/test/rdm_multi_recv.c deleted file mode 100644 index 021e479d8b7..00000000000 --- a/prov/gni/test/rdm_multi_recv.c +++ /dev/null @@ -1,955 +0,0 @@ -/* - * Copyright (c) 2015-2017 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * Copyright (c) 2019-2020 Triad National Security, LLC. - * All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "gnix_vc.h" -#include "gnix_cm_nic.h" -#include "gnix_hashtable.h" -#include "gnix_rma.h" -#include "gnix_mr.h" -#include "common.h" - -#include -#include "gnix_rdma_headers.h" -#include "common.h" - -#if 1 -#define dbg_printf(...) -#else -#define dbg_printf(...) fprintf(stderr, __VA_ARGS__); fflush(stderr) -#endif - -/* - * The multirecv tests fail when NUMEPS are > 2 (GitHub issue #1116). - * Increase this number when the issues is fixed. - */ -#define NUMEPS 4 -#define NUM_MULTIRECVS 5 - -/* Note: Set to ~FI_NOTIFY_FLAGS_ONLY since this was written before api 1.5 */ -static uint64_t mode_bits = ~FI_NOTIFY_FLAGS_ONLY; -static struct fid_fabric *fab; -static struct fid_domain *dom[NUMEPS]; -static struct fi_gni_ops_domain *gni_domain_ops[NUMEPS]; -static struct fid_ep *ep[NUMEPS]; -static struct fid_av *av[NUMEPS]; -static void *ep_name[NUMEPS]; -static fi_addr_t gni_addr[NUMEPS]; -static struct fid_cq *msg_cq[NUMEPS]; -static struct fi_info *fi[NUMEPS]; -static struct fi_cq_attr cq_attr; -static struct fi_info *hints; - -#define BUF_SZ (1<<20) -#define BUF_RNDZV (1<<14) -#define IOV_CNT (1<<3) - -static char *target, *target_base; -static char *target2, *target2_base; -static char *source, *source_base; -static char *source2, *source2_base; -struct fid_mr *rem_mr[NUMEPS], *loc_mr[NUMEPS]; - -static struct fid_cntr *send_cntr[NUMEPS], *recv_cntr[NUMEPS]; -static struct fi_cntr_attr cntr_attr = { - .events = FI_CNTR_EVENTS_COMP, - .flags = 0 -}; -static uint64_t sends[NUMEPS] = {0}, recvs[NUMEPS] = {0}, - send_errs[NUMEPS] = {0}, recv_errs[NUMEPS] = {0}; - -static void setup_common_eps(void) -{ - int ret = 0, i = 0, j = 0; - struct fi_av_attr attr; - size_t addrlen = 0; - - memset(&attr, 0, sizeof(attr)); - attr.type = FI_AV_MAP; - attr.count = NUMEPS; - - cq_attr.format = FI_CQ_FORMAT_TAGGED; - cq_attr.size = 1024; - cq_attr.wait_obj = 0; - - target_base = malloc(GNIT_ALIGN_LEN(BUF_SZ * NUM_MULTIRECVS)); - assert(target_base); - target = GNIT_ALIGN_BUFFER(char *, target_base); - - target2_base = malloc(GNIT_ALIGN_LEN(BUF_SZ * NUM_MULTIRECVS)); - assert(target2_base); - target2 = GNIT_ALIGN_BUFFER(char *, target2_base); - - source_base = malloc(GNIT_ALIGN_LEN(BUF_SZ)); - assert(source_base); - source = GNIT_ALIGN_BUFFER(char *, source_base); - - source2_base = malloc(GNIT_ALIGN_LEN(BUF_SZ)); - assert(source2_base); - source2 = GNIT_ALIGN_BUFFER(char *, source2_base); - - ret = fi_fabric(fi[0]->fabric_attr, &fab, NULL); - cr_assert(!ret, "fi_fabric"); - - for (i = 0; i < NUMEPS; i++) { - ret = fi_domain(fab, fi[i], dom + i, NULL); - cr_assert(!ret, "fi_domain"); - - ret = fi_open_ops(&dom[i]->fid, FI_GNI_DOMAIN_OPS_1, - 0, (void **) (gni_domain_ops + i), NULL); - - ret = fi_av_open(dom[i], &attr, av + i, NULL); - cr_assert(!ret, "fi_av_open"); - - ret = fi_endpoint(dom[i], fi[i], ep + i, NULL); - cr_assert(!ret, "fi_endpoint"); - - ret = fi_cq_open(dom[i], &cq_attr, msg_cq + i, 0); - cr_assert(!ret, "fi_cq_open"); - - ret = fi_ep_bind(ep[i], &msg_cq[i]->fid, FI_SEND | FI_RECV); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_getname(&ep[i]->fid, NULL, &addrlen); - cr_assert(addrlen > 0); - - ep_name[i] = malloc(addrlen); - cr_assert(ep_name[i] != NULL); - - ret = fi_getname(&ep[i]->fid, ep_name[i], &addrlen); - cr_assert(ret == FI_SUCCESS); - } - - for (i = 0; i < NUMEPS; i++) { - - for (j = 0; j < NUMEPS; j++) { - ret = fi_av_insert(av[i], ep_name[j], 1, - &gni_addr[j], - 0, NULL); - cr_assert(ret == 1); - } - - ret = fi_ep_bind(ep[i], &av[i]->fid, 0); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_cntr_open(dom[i], &cntr_attr, send_cntr + i, 0); - cr_assert(!ret, "fi_cntr_open"); - - ret = fi_ep_bind(ep[i], &send_cntr[i]->fid, FI_SEND); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_cntr_open(dom[i], &cntr_attr, recv_cntr + i, 0); - cr_assert(!ret, "fi_cntr_open"); - - ret = fi_ep_bind(ep[i], &recv_cntr[i]->fid, FI_RECV); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_enable(ep[i]); - cr_assert(!ret, "fi_ep_enable"); - - } -} - -static void setup_common(void) -{ - int ret = 0, i = 0, j = 0; - int req_key[4]; - - setup_common_eps(); - - for (i = 0; i < NUMEPS; i++) { - for (j = 0; j < 4; j++) - req_key[j] = (USING_SCALABLE(fi[i])) ? (i * 4) + j : 0; - - ret = fi_mr_reg(dom[i], - target, - NUM_MULTIRECVS * BUF_SZ, - FI_REMOTE_WRITE, - 0, - req_key[0], - 0, - rem_mr + i, - &target); - cr_assert_eq(ret, 0); - - ret = fi_mr_reg(dom[i], - source, - BUF_SZ, - FI_REMOTE_WRITE, - 0, - req_key[1], - 0, - loc_mr + i, - &source); - cr_assert_eq(ret, 0); - - if (USING_SCALABLE(fi[i])) { - MR_ENABLE(rem_mr[i], - target, - NUM_MULTIRECVS * BUF_SZ); - MR_ENABLE(loc_mr[i], - source, - BUF_SZ); - } - - } -} - -void rdm_multi_r_setup(void) -{ - int ret = 0, i = 0; - - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - - hints->domain_attr->mr_mode = GNIX_DEFAULT_MR_MODE; - hints->domain_attr->cq_data_size = NUMEPS * 2; - hints->domain_attr->control_progress = FI_PROGRESS_AUTO; - hints->domain_attr->data_progress = FI_PROGRESS_AUTO; - hints->mode = mode_bits; - hints->caps = FI_SOURCE | FI_MSG; - hints->fabric_attr->prov_name = strdup("gni"); - - /* Get info about fabric services with the provided hints */ - for (; i < NUMEPS; i++) { - ret = fi_getinfo(fi_version(), NULL, 0, 0, hints, &fi[i]); - cr_assert(!ret, "fi_getinfo"); - } - - setup_common(); -} - -void rdm_multi_r_setup_nr(void) -{ - int ret = 0, i = 0; - - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - - hints->domain_attr->mr_mode = GNIX_DEFAULT_MR_MODE; - hints->domain_attr->cq_data_size = NUMEPS * 2; - hints->domain_attr->control_progress = FI_PROGRESS_AUTO; - hints->domain_attr->data_progress = FI_PROGRESS_AUTO; - hints->mode = mode_bits; - hints->caps = FI_SOURCE | FI_MSG; - hints->fabric_attr->prov_name = strdup("gni"); - - /* Get info about fabric services with the provided hints */ - for (; i < NUMEPS; i++) { - ret = fi_getinfo(fi_version(), NULL, 0, 0, hints, &fi[i]); - cr_assert(!ret, "fi_getinfo"); - } - - setup_common_eps(); - - for (i = 0; i < NUMEPS; i++) { - rem_mr[i] = NULL; - loc_mr[i] = NULL; - } -} - - -static void rdm_multi_r_teardown(void) -{ - int ret = 0, i = 0; - - for (; i < NUMEPS; i++) { - fi_close(&recv_cntr[i]->fid); - fi_close(&send_cntr[i]->fid); - - if (loc_mr[i] != NULL) - fi_close(&loc_mr[i]->fid); - if (rem_mr[i] != NULL) - fi_close(&rem_mr[i]->fid); - - ret = fi_close(&ep[i]->fid); - cr_assert(!ret, "failure in closing ep."); - - ret = fi_close(&msg_cq[i]->fid); - cr_assert(!ret, "failure in send cq."); - - ret = fi_close(&av[i]->fid); - cr_assert(!ret, "failure in closing av."); - - ret = fi_close(&dom[i]->fid); - cr_assert(!ret, "failure in closing domain."); - - fi_freeinfo(fi[i]); - - free(ep_name[i]); - } - - free(target_base); - free(source_base); - - ret = fi_close(&fab->fid); - cr_assert(!ret, "failure in closing fabric."); - - fi_freeinfo(hints); -} - -static void init_data(char *buf, int len, char seed) -{ - int i; - - for (i = 0; i < len; i++) { - buf[i] = seed++; - } -} - -static inline int check_data(char *buf1, char *buf2, int len) -{ - int i; - - for (i = 0; i < len; i++) { - if (buf1[i] != buf2[i]) { - printf("data mismatch, elem: %d, exp: %hhx, act: %hhx\n" - , i, buf1[i], buf2[i]); - return 0; - } - } - - return 1; -} - -static void xfer_for_each_size(void (*xfer)(int len), int slen, int elen) -{ - int i; - - for (i = slen; i <= elen; i *= 2) { - xfer(i); - } -} - -static inline void check_cqe(struct fi_cq_tagged_entry *cqe, void *ctx, - uint64_t flags, void *addr, size_t len, - uint64_t data, bool buf_is_non_null, - struct fid_ep *fid_ep) -{ - struct gnix_fid_ep *gnix_ep = get_gnix_ep(fid_ep); - - cr_assert(cqe->op_context == ctx, "CQE Context mismatch"); - cr_assert(cqe->flags == flags, "CQE flags mismatch"); - - if (flags & FI_RECV) { - cr_assert(cqe->len == len, "CQE length mismatch"); - - if (buf_is_non_null) - cr_assert(cqe->buf == addr, "CQE address mismatch"); - else - cr_assert(cqe->buf == NULL, "CQE address mismatch"); - - /* TODO: Remove GNIX_ALLOW_FI_REMOTE_CQ_DATA and only check flags for FI_RMA_EVENT */ - if (GNIX_ALLOW_FI_REMOTE_CQ_DATA(flags, gnix_ep->caps)) - cr_assert(cqe->data == data, "CQE data mismatch"); - } else { - cr_assert(cqe->len == 0, "Invalid CQE length"); - cr_assert(cqe->buf == 0, "Invalid CQE address"); - cr_assert(cqe->data == 0, "Invalid CQE data"); - } - - cr_assert(cqe->tag == 0, "Invalid CQE tag"); -} - -static inline void check_cntrs(uint64_t s[], uint64_t r[], - uint64_t s_e[], uint64_t r_e[], - bool need_to_spin) -{ - int i = 0; - for (; i < NUMEPS; i++) { - sends[i] += s[i]; - recvs[i] += r[i]; - send_errs[i] += s_e[i]; - recv_errs[i] += r_e[i]; - - if (need_to_spin) { - while (fi_cntr_read(send_cntr[i]) != sends[i]) { - pthread_yield(); - } - } - - cr_assert(fi_cntr_read(send_cntr[i]) == sends[i], - "Bad send count"); - - if (need_to_spin) { - while (fi_cntr_read(recv_cntr[i]) != recvs[i]) { - pthread_yield(); - } - } - - cr_assert(fi_cntr_read(recv_cntr[i]) == recvs[i], - "Bad recv count"); - - if (need_to_spin) { - while (fi_cntr_readerr(send_cntr[i]) != send_errs[i]) { - pthread_yield(); - } - } - - cr_assert(fi_cntr_readerr(send_cntr[i]) == send_errs[i], - "Bad send err count"); - - if (need_to_spin) { - while (fi_cntr_readerr(recv_cntr[i]) != recv_errs[i]) { - pthread_yield(); - } - } - cr_assert(fi_cntr_readerr(recv_cntr[i]) == recv_errs[i], - "Bad recv err count"); - } -} - -static void inject_enable(void) -{ - int ret, err_count_val = 1, i = 0; - - for (; i < NUMEPS; i++) { - ret = gni_domain_ops[i]->set_val(&dom[i]->fid, - GNI_ERR_INJECT_COUNT, - &err_count_val); - cr_assert(!ret, "setval(GNI_ERR_INJECT_COUNT)"); - } -} - -/******************************************************************************* - * Test MSG functions - ******************************************************************************/ - -TestSuite(rdm_multi_r, - .init = rdm_multi_r_setup, - .fini = rdm_multi_r_teardown, - .disabled = false); - -TestSuite(rdm_multi_r_nr, - .init = rdm_multi_r_setup_nr, - .fini = rdm_multi_r_teardown, - .disabled = false); - -void do_multirecv(int len) -{ - int i, j, ret; - ssize_t sz; - struct fi_cq_tagged_entry s_cqe, d_cqe; - struct iovec iov; - struct fi_msg msg = {0}; - uint64_t s[NUMEPS] = {0}, r[NUMEPS] = {0}, s_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - uint64_t flags; - uint64_t min_multi_recv; - size_t optlen; - const int nrecvs = NUM_MULTIRECVS; - const int dest_ep = NUMEPS-1; - uint64_t *expected_addrs; - bool *addr_recvd, found, got_fi_multi_cqe = false; - int sends_done = 0; - - dbg_printf("do_multirecv_trunc_last() called with len = %d\n", len); - - init_data(source, len, 0xab); - init_data(target, len, 0); - - ret = fi_getopt(&ep[dest_ep]->fid, FI_OPT_ENDPOINT, - FI_OPT_MIN_MULTI_RECV, - (void *)&min_multi_recv, &optlen); - cr_assert(ret == FI_SUCCESS, "fi_getopt"); - - /* Post receives first to force matching in SMSG callback. */ - iov.iov_base = target; - iov.iov_len = len * nrecvs + (min_multi_recv-1); - - msg.msg_iov = &iov; - msg.desc = (void **)rem_mr; - msg.iov_count = 1; - msg.addr = FI_ADDR_UNSPEC; - msg.context = source; - msg.data = (uint64_t)source; - - addr_recvd = calloc(nrecvs, sizeof(bool)); - cr_assert(addr_recvd); - - expected_addrs = calloc(nrecvs, sizeof(uint64_t)); - cr_assert(expected_addrs); - - for (i = 0; i < nrecvs; i++) { - expected_addrs[i] = (uint64_t)target + - (uint64_t) (i * len); - } - - sz = fi_recvmsg(ep[dest_ep], &msg, FI_MULTI_RECV); - cr_assert_eq(sz, 0); - - for (i = nrecvs-1; i >= 0; i--) { - int iep = i%(NUMEPS-1); - - sz = fi_send(ep[iep], source, len, - loc_mr[iep], gni_addr[dest_ep], target); - cr_assert_eq(sz, 0); - } - - /* need to progress both CQs simultaneously for rendezvous */ - do { - for (i = 0; i < nrecvs; i++) { - int iep = i%(NUMEPS-1); - - /* reset cqe */ - s_cqe.op_context = s_cqe.buf = (void *) -1; - s_cqe.flags = s_cqe.len = UINT_MAX; - s_cqe.data = s_cqe.tag = UINT_MAX; - d_cqe.op_context = d_cqe.buf = (void *) -1; - d_cqe.flags = d_cqe.len = UINT_MAX; - d_cqe.data = d_cqe.tag = UINT_MAX; - - ret = fi_cq_read(msg_cq[iep], &s_cqe, 1); - if (ret == 1) { - check_cqe(&s_cqe, target, - (FI_MSG|FI_SEND), - 0, 0, 0, false, ep[iep]); - s[iep]++; - sends_done++; - } - } - - ret = fi_cq_read(msg_cq[dest_ep], &d_cqe, 1); - if (ret == 1) { - for (j = 0, found = false; j < nrecvs; j++) { - if (expected_addrs[j] == (uint64_t)d_cqe.buf) { - cr_assert(addr_recvd[j] == false, - "address already received"); - addr_recvd[j] = true; - found = true; - break; - } - } - cr_assert(found == true, "Address not found"); - flags = FI_MSG | FI_RECV; - check_cqe(&d_cqe, source, - flags, - (void *) expected_addrs[j], - len, 0, true, ep[dest_ep]); - cr_assert(check_data(source, d_cqe.buf, len), - "Data mismatch"); - r[dest_ep]++; - } - } while (sends_done < nrecvs || r[dest_ep] < nrecvs); - - /* - * now check for final FI_MULTI_RECV CQE on dest CQ - */ - - do { - ret = fi_cq_read(msg_cq[dest_ep], &d_cqe, 1); - if (d_cqe.flags & FI_MULTI_RECV) { - got_fi_multi_cqe = true; - r[dest_ep]++; - } - } while (got_fi_multi_cqe == false); - - check_cntrs(s, r, s_e, r_e, false); - - free(addr_recvd); - free(expected_addrs); - - dbg_printf("got context events!\n"); -} - -Test(rdm_multi_r, multirecv, .disabled = false) -{ - xfer_for_each_size(do_multirecv, 1, BUF_SZ); -} - -Test(rdm_multi_r, multirecv_retrans, .disabled = false) -{ - inject_enable(); - xfer_for_each_size(do_multirecv, 1, BUF_SZ); -} - -Test(rdm_multi_r_nr, multirecv, .disabled = false) -{ - xfer_for_each_size(do_multirecv, 1, BUF_SZ); -} - -Test(rdm_multi_r_nr, multirecv_retrans, .disabled = false) -{ - inject_enable(); - xfer_for_each_size(do_multirecv, 1, BUF_SZ); -} - -void do_multirecv_send_first(int len) -{ - int i, j, ret; - ssize_t sz; - struct fi_cq_tagged_entry s_cqe, d_cqe; - struct iovec iov; - struct fi_msg msg; - uint64_t s[NUMEPS] = {0}, r[NUMEPS] = {0}, s_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - uint64_t flags; - uint64_t min_multi_recv; - size_t optlen; - const int nrecvs = NUM_MULTIRECVS; - const int dest_ep = NUMEPS-1; - uint64_t *expected_addrs; - bool *addr_recvd, found; - int sends_done = 0; - bool got_fi_multi_cqe = false; - - init_data(source, len, 0xab); - init_data(target, len, 0); - - dbg_printf("do_multirecv_send_first() called with len = %d\n", len); - - ret = fi_getopt(&ep[NUMEPS-1]->fid, FI_OPT_ENDPOINT, - FI_OPT_MIN_MULTI_RECV, - (void *)&min_multi_recv, &optlen); - cr_assert(ret == FI_SUCCESS, "fi_getopt"); - - addr_recvd = calloc(nrecvs, sizeof(bool)); - cr_assert(addr_recvd); - - expected_addrs = calloc(nrecvs, sizeof(uint64_t)); - cr_assert(expected_addrs); - - for (i = 0; i < nrecvs; i++) { - expected_addrs[i] = (uint64_t)target + - (uint64_t) (i * len); - } - - /* Post sends first to force matching in the _gnix_recv() path. */ - for (i = nrecvs-1; i >= 0; i--) { - sz = fi_send(ep[i%(NUMEPS-1)], source, len, - loc_mr[i%(NUMEPS-1)], gni_addr[dest_ep], target); - cr_assert_eq(sz, 0); - } - - /* Progress our sends. */ - for (j = 0; j < 10000; j++) { - for (i = 0; i < nrecvs; i++) { - int iep = i%(NUMEPS-1); - - /* reset cqe */ - s_cqe.op_context = s_cqe.buf = (void *) -1; - s_cqe.flags = s_cqe.len = UINT_MAX; - s_cqe.data = s_cqe.tag = UINT_MAX; - d_cqe.op_context = d_cqe.buf = (void *) -1; - d_cqe.flags = d_cqe.len = UINT_MAX; - d_cqe.data = d_cqe.tag = UINT_MAX; - - ret = fi_cq_read(msg_cq[iep], &s_cqe, 1); - if (ret == 1) { - check_cqe(&s_cqe, target, - (FI_MSG|FI_SEND), - 0, 0, 0, false, ep[iep]); - s[iep]++; - sends_done++; - } - - } - ret = fi_cq_read(msg_cq[dest_ep], &d_cqe, 1); - cr_assert_eq(ret, -FI_EAGAIN); - } - - iov.iov_base = target; - iov.iov_len = len * nrecvs + (min_multi_recv-1); - - msg.msg_iov = &iov; - msg.desc = (void **)rem_mr; - msg.iov_count = 1; - msg.addr = FI_ADDR_UNSPEC; - msg.context = source; - msg.data = (uint64_t)source; - - sz = fi_recvmsg(ep[dest_ep], &msg, FI_MULTI_RECV); - cr_assert_eq(sz, 0); - - /* need to progress both CQs simultaneously for rendezvous */ - do { - for (i = 0; i < nrecvs; i++) { - int iep = i%(NUMEPS-1); - - /* reset cqe */ - s_cqe.op_context = s_cqe.buf = (void *) -1; - s_cqe.flags = s_cqe.len = UINT_MAX; - s_cqe.data = s_cqe.tag = UINT_MAX; - d_cqe.op_context = d_cqe.buf = (void *) -1; - d_cqe.flags = d_cqe.len = UINT_MAX; - d_cqe.data = d_cqe.tag = UINT_MAX; - - ret = fi_cq_read(msg_cq[iep], &s_cqe, 1); - if (ret == 1) { - check_cqe(&s_cqe, target, - (FI_MSG|FI_SEND), - 0, 0, 0, false, ep[iep]); - s[iep]++; - sends_done++; - } - } - - ret = fi_cq_read(msg_cq[dest_ep], &d_cqe, 1); - if (ret == 1) { - for (j = 0, found = false; j < nrecvs; j++) { - if (expected_addrs[j] == (uint64_t)d_cqe.buf) { - cr_assert(addr_recvd[j] == false, - "address already received"); - addr_recvd[j] = true; - found = true; - break; - } - } - cr_assert(found == true, "Address not found"); - flags = FI_MSG | FI_RECV; - check_cqe(&d_cqe, source, - flags, - (void *)expected_addrs[j], - len, 0, true, ep[dest_ep]); - cr_assert(check_data(source, d_cqe.buf, len), - "Data mismatch"); - r[dest_ep]++; - } - } while (sends_done < nrecvs || r[dest_ep] < nrecvs); - - /* - * now check for final FI_MULTI_RECV CQE on dest CQ - */ - - do { - ret = fi_cq_read(msg_cq[dest_ep], &d_cqe, 1); - if (d_cqe.flags & FI_MULTI_RECV) { - got_fi_multi_cqe = true; - r[dest_ep]++; - } - } while (got_fi_multi_cqe == false); - - check_cntrs(s, r, s_e, r_e, false); - - free(addr_recvd); - free(expected_addrs); - - dbg_printf("got context events!\n"); -} - -Test(rdm_multi_r, multirecv_send_first, .disabled = false) -{ - xfer_for_each_size(do_multirecv_send_first, 1, BUF_SZ); -} - -Test(rdm_multi_r, multirecv_send_first_retrans, .disabled = false) -{ - inject_enable(); - xfer_for_each_size(do_multirecv_send_first, 1, BUF_SZ); -} - -void do_multirecv_trunc_last(int len) -{ - int i, j, ret; - ssize_t sz; - struct fi_cq_tagged_entry s_cqe, d_cqe; - struct fi_cq_err_entry err_cqe = {0}; - struct iovec iov; - struct fi_msg msg = {0}; - uint64_t s[NUMEPS] = {0}, r[NUMEPS] = {0}, s_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - uint64_t flags; - uint64_t min_multi_recv = len-1; - const int nrecvs = 2; /* first one will fit, second will overflow */ - const int dest_ep = NUMEPS-1; - uint64_t *expected_addrs; - bool *addr_recvd, found; - - init_data(source, len, 0xab); - init_data(target, len, 0); - - /* set min multirecv length */ - ret = fi_setopt(&ep[dest_ep]->fid, FI_OPT_ENDPOINT, - FI_OPT_MIN_MULTI_RECV, - (void *)&min_multi_recv, sizeof(size_t)); - cr_assert(ret == FI_SUCCESS, "fi_setopt"); - - iov.iov_base = target; - iov.iov_len = len + min_multi_recv; - - msg.msg_iov = &iov; - msg.desc = (void **)rem_mr; - msg.iov_count = 1; - msg.addr = FI_ADDR_UNSPEC; - msg.context = source; - msg.data = (uint64_t)source; - - addr_recvd = calloc(nrecvs, sizeof(bool)); - cr_assert(addr_recvd); - - expected_addrs = calloc(nrecvs, sizeof(uint64_t)); - cr_assert(expected_addrs); - - for (i = 0; i < nrecvs; i++) { - expected_addrs[i] = (uint64_t)target + - (uint64_t) (i * len); - } - - sz = fi_recvmsg(ep[dest_ep], &msg, FI_MULTI_RECV); - cr_assert_eq(sz, 0); - - /* Send first one... */ - sz = fi_send(ep[0], source, len, loc_mr[0], - gni_addr[dest_ep], target); - cr_assert_eq(sz, 0); - - /* need to progress both CQs simultaneously for rendezvous */ - do { - /* reset cqe */ - s_cqe.op_context = s_cqe.buf = (void *) -1; - s_cqe.flags = s_cqe.len = UINT_MAX; - s_cqe.data = s_cqe.tag = UINT_MAX; - d_cqe.op_context = d_cqe.buf = (void *) -1; - d_cqe.flags = d_cqe.len = UINT_MAX; - d_cqe.data = d_cqe.tag = UINT_MAX; - - ret = fi_cq_read(msg_cq[0], &s_cqe, 1); - if (ret == 1) { - check_cqe(&s_cqe, target, - (FI_MSG|FI_SEND), - 0, 0, 0, false, ep[0]); - s[0]++; - } - - ret = fi_cq_read(msg_cq[dest_ep], &d_cqe, 1); - if (ret == 1) { - for (j = 0, found = false; j < nrecvs; j++) { - if (expected_addrs[j] == (uint64_t)d_cqe.buf) { - cr_assert(addr_recvd[j] == false, - "address already received"); - addr_recvd[j] = true; - found = true; - break; - } - } - cr_assert(found == true, "Address not found"); - flags = FI_MSG | FI_RECV; fflush(stdout); - check_cqe(&d_cqe, source, - flags, - (void *) expected_addrs[j], - len, 0, true, ep[dest_ep]); - cr_assert(check_data(source, d_cqe.buf, len), - "Data mismatch"); - r[dest_ep]++; - } - } while (s[0] != 1 || r[dest_ep] != 1); - - /* ...second one will overflow */ - sz = fi_send(ep[0], source, min_multi_recv+1, loc_mr[0], - gni_addr[dest_ep], target); - cr_assert_eq(sz, 0); - - /* need to progress both CQs simultaneously for rendezvous */ - do { - /* reset cqe */ - s_cqe.op_context = s_cqe.buf = (void *) -1; - s_cqe.flags = s_cqe.len = UINT_MAX; - s_cqe.data = s_cqe.tag = UINT_MAX; - d_cqe.op_context = d_cqe.buf = (void *) -1; - d_cqe.flags = d_cqe.len = UINT_MAX; - d_cqe.data = d_cqe.tag = UINT_MAX; - - ret = fi_cq_read(msg_cq[0], &s_cqe, 1); - if (ret == 1) { - check_cqe(&s_cqe, target, - (FI_MSG|FI_SEND), - 0, 0, 0, false, ep[0]); - s[0]++; - } - - /* Should return -FI_EAVAIL */ - ret = fi_cq_read(msg_cq[dest_ep], &d_cqe, 1); - if (ret == 1) { - r[dest_ep]++; /* we're counting the buffer release as a receive */ - } - - if (ret == -FI_EAVAIL) { - ret = fi_cq_readerr(msg_cq[dest_ep], &err_cqe, 0); - if (ret == 1) { - cr_assert((uint64_t)err_cqe.op_context == - (uint64_t)source, - "Bad error context"); - cr_assert(err_cqe.flags == (FI_MSG | FI_RECV)); - cr_assert(err_cqe.len == min_multi_recv, - "Bad error len"); - cr_assert(err_cqe.buf == (void *) expected_addrs[1], - "Bad error buf"); - cr_assert(err_cqe.olen == 1, "Bad error olen"); - cr_assert(err_cqe.err == FI_ETRUNC, "Bad error errno"); - cr_assert(err_cqe.prov_errno == FI_ETRUNC, "Bad prov errno"); - cr_assert(err_cqe.err_data == NULL, - "Bad error provider data"); - r_e[dest_ep]++; - } - } - - } while (s[0] != 2 || r_e[dest_ep] != 1 || r[dest_ep] != 2); - - check_cntrs(s, r, s_e, r_e, false); - - free(addr_recvd); - free(expected_addrs); - - dbg_printf("got context events!\n"); -} - -/* - * These two tests should be enabled when multirecv generates errors - * for truncated message (GitHub issue #1119). Also, the initial - * message size of 1 below might change depending on whether 0 is a - * valid value for FI_OPT_MIN_MULTI_RECV (Github issue #1120) - */ -Test(rdm_multi_r, multirecv_trunc_last, .disabled = false) -{ - xfer_for_each_size(do_multirecv_trunc_last, 2, BUF_SZ); -} - -Test(rdm_multi_r, multirecv_trunc_last_retrans, .disabled = false) -{ - inject_enable(); - xfer_for_each_size(do_multirecv_trunc_last, 2, BUF_SZ); -} diff --git a/prov/gni/test/rdm_rx_overrun.c b/prov/gni/test/rdm_rx_overrun.c deleted file mode 100644 index eb4da67c8f2..00000000000 --- a/prov/gni/test/rdm_rx_overrun.c +++ /dev/null @@ -1,364 +0,0 @@ -/* - * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights reserved. - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * Copyright (c) 2020 Triad National Security, LLC. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - - -#include -#include -#include - -#include "gnix_vc.h" -#include "gnix_cm_nic.h" -#include "gnix_hashtable.h" -#include "gnix_rma.h" - -#include -#include "gnix_rdma_headers.h" -#include "common.h" - -#define NUM_EPS 61 -const int num_msgs = 10; - -/* - * Note that even tho we will use a min RX CQ size of 1, ugni seems to - * have an internal minimum that is around 511, so in order for this - * test to exercise the overrun code, it must send at least 511 - * messages (i.e., NUM_EPS*num_msgs > 511) - */ -const int min_rx_cq_size = 1; - -/* Note: Set to ~FI_NOTIFY_FLAGS_ONLY since this was written before api 1.5 */ -static uint64_t mode_bits = ~FI_NOTIFY_FLAGS_ONLY; -static struct fid_fabric *fab; -static struct fid_domain *dom[NUM_EPS]; -static struct fid_ep *ep[NUM_EPS]; -static struct fid_av *av[NUM_EPS]; -static struct fi_info *hints; -static struct fi_info *fi; -static void *ep_name[NUM_EPS]; -static fi_addr_t gni_addr[NUM_EPS]; -static struct fid_cq *msg_cq[NUM_EPS]; -static struct fi_cq_attr cq_attr; - -static int target[NUM_EPS]; -static int source[NUM_EPS]; -static struct fid_mr *rem_mr[NUM_EPS], *loc_mr[NUM_EPS]; -static uint64_t mr_key[NUM_EPS]; - -static int max_eps = NUM_EPS; - -static void __setup(uint32_t version, int mr_mode) -{ - int i, j; - int ret = 0; - struct fi_av_attr attr; - size_t addrlen = 0; - struct fi_gni_ops_domain *gni_domain_ops; - uint32_t rx_cq_size; - - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - - hints->domain_attr->mr_mode = mr_mode; - hints->domain_attr->cq_data_size = 4; - hints->domain_attr->data_progress = FI_PROGRESS_MANUAL; - - hints->mode = mode_bits; - - hints->fabric_attr->prov_name = strdup("gni"); - - ret = fi_getinfo(version, NULL, 0, 0, hints, &fi); - cr_assert(!ret, "fi_getinfo"); - - ret = fi_fabric(fi->fabric_attr, &fab, NULL); - cr_assert(!ret, "fi_fabric"); - - if (USING_SCALABLE(fi)) { - struct fi_gni_ops_fab *ops; - int in; - - /* nic test opens many nics and exhausts reserved keys */ - in = 256; - - ret = fi_open_ops(&fab->fid, - FI_GNI_FAB_OPS_1, 0, (void **) &ops, NULL); - cr_assert_eq(ret, FI_SUCCESS); - cr_assert(ops); - - ret = ops->set_val(&fab->fid, - GNI_DEFAULT_PROV_REGISTRATION_LIMIT, - &in); - cr_assert_eq(ret, FI_SUCCESS); - } - - memset(&attr, 0, sizeof(attr)); - attr.type = FI_AV_TABLE; - attr.count = NUM_EPS; - - cq_attr.format = FI_CQ_FORMAT_CONTEXT; - cq_attr.size = 1024; - cq_attr.wait_obj = 0; - - for (i = 0; i < NUM_EPS; i++) { - ret = fi_domain(fab, fi, &dom[i], NULL); - cr_assert(!ret, "fi_domain (%d)", i); - - ret = fi_open_ops(&dom[i]->fid, FI_GNI_DOMAIN_OPS_1, 0, - (void **) &gni_domain_ops, NULL); - cr_assert(ret == FI_SUCCESS, "fi_open_ops (%d)", i); - - rx_cq_size = min_rx_cq_size; - - ret = gni_domain_ops->set_val(&dom[i]->fid, GNI_RX_CQ_SIZE, - &rx_cq_size); - cr_assert(ret == FI_SUCCESS, "set_val (%d)", i); - - ret = fi_endpoint(dom[i], fi, &ep[i], NULL); - if (ret != FI_SUCCESS) { - /* ran out of resources */ - max_eps = i; - break; - } - cr_assert(ep[i]); - - ret = fi_av_open(dom[i], &attr, &av[i], NULL); - cr_assert(!ret, "fi_av_open (%d)", i); - - ret = fi_cq_open(dom[i], &cq_attr, &msg_cq[i], 0); - cr_assert(!ret, "fi_cq_open (%d)", i); - - ret = fi_ep_bind(ep[i], &msg_cq[i]->fid, FI_SEND | FI_RECV); - cr_assert(!ret, "fi_ep_bind (%d)", i); - } - - ret = fi_getname(&ep[0]->fid, NULL, &addrlen); - cr_assert_eq(ret, -FI_ETOOSMALL); - cr_assert(addrlen > 0); - - for (i = 0; i < max_eps; i++) { - ep_name[i] = malloc(addrlen); - cr_assert(ep_name[i] != NULL); - ret = fi_getname(&ep[i]->fid, ep_name[i], &addrlen); - cr_assert(ret == FI_SUCCESS); - for (j = 0; j < max_eps; j++) { - ret = fi_av_insert(av[j], ep_name[i], - 1, &gni_addr[i], 0, NULL); - cr_assert(ret == 1); - } - } - - for (i = 0; i < max_eps; i++) { - int requested_rem_key = (USING_SCALABLE(fi)) ? (i * 2) : 0; - int requested_loc_key = (USING_SCALABLE(fi)) ? (i * 2) + 1 : 0; - - ret = fi_ep_bind(ep[i], &av[i]->fid, 0); - cr_assert(!ret, "fi_ep_bind"); - ret = fi_enable(ep[i]); - cr_assert(!ret, "fi_ep_enable"); - - ret = fi_mr_reg(dom[i], - target, - max_eps*sizeof(int), - FI_RECV, - 0, - requested_rem_key, - 0, - &rem_mr[i], - &target); - cr_assert_eq(ret, 0); - - ret = fi_mr_reg(dom[i], - source, - max_eps*sizeof(int), - FI_SEND, - 0, - requested_loc_key, - 0, - &loc_mr[i], - &source); - cr_assert_eq(ret, 0); - - if (USING_SCALABLE(fi)) { - MR_ENABLE(rem_mr[i], target, max_eps*sizeof(int)); - MR_ENABLE(loc_mr[i], source, max_eps*sizeof(int)); - } - - mr_key[i] = fi_mr_key(rem_mr[i]); - } -} - -static void setup_basic(void) -{ - __setup(fi_version(), GNIX_MR_BASIC); -} - -static void setup_scalable(void) -{ - __setup(fi_version(), GNIX_MR_SCALABLE); -} - -static void teardown(void) -{ - int i; - int ret = 0; - - for (i = 0; i < max_eps; i++) { - fi_close(&loc_mr[i]->fid); - fi_close(&rem_mr[i]->fid); - - ret = fi_close(&ep[i]->fid); - cr_assert(!ret, "failure in closing ep %d.", i); - ret = fi_close(&msg_cq[i]->fid); - cr_assert(!ret, "failure in msg cq %d.", i); - free(ep_name[i]); - - ret = fi_close(&av[i]->fid); - cr_assert(!ret, "failure in closing av %d.", i); - - ret = fi_close(&dom[i]->fid); - cr_assert(!ret, "failure in closing domain %d.", i); - } - - if (max_eps != NUM_EPS) { - /* clean up the last domain */ - ret = fi_close(&dom[max_eps]->fid); - cr_assert(!ret, "failure in closing domain %d.", max_eps); - } - - ret = fi_close(&fab->fid); - cr_assert(!ret, "failure in closing fabric."); - - fi_freeinfo(fi); - fi_freeinfo(hints); -} - -TestSuite(rdm_rx_overrun_basic, - .init = setup_basic, - .fini = teardown, - .disabled = false); -TestSuite(rdm_rx_overrun_scalable, - .init = setup_scalable, - .fini = teardown, - .disabled = false); - -static inline void __all_to_one(void) -{ - int i, j; - int source_done = 0, dest_done = 0; - struct fi_cq_entry s_cqe, d_cqe; - ssize_t sz; - int ctx[max_eps]; - - for (i = 0; i < max_eps; i++) { - source[i] = i; - target[i] = -1; - ctx[i] = -1; - } - - for (i = 1; i < max_eps; i++) { - for (j = 0; j < num_msgs; j++) { - sz = fi_send(ep[i], &source[i], sizeof(int), loc_mr, - gni_addr[0], ctx+i); - cr_assert_eq(sz, 0); - } - } - - do { - for (i = 1; i < max_eps; i++) { - for (j = 0; j < num_msgs; j++) { - if (fi_cq_read(msg_cq[i], &s_cqe, 1) == 1) { - cr_assert(((uint64_t) s_cqe.op_context - >= (uint64_t) ctx) && - ((uint64_t) s_cqe.op_context - <= - (uint64_t) (ctx+max_eps-1))); - source_done += 1; - } - } - } - } while (source_done != num_msgs*(max_eps-1)); - - for (i = 1; i < max_eps; i++) { - for (j = 0; j < num_msgs; j++) { - sz = fi_recv(ep[0], &target[i], sizeof(int), rem_mr, - gni_addr[i], ctx+i); - cr_assert_eq(sz, 0); - } - } - - do { - for (i = 1; i < max_eps; i++) { - for (j = 0; j < num_msgs; j++) { - if (fi_cq_read(msg_cq[0], &d_cqe, 1) == 1) { - cr_assert(((uint64_t) d_cqe.op_context - >= (uint64_t) ctx) && - ((uint64_t) d_cqe.op_context - <= - (uint64_t) (ctx+max_eps-1))); - dest_done += 1; - } - } - } - } while (dest_done != num_msgs*(max_eps-1)); - - - /* good enough error checking (only checks the last send) */ - for (i = 1; i < max_eps; i++) { - cr_assert(target[i] < max_eps); - ctx[target[i]] = target[i]; - } - - for (i = 1; i < max_eps; i++) { - cr_assert(ctx[i] == i); - } - -} - -Test(rdm_rx_overrun_basic, all_to_one) -{ - __all_to_one(); -} - -Test(rdm_rx_overrun_scalable, all_to_one) -{ - __all_to_one(); -} diff --git a/prov/gni/test/rdm_sr.c b/prov/gni/test/rdm_sr.c deleted file mode 100644 index a645dc8ed01..00000000000 --- a/prov/gni/test/rdm_sr.c +++ /dev/null @@ -1,2312 +0,0 @@ -/* - * Copyright (c) 2015-2017 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2015-2018 Cray Inc. All rights reserved. - * Copyright (c) 2019-2020 Triad National Security, LLC. - * All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "gnix_vc.h" -#include "gnix_cm_nic.h" -#include "gnix_hashtable.h" -#include "gnix_rma.h" -#include "gnix_mr.h" -#include "gnix_util.h" -#include "common.h" - -#include -#include "gnix_rdma_headers.h" -#include "common.h" - -#if 1 -#define dbg_printf(...) -#else -#define dbg_printf(...) fprintf(stderr, __VA_ARGS__); fflush(stderr) -#endif - -/* - * be careful about API-1.1 setup in rdm_sr_setup_common_eps below - * if you increase NUMEPS beyond 2 - */ -#define NUMEPS 2 - -/* Note: Set to ~FI_NOTIFY_FLAGS_ONLY since this was written before api 1.5 */ -static uint64_t mode_bits = ~FI_NOTIFY_FLAGS_ONLY; -static struct fid_fabric *fab; -static struct fid_domain *dom[NUMEPS]; -struct fi_gni_ops_domain *gni_domain_ops[NUMEPS]; -static struct fid_ep *ep[NUMEPS]; -static struct fid_av *av[NUMEPS]; -static void *ep_name[NUMEPS]; -static fi_addr_t gni_addr[NUMEPS]; -static struct fid_cq *msg_cq[NUMEPS]; -static struct fi_info *fi[NUMEPS]; -static struct fi_cq_attr cq_attr; -static const char *cdm_id[NUMEPS] = { "5000", "5001" }; -struct fi_info *hints; -static int using_bnd_ep = 0; -static int dgram_should_fail; -static int eager_auto = 0; -static int peer_src_known = 1; - -#define BUF_SZ (1<<20) -#define BUF_RNDZV (1<<14) -#define IOV_CNT (1<<3) - -static char *target, *target_base; -static char *target2, *target2_base; -static char *source, *source_base; -static char *source2, *source2_base; -static struct iovec *src_iov, *dest_iov, *s_iov, *d_iov; -static char *iov_src_buf, *iov_dest_buf, *iov_src_buf_base, *iov_dest_buf_base; -static char *uc_target; -static char *uc_source; -static struct fid_mr *rem_mr[NUMEPS], *loc_mr[NUMEPS]; -static struct fid_mr *iov_dest_buf_mr[NUMEPS], *iov_src_buf_mr[NUMEPS]; -static uint64_t iov_dest_buf_mr_key[NUMEPS]; -static uint64_t mr_key[NUMEPS]; - -static struct fid_cntr *send_cntr[NUMEPS], *recv_cntr[NUMEPS]; -static struct fi_cntr_attr cntr_attr = { - .events = FI_CNTR_EVENTS_COMP, - .flags = 0 -}; -static uint64_t sends[NUMEPS] = {0}, recvs[NUMEPS] = {0}, - send_errs[NUMEPS] = {0}, recv_errs[NUMEPS] = {0}; - -void rdm_sr_setup_common_eps(void) -{ - int ret = 0, i = 0, j = 0; - struct fi_av_attr attr; - size_t addrlen = 0; - bool is_fi_source = !!(hints->caps & FI_SOURCE); - - memset(&attr, 0, sizeof(attr)); - attr.type = FI_AV_MAP; - attr.count = NUMEPS; - - cq_attr.format = FI_CQ_FORMAT_TAGGED; - cq_attr.size = 1024; - cq_attr.wait_obj = 0; - - target_base = malloc(GNIT_ALIGN_LEN(BUF_SZ)); - assert(target_base); - target = GNIT_ALIGN_BUFFER(char *, target_base); - - target2_base = malloc(GNIT_ALIGN_LEN(BUF_SZ)); - assert(target2_base); - target2 = GNIT_ALIGN_BUFFER(char *, target2_base); - - dest_iov = malloc(sizeof(struct iovec) * IOV_CNT); - assert(dest_iov); - d_iov = malloc(sizeof(struct iovec) * IOV_CNT); - assert(d_iov); - - source_base = malloc(GNIT_ALIGN_LEN(BUF_SZ)); - assert(source_base); - source = GNIT_ALIGN_BUFFER(char *, source_base); - - source2_base = malloc(GNIT_ALIGN_LEN(BUF_SZ)); - assert(source2_base); - source2 = GNIT_ALIGN_BUFFER(char *, source2_base); - - src_iov = malloc(sizeof(struct iovec) * IOV_CNT); - assert(src_iov); - s_iov = malloc(sizeof(struct iovec) * IOV_CNT); - assert(s_iov); - - for (i = 0; i < IOV_CNT; i++) { - src_iov[i].iov_base = malloc(BUF_SZ); - assert(src_iov[i].iov_base != NULL); - - dest_iov[i].iov_base = malloc(BUF_SZ * 3); - assert(dest_iov[i].iov_base != NULL); - } - - iov_src_buf_base = malloc(GNIT_ALIGN_LEN(BUF_SZ * IOV_CNT)); - assert(iov_src_buf_base); - iov_src_buf = GNIT_ALIGN_BUFFER(char *, iov_src_buf_base); - - iov_dest_buf_base = malloc(GNIT_ALIGN_LEN(BUF_SZ * IOV_CNT)); - assert(iov_dest_buf_base); - iov_dest_buf = GNIT_ALIGN_BUFFER(char *, iov_dest_buf_base); - - uc_target = malloc(BUF_SZ); - assert(uc_target); - - uc_source = malloc(BUF_SZ); - assert(uc_source); - - ret = fi_fabric(fi[0]->fabric_attr, &fab, NULL); - cr_assert(!ret, "fi_fabric"); - - for (i = 0; i < NUMEPS; i++) { - ret = fi_domain(fab, fi[i], dom + i, NULL); - cr_assert(!ret, "fi_domain"); - - ret = fi_open_ops(&dom[i]->fid, FI_GNI_DOMAIN_OPS_1, - 0, (void **) (gni_domain_ops + i), NULL); - - if (eager_auto) - ret = gni_domain_ops[i]->set_val(&dom[i]->fid, - GNI_EAGER_AUTO_PROGRESS, - &eager_auto); - - ret = fi_av_open(dom[i], &attr, av + i, NULL); - cr_assert(!ret, "fi_av_open"); - - ret = fi_endpoint(dom[i], fi[i], ep + i, NULL); - cr_assert(!ret, "fi_endpoint"); - - ret = fi_cq_open(dom[i], &cq_attr, msg_cq + i, 0); - cr_assert(!ret, "fi_cq_open"); - - ret = fi_ep_bind(ep[i], &msg_cq[i]->fid, FI_SEND | FI_RECV); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_getname(&ep[i]->fid, NULL, &addrlen); - cr_assert(addrlen > 0); - - ep_name[i] = malloc(addrlen); - cr_assert(ep_name[i] != NULL); - - ret = fi_getname(&ep[i]->fid, ep_name[i], &addrlen); - cr_assert(ret == FI_SUCCESS); - } - - for (i = 0; i < NUMEPS; i++) { - /* - * To test API-1.1: Reporting of unknown source addresses -- - * only insert addresses into the sender's av - */ - if (is_fi_source && !peer_src_known && i < (NUMEPS / 2)) { - for (j = 0; j < NUMEPS; j++) { - dbg_printf("Only does src EP insertions\n"); - ret = fi_av_insert(av[i], ep_name[j], 1, - &gni_addr[j], - 0, NULL); - cr_assert(ret == 1); - } - } else if (peer_src_known) { - for (j = 0; j < NUMEPS; j++) { - ret = fi_av_insert(av[i], ep_name[j], 1, - &gni_addr[j], - 0, NULL); - cr_assert(ret == 1); - } - } - - ret = fi_ep_bind(ep[i], &av[i]->fid, 0); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_cntr_open(dom[i], &cntr_attr, send_cntr + i, 0); - cr_assert(!ret, "fi_cntr_open"); - - ret = fi_ep_bind(ep[i], &send_cntr[i]->fid, FI_SEND); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_cntr_open(dom[i], &cntr_attr, recv_cntr + i, 0); - cr_assert(!ret, "fi_cntr_open"); - - ret = fi_ep_bind(ep[i], &recv_cntr[i]->fid, FI_RECV); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_enable(ep[i]); - cr_assert(!ret, "fi_ep_enable"); - - } -} - -void rdm_sr_setup_common(void) -{ - int ret = 0, i = 0, j = 0; - - rdm_sr_setup_common_eps(); - int req_key[4]; - - for (i = 0; i < NUMEPS; i++) { - for (j = 0; j < 4; j++) - req_key[j] = (USING_SCALABLE(fi[i])) ? (i * 4) + j : 0; - - ret = fi_mr_reg(dom[i], - target, - BUF_SZ, - FI_REMOTE_WRITE, - 0, - req_key[0], - 0, - rem_mr + i, - &target); - cr_assert_eq(ret, 0); - - ret = fi_mr_reg(dom[i], - source, - BUF_SZ, - FI_REMOTE_WRITE, - 0, - req_key[1], - 0, - loc_mr + i, - &source); - cr_assert_eq(ret, 0); - - ret = fi_mr_reg(dom[i], - iov_dest_buf, - IOV_CNT * BUF_SZ, - FI_REMOTE_WRITE, - 0, - req_key[2], - 0, - iov_dest_buf_mr + i, - &iov_dest_buf); - cr_assert_eq(ret, 0); - - ret = fi_mr_reg(dom[i], - iov_src_buf, - IOV_CNT * BUF_SZ, - FI_REMOTE_WRITE, - 0, - req_key[3], - 0, - iov_src_buf_mr + i, - &iov_src_buf); - cr_assert_eq(ret, 0); - - if (USING_SCALABLE(fi[i])) { - MR_ENABLE(rem_mr[i], - target, - BUF_SZ); - MR_ENABLE(loc_mr[i], - source, - BUF_SZ); - MR_ENABLE(iov_dest_buf_mr[i], - iov_dest_buf, - IOV_CNT * BUF_SZ); - MR_ENABLE(iov_src_buf_mr[i], - iov_src_buf, - IOV_CNT * BUF_SZ); - } - - mr_key[i] = fi_mr_key(rem_mr[i]); - iov_dest_buf_mr_key[i] = fi_mr_key(iov_dest_buf_mr[i]); - } -} - -/* Note: default ep type is FI_EP_RDM (used in rdm_sr_setup) */ -void rdm_sr_setup(bool is_noreg, enum fi_progress pm) -{ - int ret = 0, i = 0; - - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - - hints->domain_attr->mr_mode = GNIX_DEFAULT_MR_MODE; - hints->domain_attr->cq_data_size = NUMEPS * 2; - hints->domain_attr->control_progress = pm; - hints->domain_attr->data_progress = pm; - hints->mode = mode_bits; - hints->caps = is_noreg ? hints->caps : FI_SOURCE | FI_MSG; - hints->fabric_attr->prov_name = strdup("gni"); - - /* Get info about fabric services with the provided hints */ - for (; i < NUMEPS; i++) { - ret = fi_getinfo(fi_version(), NULL, 0, 0, hints, &fi[i]); - cr_assert(!ret, "fi_getinfo"); - } - - if (is_noreg) - rdm_sr_setup_common_eps(); - else - rdm_sr_setup_common(); - - dgram_should_fail = 0; -} - -void dgram_sr_setup(uint32_t version, bool is_noreg, enum fi_progress pm) -{ - int ret = 0, i = 0; - - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - - hints->domain_attr->mr_mode = GNIX_DEFAULT_MR_MODE; - hints->domain_attr->cq_data_size = NUMEPS * 2; - hints->domain_attr->control_progress = pm; - hints->domain_attr->data_progress = pm; - hints->mode = mode_bits; - hints->caps = is_noreg ? hints->caps : FI_SOURCE | FI_MSG; - if (FI_VERSION_GE(version, FI_VERSION(1, 5))) { - hints->caps |= FI_SOURCE_ERR; - } - hints->fabric_attr->prov_name = strdup("gni"); - hints->ep_attr->type = FI_EP_DGRAM; - - /* Get info about fabric services with the provided hints */ - for (; i < NUMEPS; i++) { - ret = fi_getinfo(version, NULL, 0, 0, hints, &fi[i]); - cr_assert(!ret, "fi_getinfo"); - } - - if (is_noreg) - rdm_sr_setup_common_eps(); - else - rdm_sr_setup_common(); -} - -static void rdm_sr_setup_reg_eager_auto(void) -{ - eager_auto = 1; - rdm_sr_setup(false, FI_PROGRESS_AUTO); -} - -static void rdm_sr_setup_reg(void) -{ - eager_auto = 0; - rdm_sr_setup(false, FI_PROGRESS_AUTO); -} - -static void dgram_sr_setup_reg(void) -{ - eager_auto = 0; - dgram_sr_setup(fi_version(), false, FI_PROGRESS_AUTO); -} - -static void dgram_sr_setup_reg_src_unk_api_version_old(void) -{ - eager_auto = 0; - peer_src_known = 0; - dgram_sr_setup(FI_VERSION(1, 0), false, FI_PROGRESS_AUTO); -} - -static void dgram_sr_setup_reg_src_unk_api_version_cur(void) -{ - eager_auto = 0; - peer_src_known = 0; - dgram_sr_setup(fi_version(), false, FI_PROGRESS_AUTO); -} - -static void rdm_sr_setup_noreg(void) { - eager_auto = 0; - rdm_sr_setup(true, FI_PROGRESS_AUTO); -} - -void rdm_sr_bnd_ep_setup(void) -{ - int ret = 0, i = 0; - char my_hostname[HOST_NAME_MAX]; - - eager_auto = 0; - - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - - hints->domain_attr->mr_mode = GNIX_DEFAULT_MR_MODE; - hints->domain_attr->cq_data_size = NUMEPS * 2; - hints->mode = mode_bits; - hints->fabric_attr->prov_name = strdup("gni"); - hints->caps = FI_SOURCE | FI_MSG; - - ret = gethostname(my_hostname, sizeof(my_hostname)); - cr_assert(!ret, "gethostname"); - - for (; i < NUMEPS; i++) { - ret = fi_getinfo(fi_version(), my_hostname, - cdm_id[i], 0, hints, fi + i); - cr_assert(!ret, "fi_getinfo"); - } - - using_bnd_ep = 1; - - rdm_sr_setup_common(); -} - -static void rdm_sr_teardown_common(bool unreg) -{ - int ret = 0, i = 0; - - for (; i < NUMEPS; i++) { - fi_close(&recv_cntr[i]->fid); - fi_close(&send_cntr[i]->fid); - - if (unreg) { - fi_close(&loc_mr[i]->fid); - fi_close(&rem_mr[i]->fid); - fi_close(&iov_dest_buf_mr[i]->fid); - fi_close(&iov_src_buf_mr[i]->fid); - } - - ret = fi_close(&ep[i]->fid); - cr_assert(!ret, "failure in closing ep."); - - ret = fi_close(&msg_cq[i]->fid); - cr_assert(!ret, "failure in send cq."); - - ret = fi_close(&av[i]->fid); - cr_assert(!ret, "failure in closing av."); - - ret = fi_close(&dom[i]->fid); - cr_assert(!ret, "failure in closing domain."); - - fi_freeinfo(fi[i]); - - free(ep_name[i]); - } - - free(uc_source); - free(uc_target); - - free(iov_src_buf_base); - free(iov_dest_buf_base); - free(target_base); - free(source_base); - - for (i = 0; i < IOV_CNT; i++) { - free(src_iov[i].iov_base); - free(dest_iov[i].iov_base); - } - - free(src_iov); - free(dest_iov); - free(s_iov); - free(d_iov); - - ret = fi_close(&fab->fid); - cr_assert(!ret, "failure in closing fabric."); - - fi_freeinfo(hints); -} - -static void rdm_sr_teardown(void) -{ - rdm_sr_teardown_common(true); -} - -static void rdm_sr_teardown_nounreg(void) -{ - rdm_sr_teardown_common(false); -} - -void rdm_sr_init_data(char *buf, int len, char seed) -{ - int i; - - for (i = 0; i < len; i++) { - buf[i] = seed++; - } -} - -static inline int rdm_sr_check_data(char *buf1, char *buf2, int len) -{ - int i; - - for (i = 0; i < len; i++) { - if (buf1[i] != buf2[i]) { - printf("data mismatch, elem: %d, exp: %hhx, act: %hhx\n" - , i, buf1[i], buf2[i]); - return 0; - } - } - - return 1; -} - -static inline int rdm_sr_check_iov_data(struct iovec *iov_buf, char *buf, - size_t cnt, size_t buf_len) -{ - size_t i, j, cum_len = 0, len, iov_idx; - - for (i = 0; i < cnt; i++) { - cum_len += iov_buf[i].iov_len; - } - - len = MIN(cum_len, buf_len); - - cum_len = iov_buf[0].iov_len; - - for (i = j = iov_idx = 0; j < len; j++, iov_idx++) { - - if (j == cum_len) { - i++, iov_idx = 0; - cum_len += iov_buf[i].iov_len; - - if (i >= cnt) - break; - } - - if (((char *)iov_buf[i].iov_base)[iov_idx] != buf[j]) { - printf("data mismatch, iov_index: %lu, elem: %lu, " - "iov_buf_len: %lu, " - " iov_buf: %hhx, buf: %hhx\n", i, j, iov_buf[i].iov_len, - ((char *)iov_buf[i].iov_base)[iov_idx], - buf[j]); - return 0; - } - } - - return 1; -} - -void rdm_sr_xfer_for_each_size(void (*xfer)(int len), int slen, int elen) -{ - int i; - - for (i = slen; i <= elen; i *= 2) { - xfer(i); - } -} - -static inline void rdm_sr_check_err_cqe(struct fi_cq_err_entry *cqe, void *ctx, - uint64_t flags, void *addr, size_t len, - uint64_t data, bool buf_is_non_null) -{ - cr_assert(cqe->op_context == ctx, "error CQE Context mismatch"); - cr_assert(cqe->flags == flags, "error CQE flags mismatch"); - - if (flags & FI_RECV) { - if (cqe->len != len) { - cr_assert(cqe->olen == (len - cqe->len), "error CQE " - "olen mismatch"); - } else { - cr_assert(cqe->olen == 0, "error CQE olen mismatch"); - } - - if (buf_is_non_null) - cr_assert(cqe->buf == addr, "error CQE address " - "mismatch"); - else - cr_assert(cqe->buf == NULL, "error CQE address " - "mismatch"); - - if (flags & FI_REMOTE_CQ_DATA) - cr_assert(cqe->data == data, "error CQE data mismatch"); - } else { - cr_assert(cqe->len == 0, "Invalid error CQE length"); - cr_assert(cqe->buf == 0, "Invalid error CQE address"); - cr_assert(cqe->data == 0, "Invalid error CQE data"); - } - - cr_assert(cqe->tag == 0, "Invalid error CQE tag"); - cr_assert(cqe->err > 0, "Invalid error CQE err code"); - - /* - * Note: cqe->prov_errno and cqe->err_data are not necessarily set -- - * see the fi_cq_readerr man page - */ -} - -static inline void rdm_sr_check_cqe(struct fi_cq_tagged_entry *cqe, void *ctx, - uint64_t flags, void *addr, size_t len, - uint64_t data, bool buf_is_non_null, - struct fid_ep *fid_ep) -{ - struct gnix_fid_ep *gnix_ep = get_gnix_ep(fid_ep); - - cr_assert(cqe->op_context == ctx, "CQE Context mismatch"); - cr_assert(cqe->flags == flags, "CQE flags mismatch"); - - if (flags & FI_RECV) { - cr_assert(cqe->len == len, "CQE length mismatch"); - - if (buf_is_non_null) - cr_assert(cqe->buf == addr, "CQE address mismatch"); - else - cr_assert(cqe->buf == NULL, "CQE address mismatch"); - - /* TODO: Remove GNIX_ALLOW_FI_REMOTE_CQ_DATA and only check flags for FI_RMA_EVENT */ - if (GNIX_ALLOW_FI_REMOTE_CQ_DATA(flags, gnix_ep->caps)) - cr_assert(cqe->data == data, "CQE data mismatch"); - } else { - cr_assert(cqe->len == 0, "Invalid CQE length"); - cr_assert(cqe->buf == 0, "Invalid CQE address"); - cr_assert(cqe->data == 0, "Invalid CQE data"); - } - - cr_assert(cqe->tag == 0, "Invalid CQE tag"); -} - -static inline void rdm_sr_check_cntrs(uint64_t s[], uint64_t r[], - uint64_t s_e[], uint64_t r_e[], - bool need_to_spin) -{ - int i = 0; - for (; i < NUMEPS; i++) { - sends[i] += s[i]; - recvs[i] += r[i]; - send_errs[i] += s_e[i]; - recv_errs[i] += r_e[i]; - - if (need_to_spin) { - while (fi_cntr_read(send_cntr[i]) != sends[i]) { - pthread_yield(); - } - } - - cr_assert(fi_cntr_read(send_cntr[i]) == sends[i], - "Bad send count"); - - if (need_to_spin) { - while (fi_cntr_read(recv_cntr[i]) != recvs[i]) { - pthread_yield(); - } - } - - cr_assert(fi_cntr_read(recv_cntr[i]) == recvs[i], - "Bad recv count"); - - if (need_to_spin) { - while (fi_cntr_readerr(send_cntr[i]) != send_errs[i]) { - pthread_yield(); - } - } - - cr_assert(fi_cntr_readerr(send_cntr[i]) == send_errs[i], - "Bad send err count"); - - if (need_to_spin) { - while (fi_cntr_readerr(recv_cntr[i]) != recv_errs[i]) { - pthread_yield(); - } - } - cr_assert(fi_cntr_readerr(recv_cntr[i]) == recv_errs[i], - "Bad recv err count"); - } -} - -void rdm_sr_err_inject_enable(void) -{ - int ret, err_count_val = 1, i = 0; - - for (; i < NUMEPS; i++) { - ret = gni_domain_ops[i]->set_val(&dom[i]->fid, - GNI_ERR_INJECT_COUNT, - &err_count_val); - cr_assert(!ret, "setval(GNI_ERR_INJECT_COUNT)"); - } -} - -void rdm_sr_lazy_dereg_disable(void) -{ - int ret, lazy_dereg_val = 0, i = 0; - - for (; i < NUMEPS; i++) { - ret = gni_domain_ops[i]->set_val(&dom[i]->fid, - GNI_MR_CACHE_LAZY_DEREG, - &lazy_dereg_val); - cr_assert(!ret, "setval(GNI_MR_CACHE_LAZY_DEREG)"); - } -} - -static inline struct fi_cq_err_entry rdm_sr_check_canceled(struct fid_cq *cq) -{ - int ret; - struct fi_cq_err_entry ee; - struct gnix_ep_name err_ep_name, ep_name_test; - struct gnix_fid_cq *cq_priv; - size_t name_size; - fi_addr_t fi_addr; - - /*application provided error_data buffer and length*/ - ee.err_data_size = sizeof(struct gnix_ep_name); - ee.err_data = &err_ep_name; - - fi_cq_readerr(cq, &ee, 0); - - /* - * TODO: Check for api version once we figure out how to. - * Note: The address of err_ep_name should be the same as ee.err_data - * when using api version >= 1.5. - */ - cq_priv = container_of(cq, struct gnix_fid_cq, cq_fid); - if (FI_VERSION_LT(cq_priv->domain->fabric->fab_fid.api_version, FI_VERSION(1, 5))) { - cr_assert(ee.err_data != &err_ep_name, "Invalid err_data ptr"); - } else { - cr_assert(ee.err_data == &err_ep_name, "Invalid err_data ptr"); - } - - /* To test API-1.1: Reporting of unknown source addresses */ - if ((hints->caps & FI_SOURCE) && ee.err == FI_EADDRNOTAVAIL) { - if (FI_VERSION_GE(cq_priv->domain->fabric->fab_fid.api_version, - FI_VERSION(1, 5))) { - cr_assert(ee.err_data_size == sizeof(struct gnix_ep_name), - "Invalid err_data_size returned"); - ret = fi_av_insert(av[1], &err_ep_name, 1, &fi_addr, - 0, NULL); - cr_assert(ret == 1, "fi_av_insert failed"); - name_size = sizeof(ep_name_test); - ret = fi_av_lookup(av[1], fi_addr, - &ep_name_test, &name_size); - cr_assert(ret == FI_SUCCESS, "fi_av_lookup failed"); - cr_assert(name_size == sizeof(ep_name_test)); - cr_assert(strncmp((char *)&ep_name_test, - (char *)&err_ep_name, - sizeof(ep_name_test)) == 0); - } - } - return ee; -} - -/******************************************************************************* - * Test MSG functions - ******************************************************************************/ - -TestSuite(rdm_sr_eager_auto, - .init = rdm_sr_setup_reg_eager_auto, - .fini = rdm_sr_teardown, - .disabled = false); -TestSuite(rdm_sr, - .init = rdm_sr_setup_reg, - .fini = rdm_sr_teardown, - .disabled = false); - -TestSuite(dgram_sr, - .init = dgram_sr_setup_reg, - .fini = rdm_sr_teardown, - .disabled = false); - -TestSuite(dgram_sr_src_unk_api_version_old, - .init = dgram_sr_setup_reg_src_unk_api_version_old, - .fini = rdm_sr_teardown, .disabled = false); - -TestSuite(dgram_sr_src_unk_api_version_cur, - .init = dgram_sr_setup_reg_src_unk_api_version_cur, - .fini = rdm_sr_teardown, .disabled = false); - -TestSuite(rdm_sr_noreg, - .init = rdm_sr_setup_noreg, - .fini = rdm_sr_teardown_nounreg, - .disabled = false); - -TestSuite(rdm_sr_bnd_ep, - .init = rdm_sr_bnd_ep_setup, - .fini = rdm_sr_teardown, - .disabled = false); - -/* This tests cases where the head and tail length is greater or equal to the - * receive buffer length. - */ -TestSuite(rdm_sr_alignment_edge, - .init = rdm_sr_setup_reg, - .fini = rdm_sr_teardown, - .disabled = true); - -/* - * ssize_t fi_send(struct fid_ep *ep, void *buf, size_t len, - * void *desc, fi_addr_t dest_addr, void *context); - * - * ssize_t fi_recv(struct fid_ep *ep, void * buf, size_t len, - * void *desc, fi_addr_t src_addr, void *context); - */ -void do_send(int len) -{ - int ret; - int source_done = 0, dest_done = 0; - int scanceled = 0, dcanceled = 0, daddrnotavail = 0; - struct fi_cq_tagged_entry s_cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct fi_cq_tagged_entry d_cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct fi_cq_err_entry d_err_cqe; - struct fi_cq_err_entry s_err_cqe; - - ssize_t sz; - uint64_t s[NUMEPS] = {0}, r[NUMEPS] = {0}, s_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - memset(&d_err_cqe, -1, sizeof(struct fi_cq_err_entry)); - memset(&s_err_cqe, -1, sizeof(struct fi_cq_err_entry)); - - rdm_sr_init_data(source, len, 0xab); - rdm_sr_init_data(target, len, 0); - - sz = fi_send(ep[0], source, len, loc_mr[0], gni_addr[1], target); - cr_assert_eq(sz, 0); - - sz = fi_recv(ep[1], target, len, rem_mr[1], gni_addr[0], source); - cr_assert_eq(sz, 0); - - /* need to progress both CQs simultaneously for rendezvous */ - do { - ret = fi_cq_read(msg_cq[0], &s_cqe, 1); - if (ret == 1) { - source_done = 1; - } - if (ret == -FI_EAVAIL) { - s_err_cqe = rdm_sr_check_canceled(msg_cq[0]); - if (s_err_cqe.err == FI_ECANCELED) - scanceled = 1; - } - ret = fi_cq_read(msg_cq[1], &d_cqe, 1); - if (ret == 1) { - dest_done = 1; - } - if (ret == -FI_EAVAIL) { - d_err_cqe = rdm_sr_check_canceled(msg_cq[1]); - if (d_err_cqe.err == FI_ECANCELED) - dcanceled = 1; - else if (d_err_cqe.err == FI_EADDRNOTAVAIL && - !peer_src_known) - daddrnotavail = 1; - } - } while (!((source_done || scanceled) && - (dest_done || dcanceled || daddrnotavail))); - - /* no further checking needed */ - if (dgram_should_fail && (scanceled || dcanceled)) - return; - - if (daddrnotavail || dcanceled) - rdm_sr_check_err_cqe(&d_err_cqe, source, (FI_MSG|FI_RECV), - target, len, 0, false); - else - rdm_sr_check_cqe(&d_cqe, source, (FI_MSG|FI_RECV), target, len, - 0, false, ep[1]); - - if (scanceled) - rdm_sr_check_err_cqe(&s_err_cqe, target, (FI_MSG|FI_SEND), 0, - 0, 0, false); - else - rdm_sr_check_cqe(&s_cqe, target, (FI_MSG|FI_SEND), 0, 0, 0, - false, ep[0]); - - s[0] = 1; r[1] = 1; - rdm_sr_check_cntrs(s, r, s_e, r_e, false); - - dbg_printf("got context events!\n"); - - cr_assert(rdm_sr_check_data(source, target, len), "Data mismatch"); -} - -Test(rdm_sr, send) -{ - rdm_sr_xfer_for_each_size(do_send, 1, BUF_SZ); -} - -Test(rdm_sr, send_retrans) -{ - rdm_sr_err_inject_enable(); - rdm_sr_xfer_for_each_size(do_send, 1, BUF_SZ); -} - -Test(dgram_sr, send) -{ - rdm_sr_xfer_for_each_size(do_send, 1, BUF_SZ); -} - -Test(dgram_sr_src_unk_api_version_old, send) -{ - rdm_sr_xfer_for_each_size(do_send, 1, 1); -} - -Test(dgram_sr_src_unk_api_version_cur, send) -{ - rdm_sr_xfer_for_each_size(do_send, 1, 1); -} - -Test(dgram_sr, send_retrans) -{ - dgram_should_fail = 1; - rdm_sr_err_inject_enable(); - rdm_sr_xfer_for_each_size(do_send, BUF_RNDZV, BUF_SZ); -} - -/* -ssize_t fi_sendv(struct fid_ep *ep, const struct iovec *iov, void **desc, - size_t count, fi_addr_t dest_addr, void *context); -*/ -void do_sendv(int len) -{ - int i, ret, iov_cnt; - int source_done = 0, dest_done = 0; - struct fi_cq_tagged_entry s_cqe, d_cqe; - ssize_t sz; - uint64_t s[NUMEPS] = {0}, r[NUMEPS] = {0}, s_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - sz = fi_sendv(ep[0], src_iov, NULL, 0, gni_addr[1], iov_dest_buf); - cr_assert_eq(sz, -FI_EINVAL); - - for (iov_cnt = 1; iov_cnt <= IOV_CNT; iov_cnt++) { - for (i = 0; i < iov_cnt; i++) { - rdm_sr_init_data(src_iov[i].iov_base, len, 0x25); - src_iov[i].iov_len = len; - } - rdm_sr_init_data(iov_dest_buf, len * iov_cnt, 0); - - /* - * TODO: Register src_iov and dest_iov. - * Using NULL descriptor for now so that _gnix_send auto registers - * the buffers for rndzv messages. - */ - sz = fi_sendv(ep[0], src_iov, NULL, iov_cnt, gni_addr[1], iov_dest_buf); - cr_assert_eq(sz, 0); - - sz = fi_recv(ep[1], iov_dest_buf, len * iov_cnt, iov_dest_buf_mr[1], - gni_addr[0], src_iov); - cr_assert_eq(sz, 0); - - /* reset cqe */ - s_cqe.op_context = s_cqe.buf = (void *) -1; - s_cqe.flags = s_cqe.len = s_cqe.data = s_cqe.tag = UINT_MAX; - d_cqe.op_context = d_cqe.buf = (void *) -1; - d_cqe.flags = d_cqe.len = d_cqe.data = d_cqe.tag = UINT_MAX; - - /* need to progress both CQs simultaneously for rendezvous */ - do { - ret = fi_cq_read(msg_cq[0], &s_cqe, 1); - if (ret == 1) { - source_done = 1; - } - ret = fi_cq_read(msg_cq[1], &d_cqe, 1); - if (ret == 1) { - dest_done = 1; - } - } while (!(source_done && dest_done)); - - rdm_sr_check_cqe(&s_cqe, iov_dest_buf, (FI_MSG|FI_SEND), 0, 0, 0, - false, ep[0]); - rdm_sr_check_cqe(&d_cqe, src_iov, (FI_MSG|FI_RECV), iov_dest_buf, - len * iov_cnt, 0, false, ep[1]); - - s[0] = 1; r[1] = 1; - rdm_sr_check_cntrs(s, r, s_e, r_e, false); - - dbg_printf("got context events!\n"); - - cr_assert(rdm_sr_check_iov_data(src_iov, iov_dest_buf, iov_cnt, len * iov_cnt), - "Data mismatch"); - source_done = dest_done = 0; - } -} - -Test(rdm_sr, sendv) -{ - rdm_sr_xfer_for_each_size(do_sendv, 1, BUF_SZ); -} - -Test(rdm_sr, sendv_retrans) -{ - rdm_sr_err_inject_enable(); - rdm_sr_xfer_for_each_size(do_sendv, 1, BUF_SZ); -} - -Test(rdm_sr, bug_1390) -{ - ssize_t sz; - int i, iov_cnt; - int len = 4096; - void *mr_descs[IOV_CNT] = {NULL}; - - for (iov_cnt = 1; iov_cnt <= IOV_CNT; iov_cnt++) { - for (i = 0; i < iov_cnt; i++) { - rdm_sr_init_data(src_iov[i].iov_base, len, 0x25); - src_iov[i].iov_len = len; - } - rdm_sr_init_data(iov_dest_buf, len * iov_cnt, 0); - } - - sz = fi_sendv(ep[0], src_iov, (void **) &mr_descs, - 4, gni_addr[1], iov_dest_buf); - cr_assert_eq(sz, -FI_EINVAL); -} - -/* -ssize_t fi_sendmsg(struct fid_ep *ep, const struct fi_msg *msg, - uint64_t flags); -*/ -void do_sendmsg(int len) -{ - int ret; - ssize_t sz; - int source_done = 0, dest_done = 0; - struct fi_cq_tagged_entry s_cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct fi_cq_tagged_entry d_cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct fi_msg msg; - struct iovec iov; - uint64_t s[NUMEPS] = {0}, r[NUMEPS] = {0}, s_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - iov.iov_base = source; - iov.iov_len = len; - - msg.msg_iov = &iov; - msg.desc = (void **)loc_mr; - msg.iov_count = 1; - msg.addr = gni_addr[1]; - msg.context = target; - msg.data = (uint64_t)target; - - rdm_sr_init_data(source, len, 0xef); - rdm_sr_init_data(target, len, 0); - - sz = fi_sendmsg(ep[0], &msg, 0); - cr_assert_eq(sz, 0); - - sz = fi_recv(ep[1], target, len, rem_mr[0], gni_addr[0], source); - cr_assert_eq(sz, 0); - - /* need to progress both CQs simultaneously for rendezvous */ - do { - ret = fi_cq_read(msg_cq[0], &s_cqe, 1); - if (ret == 1) { - source_done = 1; - } - ret = fi_cq_read(msg_cq[1], &d_cqe, 1); - if (ret == 1) { - dest_done = 1; - } - } while (!(source_done && dest_done)); - - rdm_sr_check_cqe(&s_cqe, target, (FI_MSG|FI_SEND), 0, 0, 0, false, - ep[0]); - rdm_sr_check_cqe(&d_cqe, source, (FI_MSG|FI_RECV), target, len, 0, - false, ep[1]); - - s[0] = 1; r[1] = 1; - rdm_sr_check_cntrs(s, r, s_e, r_e, false); - - dbg_printf("got context events!\n"); - - cr_assert(rdm_sr_check_data(source, target, len), "Data mismatch"); -} - -Test(rdm_sr, sendmsg) -{ - rdm_sr_xfer_for_each_size(do_sendmsg, 1, BUF_SZ); -} - -Test(rdm_sr, sendmsg_retrans) -{ - rdm_sr_err_inject_enable(); - rdm_sr_xfer_for_each_size(do_sendmsg, 1, BUF_SZ); -} - -/* -ssize_t fi_sendmsg(struct fid_ep *ep, const struct fi_msg *msg, - uint64_t flags); -*/ - -void do_sendmsgdata(int len) -{ - int ret; - ssize_t sz; - int source_done = 0, dest_done = 0; - struct fi_cq_tagged_entry s_cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct fi_cq_tagged_entry d_cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct fi_msg msg; - struct iovec iov; - uint64_t s[NUMEPS] = {0}, r[NUMEPS] = {0}, s_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - iov.iov_base = source; - iov.iov_len = len; - - msg.msg_iov = &iov; - msg.desc = (void **)loc_mr; - msg.iov_count = 1; - msg.addr = gni_addr[1]; - msg.context = target; - msg.data = (uint64_t)source; - - rdm_sr_init_data(source, len, 0xef); - rdm_sr_init_data(target, len, 0); - - sz = fi_sendmsg(ep[0], &msg, FI_REMOTE_CQ_DATA); - cr_assert_eq(sz, 0); - - sz = fi_recv(ep[1], target, len, rem_mr[0], gni_addr[0], source); - cr_assert_eq(sz, 0); - - /* need to progress both CQs simultaneously for rendezvous */ - do { - ret = fi_cq_read(msg_cq[0], &s_cqe, 1); - if (ret == 1) { - source_done = 1; - } - ret = fi_cq_read(msg_cq[1], &d_cqe, 1); - if (ret == 1) { - dest_done = 1; - } - } while (!(source_done && dest_done)); - - rdm_sr_check_cqe(&s_cqe, target, (FI_MSG|FI_SEND), 0, 0, 0, false, ep[0]); - rdm_sr_check_cqe(&d_cqe, source, - (FI_MSG|FI_RECV|FI_REMOTE_CQ_DATA), - target, len, (uint64_t)source, false, ep[1]); - - s[0] = 1; r[1] = 1; - rdm_sr_check_cntrs(s, r, s_e, r_e, false); - - dbg_printf("got context events!\n"); - - cr_assert(rdm_sr_check_data(source, target, len), "Data mismatch"); -} - -Test(rdm_sr, sendmsgdata) -{ - rdm_sr_xfer_for_each_size(do_sendmsgdata, 1, BUF_SZ); -} - -Test(rdm_sr, sendmsgdata_retrans) -{ - rdm_sr_err_inject_enable(); - rdm_sr_xfer_for_each_size(do_sendmsgdata, 1, BUF_SZ); -} - -/* -ssize_t fi_inject(struct fid_ep *ep, void *buf, size_t len, - fi_addr_t dest_addr); -*/ -#define INJECT_SIZE 64 -void do_inject(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t s[NUMEPS] = {0}, r[NUMEPS] = {0}, s_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - static gnix_mr_cache_t *cache; - struct gnix_fid_ep *ep_priv; - int already_registered = 0; - - rdm_sr_init_data(source, len, 0x23); - rdm_sr_init_data(target, len, 0); - - if (!USING_SCALABLE(fi[0])) { - ep_priv = container_of(ep[0], struct gnix_fid_ep, ep_fid); - cache = GET_DOMAIN_RW_CACHE(ep_priv->domain); - cr_assert(cache != NULL); - already_registered = ofi_atomic_get32(&cache->inuse.elements); - } - - sz = fi_inject(ep[0], source, len, gni_addr[1]); - cr_assert_eq(sz, 0); - - if (!USING_SCALABLE(fi[0])) { - /* - * shouldn't have registered the source buffer, - * trust but verify - */ - cr_assert(ofi_atomic_get32(&cache->inuse.elements) - == already_registered); - } - - sz = fi_recv(ep[1], target, len, rem_mr[1], gni_addr[0], source); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(msg_cq[1], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - /* Manually progress connection to domain 1 */ - fi_cq_read(msg_cq[0], &cqe, 1); - } - - cr_assert_eq(ret, 1); - rdm_sr_check_cqe(&cqe, source, (FI_MSG|FI_RECV), - target, len, (uint64_t)source, false, ep[1]); - - dbg_printf("got recv context event!\n"); - - /* do progress until send counter is updated */ - while (fi_cntr_read(send_cntr[0]) < 1) { - pthread_yield(); - } - s[0] = 1; r[1] = 1; - rdm_sr_check_cntrs(s, r, s_e, r_e, true); - - /* make sure inject does not generate a send completion */ - cr_assert_eq(fi_cq_read(msg_cq[0], &cqe, 1), -FI_EAGAIN); - - cr_assert(rdm_sr_check_data(source, target, len), "Data mismatch"); -} - -Test(rdm_sr, inject, .disabled = false) -{ - rdm_sr_xfer_for_each_size(do_inject, 1, INJECT_SIZE); -} - -/* - * this test attempts to demonstrate issue ofi-cray/libfabric-cray#559. - * For domains with control_progress AUTO, this test should not hang. - */ -Test(rdm_sr, inject_progress) -{ - int ret, len = 64; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t s[NUMEPS] = {0}, r[NUMEPS] = {0}, s_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - rdm_sr_init_data(source, len, 0x23); - rdm_sr_init_data(target, len, 0); - - sz = fi_inject(ep[0], source, len, gni_addr[1]); - cr_assert_eq(sz, 0); - - sz = fi_recv(ep[1], target, len, rem_mr[1], gni_addr[0], source); - cr_assert_eq(sz, 0); - - /* - * do progress until send counter is updated. - * This works because we have FI_PROGRESS_AUTO for control progress - */ - while (fi_cntr_read(send_cntr[0]) < 1) { - pthread_yield(); - } - - while ((ret = fi_cq_read(msg_cq[1], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1); - rdm_sr_check_cqe(&cqe, source, (FI_MSG|FI_RECV), - target, len, (uint64_t)source, false, ep[1]); - - dbg_printf("got recv context event!\n"); - - s[0] = 1; r[1] = 1; - rdm_sr_check_cntrs(s, r, s_e, r_e, true); - - /* make sure inject does not generate a send competion */ - cr_assert_eq(fi_cq_read(msg_cq[0], &cqe, 1), -FI_EAGAIN); - - cr_assert(rdm_sr_check_data(source, target, len), "Data mismatch"); -} - -Test(rdm_sr, inject_retrans) -{ - rdm_sr_err_inject_enable(); - rdm_sr_xfer_for_each_size(do_inject, 1, INJECT_SIZE); -} - -void do_senddata_eager_auto(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry s_cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct fi_cq_tagged_entry d_cqe[2]; - struct fi_msg msg; - struct iovec iov; - uint64_t s[NUMEPS] = {0}, r[NUMEPS] = {0}, s_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - rdm_sr_init_data(source, len, 0xab); - rdm_sr_init_data(source2, len, 0xdc); - rdm_sr_init_data(target, len, 0); - rdm_sr_init_data(target2, len, 1); - - iov.iov_base = source2; - iov.iov_len = len; - - msg.msg_iov = &iov; - msg.desc = NULL; - msg.iov_count = 1; - msg.addr = gni_addr[1]; - msg.context = target; - msg.data = (uint64_t)target; - - sz = fi_sendmsg(ep[0], &msg, FI_REMOTE_CQ_DATA); - cr_assert_eq(sz, 0); - - iov.iov_base = source; - iov.iov_len = len; - msg.desc = (void **)loc_mr; - - sz = fi_sendmsg(ep[0], &msg, FI_REMOTE_CQ_DATA | FI_FENCE); - cr_assert_eq(sz, 0); - - sz = fi_recv(ep[1], target2, len, NULL, gni_addr[0], source2); - cr_assert_eq(sz, 0); - - sz = fi_recv(ep[1], target, len, rem_mr[0], gni_addr[0], source); - cr_assert_eq(sz, 0); - - /* Wait for auto-progress threads to do all the work. */ - sleep(1); - - /* If progress works, events should be ready right away. */ - ret = fi_cq_read(msg_cq[1], &d_cqe, 2); - cr_assert_eq(ret, 2); - - rdm_sr_check_cqe(&d_cqe[0], source2, (FI_MSG|FI_RECV|FI_REMOTE_CQ_DATA), - target, len, (uint64_t)target, false, ep[1]); - rdm_sr_check_cqe(&d_cqe[1], source, (FI_MSG|FI_RECV|FI_REMOTE_CQ_DATA), - target, len, (uint64_t)target, false, ep[1]); - - ret = fi_cq_read(msg_cq[0], &s_cqe, 1); - cr_assert_eq(ret, 1); - - rdm_sr_check_cqe(&s_cqe, target, (FI_MSG|FI_SEND), 0, 0, 0, false, - ep[0]); - - ret = fi_cq_read(msg_cq[0], &s_cqe, 1); - cr_assert_eq(ret, 1); - - rdm_sr_check_cqe(&s_cqe, target, (FI_MSG|FI_SEND), 0, 0, 0, false, - ep[0]); - - s[0] = 2; r[1] = 2; - rdm_sr_check_cntrs(s, r, s_e, r_e, false); - - dbg_printf("got context events!\n"); - - cr_assert(rdm_sr_check_data(source, target, len), "Data mismatch"); -} - -Test(rdm_sr_eager_auto, senddata_eager_auto) -{ - /* FIXME intermittent test failures */ - cr_skip_test("intermittent test failures"); - - /* Try eager and rndzv sizes */ - do_senddata_eager_auto(1); - do_senddata_eager_auto(1024); - do_senddata_eager_auto(BUF_SZ); -} - -/* -ssize_t fi_senddata(struct fid_ep *ep, void *buf, size_t len, void *desc, - uint64_t data, fi_addr_t dest_addr, void *context); -*/ -void do_senddata(int len) -{ - int ret; - ssize_t sz; - int source_done = 0, dest_done = 0; - struct fi_cq_tagged_entry s_cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct fi_cq_tagged_entry d_cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t s[NUMEPS] = {0}, r[NUMEPS] = {0}, s_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - rdm_sr_init_data(source, len, 0xab); - rdm_sr_init_data(target, len, 0); - - sz = fi_senddata(ep[0], source, len, loc_mr[0], (uint64_t)source, - gni_addr[1], target); - cr_assert_eq(sz, 0); - - sz = fi_recv(ep[1], target, len, rem_mr[0], gni_addr[0], source); - cr_assert_eq(sz, 0); - - /* need to progress both CQs simultaneously for rendezvous */ - do { - ret = fi_cq_read(msg_cq[0], &s_cqe, 1); - if (ret == 1) { - source_done = 1; - } - ret = fi_cq_read(msg_cq[1], &d_cqe, 1); - if (ret == 1) { - dest_done = 1; - } - } while (!(source_done && dest_done)); - - rdm_sr_check_cqe(&s_cqe, target, (FI_MSG|FI_SEND), 0, 0, 0, false, ep[0]); - rdm_sr_check_cqe(&d_cqe, source, - (FI_MSG|FI_RECV|FI_REMOTE_CQ_DATA), - target, len, (uint64_t)source, false, ep[1]); - - s[0] = 1; r[1] = 1; - rdm_sr_check_cntrs(s, r, s_e, r_e, false); - - dbg_printf("got context events!\n"); - - cr_assert(rdm_sr_check_data(source, target, len), "Data mismatch"); -} - -Test(rdm_sr, senddata) -{ - rdm_sr_xfer_for_each_size(do_senddata, 1, BUF_SZ); -} - -Test(rdm_sr, senddata_retrans) -{ - rdm_sr_err_inject_enable(); - rdm_sr_xfer_for_each_size(do_senddata, 1, BUF_SZ); -} - -/* -ssize_t fi_injectdata(struct fid_ep *ep, const void *buf, size_t len, - uint64_t data, fi_addr_t dest_addr); -*/ -void do_injectdata(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t s[NUMEPS] = {0}, r[NUMEPS] = {0}, s_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - rdm_sr_init_data(source, len, 0xab); - rdm_sr_init_data(target, len, 0); - - sz = fi_injectdata(ep[0], source, len, (uint64_t)source, gni_addr[1]); - cr_assert_eq(sz, 0); - - sz = fi_recv(ep[1], target, len, rem_mr[0], gni_addr[0], source); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(msg_cq[1], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - /* Manually progress connection to domain 1 */ - fi_cq_read(msg_cq[0], &cqe, 1); - } - - rdm_sr_check_cqe(&cqe, source, (FI_MSG|FI_RECV|FI_REMOTE_CQ_DATA), - target, len, (uint64_t)source, false, ep[1]); - - dbg_printf("got recv context event!\n"); - - /* don't progress until send counter is updated */ - while (fi_cntr_read(send_cntr[0]) < 1) { - pthread_yield(); - } - - s[0] = 1; r[1] = 1; - rdm_sr_check_cntrs(s, r, s_e, r_e, true); - - /* make sure inject does not generate a send competion */ - cr_assert_eq(fi_cq_read(msg_cq[0], &cqe, 1), -FI_EAGAIN); - - cr_assert(rdm_sr_check_data(source, target, len), "Data mismatch"); -} - -Test(rdm_sr, injectdata, .disabled = false) -{ - rdm_sr_xfer_for_each_size(do_injectdata, 1, INJECT_SIZE); -} - -Test(rdm_sr, injectdata_retrans, .disabled = false) -{ - rdm_sr_err_inject_enable(); - rdm_sr_xfer_for_each_size(do_injectdata, 1, INJECT_SIZE); -} - -/* -ssize_t (*recvv)(struct fid_ep *ep, const struct iovec *iov, void **desc, - size_t count, fi_addr_t src_addr, void *context); -*/ -void do_recvv(int len) -{ - int i, ret, iov_cnt; - ssize_t sz; - int source_done = 0, dest_done = 0; - struct fi_cq_tagged_entry s_cqe, d_cqe; - uint64_t s[NUMEPS] = {0}, r[NUMEPS] = {0}, s_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - sz = fi_recvv(ep[1], NULL, NULL, IOV_CNT, gni_addr[0], iov_src_buf); - cr_assert_eq(sz, -FI_EINVAL); - - sz = fi_recvv(ep[1], dest_iov, NULL, IOV_CNT + 1, gni_addr[0], iov_src_buf); - cr_assert_eq(sz, -FI_EINVAL); - - for (iov_cnt = 1; iov_cnt <= IOV_CNT; iov_cnt++) { - rdm_sr_init_data(iov_src_buf, len * iov_cnt, 0xab); - - for (i = 0; i < iov_cnt; i++) { - rdm_sr_init_data(dest_iov[i].iov_base, len, 0); - dest_iov[i].iov_len = len; - } - - sz = fi_send(ep[0], iov_src_buf, len * iov_cnt, NULL, gni_addr[1], - dest_iov); - cr_assert_eq(sz, 0); - - sz = fi_recvv(ep[1], dest_iov, NULL, iov_cnt, gni_addr[0], iov_src_buf); - cr_assert_eq(sz, 0); - - /* reset cqe */ - s_cqe.op_context = s_cqe.buf = (void *) -1; - s_cqe.flags = s_cqe.len = s_cqe.data = s_cqe.tag = UINT_MAX; - d_cqe.op_context = d_cqe.buf = (void *) -1; - d_cqe.flags = d_cqe.len = d_cqe.data = d_cqe.tag = UINT_MAX; - - /* need to progress both CQs simultaneously for rendezvous */ - do { - ret = fi_cq_read(msg_cq[0], &s_cqe, 1); - if (ret == 1) { - source_done = 1; - } - ret = fi_cq_read(msg_cq[1], &d_cqe, 1); - if (ret == 1) { - dest_done = 1; - } - } while (!(source_done && dest_done)); - - rdm_sr_check_cqe(&s_cqe, dest_iov, (FI_MSG|FI_SEND), 0, 0, 0, - false, ep[0]); - rdm_sr_check_cqe(&d_cqe, iov_src_buf, (FI_MSG|FI_RECV), dest_iov, - len * iov_cnt, 0, false, ep[1]); - - s[0] = 1; r[1] = 1; - rdm_sr_check_cntrs(s, r, s_e, r_e, false); - - dbg_printf("got context events!\n"); - - cr_assert(rdm_sr_check_iov_data(dest_iov, iov_src_buf, iov_cnt, len * iov_cnt), - "Data mismatch"); - source_done = dest_done = 0; - } -} - -Test(rdm_sr, recvv) -{ - rdm_sr_xfer_for_each_size(do_recvv, 1, BUF_SZ); -} - -Test(rdm_sr, recvv_retrans) -{ - rdm_sr_err_inject_enable(); - rdm_sr_xfer_for_each_size(do_recvv, 1, BUF_SZ); -} - -/* -ssize_t (*recvmsg)(struct fid_ep *ep, const struct fi_msg *msg, - uint64_t flags); -*/ -void do_recvmsg(int len) -{ - int ret; - ssize_t sz; - int source_done = 0, dest_done = 0; - struct fi_cq_tagged_entry s_cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct fi_cq_tagged_entry d_cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct iovec iov; - struct fi_msg msg; - uint64_t s[NUMEPS] = {0}, r[NUMEPS] = {0}, s_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - rdm_sr_init_data(source, len, 0xab); - rdm_sr_init_data(target, len, 0); - - sz = fi_send(ep[0], source, len, loc_mr[0], gni_addr[1], target); - cr_assert_eq(sz, 0); - - iov.iov_base = target; - iov.iov_len = len; - - msg.msg_iov = &iov; - msg.desc = (void **)rem_mr; - msg.iov_count = 1; - msg.addr = gni_addr[0]; - msg.context = source; - msg.data = (uint64_t)source; - - sz = fi_recvmsg(ep[1], &msg, 0); - cr_assert_eq(sz, 0); - - /* need to progress both CQs simultaneously for rendezvous */ - do { - ret = fi_cq_read(msg_cq[0], &s_cqe, 1); - if (ret == 1) { - source_done = 1; - } - ret = fi_cq_read(msg_cq[1], &d_cqe, 1); - if (ret == 1) { - dest_done = 1; - } - } while (!(source_done && dest_done)); - - rdm_sr_check_cqe(&s_cqe, target, (FI_MSG|FI_SEND), 0, 0, 0, false, - ep[0]); - rdm_sr_check_cqe(&d_cqe, source, (FI_MSG|FI_RECV), target, len, 0, - false, ep[1]); - - s[0] = 1; r[1] = 1; - rdm_sr_check_cntrs(s, r, s_e, r_e, false); - - dbg_printf("got context events!\n"); - - cr_assert(rdm_sr_check_data(source, target, len), "Data mismatch"); -} - -Test(rdm_sr, recvmsg) -{ - rdm_sr_xfer_for_each_size(do_recvmsg, 1, BUF_SZ); -} - -Test(rdm_sr, recvmsg_retrans) -{ - rdm_sr_err_inject_enable(); - rdm_sr_xfer_for_each_size(do_recvmsg, 1, BUF_SZ); -} - -Test(rdm_sr_bnd_ep, recvmsg) -{ - rdm_sr_xfer_for_each_size(do_recvmsg, 1, BUF_SZ); -} - -void do_send_autoreg(int len) -{ - int ret; - int source_done = 0, dest_done = 0; - struct fi_cq_tagged_entry s_cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct fi_cq_tagged_entry d_cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - ssize_t sz; - uint64_t s[NUMEPS] = {0}, r[NUMEPS] = {0}, s_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - rdm_sr_init_data(source, len, 0xab); - rdm_sr_init_data(target, len, 0); - - sz = fi_send(ep[0], source, len, NULL, gni_addr[1], target); - cr_assert_eq(sz, 0); - - sz = fi_recv(ep[1], target, len, NULL, gni_addr[0], source); - cr_assert_eq(sz, 0); - - /* need to progress both CQs simultaneously for rendezvous */ - do { - ret = fi_cq_read(msg_cq[0], &s_cqe, 1); - if (ret == 1) { - source_done = 1; - } - ret = fi_cq_read(msg_cq[1], &d_cqe, 1); - if (ret == 1) { - dest_done = 1; - } - } while (!(source_done && dest_done)); - - rdm_sr_check_cqe(&s_cqe, target, (FI_MSG|FI_SEND), 0, 0, 0, false, - ep[0]); - rdm_sr_check_cqe(&d_cqe, source, (FI_MSG|FI_RECV), target, len, 0, - false, ep[1]); - - s[0] = 1; r[1] = 1; - rdm_sr_check_cntrs(s, r, s_e, r_e, false); - - dbg_printf("got context events!\n"); - - cr_assert(rdm_sr_check_data(source, target, len), "Data mismatch"); -} - -Test(rdm_sr, send_autoreg) -{ - rdm_sr_xfer_for_each_size(do_send_autoreg, 1, BUF_SZ); -} - -Test(rdm_sr, send_autoreg_retrans) -{ - rdm_sr_err_inject_enable(); - rdm_sr_xfer_for_each_size(do_send_autoreg, 1, BUF_SZ); -} - -void do_send_autoreg_uncached(int len) -{ - int ret; - int source_done = 0, dest_done = 0; - struct fi_cq_tagged_entry s_cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct fi_cq_tagged_entry d_cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - ssize_t sz; - uint64_t s[NUMEPS] = {0}, r[NUMEPS] = {0}, s_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - rdm_sr_init_data(uc_source, len, 0xab); - rdm_sr_init_data(uc_target, len, 0); - - sz = fi_send(ep[0], uc_source, len, NULL, gni_addr[1], uc_target); - cr_assert_eq(sz, 0); - - sz = fi_recv(ep[1], uc_target, len, NULL, gni_addr[0], uc_source); - cr_assert_eq(sz, 0); - - /* need to progress both CQs simultaneously for rendezvous */ - do { - ret = fi_cq_read(msg_cq[0], &s_cqe, 1); - if (ret == 1) { - source_done = 1; - } - ret = fi_cq_read(msg_cq[1], &d_cqe, 1); - if (ret == 1) { - dest_done = 1; - } - } while (!(source_done && dest_done)); - - rdm_sr_check_cqe(&s_cqe, uc_target, (FI_MSG|FI_SEND), 0, 0, 0, false, - ep[0]); - rdm_sr_check_cqe(&d_cqe, uc_source, (FI_MSG|FI_RECV), - uc_target, len, 0, false, ep[1]); - - s[0] = 1; r[1] = 1; - rdm_sr_check_cntrs(s, r, s_e, r_e, false); - - dbg_printf("got context events!\n"); - - cr_assert(rdm_sr_check_data(uc_source, uc_target, len), - "Data mismatch"); -} - -Test(rdm_sr, send_autoreg_uncached) -{ - rdm_sr_xfer_for_each_size(do_send_autoreg_uncached, 1, BUF_SZ); -} - -Test(rdm_sr, send_autoreg_uncached_retrans) -{ - rdm_sr_err_inject_enable(); - rdm_sr_xfer_for_each_size(do_send_autoreg_uncached, 1, BUF_SZ); -} - -void do_send_err(int len) -{ - int ret; - struct fi_cq_tagged_entry s_cqe; - struct fi_cq_err_entry err_cqe = {0}; - ssize_t sz; - uint64_t s[NUMEPS] = {0}, r[NUMEPS] = {0}, s_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - /* Set err_data_size to 0 to have provider allocate buffer if needed */ - err_cqe.err_data_size = 0; - - rdm_sr_init_data(source, len, 0xab); - rdm_sr_init_data(target, len, 0); - - sz = fi_send(ep[0], source, len, loc_mr[0], gni_addr[1], target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(msg_cq[0], &s_cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, -FI_EAVAIL); - - ret = fi_cq_readerr(msg_cq[0], &err_cqe, 0); - cr_assert_eq(ret, 1); - - cr_assert((uint64_t)err_cqe.op_context == (uint64_t)target, - "Bad error context"); - cr_assert(err_cqe.flags == (FI_MSG | FI_SEND)); - cr_assert(err_cqe.len == 0, "Bad error len"); - cr_assert(err_cqe.buf == 0, "Bad error buf"); - cr_assert(err_cqe.data == 0, "Bad error data"); - cr_assert(err_cqe.tag == 0, "Bad error tag"); - cr_assert(err_cqe.olen == 0, "Bad error olen"); - cr_assert(err_cqe.err == FI_ECANCELED, "Bad error errno"); - cr_assert(err_cqe.prov_errno == gnixu_to_fi_errno(GNI_RC_TRANSACTION_ERROR), - "Bad prov errno"); - cr_assert(err_cqe.err_data == NULL, "Bad error provider data"); - - s_e[0] = 1; - rdm_sr_check_cntrs(s, r, s_e, r_e, false); -} - -Test(rdm_sr, send_err) -{ - int ret, max_retrans_val = 0, i = 0; /* 0 to force SMSG failure */ - - for (; i < NUMEPS; i++) { - ret = gni_domain_ops[i]->set_val(&dom[i]->fid, - GNI_MAX_RETRANSMITS, - &max_retrans_val); - cr_assert(!ret, "setval(GNI_MAX_RETRANSMITS)"); - } - rdm_sr_err_inject_enable(); - - rdm_sr_xfer_for_each_size(do_send_err, 1, BUF_SZ); -} - -void do_send_autoreg_uncached_nolazydereg(int len) -{ - int ret; - int source_done = 0, dest_done = 0; - struct fi_cq_tagged_entry s_cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct fi_cq_tagged_entry d_cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - ssize_t sz; - uint64_t s[NUMEPS] = {0}, r[NUMEPS] = {0}, s_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - rdm_sr_init_data(uc_source, len, 0xab); - rdm_sr_init_data(uc_target, len, 0); - - sz = fi_send(ep[0], uc_source, len, NULL, gni_addr[1], uc_target); - cr_assert_eq(sz, 0); - - sz = fi_recv(ep[1], uc_target, len, NULL, gni_addr[0], uc_source); - cr_assert_eq(sz, 0); - - /* need to progress both CQs simultaneously for rendezvous */ - do { - ret = fi_cq_read(msg_cq[0], &s_cqe, 1); - if (ret == 1) { - source_done = 1; - } - ret = fi_cq_read(msg_cq[1], &d_cqe, 1); - if (ret == 1) { - dest_done = 1; - } - } while (!(source_done && dest_done)); - - rdm_sr_check_cqe(&s_cqe, uc_target, (FI_MSG|FI_SEND), 0, 0, 0, false, - ep[0]); - rdm_sr_check_cqe(&d_cqe, uc_source, (FI_MSG|FI_RECV), - uc_target, len, 0, false, ep[1]); - - s[0] = 1; r[1] = 1; - rdm_sr_check_cntrs(s, r, s_e, r_e, false); - - dbg_printf("got context events!\n"); - - cr_assert(rdm_sr_check_data(uc_source, uc_target, len), - "Data mismatch"); -} - -Test(rdm_sr_noreg, send_autoreg_uncached_nolazydereg) -{ - rdm_sr_lazy_dereg_disable(); - rdm_sr_xfer_for_each_size(do_send_autoreg_uncached_nolazydereg, - 1, BUF_SZ); -} - -Test(rdm_sr, send_readfrom) -{ - int ret; - int source_done = 0, dest_done = 0; - struct fi_cq_tagged_entry s_cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct fi_cq_tagged_entry d_cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - ssize_t sz; - fi_addr_t src_addr; - int len = 64; - uint64_t s[NUMEPS] = {0}, r[NUMEPS] = {0}, s_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - rdm_sr_init_data(source, len, 0xab); - rdm_sr_init_data(target, len, 0); - - sz = fi_send(ep[0], source, len, loc_mr[0], gni_addr[1], target); - cr_assert_eq(sz, 0); - - sz = fi_recv(ep[1], target, len, rem_mr[0], gni_addr[0], source); - cr_assert_eq(sz, 0); - - /* need to progress both CQs simultaneously for rendezvous */ - do { - ret = fi_cq_read(msg_cq[0], &s_cqe, 1); - if (ret == 1) { - source_done = 1; - } - ret = fi_cq_readfrom(msg_cq[1], &d_cqe, 1, &src_addr); - if (ret == 1) { - dest_done = 1; - } - } while (!(source_done && dest_done)); - - rdm_sr_check_cqe(&s_cqe, target, (FI_MSG|FI_SEND), 0, 0, 0, false, ep[0]); - rdm_sr_check_cqe(&d_cqe, source, (FI_MSG|FI_RECV), target, len, 0, false, ep[1]); - - s[0] = 1; r[1] = 1; - rdm_sr_check_cntrs(s, r, s_e, r_e, false); - - cr_assert(src_addr == gni_addr[0], "src_addr mismatch"); - - dbg_printf("got context events!\n"); - - cr_assert(rdm_sr_check_data(source, target, len), "Data mismatch"); -} - -void do_send_buf(void *p, void *t, int len) -{ - int ret; - int source_done = 0, dest_done = 0; - struct fi_cq_tagged_entry s_cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct fi_cq_tagged_entry d_cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - ssize_t sz; - uint64_t s[NUMEPS] = {0}, r[NUMEPS] = {0}, s_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - rdm_sr_init_data(p, len, 0xab); - rdm_sr_init_data(t, len, 0); - - sz = fi_send(ep[0], p, len, loc_mr[0], gni_addr[1], t); - cr_assert_eq(sz, 0); - - sz = fi_recv(ep[1], t, len, rem_mr[0], gni_addr[0], p); - cr_assert_eq(sz, 0); - - /* need to progress both CQs simultaneously for rendezvous */ - do { - ret = fi_cq_read(msg_cq[0], &s_cqe, 1); - if (ret == 1) { - source_done = 1; - } - ret = fi_cq_read(msg_cq[1], &d_cqe, 1); - if (ret == 1) { - dest_done = 1; - } - } while (!(source_done && dest_done)); - - rdm_sr_check_cqe(&s_cqe, t, (FI_MSG|FI_SEND), 0, 0, 0, false, ep[0]); - rdm_sr_check_cqe(&d_cqe, p, (FI_MSG|FI_RECV), t, len, 0, false, ep[1]); - - s[0] = 1; r[1] = 1; - rdm_sr_check_cntrs(s, r, s_e, r_e, false); - - dbg_printf("got context events!\n"); - - cr_assert(rdm_sr_check_data(p, t, len), "Data mismatch"); -} - -void do_send_alignment(int len) -{ - int s_off, t_off, l_off; - - for (s_off = 0; s_off < 7; s_off++) { - for (t_off = 0; t_off < 7; t_off++) { - for (l_off = 0; l_off < 7; l_off++) { - do_send_buf(source + s_off, - target + t_off, - len + l_off); - } - } - } -} - -Test(rdm_sr, send_alignment) -{ - rdm_sr_xfer_for_each_size(do_send_alignment, 1, (BUF_SZ-1)); -} - -Test(rdm_sr, send_alignment_retrans) -{ - rdm_sr_err_inject_enable(); - rdm_sr_xfer_for_each_size(do_send_alignment, 1, (BUF_SZ-1)); -} - -void do_sendrecv_buf(void *p, void *t, int send_len, int recv_len) -{ - int ret; - int source_done = 0, dest_done = 0; - struct fi_cq_tagged_entry s_cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct fi_cq_tagged_entry d_cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - ssize_t sz; - int xfer_len; - uint64_t s[NUMEPS] = {0}, r[NUMEPS] = {0}, s_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - /* - * this test can't handle truncated messages so skip if recv_len - * isn't big enough to receive message - */ - if (send_len > recv_len) - return; - - rdm_sr_init_data(p, send_len, 0xab); - rdm_sr_init_data(t, recv_len, 0); - - sz = fi_send(ep[0], p, send_len, loc_mr[0], gni_addr[1], t); - cr_assert_eq(sz, 0); - - sz = fi_recv(ep[1], t, recv_len, rem_mr[0], gni_addr[0], p); - cr_assert_eq(sz, 0); - - /* need to progress both CQs simultaneously for rendezvous */ - do { - ret = fi_cq_read(msg_cq[0], &s_cqe, 1); - if (ret == 1) { - source_done = 1; - } - ret = fi_cq_read(msg_cq[1], &d_cqe, 1); - if (ret == 1) { - dest_done = 1; - } - } while (!(source_done && dest_done)); - - xfer_len = MIN(send_len, recv_len); - rdm_sr_check_cqe(&s_cqe, t, (FI_MSG|FI_SEND), 0, 0, 0, false, ep[0]); - rdm_sr_check_cqe(&d_cqe, p, (FI_MSG|FI_RECV), t, xfer_len, 0, false, ep[1]); - - s[0] = 1; r[1] = 1; - rdm_sr_check_cntrs(s, r, s_e, r_e, false); - - dbg_printf("got context events!\n"); - - cr_assert(rdm_sr_check_data(p, t, xfer_len), "Data mismatch"); -} - -void do_sendrecv_alignment(int len) -{ - int s_off, t_off, sl_off, rl_off; - - for (s_off = 0; s_off < 8; s_off++) { - for (t_off = 0; t_off < 8; t_off++) { - for (sl_off = -7; sl_off < 8; sl_off++) { - for (rl_off = -7; rl_off < 8; rl_off++) { - do_sendrecv_buf(source + s_off, - target + t_off, - len + sl_off, - len + rl_off); - } - } - } - } -} - -void do_sendvrecv_alignment(int slen, int dlen, int offset) -{ - int i, ret, iov_cnt; - int source_done = 0, dest_done = 0; - struct fi_cq_tagged_entry s_cqe, d_cqe; - ssize_t sz; - uint64_t s[NUMEPS] = {0}, r[NUMEPS] = {0}, s_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - uint64_t iov_d_buf = (uint64_t) iov_dest_buf; - - iov_d_buf += offset; - - for (iov_cnt = 1; iov_cnt <= IOV_CNT; iov_cnt++) { - for (i = 0; i < iov_cnt; i++) { - s_iov[i].iov_base = src_iov[i].iov_base; - s_iov[i].iov_base = (void *) ((uint64_t)s_iov[i].iov_base + offset); - rdm_sr_init_data(s_iov[i].iov_base, slen - offset, 0x25); - s_iov[i].iov_len = slen - offset; - } - rdm_sr_init_data((void *) iov_d_buf, (dlen - offset) * iov_cnt, 0); - sz = fi_sendv(ep[0], s_iov, NULL, iov_cnt, gni_addr[1], (void *) iov_d_buf); - cr_assert_eq(sz, 0); - - sz = fi_recv(ep[1], (void *) iov_d_buf, (dlen - offset) * iov_cnt, (void *) iov_dest_buf_mr[1], - gni_addr[0], s_iov); - cr_assert_eq(sz, 0); - - /* reset cqe */ - s_cqe.op_context = s_cqe.buf = (void *) -1; - s_cqe.flags = s_cqe.len = s_cqe.data = s_cqe.tag = UINT_MAX; - d_cqe.op_context = d_cqe.buf = (void *) -1; - d_cqe.flags = d_cqe.len = d_cqe.data = d_cqe.tag = UINT_MAX; - - /* need to progress both CQs simultaneously for rendezvous */ - do { - ret = fi_cq_read(msg_cq[0], &s_cqe, 1); - if (ret == 1) { - source_done = 1; - } - ret = fi_cq_read(msg_cq[1], &d_cqe, 1); - if (ret == 1) { - dest_done = 1; - } - } while (!(source_done && dest_done)); - - rdm_sr_check_cqe(&s_cqe, (void *) iov_d_buf, (FI_MSG|FI_SEND), - 0, 0, 0, false, ep[0]); - rdm_sr_check_cqe(&d_cqe, s_iov, (FI_MSG|FI_RECV), - (void *) iov_d_buf, (dlen - offset) * iov_cnt, - 0, false, ep[1]); - - s[0] = 1; r[1] = 1; - rdm_sr_check_cntrs(s, r, s_e, r_e, false); - - dbg_printf("got context events!\n"); - - cr_assert(rdm_sr_check_iov_data(s_iov, (void *) iov_d_buf, - iov_cnt, (dlen - offset) * iov_cnt), - "Data mismatch"); - source_done = dest_done = 0; - } - -} - -void do_sendrecvv_alignment(int slen, int dlen, int offset) -{ - int i, ret, iov_cnt; - ssize_t sz; - int source_done = 0, dest_done = 0; - struct fi_cq_tagged_entry s_cqe, d_cqe; - uint64_t s[NUMEPS] = {0}, r[NUMEPS] = {0}, s_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - uint64_t iov_s_buf = (uint64_t) iov_src_buf; - - iov_s_buf += offset; - - for (iov_cnt = 1; iov_cnt <= IOV_CNT; iov_cnt++) { - for (i = 0; i < iov_cnt; i++) { - d_iov[i].iov_base = dest_iov[i].iov_base; - d_iov[i].iov_base = (void *) ((uint64_t)d_iov[i].iov_base + offset); - rdm_sr_init_data(d_iov[i].iov_base, dlen - offset, 0); - d_iov[i].iov_len = dlen - offset; - } - - rdm_sr_init_data((void *) iov_s_buf, (slen - offset) * iov_cnt, 0xab); - - sz = fi_send(ep[0], (void *) iov_s_buf, (slen - offset) * iov_cnt, NULL, gni_addr[1], - d_iov); - cr_assert_eq(sz, 0); - - sz = fi_recvv(ep[1], d_iov, NULL, iov_cnt, gni_addr[0], (void *) iov_s_buf); - cr_assert_eq(sz, 0); - - /* reset cqe */ - s_cqe.op_context = s_cqe.buf = (void *) -1; - s_cqe.flags = s_cqe.len = s_cqe.data = s_cqe.tag = UINT_MAX; - d_cqe.op_context = d_cqe.buf = (void *) -1; - d_cqe.flags = d_cqe.len = d_cqe.data = d_cqe.tag = UINT_MAX; - - /* need to progress both CQs simultaneously for rendezvous */ - do { - ret = fi_cq_read(msg_cq[0], &s_cqe, 1); - if (ret == 1) { - source_done = 1; - } - ret = fi_cq_read(msg_cq[1], &d_cqe, 1); - if (ret == 1) { - dest_done = 1; - } - } while (!(source_done && dest_done)); - - rdm_sr_check_cqe(&s_cqe, d_iov, (FI_MSG|FI_SEND), 0, 0, 0, - false, ep[0]); - rdm_sr_check_cqe(&d_cqe, (void *) iov_s_buf, (FI_MSG|FI_RECV), - d_iov, - MIN((slen - offset) * iov_cnt, (dlen - - offset) * iov_cnt), 0, false, ep[1]); - - s[0] = 1; r[1] = 1; - rdm_sr_check_cntrs(s, r, s_e, r_e, false); - - dbg_printf("got context events!\n"); - - cr_assert(rdm_sr_check_iov_data(d_iov, (void *) iov_s_buf, - iov_cnt, (slen - offset) * iov_cnt), - "Data mismatch"); - source_done = dest_done = 0; - } -} - -void do_sendvrecv_alignment_iter(int len) -{ - int offset; - - /* Check for alignment issues using offsets 1..3 */ - for (offset = 1; offset < GNI_READ_ALIGN; offset++) { - /* lets assume the user passes in valid addresses */ - if (offset < len) { - do_sendvrecv_alignment(len, len, offset); - } - } -} - -void do_sendrecvv_alignment_iter(int len) -{ - int offset; - - /* Check for alignment issues using offsets 1..3 */ - for (offset = 1; offset < GNI_READ_ALIGN; offset++) { - /* lets assume the user passes in valid addresses */ - if (offset < len) { - do_sendrecvv_alignment(len, len, offset); - } - } -} - -void do_iov_alignment_edge(int len) -{ - int offset; - - /* Check for alignment issues using offsets 1..3 */ - for (offset = 1; offset < GNI_READ_ALIGN; offset++) { - /* lets assume the user passes in valid addresses */ - if (offset < len) { - /* These calls trigger rendezvous cases on the sender's - * side but the recv buffer that's posted will be so - * small it will only fit a portion of the sender's data. - * - * The four byte alignment support in the current - * sendv/recvv implementation doesn't support a head/tail - * which meets or exceeds a given recv buffer. - */ - - /* large (IOV) 1..8 x slen of 1..BUF_SZ -> dlen of 2..4 */ - do_sendvrecv_alignment(len, offset + 1, offset); - - /* large slen of 1..BUF_SZ -> (IOV) 1..8 x dlen of 2..4 */ - do_sendrecvv_alignment(len, offset + 1, offset); - } - } -} - -Test(rdm_sr, sendrecv_alignment) -{ - rdm_sr_xfer_for_each_size(do_sendrecv_alignment, 8*1024, 16*1024); -} - -Test(rdm_sr, sendrecv_alignment_retrans) -{ - rdm_sr_err_inject_enable(); - rdm_sr_xfer_for_each_size(do_sendrecv_alignment, 8*1024, 32*1024); -} - -Test(rdm_sr, sendvrecv_alignment) -{ - rdm_sr_xfer_for_each_size(do_sendvrecv_alignment_iter, 1, BUF_SZ); -} - -Test(rdm_sr, sendrecvv_alignment) -{ - rdm_sr_xfer_for_each_size(do_sendrecvv_alignment_iter, 1, BUF_SZ); -} - -Test(rdm_sr_alignment_edge, iov_alignment_edge) -{ - rdm_sr_xfer_for_each_size(do_iov_alignment_edge, 1, BUF_SZ); -} - diff --git a/prov/gni/test/rdm_tagged_sr.c b/prov/gni/test/rdm_tagged_sr.c deleted file mode 100644 index a36f97b756f..00000000000 --- a/prov/gni/test/rdm_tagged_sr.c +++ /dev/null @@ -1,961 +0,0 @@ -/* - * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights reserved. - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * Copyright (c) 2020 Triad National Security, LLC. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - - -#include -#include -#include - -#include "gnix_vc.h" -#include "gnix_cm_nic.h" -#include "gnix_hashtable.h" -#include "gnix_rma.h" - -#include -#include "gnix_rdma_headers.h" -#include "common.h" - -#if 1 -#define dbg_printf(...) -#else -#define dbg_printf(...) \ - do { \ - fprintf(stderr, __VA_ARGS__); \ - fflush(stderr); \ - } while(0) -#endif - -/* Note: Set to ~FI_NOTIFY_FLAGS_ONLY since this was written before api 1.5 */ -static uint64_t mode_bits = ~FI_NOTIFY_FLAGS_ONLY; -static struct fid_fabric *fab; -static struct fid_domain *dom; -static struct fid_ep *ep[2]; -static struct fid_av *av; -static struct fi_info *hints; -static struct fi_info *fi; -static void *ep_name[2]; -static size_t gni_addr[2]; -static struct fid_cq *msg_cq[2]; -static struct fi_cq_attr cq_attr; - -#define BUF_SZ (1<<16) -#define IOV_CNT (1<<3) - -static char *target, *target_base; -static char *source, *source_base; -static struct iovec *src_iov, *dest_iov; -static char *iov_src_buf, *iov_dest_buf; -static struct fid_mr *rem_mr, *loc_mr; -static uint64_t mr_key; - -static void setup_dom(enum fi_progress pm, uint32_t version, int mr_mode) -{ - int ret; - - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - - hints->domain_attr->mr_mode = mr_mode; - hints->domain_attr->data_progress = pm; - hints->domain_attr->cq_data_size = 4; - hints->mode = mode_bits; - - hints->fabric_attr->prov_name = strdup("gni"); - - ret = fi_getinfo(version, NULL, 0, 0, hints, &fi); - cr_assert(!ret, "fi_getinfo"); - - ret = fi_fabric(fi->fabric_attr, &fab, NULL); - cr_assert(!ret, "fi_fabric"); - - ret = fi_domain(fab, fi, &dom, NULL); - cr_assert(!ret, "fi_domain"); - -} - -static void setup_ep(void) -{ - int ret; - struct fi_av_attr attr; - size_t addrlen = 0; - - memset(&attr, 0, sizeof(attr)); - attr.type = FI_AV_MAP; - attr.count = 16; - - ret = fi_av_open(dom, &attr, &av, NULL); - cr_assert(!ret, "fi_av_open"); - - ret = fi_endpoint(dom, fi, &ep[0], NULL); - cr_assert(!ret, "fi_endpoint"); - - cq_attr.format = FI_CQ_FORMAT_TAGGED; - cq_attr.size = 1024; - cq_attr.wait_obj = 0; - - ret = fi_cq_open(dom, &cq_attr, &msg_cq[0], 0); - cr_assert(!ret, "fi_cq_open"); - - ret = fi_cq_open(dom, &cq_attr, &msg_cq[1], 0); - cr_assert(!ret, "fi_cq_open"); - - ret = fi_ep_bind(ep[0], &msg_cq[0]->fid, FI_SEND | FI_RECV); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_getname(&ep[0]->fid, NULL, &addrlen); - cr_assert(addrlen > 0); - - ep_name[0] = malloc(addrlen); - cr_assert(ep_name[0] != NULL); - - ret = fi_getname(&ep[0]->fid, ep_name[0], &addrlen); - cr_assert(ret == FI_SUCCESS); - - ret = fi_endpoint(dom, fi, &ep[1], NULL); - cr_assert(!ret, "fi_endpoint"); - - ret = fi_ep_bind(ep[1], &msg_cq[1]->fid, FI_SEND | FI_RECV); - cr_assert(!ret, "fi_ep_bind"); - - ep_name[1] = malloc(addrlen); - cr_assert(ep_name[1] != NULL); - - ret = fi_getname(&ep[1]->fid, ep_name[1], &addrlen); - cr_assert(ret == FI_SUCCESS); - - ret = fi_av_insert(av, ep_name[0], 1, &gni_addr[0], 0, - NULL); - cr_assert(ret == 1); - - ret = fi_av_insert(av, ep_name[1], 1, &gni_addr[1], 0, - NULL); - cr_assert(ret == 1); - - ret = fi_ep_bind(ep[0], &av->fid, 0); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_ep_bind(ep[1], &av->fid, 0); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_enable(ep[0]); - cr_assert(!ret, "fi_ep_enable"); - - ret = fi_enable(ep[1]); - cr_assert(!ret, "fi_ep_enable"); -} - -static void setup_mr(void) -{ - int i, ret; - - dest_iov = malloc(sizeof(struct iovec) * IOV_CNT); - assert(dest_iov); - - target_base = malloc(GNIT_ALIGN_LEN(BUF_SZ)); - assert(target_base); - target = GNIT_ALIGN_BUFFER(char *, target_base); - - source_base = malloc(GNIT_ALIGN_LEN(BUF_SZ)); - assert(source_base); - source = GNIT_ALIGN_BUFFER(char *, source_base); - - src_iov = malloc(sizeof(struct iovec) * IOV_CNT); - assert(src_iov); - - for (i = 0; i < IOV_CNT; i++) { - src_iov[i].iov_base = malloc(BUF_SZ); - assert(src_iov[i].iov_base != NULL); - - dest_iov[i].iov_base = malloc(BUF_SZ); - assert(dest_iov[i].iov_base != NULL); - } - - iov_src_buf = malloc(BUF_SZ * IOV_CNT); - assert(iov_src_buf != NULL); - - iov_dest_buf = malloc(BUF_SZ * IOV_CNT); - assert(iov_src_buf != NULL); - - ret = fi_mr_reg(dom, target, BUF_SZ, - FI_SEND | FI_RECV, 0, (USING_SCALABLE(fi) ? 1 : 0), - 0, &rem_mr, &target); - cr_assert_eq(ret, 0); - - ret = fi_mr_reg(dom, source, BUF_SZ, - FI_SEND | FI_RECV, 0, (USING_SCALABLE(fi) ? 2 : 0), - 0, &loc_mr, &source); - cr_assert_eq(ret, 0); - - if (USING_SCALABLE(fi)) { - MR_ENABLE(rem_mr, target, BUF_SZ); - MR_ENABLE(loc_mr, source, BUF_SZ); - } - - mr_key = fi_mr_key(rem_mr); -} - -static void rdm_tagged_sr_basic_setup(void) -{ - /* Change this to FI_PROGRESS_AUTO when supported */ - setup_dom(FI_PROGRESS_MANUAL, fi_version(), GNIX_MR_BASIC); - setup_ep(); - setup_mr(); -} - -static void rdm_tagged_sr_scalable_setup(void) -{ - /* Change this to FI_PROGRESS_AUTO when supported */ - setup_dom(FI_PROGRESS_MANUAL, fi_version(), GNIX_MR_SCALABLE); - setup_ep(); - setup_mr(); -} - -static void rdm_tagged_sr_teardown(void) -{ - int i, ret = 0; - - fi_close(&loc_mr->fid); - fi_close(&rem_mr->fid); - - free(target_base); - free(source_base); - - for (i = 0; i < IOV_CNT; i++) { - free(src_iov[i].iov_base); - free(dest_iov[i].iov_base); - } - - free(src_iov); - free(dest_iov); - free(iov_src_buf); - free(iov_dest_buf); - - ret = fi_close(&ep[0]->fid); - cr_assert(!ret, "failure in closing ep."); - - ret = fi_close(&ep[1]->fid); - cr_assert(!ret, "failure in closing ep."); - - ret = fi_close(&msg_cq[0]->fid); - cr_assert(!ret, "failure in send cq."); - - ret = fi_close(&msg_cq[1]->fid); - cr_assert(!ret, "failure in recv cq."); - - ret = fi_close(&av->fid); - cr_assert(!ret, "failure in closing av."); - - ret = fi_close(&dom->fid); - cr_assert(!ret, "failure in closing domain."); - - ret = fi_close(&fab->fid); - cr_assert(!ret, "failure in closing fabric."); - - fi_freeinfo(fi); - fi_freeinfo(hints); - free(ep_name[0]); - free(ep_name[1]); -} - -void rdm_tagged_sr_init_data(char *buf, int len, char seed) -{ - int i; - - for (i = 0; i < len; i++) { - buf[i] = seed++; - } -} - -int rdm_tagged_sr_check_iov_data(struct iovec *iov_buf, char *buf, size_t cnt) -{ - int i, j, cum_len = 0; - - /* - * For these tests we assume cumulative length of the vector entries is - * equal to the buf size - */ - for (i = 0; i < cnt; i++) { - for (j = 0; j < iov_buf[i].iov_len; j++, cum_len++) { - if (((char *)iov_buf[i].iov_base)[j] != buf[cum_len]) { - printf("data mismatch, iov_index: %d, elem: %d," - " iov_buf: %hhx, buf: %hhx\n", i, j, - ((char *)iov_buf[i].iov_base)[j], - buf[cum_len]); - return 0; - } - } - } - return 1; -} - -int rdm_tagged_sr_check_data(char *buf1, char *buf2, int len) -{ - int i; - - for (i = 0; i < len; i++) { - if (buf1[i] != buf2[i]) { - printf("data mismatch, elem: %d, exp: %x, act: %x\n", - i, buf1[i], buf2[i]); - return 0; - } - } - - return 1; -} - -void rdm_tagged_sr_xfer_for_each_size(void (*xfer)(int len), int slen, int elen) -{ - int i; - - for (i = slen; i <= elen; i *= 2) { - xfer(i); - } -} - -/******************************************************************************* - * Test MSG functions - ******************************************************************************/ - -TestSuite(rdm_tagged_sr_basic, - .init = rdm_tagged_sr_basic_setup, - .fini = rdm_tagged_sr_teardown, - .disabled = false); - -TestSuite(rdm_tagged_sr_scalable, - .init = rdm_tagged_sr_scalable_setup, - .fini = rdm_tagged_sr_teardown, - .disabled = false); - -/* - * ssize_t fi_tsend(struct fid_ep *ep, void *buf, size_t len, - * void *desc, fi_addr_t dest_addr, uint64_t tag, void *context); - * - * ssize_t fi_trecv(struct fid_ep *ep, void * buf, size_t len, - * void *desc, fi_addr_t src_addr, uint64_t tag, uint64_t ignore, - * void *context); - */ -void do_tsend(int len) -{ - int ret; - ssize_t sz; - int source_done = 0, dest_done = 0; - struct fi_cq_tagged_entry s_cqe, d_cqe; - - rdm_tagged_sr_init_data(source, len, 0xab); - rdm_tagged_sr_init_data(target, len, 0); - - sz = fi_tsend(ep[0], source, len, loc_mr, gni_addr[1], len, target); - cr_assert_eq(sz, 0); - - sz = fi_trecv(ep[1], target, len, rem_mr, gni_addr[0], len, 0, source); - cr_assert_eq(sz, 0); - - /* need to progress both CQs simultaneously for rendezvous */ - do { - ret = fi_cq_read(msg_cq[0], &s_cqe, 1); - if (ret == 1) { - source_done = 1; - } - ret = fi_cq_read(msg_cq[1], &d_cqe, 1); - if (ret == 1) { - dest_done = 1; - } - } while (!(source_done && dest_done)); - - dbg_printf("got context events!\n"); - - - cr_assert(rdm_tagged_sr_check_data(source, target, len), "Data mismatch"); -} - -Test(rdm_tagged_sr_basic, tsend) -{ - rdm_tagged_sr_xfer_for_each_size(do_tsend, 1, BUF_SZ); -} - -Test(rdm_tagged_sr_scalable, tsend) -{ - rdm_tagged_sr_xfer_for_each_size(do_tsend, 1, BUF_SZ); -} - -/* -ssize_t fi_tsendv(struct fid_ep *ep, const struct iovec *iov, - void **desc, size_t count, fi_addr_t dest_addr, uint64_t tag, - void *context); - */ -void do_tsendv(int len) -{ - int i, ret, iov_cnt; - ssize_t sz; - int source_done = 0, dest_done = 0; - struct fi_cq_tagged_entry s_cqe, d_cqe; - - sz = fi_tsendv(ep[0], src_iov, NULL, 0, gni_addr[1], - len * IOV_CNT, iov_dest_buf); - cr_assert_eq(sz, -FI_EINVAL); - - for (iov_cnt = 1; iov_cnt <= IOV_CNT; iov_cnt++) { - rdm_tagged_sr_init_data(iov_dest_buf, len * iov_cnt, 0); - - for (i = 0; i < iov_cnt; i++) { - rdm_tagged_sr_init_data(src_iov[i].iov_base, len, 0xab); - src_iov[i].iov_len = len; - } - - sz = fi_tsendv(ep[0], src_iov, NULL, iov_cnt, gni_addr[1], - len * iov_cnt, iov_dest_buf); - cr_assert_eq(sz, 0); - - sz = fi_trecv(ep[1], iov_dest_buf, len * iov_cnt, NULL, gni_addr[0], - len * iov_cnt, 0, src_iov); - cr_assert_eq(sz, 0); - - /* need to progress both CQs simultaneously for rendezvous */ - do { - ret = fi_cq_read(msg_cq[0], &s_cqe, 1); - if (ret == 1) { - source_done = 1; - } - ret = fi_cq_read(msg_cq[1], &d_cqe, 1); - if (ret == 1) { - dest_done = 1; - } - } while (!(source_done && dest_done)); - - dbg_printf("got recv context event!\n"); - - cr_assert(rdm_tagged_sr_check_iov_data(src_iov, iov_dest_buf, iov_cnt), - "Data mismatch"); - source_done = dest_done = 0; - } -} - -Test(rdm_tagged_sr_basic, tsendv) -{ - rdm_tagged_sr_xfer_for_each_size(do_tsendv, 1, BUF_SZ); -} - -Test(rdm_tagged_sr_scalable, tsendv) -{ - rdm_tagged_sr_xfer_for_each_size(do_tsendv, 1, BUF_SZ); -} - -/* -ssize_t fi_tsendmsg(struct fid_ep *ep, const struct fi_msg *msg, - uint64_t flags); -*/ -void do_tsendmsg(int len) -{ - int ret; - ssize_t sz; - int source_done = 0, dest_done = 0; - struct fi_cq_tagged_entry s_cqe, d_cqe; - struct fi_msg_tagged msg; - struct iovec iov; - - iov.iov_base = source; - iov.iov_len = len; - - msg.msg_iov = &iov; - msg.desc = (void **)&loc_mr; - msg.iov_count = 1; - msg.addr = gni_addr[1]; - msg.context = target; - msg.data = (uint64_t)target; - msg.tag = len; - msg.ignore = 0; - - rdm_tagged_sr_init_data(source, len, 0xef); - rdm_tagged_sr_init_data(target, len, 0); - - sz = fi_tsendmsg(ep[0], &msg, 0); - cr_assert_eq(sz, 0); - - sz = fi_trecv(ep[1], target, len, rem_mr, gni_addr[0], len, 0, source); - cr_assert_eq(sz, 0); - - /* need to progress both CQs simultaneously for rendezvous */ - do { - ret = fi_cq_read(msg_cq[0], &s_cqe, 1); - if (ret == 1) { - source_done = 1; - } - ret = fi_cq_read(msg_cq[1], &d_cqe, 1); - if (ret == 1) { - dest_done = 1; - } - } while (!(source_done && dest_done)); - - dbg_printf("got context events!\n"); - - cr_assert(rdm_tagged_sr_check_data(source, target, len), "Data mismatch"); -} - -Test(rdm_tagged_sr_basic, tsendmsg) -{ - rdm_tagged_sr_xfer_for_each_size(do_tsendmsg, 1, BUF_SZ); -} - -Test(rdm_tagged_sr_scalable, tsendmsg) -{ - rdm_tagged_sr_xfer_for_each_size(do_tsendmsg, 1, BUF_SZ); -} - -/* -ssize_t fi_tinject(struct fid_ep *ep, void *buf, size_t len, - fi_addr_t dest_addr); -*/ -#define INJECT_SIZE 64 -void do_tinject(int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe; - - rdm_tagged_sr_init_data(source, len, 0x23); - rdm_tagged_sr_init_data(target, len, 0); - - sz = fi_tinject(ep[0], source, len, gni_addr[1], len); - cr_assert_eq(sz, 0); - - sz = fi_trecv(ep[1], target, len, rem_mr, gni_addr[0], len, 0, source); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(msg_cq[1], &cqe, 1)) == -FI_EAGAIN) { - ret = fi_cq_read(msg_cq[0], &cqe, 1); - pthread_yield(); - } - - cr_assert_eq(ret, 1); - cr_assert_eq((uint64_t)cqe.op_context, (uint64_t)source); - - dbg_printf("got recv context event!\n"); - - cr_assert(rdm_tagged_sr_check_data(source, target, len), "Data mismatch"); -} - -Test(rdm_tagged_sr_basic, tinject, .disabled = false) -{ - rdm_tagged_sr_xfer_for_each_size(do_tinject, 1, INJECT_SIZE); -} - -Test(rdm_tagged_sr_scalable, tinject, .disabled = false) -{ - rdm_tagged_sr_xfer_for_each_size(do_tinject, 1, INJECT_SIZE); -} - - -/* -ssize_t fi_tsenddata(struct fid_ep *ep, void *buf, size_t len, - void *desc, uint64_t data, fi_addr_t dest_addr, void *context); -*/ -void do_tsenddata(int len) -{ - int ret; - ssize_t sz; - int source_done = 0, dest_done = 0; - struct fi_cq_tagged_entry s_cqe, d_cqe; - - rdm_tagged_sr_init_data(source, len, 0xab); - rdm_tagged_sr_init_data(target, len, 0); - - sz = fi_tsenddata(ep[0], source, len, loc_mr, (uint64_t)source, - gni_addr[1], len, target); - cr_assert_eq(sz, 0); - - sz = fi_trecv(ep[1], target, len, rem_mr, gni_addr[0], len, 0, source); - cr_assert_eq(sz, 0); - - /* need to progress both CQs simultaneously for rendezvous */ - do { - ret = fi_cq_read(msg_cq[0], &s_cqe, 1); - if (ret == 1) { - source_done = 1; - } - ret = fi_cq_read(msg_cq[1], &d_cqe, 1); - if (ret == 1) { - dest_done = 1; - } - } while (!(source_done && dest_done)); - - dbg_printf("got context events!\n"); - - cr_assert(rdm_tagged_sr_check_data(source, target, len), "Data mismatch"); -} - -Test(rdm_tagged_sr_basic, tsenddata) -{ - rdm_tagged_sr_xfer_for_each_size(do_tsenddata, 1, BUF_SZ); -} - -Test(rdm_tagged_sr_scalable, tsenddata) -{ - rdm_tagged_sr_xfer_for_each_size(do_tsenddata, 1, BUF_SZ); -} - -/* -ssize_t (*recvv)(struct fid_ep *ep, const struct iovec *iov, void **desc, - size_t count, fi_addr_t src_addr, void *context); - */ -void do_trecvv(int len) -{ - int i, ret, iov_cnt; - ssize_t sz; - int source_done = 0, dest_done = 0; - struct fi_cq_tagged_entry s_cqe, d_cqe; - - sz = fi_trecvv(ep[1], NULL, NULL, IOV_CNT, gni_addr[0], - len * IOV_CNT, 0, iov_src_buf); - cr_assert_eq(sz, -FI_EINVAL); - - sz = fi_trecvv(ep[1], dest_iov, NULL, IOV_CNT + 1, gni_addr[0], - len * IOV_CNT, 0, iov_src_buf); - cr_assert_eq(sz, -FI_EINVAL); - - for (iov_cnt = 1; iov_cnt <= IOV_CNT; iov_cnt++) { - rdm_tagged_sr_init_data(iov_src_buf, len * iov_cnt, 0xab); - - for (i = 0; i < iov_cnt; i++) { - rdm_tagged_sr_init_data(dest_iov[i].iov_base, len, 0); - dest_iov[i].iov_len = len; - } - - sz = fi_tsend(ep[0], iov_src_buf, len * iov_cnt, NULL, gni_addr[1], - len * iov_cnt, dest_iov); - cr_assert_eq(sz, 0); - - sz = fi_trecvv(ep[1], dest_iov, NULL, iov_cnt, gni_addr[0], - len * iov_cnt, 0, iov_src_buf); - cr_assert_eq(sz, 0); - - /* need to progress both CQs simultaneously for rendezvous */ - do { - ret = fi_cq_read(msg_cq[0], &s_cqe, 1); - if (ret == 1) { - source_done = 1; - } - ret = fi_cq_read(msg_cq[1], &d_cqe, 1); - if (ret == 1) { - dest_done = 1; - } - } while (!(source_done && dest_done)); - - dbg_printf("got context events!\n"); - - cr_assert(rdm_tagged_sr_check_iov_data(dest_iov, iov_src_buf, iov_cnt), - "Data mismatch"); - source_done = dest_done = 0; - } -} - -Test(rdm_tagged_sr_basic, trecvv) -{ - rdm_tagged_sr_xfer_for_each_size(do_trecvv, 1, BUF_SZ); -} - -Test(rdm_tagged_sr_scalable, trecvv) -{ - rdm_tagged_sr_xfer_for_each_size(do_trecvv, 1, BUF_SZ); -} - -/* -ssize_t (*recvmsg)(struct fid_ep *ep, const struct fi_msg *msg, - uint64_t flags); - */ -void do_trecvmsg(int len) -{ - int ret; - ssize_t sz; - int source_done = 0, dest_done = 0; - struct fi_cq_tagged_entry s_cqe, d_cqe; - struct fi_msg_tagged msg; - struct iovec iov; - - rdm_tagged_sr_init_data(source, len, 0xab); - rdm_tagged_sr_init_data(target, len, 0); - - sz = fi_tsend(ep[0], source, len, loc_mr, gni_addr[1], len, target); - cr_assert_eq(sz, 0); - - iov.iov_base = target; - iov.iov_len = len; - - msg.msg_iov = &iov; - msg.desc = (void **)&rem_mr; - msg.iov_count = 1; - msg.addr = gni_addr[0]; - msg.context = source; - msg.data = (uint64_t)source; - msg.tag = len; - msg.ignore = 0; - - sz = fi_trecvmsg(ep[1], &msg, 0); - cr_assert_eq(sz, 0); - - /* need to progress both CQs simultaneously for rendezvous */ - do { - ret = fi_cq_read(msg_cq[0], &s_cqe, 1); - if (ret == 1) { - source_done = 1; - } - ret = fi_cq_read(msg_cq[1], &d_cqe, 1); - if (ret == 1) { - dest_done = 1; - } - } while (!(source_done && dest_done)); - - dbg_printf("got context events!\n"); - - cr_assert(rdm_tagged_sr_check_data(source, target, len), "Data mismatch"); -} - -Test(rdm_tagged_sr_basic, trecvmsg) -{ - rdm_tagged_sr_xfer_for_each_size(do_trecvmsg, 1, BUF_SZ); -} - -Test(rdm_tagged_sr_scalable, trecvmsg) -{ - rdm_tagged_sr_xfer_for_each_size(do_trecvmsg, 1, BUF_SZ); -} - -static inline void __multi_tsend_trecv(void) -{ - int i, it, ridx, ret; - const int iters = 37; - const int num_msgs = 17; - const int slen = 256; - uint64_t tags[num_msgs]; - uint64_t rtag = 0x01000000; - uint64_t ignore = 0xf0ffffff; - char msg[num_msgs][slen]; - struct fi_cq_tagged_entry cqe; - - srand(time(NULL)); - - for (it = 0; it < iters; it++) { - for (i = 0; i < num_msgs; i++) { - tags[i] = 0x01010abc + it*iters + i; - - sprintf(msg[i], "%d\n", i); - ret = fi_tsend(ep[1], msg[i], strlen(msg[i]), - NULL, gni_addr[0], tags[i], NULL); - cr_assert(ret == FI_SUCCESS); - - do { - ret = fi_cq_read(msg_cq[1], &cqe, 1); - cr_assert((ret == 1) || (ret == -FI_EAGAIN)); - } while (ret == -FI_EAGAIN); - - cr_assert(cqe.len == 0); - cr_assert(cqe.tag == 0); - } - - for (i = 0; i < num_msgs; i++) { - memset(target, 0, BUF_SZ); - ret = fi_trecv(ep[0], target, BUF_SZ, - fi_mr_desc(loc_mr), - gni_addr[1], rtag, ignore, NULL); - cr_assert(ret == FI_SUCCESS); - - do { - ret = fi_cq_read(msg_cq[0], &cqe, 1); - cr_assert((ret == 1) || (ret == -FI_EAGAIN)); - } while (ret == -FI_EAGAIN); - - cr_assert(rtag != cqe.tag); - - ret = sscanf(target, "%d", &ridx); - cr_assert(ret == 1); - cr_assert(cqe.len == strlen(msg[ridx])); - - /* zero out the tag for error checking below */ - tags[ridx] = 0; - } - - /* Make sure we got everything */ - for (i = 0; i < num_msgs; i++) - cr_assert(tags[i] == 0); - } - -} - -Test(rdm_tagged_sr_basic, multi_tsend_trecv) -{ - __multi_tsend_trecv(); -} - -Test(rdm_tagged_sr_scalable, multi_tsend_trecv) -{ - __multi_tsend_trecv(); -} - -static void do_tagged_sr_pipelined(void) -{ - int i, it, s, ret; - const int iters = 37; - const int num_msgs = 61; - const int msgs_per_stage = 17; - const int num_stages = num_msgs/msgs_per_stage + - (num_msgs%msgs_per_stage != 0); - const int slen = 256; - uint64_t tags[num_msgs]; - uint64_t rtag = 0x01000000; - uint64_t ignore = 0xf0ffffff; - char msg[num_msgs][slen]; - struct fi_cq_tagged_entry cqe; - - srand(time(NULL)); - - for (it = 0; it < iters; it++) { - dbg_printf("iter %d\n", it); - for (s = 0; s < num_stages; s++) { - dbg_printf("\tsending stage %d\n", s); - for (i = s*msgs_per_stage; - i < (s+1)*msgs_per_stage && i < num_msgs; - i++) { - tags[i] = 0x01010abc + it*iters + i; - - sprintf(msg[i], "%d\n", i%10); - ret = fi_tsend(ep[1], msg[i], strlen(msg[i]), - NULL, gni_addr[0], tags[i], - NULL); - cr_assert(ret == FI_SUCCESS); - } - - for (i = s*msgs_per_stage; - i < (s+1)*msgs_per_stage && i < num_msgs; - i++) { - do { - ret = fi_cq_read(msg_cq[1], &cqe, 1); - cr_assert((ret == 1) || - (ret == -FI_EAGAIN)); - } while (ret == -FI_EAGAIN); - - cr_assert(cqe.tag == 0); - } - cr_assert(cqe.len == 0); - } - - for (s = 0; s < num_stages; s++) { - dbg_printf("\treceiving stage %d\n", s); - for (i = s*msgs_per_stage; - i < (s+1)*msgs_per_stage && i < num_msgs; - i++) { - ret = fi_trecv(ep[0], &target[i], BUF_SZ, - fi_mr_desc(loc_mr), - gni_addr[1], rtag, ignore, - NULL); - cr_assert(ret == FI_SUCCESS); - } - - for (i = s*msgs_per_stage; - i < (s+1)*msgs_per_stage && i < num_msgs; - i++) { - do { - ret = fi_cq_read(msg_cq[0], &cqe, 1); - cr_assert((ret == 1) || - (ret == -FI_EAGAIN)); - } while (ret == -FI_EAGAIN); - - cr_assert(rtag != cqe.tag); - - cr_assert(ret == 1); - cr_assert(cqe.len == 2); - - /* zero out the tag for error checking below */ - tags[cqe.tag - (0x01010abc + it*iters)] = 0; - } - } - - /* Make sure we got everything */ - for (i = 0; i < num_msgs; i++) { - cr_assert(tags[i] == 0); - } - } - -} - -/* Add this test when FI_PROGRESS_AUTO is implemented */ -Test(rdm_tagged_sr_basic, multi_tsend_trecv_pipelined, .disabled = true) { - do_tagged_sr_pipelined(); -} - -Test(rdm_tagged_sr_scalable, multi_tsend_trecv_pipelined, .disabled = true) { - do_tagged_sr_pipelined(); -} - -/* Call fi_gni_domain_ops->set_val() with op and opval */ -static void progress_manual_dom_ops_setup(const dom_ops_val_t op, - const uint32_t opval) -{ - int ret; - uint32_t val = opval; - struct fi_gni_ops_domain *gni_domain_ops; - - setup_dom(FI_PROGRESS_MANUAL, fi_version(), GNIX_MR_BASIC); - ret = fi_open_ops(&dom->fid, FI_GNI_DOMAIN_OPS_1, - 0, (void **) &gni_domain_ops, NULL); - gni_domain_ops->set_val(&dom->fid, op, &val); - cr_assert(ret == FI_SUCCESS, "fi_open_ops"); - - setup_ep(); - setup_mr(); -} - -static void mbox_max_credit_setup(void) -{ - /* Use this with manual progress */ - progress_manual_dom_ops_setup(GNI_MBOX_MAX_CREDIT, - 122-1 /* 2*num_msgs-1 above */); -} - -/* Suite of tests that should work with manual progress */ -TestSuite(rdm_tagged_sr_progress_manual, - .init = mbox_max_credit_setup, - .fini = rdm_tagged_sr_teardown, - .disabled = false); - -Test(rdm_tagged_sr_progress_manual, multi_tsend_trecv_pipelined) { - do_tagged_sr_pipelined(); -} diff --git a/prov/gni/test/run_gnitest b/prov/gni/test/run_gnitest deleted file mode 100755 index cda104005cb..00000000000 --- a/prov/gni/test/run_gnitest +++ /dev/null @@ -1,85 +0,0 @@ -#!/bin/bash -# -# Copyright (c) 2015 Cray Inc. All rights reserved. -# Copyright (c) 2019 Triad National Security, LLC. -# All rights reserved. -# -# This software is available to you under a choice of one of two -# licenses. You may choose to be licensed under the terms of the GNU -# General Public License (GPL) Version 2, available from the file -# COPYING in the main directory of this source tree, or the -# BSD license below: -# -# Redistribution and use in source and binary forms, with or -# without modification, are permitted provided that the following -# conditions are met: -# -# - Redistributions of source code must retain the above -# copyright notice, this list of conditions and the following -# disclaimer. -# -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials -# provided with the distribution. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. -# - -# -# disable use of xpmem bypass for criterion tests -# -export GNIX_DISABLE_XPMEM=1 - -# -# disable fma sharing for criterion tests -# -export UGNI_FMA_SHARED=0 - -# -# Check for srun or aprun -# -srun=`command -v srun` -if [ $? == 0 ]; then - launcher="srun" -else - aprun=`command -v aprun` - if [ $? == 0 ]; then - launcher="aprun" - else - echo "Cannot find a supported job launcher (srun, aprun). Please load the appropriate module" - exit -1 - fi -fi - -gnitest_bin="$(cd "$(dirname "$0")" && pwd)/gnitest" -if [ ! -f "$gnitest_bin" ]; then - echo "Could not find gnitest executable" - exit -1 -fi - -# -# for srun need to have multiple cores per task to insure we get enough -# GNI resources assiged for tests to pass -# -if [ $launcher = "srun" ]; then - args="-N1 --exclusive --cpu_bind=none -t00:20:00 --ntasks=1 --cpus-per-task=12" -else - args="-n1 -N1 -j0 -cc none -t1200" -fi - -# As of Criterion 2.3, it seems that the PRE_ALL hook is being run -# more than once. These two environment variables insure we never -# fork and never initialize more than once. -export PMI_NO_PREINITIALIZE=1 -export PMI_NO_FORK=1 - -# pass all command line args to gnitest -$launcher $args $gnitest_bin -j1 "$@" diff --git a/prov/gni/test/sep.c b/prov/gni/test/sep.c deleted file mode 100644 index 2fb6ab3b0a8..00000000000 --- a/prov/gni/test/sep.c +++ /dev/null @@ -1,2689 +0,0 @@ -/* - * Copyright (c) 2015-2018 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * Copyright (c) 2019-2020 Triad National Security, LLC. - * All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include "gnix_rdma_headers.h" -#include "common.h" -#include "fi_ext_gni.h" -#include "gnix.h" - -#define NUMCONTEXTS 4 -#define NUMEPS 2 -#define EXTRAEPS 2 -#define TOTALEPS (NUMEPS+EXTRAEPS) -#define BUF_SZ (1<<20) -#define IOV_CNT (4) - -static struct fid_fabric *fab; -static struct fid_domain *dom[NUMEPS]; -static struct fid_av *av[NUMEPS]; -static struct fid_av *t_av; -static void *ep_name[TOTALEPS]; -static fi_addr_t gni_addr[NUMEPS]; -static struct fi_cq_attr cq_attr; -static struct fi_info *hints; -static struct fi_info *fi[NUMEPS]; -static struct fid_ep *sep[TOTALEPS]; - -static char *target, *target_base; -static char *source, *source_base; -static struct iovec *src_iov, *dest_iov; -static char *iov_src_buf, *iov_dest_buf, *iov_src_buf_base, *iov_dest_buf_base; -static struct fid_mr *rem_mr[NUMEPS], *loc_mr[NUMEPS]; -static struct fid_mr *iov_dest_buf_mr[NUMEPS], *iov_src_buf_mr[NUMEPS]; -static uint64_t mr_key[NUMEPS]; - -static int ctx_cnt = NUMCONTEXTS; -static int rx_ctx_bits; -static struct fid_ep **tx_ep[NUMEPS], **rx_ep[NUMEPS]; -static struct fid_cq **tx_cq[NUMEPS]; -static struct fid_cq **rx_cq[NUMEPS]; -static fi_addr_t *rx_addr; -static struct fid_cntr *send_cntr[NUMEPS], *recv_cntr[NUMEPS]; -static struct fi_cntr_attr cntr_attr = { - .events = FI_CNTR_EVENTS_COMP, - .flags = 0 -}; -static struct fi_tx_attr tx_attr; -static struct fi_rx_attr rx_attr; - -static uint64_t sends[NUMEPS] = {0}, recvs[NUMEPS] = {0}, - send_errs[NUMEPS] = {0}, recv_errs[NUMEPS] = {0}; - -void sep_setup_common(int av_type, uint32_t version, int mr_mode) -{ - int ret, i, j; - struct fi_av_attr av_attr = {0}; - size_t addrlen = 0; - - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - hints->ep_attr->type = FI_EP_RDM; - hints->caps = FI_RMA_EVENT | FI_ATOMIC | FI_RMA | FI_MSG | - FI_NAMED_RX_CTX | FI_TAGGED; - hints->mode = FI_LOCAL_MR; - hints->domain_attr->cq_data_size = NUMEPS * 2; - hints->domain_attr->data_progress = FI_PROGRESS_AUTO; - hints->domain_attr->mr_mode = mr_mode; - hints->fabric_attr->prov_name = strdup("gni"); - hints->ep_attr->tx_ctx_cnt = ctx_cnt; - hints->ep_attr->rx_ctx_cnt = ctx_cnt; - - for (i = 0; i < NUMEPS; i++) { - ret = fi_getinfo(version, NULL, 0, 0, hints, &fi[i]); - cr_assert(!ret, "fi_getinfo"); - - tx_cq[i] = calloc(ctx_cnt, sizeof(*tx_cq)); - rx_cq[i] = calloc(ctx_cnt, sizeof(*rx_cq)); - tx_ep[i] = calloc(ctx_cnt, sizeof(*tx_ep)); - rx_ep[i] = calloc(ctx_cnt, sizeof(*rx_ep)); - if (!tx_cq[i] || !tx_cq[i] || - !tx_ep[i] || !rx_ep[i]) { - cr_assert(0, "calloc"); - } - } - - ctx_cnt = MIN(ctx_cnt, fi[0]->domain_attr->rx_ctx_cnt); - ctx_cnt = MIN(ctx_cnt, fi[0]->domain_attr->tx_ctx_cnt); - cr_assert(ctx_cnt, "ctx_cnt is 0"); - - ret = fi_fabric(fi[0]->fabric_attr, &fab, NULL); - cr_assert(!ret, "fi_fabric"); - - rx_ctx_bits = 0; - while (ctx_cnt >> ++rx_ctx_bits); - av_attr.rx_ctx_bits = rx_ctx_bits; - av_attr.type = av_type; - av_attr.count = NUMEPS; - - cq_attr.format = FI_CQ_FORMAT_TAGGED; - cq_attr.size = 1024; - cq_attr.wait_obj = FI_WAIT_NONE; - - rx_addr = calloc(ctx_cnt, sizeof(*rx_addr)); - - target_base = calloc(GNIT_ALIGN_LEN(BUF_SZ), 1); - source_base = calloc(GNIT_ALIGN_LEN(BUF_SZ), 1); - - iov_src_buf_base = malloc(GNIT_ALIGN_LEN(BUF_SZ) * IOV_CNT); - iov_dest_buf_base = malloc(GNIT_ALIGN_LEN(BUF_SZ) * IOV_CNT); - src_iov = malloc(sizeof(struct iovec) * IOV_CNT); - dest_iov = malloc(sizeof(struct iovec) * IOV_CNT); - - if (!rx_addr || !target_base || !source_base || - !iov_src_buf_base || !iov_dest_buf_base || - !src_iov || !dest_iov) { - cr_assert(0, "allocation"); - } - - target = GNIT_ALIGN_BUFFER(char *, target_base); - source = GNIT_ALIGN_BUFFER(char *, source_base); - iov_src_buf = GNIT_ALIGN_BUFFER(char *, iov_src_buf_base); - iov_dest_buf = GNIT_ALIGN_BUFFER(char *, iov_dest_buf_base); - - for (i = 0; i < IOV_CNT; i++) { - src_iov[i].iov_base = malloc(BUF_SZ); - assert(src_iov[i].iov_base != NULL); - - dest_iov[i].iov_base = malloc(BUF_SZ * 3); - assert(dest_iov[i].iov_base != NULL); - } - - for (i = 0; i < NUMEPS; i++) { - fi[i]->ep_attr->tx_ctx_cnt = ctx_cnt; - fi[i]->ep_attr->rx_ctx_cnt = ctx_cnt; - - ret = fi_domain(fab, fi[i], &dom[i], NULL); - cr_assert(!ret, "fi_domain"); - - ret = fi_scalable_ep(dom[i], fi[i], &sep[i], NULL); - cr_assert(!ret, "fi_scalable_ep"); - - ret = fi_av_open(dom[i], &av_attr, &av[i], NULL); - cr_assert(!ret, "fi_av_open"); - - ret = fi_cntr_open(dom[i], &cntr_attr, &send_cntr[i], 0); - cr_assert(!ret, "fi_cntr_open"); - - ret = fi_cntr_open(dom[i], &cntr_attr, &recv_cntr[i], 0); - cr_assert(!ret, "fi_cntr_open"); - - for (j = 0; j < ctx_cnt; j++) { - ret = fi_tx_context(sep[i], j, NULL, &tx_ep[i][j], - NULL); - cr_assert(!ret, "fi_tx_context"); - - ret = fi_cq_open(dom[i], &cq_attr, &tx_cq[i][j], - NULL); - cr_assert(!ret, "fi_cq_open"); - - ret = fi_rx_context(sep[i], j, NULL, &rx_ep[i][j], - NULL); - cr_assert(!ret, "fi_rx_context"); - - ret = fi_cq_open(dom[i], &cq_attr, &rx_cq[i][j], - NULL); - cr_assert(!ret, "fi_cq_open"); - } - - ret = fi_scalable_ep_bind(sep[i], &av[i]->fid, 0); - cr_assert(!ret, "fi_scalable_ep_bind"); - - for (j = 0; j < ctx_cnt; j++) { - ret = fi_ep_bind(tx_ep[i][j], &tx_cq[i][j]->fid, - FI_TRANSMIT); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_ep_bind(tx_ep[i][j], &send_cntr[i]->fid, - FI_SEND | FI_WRITE); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_enable(tx_ep[i][j]); - cr_assert(!ret, "fi_enable"); - - ret = fi_ep_bind(rx_ep[i][j], &rx_cq[i][j]->fid, - FI_RECV); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_ep_bind(rx_ep[i][j], &recv_cntr[i]->fid, - FI_RECV | FI_READ); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_enable(rx_ep[i][j]); - cr_assert(!ret, "fi_enable"); - - } - } - - for (i = 0; i < NUMEPS; i++) { - ret = fi_enable(sep[i]); - cr_assert(!ret, "fi_enable"); - - ret = fi_getname(&sep[i]->fid, NULL, &addrlen); - cr_assert(addrlen > 0); - - ep_name[i] = malloc(addrlen); - cr_assert(ep_name[i] != NULL); - - ret = fi_getname(&sep[i]->fid, ep_name[i], &addrlen); - cr_assert(ret == FI_SUCCESS); - - ret = fi_mr_reg(dom[i], - target, - BUF_SZ, - FI_REMOTE_WRITE, - 0, - (USING_SCALABLE(fi[i]) ? (i * 4) : 0), - 0, - &rem_mr[i], - &target); - cr_assert_eq(ret, 0); - - ret = fi_mr_reg(dom[i], - source, - BUF_SZ, - FI_REMOTE_WRITE, - 0, - (USING_SCALABLE(fi[i]) ? (i * 4) + 1 : 0), - 0, - &loc_mr[i], - &source); - cr_assert_eq(ret, 0); - - if (USING_SCALABLE(fi[i])) { - MR_ENABLE(rem_mr[i], target, BUF_SZ); - MR_ENABLE(loc_mr[i], source, BUF_SZ); - } - - mr_key[i] = fi_mr_key(rem_mr[i]); - - ret = fi_mr_reg(dom[i], - iov_dest_buf, - IOV_CNT * BUF_SZ, - FI_REMOTE_WRITE, - 0, - (USING_SCALABLE(fi[i]) ? (i * 4) + 2 : 0), - 0, - iov_dest_buf_mr + i, - &iov_dest_buf); - cr_assert_eq(ret, 0); - - ret = fi_mr_reg(dom[i], - iov_src_buf, - IOV_CNT * BUF_SZ, - FI_REMOTE_WRITE, - 0, - (USING_SCALABLE(fi[i]) ? (i * 4) + 3 : 0), - 0, - iov_src_buf_mr + i, - &iov_src_buf); - cr_assert_eq(ret, 0); - - if (USING_SCALABLE(fi[i])) { - MR_ENABLE(iov_dest_buf_mr[i], - iov_dest_buf, - IOV_CNT * BUF_SZ); - MR_ENABLE(iov_src_buf_mr[i], - iov_src_buf, - IOV_CNT * BUF_SZ); - } - } - - for (i = 0; i < NUMEPS; i++) { - for (j = 0; j < NUMEPS; j++) { - ret = fi_av_insert(av[i], ep_name[j], 1, &gni_addr[j], - 0, NULL); - cr_assert(ret == 1); - } - } - - for (i = 0; i < ctx_cnt; i++) { - rx_addr[i] = fi_rx_addr(gni_addr[1], i, rx_ctx_bits); - } -} - -void sep_basic_setup_map(void) -{ - sep_setup_common(FI_AV_MAP, fi_version(), GNIX_MR_BASIC); -} - -void sep_scalable_setup_map(void) -{ - sep_setup_common(FI_AV_MAP, fi_version(), GNIX_MR_SCALABLE); -} - -void sep_default_setup_map(void) -{ - sep_setup_common(FI_AV_MAP, fi_version(), GNIX_DEFAULT_MR_MODE); -} - -void sep_basic_setup_table(void) -{ - sep_setup_common(FI_AV_TABLE, fi_version(), GNIX_MR_BASIC); -} - -void sep_scalable_setup_table(void) -{ - sep_setup_common(FI_AV_TABLE, fi_version(), GNIX_MR_SCALABLE); -} - -void sep_default_setup_table(void) -{ - sep_setup_common(FI_AV_TABLE, fi_version(), GNIX_DEFAULT_MR_MODE); -} - -static void sep_teardown(void) -{ - int ret, i, j; - - for (i = 0; i < NUMEPS; i++) { - fi_close(&recv_cntr[i]->fid); - fi_close(&send_cntr[i]->fid); - - for (j = 0; j < ctx_cnt; j++) { - ret = fi_close(&tx_ep[i][j]->fid); - cr_assert(!ret, "failure closing tx_ep."); - - ret = fi_close(&rx_ep[i][j]->fid); - cr_assert(!ret, "failure closing rx_ep."); - - ret = fi_close(&tx_cq[i][j]->fid); - cr_assert(!ret, "failure closing tx cq."); - - ret = fi_close(&rx_cq[i][j]->fid); - cr_assert(!ret, "failure closing rx cq."); - } - - ret = fi_close(&sep[i]->fid); - cr_assert(!ret, "failure in closing ep."); - - ret = fi_close(&av[i]->fid); - cr_assert(!ret, "failure in closing av."); - - fi_close(&loc_mr[i]->fid); - fi_close(&rem_mr[i]->fid); - - ret = fi_close(&dom[i]->fid); - cr_assert(!ret, "failure in closing domain."); - - free(tx_ep[i]); - free(rx_ep[i]); - free(ep_name[i]); - fi_freeinfo(fi[i]); - } - - for (i = 0; i < IOV_CNT; i++) { - free(src_iov[i].iov_base); - free(dest_iov[i].iov_base); - } - - free(src_iov); - free(dest_iov); - free(iov_src_buf_base); - free(iov_dest_buf_base); - - fi_freeinfo(hints); - free(target_base); - free(source_base); - - ret = fi_close(&fab->fid); - cr_assert(!ret, "failure in closing fabric."); -} - -void sep_setup_context(void) -{ - int ret; - - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - hints->ep_attr->type = FI_EP_RDM; - hints->caps = FI_ATOMIC | FI_RMA | FI_MSG | FI_NAMED_RX_CTX | FI_TAGGED; - hints->mode = FI_LOCAL_MR; - hints->domain_attr->cq_data_size = NUMEPS * 2; - hints->domain_attr->data_progress = FI_PROGRESS_AUTO; - hints->domain_attr->mr_mode = GNIX_DEFAULT_MR_MODE; - hints->fabric_attr->prov_name = strdup("gni"); - - hints->ep_attr->tx_ctx_cnt = 0; - hints->ep_attr->rx_ctx_cnt = 0; - - ret = fi_getinfo(fi_version(), NULL, 0, 0, hints, &fi[0]); - cr_assert(!ret, "fi_getinfo"); - cr_assert_eq(fi[0]->ep_attr->tx_ctx_cnt, 1, "incorrect tx_ctx_cnt"); - cr_assert_eq(fi[0]->ep_attr->rx_ctx_cnt, 1, "incorrect rx_ctx_cnt"); - - hints->ep_attr->tx_ctx_cnt = ctx_cnt; - hints->ep_attr->rx_ctx_cnt = ctx_cnt; - - ret = fi_getinfo(fi_version(), NULL, 0, 0, hints, &fi[0]); - cr_assert(!ret, "fi_getinfo"); - - tx_ep[0] = calloc(ctx_cnt, sizeof(*tx_ep)); - rx_ep[0] = calloc(ctx_cnt, sizeof(*rx_ep)); - if (!tx_ep[0] || !rx_ep[0]) { - cr_assert(0, "calloc"); - } - - ctx_cnt = MIN(ctx_cnt, fi[0]->domain_attr->rx_ctx_cnt); - ctx_cnt = MIN(ctx_cnt, fi[0]->domain_attr->tx_ctx_cnt); - cr_assert(ctx_cnt, "ctx_cnt is 0"); - - ret = fi_fabric(fi[0]->fabric_attr, &fab, NULL); - cr_assert(!ret, "fi_fabric"); - - fi[0]->ep_attr->tx_ctx_cnt = ctx_cnt; - fi[0]->ep_attr->rx_ctx_cnt = ctx_cnt; - - ret = fi_domain(fab, fi[0], &dom[0], NULL); - cr_assert(!ret, "fi_domain"); - - ret = fi_scalable_ep(dom[0], fi[0], &sep[0], NULL); - cr_assert(!ret, "fi_scalable_ep"); - - /* add bits to check failure path */ - tx_attr.mode = FI_RESTRICTED_COMP; - ret = fi_tx_context(sep[0], 0, &tx_attr, &tx_ep[0][0], - NULL); - tx_attr.mode = 0; - tx_attr.caps = FI_MULTICAST; - ret = fi_tx_context(sep[0], 0, &tx_attr, &tx_ep[0][0], - NULL); - cr_assert(-FI_EINVAL, "fi_tx_context"); - - rx_attr.caps = FI_MULTICAST; - ret = fi_rx_context(sep[0], 0, &rx_attr, &rx_ep[0][0], - NULL); - cr_assert(-FI_EINVAL, "fi_rx_context"); - - rx_attr.caps = 0; - rx_attr.mode = FI_RESTRICTED_COMP; - ret = fi_rx_context(sep[0], 0, &rx_attr, &rx_ep[0][0], - NULL); - cr_assert(-FI_EINVAL, "fi_rx_context"); -} - -static void sep_teardown_context(void) -{ - int ret; - - ret = fi_close(&sep[0]->fid); - cr_assert(!ret, "failure in closing ep."); - - ret = fi_close(&dom[0]->fid); - cr_assert(!ret, "failure in closing domain."); - - free(tx_ep[0]); - free(rx_ep[0]); - free(ep_name[0]); - fi_freeinfo(fi[0]); - - fi_freeinfo(hints); - - ret = fi_close(&fab->fid); - cr_assert(!ret, "failure in closing fabric."); -} - -static void -sep_init_data(char *buf, int len, char seed) -{ - int i; - - for (i = 0; i < len; i++) - buf[i] = seed++; -} - -static int -sep_check_data(char *buf1, char *buf2, int len) -{ - int i; - - for (i = 0; i < len; i++) { - if (buf1[i] != buf2[i]) { - printf("data mismatch, elem: %d, exp: %hhx, act: %hhx\n" - , i, buf1[i], buf2[i]); - return 0; - } - } - - return 1; -} - -static void -wait_for_cqs(struct fid_cq *scq, struct fid_cq *dcq, - struct fi_cq_tagged_entry *scqe, - struct fi_cq_tagged_entry *dcqe) -{ - int ret; - int s_done = 0, d_done = 0; - - do { - ret = fi_cq_read(scq, scqe, 1); - if (ret == 1) { - s_done = 1; - } - - ret = fi_cq_read(dcq, dcqe, 1); - if (ret == 1) { - d_done = 1; - } - } while (!(s_done && d_done)); -} - -static void -xfer_each_size(void (*xfer)(int index, int len), int index, int slen, int elen) -{ - int i; - - for (i = slen; i <= elen; i *= 2) { - xfer(index, i); - } -} - -static void -sep_check_cqe(struct fi_cq_tagged_entry *cqe, void *ctx, - uint64_t flags, void *addr, size_t len, - uint64_t data, bool buf_is_non_null, uint64_t tag, - struct fid_ep *fid_ep) -{ - struct gnix_fid_ep *gnix_ep = get_gnix_ep(fid_ep); - cr_assert(cqe->op_context == ctx, "CQE Context mismatch"); - cr_assert(cqe->flags == flags, - "CQE flags mismatch cqe flags:0x%lx, flags:0x%lx", cqe->flags, - flags); - - if (flags & FI_RECV) { - cr_assert(cqe->len == len, "CQE length mismatch"); - - if (buf_is_non_null) - cr_assert(cqe->buf == addr, "CQE address mismatch"); - else - cr_assert(cqe->buf == NULL, "CQE address mismatch"); - - - /* TODO: Remove GNIX_ALLOW_FI_REMOTE_CQ_DATA and only check flags for FI_RMA_EVENT */ - if (GNIX_ALLOW_FI_REMOTE_CQ_DATA(flags, gnix_ep->caps)) - cr_assert(cqe->data == data, "CQE data mismatch"); - } else { - cr_assert(cqe->len == 0, "Invalid CQE length"); - cr_assert(cqe->buf == 0, "Invalid CQE address"); - cr_assert(cqe->data == 0, "Invalid CQE data"); - } - - cr_assert(cqe->tag == tag, "Invalid CQE tag:0x%lx, tag:0x%lx", - cqe->tag, tag); -} - -static void -sep_check_tcqe(struct fi_cq_tagged_entry *tcqe, void *ctx, - uint64_t flags, uint64_t data, struct fid_ep *fid_ep) -{ - struct gnix_fid_ep *gnix_ep = get_gnix_ep(fid_ep); - - cr_assert(tcqe->op_context == ctx, "CQE Context mismatch"); - cr_assert(tcqe->flags == flags, "CQE flags mismatch"); - - /* TODO: Remove GNIX_ALLOW_FI_REMOTE_CQ_DATA and only check flags for FI_RMA_EVENT */ - if (GNIX_ALLOW_FI_REMOTE_CQ_DATA(flags, gnix_ep->caps)) { - cr_assert(tcqe->data == data, "CQE data invalid"); - } else { - cr_assert(tcqe->data == 0, "CQE data invalid"); - } - - cr_assert(tcqe->len == 0, "CQE length mismatch"); - cr_assert(tcqe->buf == 0, "CQE address mismatch"); - cr_assert(tcqe->tag == 0, "CQE tag invalid"); -} - -static void -sep_check_cntrs(uint64_t s[], uint64_t r[], uint64_t s_e[], - uint64_t r_e[], bool need_to_spin) -{ - int i = 0; - - for (; i < NUMEPS; i++) { - sends[i] += s[i]; - recvs[i] += r[i]; - send_errs[i] += s_e[i]; - recv_errs[i] += r_e[i]; - - if (need_to_spin) { - while (fi_cntr_read(send_cntr[i]) != sends[i]) { - pthread_yield(); - } - } - - cr_assert(fi_cntr_read(send_cntr[i]) == sends[i], - "Bad send count i:%d send_cntr:%ld sends:%ld", - i, fi_cntr_read(send_cntr[i]), sends[i]); - - if (need_to_spin) { - while (fi_cntr_read(recv_cntr[i]) != recvs[i]) { - pthread_yield(); - } - } - - cr_assert(fi_cntr_read(recv_cntr[i]) == recvs[i], - "Bad recv count"); - - if (need_to_spin) { - while (fi_cntr_readerr(send_cntr[i]) != send_errs[i]) { - pthread_yield(); - } - } - - cr_assert(fi_cntr_readerr(send_cntr[i]) == send_errs[i], - "Bad send err count"); - - if (need_to_spin) { - while (fi_cntr_readerr(recv_cntr[i]) != recv_errs[i]) { - pthread_yield(); - } - } - - cr_assert(fi_cntr_readerr(recv_cntr[i]) == recv_errs[i], - "Bad recv err count"); - } -} - -static int -sep_check_iov_data(struct iovec *iov_buf, char *buf, size_t cnt, size_t buf_len) -{ - size_t i, j, cum_len = 0, len, iov_idx; - - for (i = 0; i < cnt; i++) { - cum_len += iov_buf[i].iov_len; - } - - len = MIN(cum_len, buf_len); - - cum_len = iov_buf[0].iov_len; - - for (i = j = iov_idx = 0; j < len; j++, iov_idx++) { - - if (j == cum_len) { - i++, iov_idx = 0; - cum_len += iov_buf[i].iov_len; - - if (i >= cnt) - break; - } - - if (((char *)iov_buf[i].iov_base)[iov_idx] != buf[j]) { - printf("data mismatch, iov_index: %lu, elem: %lu, " - "iov_buf_len: %lu, " - " iov_buf: %hhx, buf: %hhx\n", i, j, - iov_buf[i].iov_len, - ((char *)iov_buf[i].iov_base)[iov_idx], - buf[j]); - return 0; - } - } - - return 1; -} - -static int -check_iov_data(struct iovec *ib, struct iovec *ob, size_t cnt) -{ - size_t i; - - for (i = 0; i < cnt; i++) { - if (memcmp(ib[i].iov_base, ob[i].iov_base, ib[i].iov_len)) { - printf("data mismatch, ib:%x ob:%x\n", - *(char *)ib[i].iov_base, - *(char *)ob[i].iov_base); - return 0; - } - } - - return 1; -} - -/******************************************************************************* - * Test MSG functions - ******************************************************************************/ - -static void sep_send_recv(int index, int len) -{ - ssize_t ret; - struct fi_cq_tagged_entry cqe; - - sep_init_data(source, len, 0xab + index); - sep_init_data(target, len, 0); - - ret = fi_send(tx_ep[0][index], source, len, loc_mr[0], - rx_addr[index], target); - cr_assert(ret == 0, "fi_send failed err:%ld", ret); - - ret = fi_recv(rx_ep[1][index], target, len, rem_mr[0], - FI_ADDR_UNSPEC, source); - cr_assert(ret == 0, "fi_recv failed err:%ld", ret); - - wait_for_cqs(tx_cq[0][index], rx_cq[1][index], &cqe, &cqe); - - ret = sep_check_data(source, target, 8); - cr_assert(ret == 1, "Data check failed"); -} - -static void sep_tsend(int index, int len) -{ - ssize_t ret; - struct fi_cq_tagged_entry cqe; - - sep_init_data(source, len, 0xab + index); - sep_init_data(target, len, 0); - - ret = fi_tsend(tx_ep[0][index], source, len, loc_mr[0], - rx_addr[index], len, target); - cr_assert(ret == 0, "fi_tsend failed err:%ld", ret); - - ret = fi_trecv(rx_ep[1][index], target, len, rem_mr[0], - FI_ADDR_UNSPEC, len, 0, source); - cr_assert(ret == 0, "fi_trecv failed err:%ld", ret); - - wait_for_cqs(tx_cq[0][index], rx_cq[1][index], &cqe, &cqe); - - ret = sep_check_data(source, target, 8); - cr_assert(ret == 1, "Data check failed"); -} - -static void sep_recvmsg(int index, int len) -{ - ssize_t ret; - struct iovec iov; - struct fi_msg msg; - struct fi_cq_tagged_entry cqe; - - sep_init_data(source, len, 0xab + index); - sep_init_data(target, len, 0); - - ret = fi_send(tx_ep[0][index], source, len, loc_mr[0], - rx_addr[index], target); - cr_assert(ret == 0, "fi_send failed err:%ld", ret); - - iov.iov_base = target; - iov.iov_len = len; - - msg.msg_iov = &iov; - msg.desc = (void **)rem_mr; - msg.iov_count = 1; - msg.addr = FI_ADDR_UNSPEC; - msg.context = source; - msg.data = (uint64_t)source; - - ret = fi_recvmsg(rx_ep[1][index], &msg, 0); - cr_assert(ret == 0, "fi_recvmsg failed err:%ld", ret); - - wait_for_cqs(tx_cq[0][index], rx_cq[1][index], &cqe, &cqe); - - ret = sep_check_data(source, target, 8); - cr_assert(ret == 1, "Data check failed"); -} - -static void sep_trecvmsg(int index, int len) -{ - ssize_t ret; - struct iovec iov; - struct fi_msg_tagged tmsg; - struct fi_cq_tagged_entry cqe; - - sep_init_data(source, len, 0xab + index); - sep_init_data(target, len, 0); - - ret = fi_tsend(tx_ep[0][index], source, len, loc_mr[0], - rx_addr[index], len, target); - cr_assert(ret == 0, "fi_send failed err:%ld", ret); - - iov.iov_base = target; - iov.iov_len = len; - - tmsg.msg_iov = &iov; - tmsg.desc = (void **)rem_mr; - tmsg.iov_count = 1; - tmsg.addr = FI_ADDR_UNSPEC; - tmsg.context = source; - tmsg.data = (uint64_t)source; - tmsg.tag = (uint64_t)len; - tmsg.ignore = 0; - - ret = fi_trecvmsg(rx_ep[1][index], &tmsg, 0); - cr_assert(ret == 0, "fi_recvmsg failed err:%ld", ret); - - wait_for_cqs(tx_cq[0][index], rx_cq[1][index], &cqe, &cqe); - - ret = sep_check_data(source, target, 8); - cr_assert(ret == 1, "Data check failed"); -} - -static void sep_sendv(int index, int len) -{ - int i, iov_cnt; - struct fi_cq_tagged_entry s_cqe, d_cqe; - ssize_t sz; - uint64_t s[NUMEPS] = {0}, r[NUMEPS] = {0}, s_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - for (iov_cnt = 1; iov_cnt <= IOV_CNT; iov_cnt++) { - for (i = 0; i < iov_cnt; i++) { - sep_init_data(src_iov[i].iov_base, len, 0x25); - src_iov[i].iov_len = len; - } - sep_init_data(iov_dest_buf, len * iov_cnt, 0); - - sz = fi_sendv(tx_ep[0][index], src_iov, NULL, iov_cnt, - rx_addr[index], iov_dest_buf); - cr_assert_eq(sz, 0); - - sz = fi_recv(rx_ep[1][index], iov_dest_buf, len * iov_cnt, - iov_dest_buf_mr[0], FI_ADDR_UNSPEC, src_iov); - cr_assert_eq(sz, 0); - - /* reset cqe */ - s_cqe.op_context = s_cqe.buf = (void *) -1; - s_cqe.flags = s_cqe.len = s_cqe.data = s_cqe.tag = UINT_MAX; - d_cqe.op_context = d_cqe.buf = (void *) -1; - d_cqe.flags = d_cqe.len = d_cqe.data = d_cqe.tag = UINT_MAX; - - wait_for_cqs(tx_cq[0][index], rx_cq[1][index], &s_cqe, &d_cqe); - sep_check_cqe(&s_cqe, iov_dest_buf, (FI_MSG|FI_SEND), - 0, 0, 0, false, 0, tx_ep[0][index]); - sep_check_cqe(&d_cqe, src_iov, (FI_MSG|FI_RECV), iov_dest_buf, - len * iov_cnt, 0, false, 0, rx_ep[1][index]); - - s[0] = 1; r[1] = 1; - sep_check_cntrs(s, r, s_e, r_e, false); - cr_assert(sep_check_iov_data(src_iov, iov_dest_buf, iov_cnt, - len * iov_cnt), "Data mismatch"); - } -} - -static void sep_tsendv(int index, int len) -{ - int i, iov_cnt; - struct fi_cq_tagged_entry s_cqe, d_cqe; - ssize_t sz; - uint64_t s[NUMEPS] = {0}, r[NUMEPS] = {0}, s_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - for (iov_cnt = 1; iov_cnt <= IOV_CNT; iov_cnt++) { - for (i = 0; i < iov_cnt; i++) { - sep_init_data(src_iov[i].iov_base, len, 0x25); - src_iov[i].iov_len = len; - } - sep_init_data(iov_dest_buf, len * iov_cnt, 0); - - sz = fi_tsendv(tx_ep[0][index], src_iov, NULL, iov_cnt, - rx_addr[index], len * iov_cnt, iov_dest_buf); - cr_assert_eq(sz, 0); - - sz = fi_trecv(rx_ep[1][index], iov_dest_buf, len * iov_cnt, - iov_dest_buf_mr[0], FI_ADDR_UNSPEC, len * iov_cnt, - 0, src_iov); - cr_assert_eq(sz, 0); - - /* reset cqe */ - s_cqe.op_context = s_cqe.buf = (void *) -1; - s_cqe.flags = s_cqe.len = s_cqe.data = s_cqe.tag = UINT_MAX; - d_cqe.op_context = d_cqe.buf = (void *) -1; - d_cqe.flags = d_cqe.len = d_cqe.data = d_cqe.tag = UINT_MAX; - - wait_for_cqs(tx_cq[0][index], rx_cq[1][index], &s_cqe, &d_cqe); - sep_check_cqe(&s_cqe, iov_dest_buf, (FI_MSG|FI_SEND|FI_TAGGED), - 0, 0, 0, false, 0, tx_ep[0][index]); - sep_check_cqe(&d_cqe, src_iov, (FI_MSG|FI_RECV|FI_TAGGED), - iov_dest_buf, len * iov_cnt, 0, false, - len * iov_cnt, rx_ep[1][index]); - - s[0] = 1; r[1] = 1; - sep_check_cntrs(s, r, s_e, r_e, false); - cr_assert(sep_check_iov_data(src_iov, iov_dest_buf, iov_cnt, - len * iov_cnt), "Data mismatch"); - } -} - -static void sep_recvv(int index, int len) -{ - int i, iov_cnt; - ssize_t sz; - struct fi_cq_tagged_entry s_cqe, d_cqe; - uint64_t s[NUMEPS] = {0}, r[NUMEPS] = {0}, s_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - for (iov_cnt = 1; iov_cnt <= IOV_CNT; iov_cnt++) { - for (i = 0; i < iov_cnt; i++) { - sep_init_data(src_iov[i].iov_base, len, 0x25 + index); - src_iov[i].iov_len = len; - } - - for (i = 0; i < iov_cnt; i++) { - sep_init_data(dest_iov[i].iov_base, len, 0); - dest_iov[i].iov_len = len; - } - - sz = fi_sendv(tx_ep[0][index], src_iov, NULL, iov_cnt, - rx_addr[index], iov_dest_buf); - cr_assert_eq(sz, 0); - - sz = fi_recvv(rx_ep[1][index], dest_iov, NULL, iov_cnt, - FI_ADDR_UNSPEC, iov_src_buf); - cr_assert_eq(sz, 0); - - /* reset cqe */ - s_cqe.op_context = s_cqe.buf = (void *) -1; - s_cqe.flags = s_cqe.len = s_cqe.data = s_cqe.tag = UINT_MAX; - d_cqe.op_context = d_cqe.buf = (void *) -1; - d_cqe.flags = d_cqe.len = d_cqe.data = d_cqe.tag = UINT_MAX; - - wait_for_cqs(tx_cq[0][index], rx_cq[1][index], &s_cqe, &d_cqe); - sep_check_cqe(&s_cqe, iov_dest_buf, (FI_MSG|FI_SEND), - 0, 0, 0, false, 0, tx_ep[0][index]); - sep_check_cqe(&d_cqe, iov_src_buf, (FI_MSG|FI_RECV), - iov_dest_buf, len * iov_cnt, 0, false, 0, - rx_ep[1][index]); - - s[0] = 1; r[1] = 1; - sep_check_cntrs(s, r, s_e, r_e, false); - cr_assert(check_iov_data(src_iov, dest_iov, iov_cnt), - "Data mismatch"); - } -} - -static void _sendmsg(int index, int len, bool tagged) -{ - ssize_t sz; - struct fi_cq_tagged_entry s_cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct fi_cq_tagged_entry d_cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct fi_msg msg; - struct fi_msg_tagged tmsg; - struct iovec iov; - uint64_t s[NUMEPS] = {0}, r[NUMEPS] = {0}, s_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - uint64_t sflags = FI_MSG|FI_SEND|(tagged ? FI_TAGGED : 0); - uint64_t dflags = FI_MSG|FI_RECV|(tagged ? FI_TAGGED : 0); - - iov.iov_base = source; - iov.iov_len = len; - - msg.msg_iov = &iov; - msg.desc = (void **)loc_mr; - msg.iov_count = 1; - msg.addr = rx_addr[index]; - msg.context = target; - msg.data = (uint64_t)target; - - tmsg.msg_iov = &iov; - tmsg.desc = (void **)loc_mr; - tmsg.iov_count = 1; - tmsg.addr = rx_addr[index]; - tmsg.context = target; - tmsg.data = (uint64_t)target; - tmsg.tag = (uint64_t)len; - tmsg.ignore = 0; - - sep_init_data(source, len, 0xd0 + index); - sep_init_data(target, len, 0); - - if (tagged) { - sz = fi_tsendmsg(tx_ep[0][index], &tmsg, 0); - } else { - sz = fi_sendmsg(tx_ep[0][index], &msg, 0); - } - cr_assert_eq(sz, 0); - - if (tagged) { - sz = fi_trecv(rx_ep[1][index], target, len, rem_mr[0], - FI_ADDR_UNSPEC, len, 0, source); - } else { - sz = fi_recv(rx_ep[1][index], target, len, rem_mr[0], - FI_ADDR_UNSPEC, source); - } - cr_assert_eq(sz, 0); - - wait_for_cqs(tx_cq[0][index], rx_cq[1][index], &s_cqe, &d_cqe); - sep_check_cqe(&s_cqe, target, sflags, 0, 0, 0, false, 0, - tx_ep[0][index]); - sep_check_cqe(&d_cqe, source, dflags, target, len, 0, - false, tagged ? len : 0, rx_ep[1][index]); - - s[0] = 1; r[1] = 1; - sep_check_cntrs(s, r, s_e, r_e, false); - cr_assert(sep_check_data(source, target, len), "Data mismatch"); -} - -static void sep_sendmsg(int index, int len) -{ - _sendmsg(index, len, false); -} - -static void sep_tsendmsg(int index, int len) -{ - _sendmsg(index, len, true); -} - -void sep_clear_counters(void) -{ - int i, ret; - - for (i = 0; i < NUMEPS; i++) { - ret = fi_cntr_set(send_cntr[i], 0); - cr_assert(!ret, "fi_cntr_set"); - ret = fi_cntr_set(recv_cntr[i], 0); - cr_assert(!ret, "fi_cntr_set"); - sends[i] = 0; - recvs[i] = 0; - send_errs[i] = 0; - recv_errs[i] = 0; - } -} - -void sep_sendmsgdata(int index, int len) -{ - ssize_t sz; - struct fi_cq_tagged_entry s_cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct fi_cq_tagged_entry d_cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct fi_msg msg; - struct iovec iov; - uint64_t s[NUMEPS] = {0}, r[NUMEPS] = {0}, s_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - iov.iov_base = source; - iov.iov_len = len; - - msg.msg_iov = &iov; - msg.desc = (void **)loc_mr; - msg.iov_count = 1; - msg.addr = rx_addr[index]; - msg.context = target; - msg.data = (uint64_t)source; - - sep_init_data(source, len, 0xe0 + index); - sep_init_data(target, len, 0); - - sz = fi_sendmsg(tx_ep[0][index], &msg, FI_REMOTE_CQ_DATA); - cr_assert_eq(sz, 0); - - sz = fi_recv(rx_ep[1][index], target, len, rem_mr[0], - FI_ADDR_UNSPEC, source); - cr_assert_eq(sz, 0); - - wait_for_cqs(tx_cq[0][index], rx_cq[1][index], &s_cqe, &d_cqe); - sep_check_cqe(&s_cqe, target, (FI_MSG|FI_SEND), 0, 0, 0, false, 0, - tx_ep[0][index]); - sep_check_cqe(&d_cqe, source, (FI_MSG|FI_RECV|FI_REMOTE_CQ_DATA), - target, len, (uint64_t)source, false, 0, rx_ep[1][index]); - - s[0] = 1; r[1] = 1; - sep_check_cntrs(s, r, s_e, r_e, false); - cr_assert(sep_check_data(source, target, len), "Data mismatch"); -} - -#define INJECT_SIZE 64 -void sep_inject(int index, int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t s[NUMEPS] = {0}, r[NUMEPS] = {0}, s_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - sep_init_data(source, len, 0x13 + index); - sep_init_data(target, len, 0); - - sz = fi_inject(tx_ep[0][index], source, len, rx_addr[index]); - cr_assert_eq(sz, 0); - - sz = fi_recv(rx_ep[1][index], target, len, rem_mr[1], - FI_ADDR_UNSPEC, source); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(rx_cq[1][index], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - /* Manually progress connection to domain 1 */ - fi_cq_read(tx_cq[0][index], &cqe, 1); - } - - cr_assert_eq(ret, 1); - sep_check_cqe(&cqe, source, (FI_MSG|FI_RECV), - target, len, (uint64_t)source, false, 0, - rx_ep[1][index]); - - /* do progress until send counter is updated */ - while (fi_cntr_read(send_cntr[0]) < 1) { - pthread_yield(); - } - s[0] = 1; r[1] = 1; - sep_check_cntrs(s, r, s_e, r_e, true); - - /* make sure inject does not generate a send competion */ - cr_assert_eq(fi_cq_read(tx_cq[0][index], &cqe, 1), -FI_EAGAIN); - cr_assert(sep_check_data(source, target, len), "Data mismatch"); -} - -void sep_tinject(int index, int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t s[NUMEPS] = {0}, r[NUMEPS] = {0}, s_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - sep_init_data(source, len, 0x13 + index); - sep_init_data(target, len, 0); - - sz = fi_tinject(tx_ep[0][index], source, len, rx_addr[index], len); - cr_assert_eq(sz, 0); - - sz = fi_trecv(rx_ep[1][index], target, len, rem_mr[1], - FI_ADDR_UNSPEC, len, 0, source); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(rx_cq[1][index], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - /* Manually progress connection to domain 1 */ - fi_cq_read(tx_cq[0][index], &cqe, 1); - } - - cr_assert_eq(ret, 1); - sep_check_cqe(&cqe, source, (FI_MSG|FI_RECV|FI_TAGGED), - target, len, (uint64_t)source, false, len, - rx_ep[1][index]); - - /* do progress until send counter is updated */ - while (fi_cntr_read(send_cntr[0]) < 1) { - pthread_yield(); - } - s[0] = 1; r[1] = 1; - sep_check_cntrs(s, r, s_e, r_e, true); - - /* make sure inject does not generate a send competion */ - cr_assert_eq(fi_cq_read(tx_cq[0][index], &cqe, 1), -FI_EAGAIN); - cr_assert(sep_check_data(source, target, len), "Data mismatch"); -} - -void sep_senddata(int index, int len) -{ - ssize_t sz; - struct fi_cq_tagged_entry s_cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct fi_cq_tagged_entry d_cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t s[NUMEPS] = {0}, r[NUMEPS] = {0}, s_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - sep_init_data(source, len, 0xab + index); - sep_init_data(target, len, 0); - - sz = fi_senddata(tx_ep[0][index], source, len, loc_mr[0], - (uint64_t)source, rx_addr[index], target); - cr_assert_eq(sz, 0); - - sz = fi_recv(rx_ep[1][index], target, len, rem_mr[0], - FI_ADDR_UNSPEC, source); - cr_assert_eq(sz, 0); - - wait_for_cqs(tx_cq[0][index], rx_cq[1][index], &s_cqe, &d_cqe); - sep_check_cqe(&s_cqe, target, (FI_MSG|FI_SEND), 0, 0, 0, false, 0, - tx_ep[0][index]); - sep_check_cqe(&d_cqe, source, (FI_MSG|FI_RECV|FI_REMOTE_CQ_DATA), - target, len, (uint64_t)source, false, 0, - rx_ep[1][index]); - - s[0] = 1; r[1] = 1; - sep_check_cntrs(s, r, s_e, r_e, false); - cr_assert(sep_check_data(source, target, len), "Data mismatch"); -} - -void sep_tsenddata(int index, int len) -{ - ssize_t sz; - struct fi_cq_tagged_entry s_cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct fi_cq_tagged_entry d_cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t s[NUMEPS] = {0}, r[NUMEPS] = {0}, s_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - sep_init_data(source, len, 0xab + index); - sep_init_data(target, len, 0); - - sz = fi_tsenddata(tx_ep[0][index], source, len, loc_mr[0], - (uint64_t)source, rx_addr[index], len, target); - cr_assert_eq(sz, 0); - - sz = fi_trecv(rx_ep[1][index], target, len, rem_mr[0], - FI_ADDR_UNSPEC, len, 0, source); - cr_assert_eq(sz, 0); - - wait_for_cqs(tx_cq[0][index], rx_cq[1][index], &s_cqe, &d_cqe); - sep_check_cqe(&s_cqe, target, (FI_MSG|FI_SEND|FI_TAGGED), 0, 0, 0, - false, 0, tx_ep[0][index]); - sep_check_cqe(&d_cqe, source, - (FI_MSG|FI_RECV|FI_REMOTE_CQ_DATA|FI_TAGGED), - target, len, (uint64_t)source, false, len, - rx_ep[1][index]); - - s[0] = 1; r[1] = 1; - sep_check_cntrs(s, r, s_e, r_e, false); - cr_assert(sep_check_data(source, target, len), "Data mismatch"); -} - -void sep_injectdata(int index, int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t s[NUMEPS] = {0}, r[NUMEPS] = {0}, s_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - sep_init_data(source, len, 0x9b + index); - sep_init_data(target, len, 0); - - sz = fi_injectdata(tx_ep[0][index], source, len, (uint64_t)source, - rx_addr[index]); - cr_assert_eq(sz, 0); - - sz = fi_recv(rx_ep[1][index], target, len, rem_mr[0], - FI_ADDR_UNSPEC, source); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(rx_cq[1][index], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - /* Manually progress connection to domain 1 */ - fi_cq_read(tx_cq[0][index], &cqe, 1); - } - sep_check_cqe(&cqe, source, (FI_MSG|FI_RECV|FI_REMOTE_CQ_DATA), - target, len, (uint64_t)source, false, 0, - rx_ep[1][index]); - - /* don't progress until send counter is updated */ - while (fi_cntr_read(send_cntr[0]) < 1) { - pthread_yield(); - } - - s[0] = 1; r[1] = 1; - sep_check_cntrs(s, r, s_e, r_e, false); - - /* make sure inject does not generate a send competion */ - cr_assert_eq(fi_cq_read(tx_cq[0][index], &cqe, 1), -FI_EAGAIN); - cr_assert(sep_check_data(source, target, len), "Data mismatch"); -} - -void sep_tinjectdata(int index, int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t s[NUMEPS] = {0}, r[NUMEPS] = {0}, s_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - sep_init_data(source, len, 0x9b + index); - sep_init_data(target, len, 0); - - sz = fi_tinjectdata(tx_ep[0][index], source, len, (uint64_t)source, - rx_addr[index], len); - cr_assert_eq(sz, 0); - - sz = fi_trecv(rx_ep[1][index], target, len, rem_mr[0], - FI_ADDR_UNSPEC, len, 0, source); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(rx_cq[1][index], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - /* Manually progress connection to domain 1 */ - fi_cq_read(tx_cq[0][index], &cqe, 1); - } - sep_check_cqe(&cqe, source, - (FI_MSG|FI_RECV|FI_REMOTE_CQ_DATA|FI_TAGGED), - target, len, (uint64_t)source, false, len, - rx_ep[1][index]); - - /* don't progress until send counter is updated */ - while (fi_cntr_read(send_cntr[0]) < 1) { - pthread_yield(); - } - - s[0] = 1; r[1] = 1; - sep_check_cntrs(s, r, s_e, r_e, true); - - /* make sure inject does not generate a send competion */ - cr_assert_eq(fi_cq_read(tx_cq[0][index], &cqe, 1), -FI_EAGAIN); - cr_assert(sep_check_data(source, target, len), "Data mismatch"); -} - -void sep_read(int index, int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0}; - -#define READ_CTX 0x4e3dda1aULL - sep_init_data(source, len, 0); - sep_init_data(target, len, 0xad); - - sz = fi_read(tx_ep[0][index], source, len, - loc_mr[0], rx_addr[index], - _REM_ADDR(fi[0], target, target), - mr_key[1], (void *)READ_CTX); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(tx_cq[0][index], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1); - sep_check_tcqe(&cqe, (void *)READ_CTX, FI_RMA | FI_READ, 0, - tx_ep[0][index]); - - r[0] = 1; - sep_check_cntrs(w, r, w_e, r_e, false); - cr_assert(sep_check_data(source, target, len), "Data mismatch"); -} - -void sep_readv(int index, int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct iovec iov; - uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0}; - - iov.iov_base = source; - iov.iov_len = len; - - sep_init_data(target, len, 0x25); - sep_init_data(source, len, 0); - sz = fi_readv(tx_ep[0][index], &iov, (void **)loc_mr, 1, - rx_addr[index], - _REM_ADDR(fi[0], target, target), mr_key[1], - target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(tx_cq[0][index], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1); - sep_check_tcqe(&cqe, target, FI_RMA | FI_READ, 0, tx_ep[0][index]); - - r[0] = 1; - sep_check_cntrs(w, r, w_e, r_e, false); - cr_assert(sep_check_data(source, target, len), "Data mismatch"); -} - - -void sep_readmsg(int index, int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct iovec iov; - struct fi_msg_rma msg; - struct fi_rma_iov rma_iov; - uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0}; - - iov.iov_base = source; - iov.iov_len = len; - - rma_iov.addr = _REM_ADDR(fi[0], target, target); - rma_iov.len = len; - rma_iov.key = mr_key[1]; - - msg.msg_iov = &iov; - msg.desc = (void **)loc_mr; - msg.iov_count = 1; - msg.addr = rx_addr[index]; - msg.rma_iov = &rma_iov; - msg.rma_iov_count = 1; - msg.context = target; - msg.data = (uint64_t)target; - - sep_init_data(target, len, 0xe0 + index); - sep_init_data(source, len, 0); - sz = fi_readmsg(tx_ep[0][index], &msg, 0); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(tx_cq[0][index], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1); - sep_check_tcqe(&cqe, target, FI_RMA | FI_READ, 0, tx_ep[0][index]); - - r[0] = 1; - sep_check_cntrs(w, r, w_e, r_e, false); - cr_assert(sep_check_data(source, target, len), "Data mismatch"); -} - -void sep_write(int index, int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0}; - - sep_init_data(source, len, 0xab); - sep_init_data(target, len, 0); - - sz = fi_write(tx_ep[0][index], source, len, loc_mr[0], rx_addr[index], - _REM_ADDR(fi[0], target, target), mr_key[1], target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(tx_cq[0][index], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1); - sep_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, tx_ep[0][index]); - - w[0] = 1; - sep_check_cntrs(w, r, w_e, r_e, false); - cr_assert(sep_check_data(source, target, len), "Data mismatch"); -} - -void sep_writev(int index, int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct iovec iov; - uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0}; - - iov.iov_base = source; - iov.iov_len = len; - - sep_init_data(source, len, 0x25 + index); - sep_init_data(target, len, 0); - - sz = fi_writev(tx_ep[0][index], &iov, (void **)loc_mr, 1, - gni_addr[1], - _REM_ADDR(fi[0], target, target), mr_key[1], - target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(tx_cq[0][index], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1); - sep_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, tx_ep[0][index]); - - w[0] = 1; - sep_check_cntrs(w, r, w_e, r_e, false); - cr_assert(sep_check_data(source, target, len), "Data mismatch"); -} - -void sep_writemsg(int index, int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct iovec iov; - struct fi_msg_rma msg; - struct fi_rma_iov rma_iov; - uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0}; - - iov.iov_base = source; - iov.iov_len = len; - - rma_iov.addr = _REM_ADDR(fi[0], target, target); - rma_iov.len = len; - rma_iov.key = mr_key[1]; - - msg.msg_iov = &iov; - msg.desc = (void **)loc_mr; - msg.iov_count = 1; - msg.addr = gni_addr[1]; - msg.rma_iov = &rma_iov; - msg.rma_iov_count = 1; - msg.context = target; - msg.data = (uint64_t)target; - - sep_init_data(source, len, 0xe4 + index); - sep_init_data(target, len, 0); - sz = fi_writemsg(tx_ep[0][index], &msg, 0); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(tx_cq[0][index], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1); - sep_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, tx_ep[0][index]); - - w[0] = 1; - sep_check_cntrs(w, r, w_e, r_e, false); - cr_assert(sep_check_data(source, target, len), "Data mismatch"); -} - -void sep_inject_write(int index, int len) -{ - ssize_t sz; - int ret, i; - struct fi_cq_tagged_entry cqe; - - sep_init_data(source, len, 0x33); - sep_init_data(target, len, 0); - sz = fi_inject_write(tx_ep[0][index], source, len, - rx_addr[index], - _REM_ADDR(fi[0], target, target), mr_key[1]); - cr_assert_eq(sz, 0, "fi_inject_write returned %ld (%s)", sz, - fi_strerror(-sz)); - - for (i = 0; i < len; i++) { - while (source[i] != target[i]) { - /* for progress */ - ret = fi_cq_read(tx_cq[0][index], &cqe, 1); - cr_assert(ret == -FI_EAGAIN || ret == -FI_EAVAIL, - "Received unexpected event\n"); - - pthread_yield(); - } - } -} - -void sep_writedata(int index, int len) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct fi_cq_tagged_entry dcqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t w[2] = {0}, r[2] = {0}, w_e[2] = {0}, r_e[2] = {0}; - - -#define WRITE_DATA 0x5123da1a145 - sep_init_data(source, len, 0x43 + index); - sep_init_data(target, len, 0); - sz = fi_writedata(tx_ep[0][index], source, len, loc_mr[0], WRITE_DATA, - rx_addr[index], - _REM_ADDR(fi[0], target, target), mr_key[1], - target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(tx_cq[0][index], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1); - sep_check_tcqe(&cqe, target, FI_RMA | FI_WRITE, 0, tx_ep[0][index]); - - w[0] = 1; - sep_check_cntrs(w, r, w_e, r_e, false); - cr_assert(sep_check_data(source, target, len), "Data mismatch"); - - while ((ret = fi_cq_read(rx_cq[1][index], &dcqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - cr_assert(ret != FI_SUCCESS, "Missing remote data"); - - sep_check_tcqe(&dcqe, NULL, - (FI_RMA | FI_REMOTE_WRITE | FI_REMOTE_CQ_DATA), - WRITE_DATA, rx_ep[1][index]); -} - -#define INJECTWRITE_DATA 0xdededadadeadbeaf -void sep_inject_writedata(int index, int len) -{ - ssize_t sz; - int ret, i; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - struct fi_cq_tagged_entry dcqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - - sep_init_data(source, len, 0x53 + index); - sep_init_data(target, len, 0); - sz = fi_inject_writedata(tx_ep[0][index], source, len, INJECTWRITE_DATA, - rx_addr[index], - _REM_ADDR(fi[0], target, target), mr_key[1]); - cr_assert_eq(sz, 0); - - for (i = 0; i < len; i++) { - while (source[i] != target[i]) { - /* for progress */ - ret = fi_cq_read(tx_cq[0][index], &cqe, 1); - cr_assert(ret == -FI_EAGAIN || ret == -FI_EAVAIL, - "Received unexpected event\n"); - - pthread_yield(); - } - } - - while ((ret = fi_cq_read(rx_cq[1][index], &dcqe, 1)) == -FI_EAGAIN) { - ret = fi_cq_read(tx_cq[0][index], &cqe, 1); /* for progress */ - pthread_yield(); - } - cr_assert(ret != FI_SUCCESS, "Missing remote data"); - - sep_check_tcqe(&dcqe, NULL, - (FI_RMA | FI_REMOTE_WRITE | FI_REMOTE_CQ_DATA), - INJECTWRITE_DATA, rx_ep[1][index]); -} - -#define SOURCE_DATA 0xBBBB0000CCCCULL -#define TARGET_DATA 0xAAAA0000DDDDULL -void sep_atomic(int index) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t w[NUMEPS] = {0}, r[NUMEPS] = {0}, w_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - /* u64 */ - *((uint64_t *)source) = SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_atomic(tx_ep[0][index], source, 1, - loc_mr[0], rx_addr[index], - _REM_ADDR(fi[0], target, target), - mr_key[1], FI_UINT64, FI_ATOMIC_WRITE, target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(tx_cq[0][index], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1); - sep_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0, tx_ep[0][index]); - - w[0] = 1; - sep_check_cntrs(w, r, w_e, r_e, false); - ret = *((uint64_t *)target) == SOURCE_DATA; - cr_assert(ret, "Data mismatch"); - -} - -void sep_atomic_v(int index) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t min; - struct fi_ioc iov; - uint64_t w[NUMEPS] = {0}, r[NUMEPS] = {0}, w_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - iov.addr = source; - iov.count = 1; - - /* i64 */ - *((int64_t *)source) = SOURCE_DATA; - *((int64_t *)target) = TARGET_DATA; - sz = fi_atomicv(tx_ep[0][index], &iov, (void **)loc_mr, 1, - rx_addr[index], - _REM_ADDR(fi[0], target, target), mr_key[1], - FI_INT64, FI_MIN, target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(tx_cq[0][index], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1); - sep_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0, tx_ep[0][index]); - - w[0] = 1; - sep_check_cntrs(w, r, w_e, r_e, false); - min = ((int64_t)SOURCE_DATA < (int64_t)TARGET_DATA) ? - SOURCE_DATA : TARGET_DATA; - ret = *((int64_t *)target) == min; - cr_assert(ret, "Data mismatch"); -} - -#define U32_MASK 0xFFFFFFFFULL -void sep_atomic_msg(int index) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t min; - struct fi_msg_atomic msg; - struct fi_ioc msg_iov; - struct fi_rma_ioc rma_iov; - uint64_t w[NUMEPS] = {0}, r[NUMEPS] = {0}, w_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - msg_iov.addr = source; - msg_iov.count = 1; - msg.msg_iov = &msg_iov; - msg.desc = (void **)loc_mr; - msg.iov_count = 1; - msg.addr = gni_addr[1]; - rma_iov.addr = _REM_ADDR(fi[0], target, target); - rma_iov.count = 1; - rma_iov.key = mr_key[1]; - msg.rma_iov = &rma_iov; - msg.context = target; - msg.op = FI_MIN; - - /* i32 */ - *((int64_t *)source) = SOURCE_DATA; - *((int64_t *)target) = TARGET_DATA; - msg.datatype = FI_INT32; - sz = fi_atomicmsg(tx_ep[0][index], &msg, 0); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(tx_cq[0][index], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1); - sep_check_tcqe(&cqe, target, FI_ATOMIC | FI_WRITE, 0, tx_ep[0][index]); - - w[0] = 1; - sep_check_cntrs(w, r, w_e, r_e, false); - min = ((int32_t)SOURCE_DATA < (int32_t)TARGET_DATA) ? - SOURCE_DATA : TARGET_DATA; - min = (min & U32_MASK) | (TARGET_DATA & (U32_MASK << 32)); - ret = *((int64_t *)target) == min; - cr_assert(ret, "Data mismatch"); -} - -void sep_atomic_inject(int index) -{ - int ret, loops; - ssize_t sz; - struct fi_cq_tagged_entry cqe; - uint64_t min; - - /* i64 */ - *((int64_t *)source) = SOURCE_DATA; - *((int64_t *)target) = TARGET_DATA; - sz = fi_inject_atomic(tx_ep[0][index], source, 1, - gni_addr[1], - _REM_ADDR(fi[0], target, target), mr_key[1], - FI_INT64, FI_MIN); - cr_assert_eq(sz, 0); - - min = ((int64_t)SOURCE_DATA < (int64_t)TARGET_DATA) ? - SOURCE_DATA : TARGET_DATA; - loops = 0; - while (*((int64_t *)target) != min) { - ret = fi_cq_read(tx_cq[0][index], &cqe, 1); /* for progress */ - cr_assert(ret == -FI_EAGAIN, - "Received unexpected event\n"); - - pthread_yield(); - cr_assert(++loops < 10000, "Data mismatch"); - } -} - -#define FETCH_SOURCE_DATA 0xACEDACEDULL -void sep_atomic_rw(int index) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t operand = SOURCE_DATA; - uint64_t w[NUMEPS] = {0}, r[NUMEPS] = {0}, w_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - /* u64 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_fetch_atomic(tx_ep[0][index], &operand, 1, NULL, source, - loc_mr[0], rx_addr[index], - _REM_ADDR(fi[0], target, target), - mr_key[1], FI_UINT64, FI_SUM, target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(tx_cq[0][index], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1); - sep_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0, tx_ep[0][index]); - - r[0] = 1; - sep_check_cntrs(w, r, w_e, r_e, false); - ret = *((uint64_t *)target) == (SOURCE_DATA + TARGET_DATA); - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)source) == TARGET_DATA; - cr_assert(ret, "Fetch data mismatch"); -} - -void sep_atomic_rwv(int index) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t min; - uint64_t operand = SOURCE_DATA; - struct fi_ioc iov, r_iov; - uint64_t w[NUMEPS] = {0}, r[NUMEPS] = {0}, w_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - iov.count = 1; - r_iov.count = 1; - - /* i64 */ - *((int64_t *)source) = FETCH_SOURCE_DATA; - *((int64_t *)target) = TARGET_DATA; - iov.addr = &operand; - r_iov.addr = source; - sz = fi_fetch_atomicv(tx_ep[0][index], &iov, NULL, 1, - &r_iov, (void **)loc_mr, 1, - gni_addr[1], - _REM_ADDR(fi[0], target, target), mr_key[1], - FI_INT64, FI_MIN, target); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(tx_cq[0][index], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1); - sep_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0, tx_ep[0][index]); - - r[0] = 1; - sep_check_cntrs(w, r, w_e, r_e, false); - min = ((int64_t)SOURCE_DATA < (int64_t)TARGET_DATA) ? - SOURCE_DATA : TARGET_DATA; - ret = *((int64_t *)target) == min; - cr_assert(ret, "Data mismatch"); - ret = *((int64_t *)source) == TARGET_DATA; - cr_assert(ret, "Fetch data mismatch"); -} - -void sep_atomic_rwmsg(int index) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t min; - uint64_t operand = SOURCE_DATA; - struct fi_msg_atomic msg; - struct fi_ioc msg_iov, res_iov; - struct fi_rma_ioc rma_iov; - uint64_t w[NUMEPS] = {0}, r[NUMEPS] = {0}, w_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - msg_iov.count = 1; - msg.msg_iov = &msg_iov; - msg.desc = (void **)loc_mr; - msg.iov_count = 1; - msg.addr = gni_addr[1]; - rma_iov.addr = _REM_ADDR(fi[0], target, target); - rma_iov.count = 1; - rma_iov.key = mr_key[1]; - msg.rma_iov = &rma_iov; - msg.context = target; - msg.op = FI_MIN; - - res_iov.addr = source; - res_iov.count = 1; - - /* i64 */ - *((int64_t *)source) = FETCH_SOURCE_DATA; - *((int64_t *)target) = TARGET_DATA; - msg_iov.addr = &operand; - msg.datatype = FI_INT64; - sz = fi_fetch_atomicmsg(tx_ep[0][index], &msg, &res_iov, - (void **)loc_mr, 1, 0); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(tx_cq[0][index], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1); - sep_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0, tx_ep[0][index]); - - r[0] = 1; - sep_check_cntrs(w, r, w_e, r_e, false); - min = ((int64_t)SOURCE_DATA < (int64_t)TARGET_DATA) ? - SOURCE_DATA : TARGET_DATA; - ret = *((int64_t *)target) == min; - cr_assert(ret, "Data mismatch"); - ret = *((int64_t *)source) == TARGET_DATA; - cr_assert(ret, "Fetch data mismatch"); -} - -void sep_atomic_compwrite(int index) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t operand = SOURCE_DATA, op2 = TARGET_DATA; - uint64_t w[NUMEPS] = {0}, r[NUMEPS] = {0}, w_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - /* u64 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - sz = fi_compare_atomic(tx_ep[0][index], &operand, 1, NULL, &op2, NULL, - source, loc_mr[0], rx_addr[index], - _REM_ADDR(fi[0], target, target), - mr_key[1], FI_UINT64, - FI_CSWAP, target); - cr_assert_eq(sz, 0, "fi_compare_atomic returned %ld (%s)", sz, - fi_strerror(-sz)); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(tx_cq[0][index], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1); - sep_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0, tx_ep[0][index]); - - r[0] = 1; - sep_check_cntrs(w, r, w_e, r_e, false); - ret = *((uint64_t *)target) == SOURCE_DATA; - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)source) == TARGET_DATA; - cr_assert(ret, "Fetch data mismatch"); -} - -#define SOURCE_DATA_FP 0.83203125 -#define TARGET_DATA_FP 0.83984375 -void sep_atomic_compwritev(int index) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - double operand_dp, op2_dp; - struct fi_ioc iov, r_iov, c_iov; - uint64_t w[NUMEPS] = {0}, r[NUMEPS] = {0}, w_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - iov.count = 1; - r_iov.count = 1; - c_iov.count = 1; - - /* double */ - *((double *)&operand_dp) = SOURCE_DATA_FP; - *((double *)&op2_dp) = TARGET_DATA_FP; - *((double *)source) = FETCH_SOURCE_DATA; - *((double *)target) = TARGET_DATA_FP; - iov.addr = &operand_dp; - r_iov.addr = source; - c_iov.addr = &op2_dp; - sz = fi_compare_atomicv(tx_ep[0][index], - &iov, NULL, 1, - &c_iov, NULL, 1, - &r_iov, (void **)loc_mr, 1, - gni_addr[1], - _REM_ADDR(fi[0], target, target), mr_key[1], - FI_DOUBLE, FI_CSWAP, target); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - - while ((ret = fi_cq_read(tx_cq[0][index], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1); - sep_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0, tx_ep[0][index]); - r[0] = 1; - sep_check_cntrs(w, r, w_e, r_e, false); - ret = *((double *)target) == (double)SOURCE_DATA_FP; - cr_assert(ret, "Data mismatch"); - ret = *((double *)source) == (double)TARGET_DATA_FP; - cr_assert(ret, "Fetch data mismatch"); -} - -void sep_atomic_compwritemsg(int index) -{ - int ret; - ssize_t sz; - struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, - (void *) -1, UINT_MAX, UINT_MAX }; - uint64_t operand = SOURCE_DATA, op2 = TARGET_DATA; - struct fi_msg_atomic msg; - struct fi_ioc msg_iov, res_iov, cmp_iov; - struct fi_rma_ioc rma_iov; - uint64_t w[NUMEPS] = {0}, r[NUMEPS] = {0}, w_e[NUMEPS] = {0}; - uint64_t r_e[NUMEPS] = {0}; - - msg_iov.count = 1; - msg.msg_iov = &msg_iov; - msg.desc = (void **)loc_mr; - msg.iov_count = 1; - msg.addr = gni_addr[1]; - rma_iov.addr = _REM_ADDR(fi[0], target, target); - rma_iov.count = 1; - rma_iov.key = mr_key[1]; - msg.rma_iov = &rma_iov; - msg.context = target; - msg.op = FI_CSWAP; - - res_iov.count = 1; - cmp_iov.count = 1; - - /* i64 */ - *((uint64_t *)source) = FETCH_SOURCE_DATA; - *((uint64_t *)target) = TARGET_DATA; - msg_iov.addr = &operand; - msg.datatype = FI_INT64; - res_iov.addr = source; - cmp_iov.addr = &op2; - sz = fi_compare_atomicmsg(tx_ep[0][index], &msg, &cmp_iov, NULL, 1, - &res_iov, (void **)loc_mr, 1, 0); - cr_assert_eq(sz, 0); - - /* reset cqe */ - cqe.op_context = cqe.buf = (void *) -1; - cqe.flags = cqe.len = cqe.data = cqe.tag = UINT_MAX; - while ((ret = fi_cq_read(tx_cq[0][index], &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1); - sep_check_tcqe(&cqe, target, FI_ATOMIC | FI_READ, 0, tx_ep[0][index]); - r[0] = 1; - sep_check_cntrs(w, r, w_e, r_e, false); - ret = *((uint64_t *)target) == SOURCE_DATA; - cr_assert(ret, "Data mismatch"); - ret = *((uint64_t *)source) == TARGET_DATA; - cr_assert(ret, "Fetch data mismatch"); -} - -void sep_invalid_compare_atomic(enum fi_datatype dt, enum fi_op op) -{ - ssize_t sz; - size_t count; - uint64_t operand, op2; - - if (!supported_compare_atomic_ops[op][dt]) { - sz = fi_compare_atomic(tx_ep[0][0], &operand, 1, NULL, - &op2, NULL, source, loc_mr, - rx_addr[0], (uint64_t)target, mr_key[1], - dt, op, target); - cr_assert(sz == -FI_EOPNOTSUPP); - - sz = fi_compare_atomicvalid(tx_ep[0][0], dt, op, &count); - cr_assert(sz == -FI_EOPNOTSUPP, "fi_atomicvalid() succeeded\n"); - } else { - sz = fi_compare_atomicvalid(tx_ep[0][0], dt, op, &count); - cr_assert(!sz, "fi_atomicvalid() failed\n"); - cr_assert(count == 1, "fi_atomicvalid(): bad count\n"); - } -} - -void sep_invalid_fetch_atomic(enum fi_datatype dt, enum fi_op op) -{ - ssize_t sz; - size_t count; - uint64_t operand; - - if (!supported_fetch_atomic_ops[op][dt]) { - sz = fi_fetch_atomic(tx_ep[0][0], - &operand, - 1, - NULL, - source, loc_mr[0], - rx_addr[0], - _REM_ADDR(fi[0], target, target), - mr_key[1], - dt, op, target); - cr_assert(sz == -FI_EOPNOTSUPP); - - sz = fi_fetch_atomicvalid(tx_ep[0][0], dt, op, &count); - cr_assert(sz == -FI_EOPNOTSUPP, "fi_atomicvalid() succeeded\n"); - } else { - sz = fi_fetch_atomicvalid(tx_ep[0][0], dt, op, &count); - cr_assert(!sz, "fi_atomicvalid() failed\n"); - cr_assert(count == 1, "fi_atomicvalid(): bad count\n"); - } -} - -static void cancel_sep_send_sep(int index) -{ - ssize_t ret, len = 16; - struct fi_cq_err_entry buf; - - sep_init_data(source, len, 0xab + index); - sep_init_data(target, len, 0); - - ret = fi_send(tx_ep[0][index], source, len, loc_mr[0], - rx_addr[index], target); - cr_assert(ret == 0, "fi_send failed err:%ld", ret); - - ret = fi_cancel(&tx_ep[0][index]->fid, target); - fprintf(stderr, "ret = %ld %s\n", ret, fi_strerror(-ret)); - cr_assert(ret == FI_SUCCESS, "fi_cancel failed"); - - /* check for event */ - ret = fi_cq_readerr(tx_cq[0][index], &buf, FI_SEND); - cr_assert(ret == 1, "did not find one error event"); - - cr_assert(buf.buf == (void *) source, "buffer mismatch"); - cr_assert(buf.err == FI_ECANCELED, "error code mismatch"); - cr_assert(buf.prov_errno == FI_ECANCELED, "prov error code mismatch"); - cr_assert(buf.len == len, "length mismatch"); -} - -static void cancel_sep_recv_sep(int index) -{ - ssize_t ret, len = 16; - struct fi_cq_err_entry buf; - - sep_init_data(source, len, 0xab + index); - sep_init_data(target, len, 0); - - ret = fi_recv(rx_ep[1][index], target, len, rem_mr[0], - FI_ADDR_UNSPEC, source); - cr_assert(ret == 0, "fi_recv failed err:%ld", ret); - - ret = fi_cancel(&rx_ep[1][index]->fid, source); - cr_assert(ret == FI_SUCCESS, "fi_cancel failed"); - - /* check for event */ - ret = fi_cq_readerr(rx_cq[1][index], &buf, FI_RECV); - cr_assert(ret == 1, "did not find one error event"); - - cr_assert(buf.buf == (void *) target, "buffer mismatch"); - cr_assert(buf.err == FI_ECANCELED, "error code mismatch"); - cr_assert(buf.prov_errno == FI_ECANCELED, "prov error code mismatch"); - cr_assert(buf.len == len, "length mismatch"); -} - -static void cancel_sep_no_event(int index) -{ - ssize_t ret; - - ret = fi_cancel(&tx_ep[0][index]->fid, NULL); - cr_assert(ret == -FI_ENOENT, "fi_cancel failed"); - - ret = fi_cancel(&rx_ep[0][index]->fid, NULL); - cr_assert(ret == -FI_ENOENT, "fi_cancel failed"); -} - -void run_tests(void) -{ - int i, j; - - cr_log_info("sep_send_recv\n"); - for (i = 0; i < ctx_cnt; i++) { - xfer_each_size(sep_send_recv, i, 1, BUF_SZ); - } - - cr_log_info("sep_tsend\n"); - for (i = 0; i < ctx_cnt; i++) { - xfer_each_size(sep_tsend, i, 1, BUF_SZ); - } - - cr_log_info("sep_recvmsg\n"); - for (i = 0; i < ctx_cnt; i++) { - xfer_each_size(sep_recvmsg, i, 1, BUF_SZ); - } - - cr_log_info("sep_trecvmsg\n"); - for (i = 0; i < ctx_cnt; i++) { - xfer_each_size(sep_trecvmsg, i, 1, BUF_SZ); - } - - cr_log_info("sep_sendv\n"); - sep_clear_counters(); - for (i = 0; i < ctx_cnt; i++) { - xfer_each_size(sep_sendv, i, 1, BUF_SZ); - } - - cr_log_info("sep_tsendv\n"); - sep_clear_counters(); - for (i = 0; i < ctx_cnt; i++) { - xfer_each_size(sep_tsendv, i, 1, BUF_SZ); - } - - cr_log_info("sep_recvv\n"); - sep_clear_counters(); - for (i = 0; i < ctx_cnt; i++) { - xfer_each_size(sep_recvv, i, 1, BUF_SZ); - } - - cr_log_info("sep_sendmsg\n"); - sep_clear_counters(); - for (i = 0; i < ctx_cnt; i++) { - xfer_each_size(sep_sendmsg, i, 1, BUF_SZ); - } - - cr_log_info("sep_tsendmsg\n"); - sep_clear_counters(); - for (i = 0; i < ctx_cnt; i++) { - xfer_each_size(sep_tsendmsg, i, 1, BUF_SZ); - } - - cr_log_info("sep_sendmsgdata\n"); - sep_clear_counters(); - for (i = 0; i < ctx_cnt; i++) { - xfer_each_size(sep_sendmsgdata, i, 1, BUF_SZ); - } - - cr_log_info("sep_inject\n"); - sep_clear_counters(); - for (i = 0; i < ctx_cnt; i++) { - xfer_each_size(sep_inject, i, 1, INJECT_SIZE); - } - - cr_log_info("sep_tinject\n"); - sep_clear_counters(); - for (i = 0; i < ctx_cnt; i++) { - xfer_each_size(sep_tinject, i, 1, INJECT_SIZE); - } - - cr_log_info("sep_senddata\n"); - sep_clear_counters(); - for (i = 0; i < ctx_cnt; i++) { - xfer_each_size(sep_senddata, i, 1, INJECT_SIZE); - } - - cr_log_info("sep_tsenddata\n"); - sep_clear_counters(); - for (i = 0; i < ctx_cnt; i++) { - xfer_each_size(sep_tsenddata, i, 1, INJECT_SIZE); - } - - cr_log_info("sep_injectdata\n"); - sep_clear_counters(); - for (i = 0; i < ctx_cnt; i++) { - xfer_each_size(sep_injectdata, i, 1, INJECT_SIZE); - } - - cr_log_info("sep_tinjectdata\n"); - sep_clear_counters(); - for (i = 0; i < ctx_cnt; i++) { - xfer_each_size(sep_tinjectdata, i, 1, INJECT_SIZE); - } - - cr_log_info("sep_read\n"); - sep_clear_counters(); - for (i = 0; i < ctx_cnt; i++) { - xfer_each_size(sep_read, i, 8, BUF_SZ); - } - - cr_log_info("sep_readv\n"); - sep_clear_counters(); - for (i = 0; i < ctx_cnt; i++) { - xfer_each_size(sep_readv, i, 8, BUF_SZ); - } - - cr_log_info("sep_readmsg\n"); - sep_clear_counters(); - for (i = 0; i < ctx_cnt; i++) { - xfer_each_size(sep_readmsg, i, 8, BUF_SZ); - } - - cr_log_info("sep_write\n"); - sep_clear_counters(); - for (i = 0; i < ctx_cnt; i++) { - xfer_each_size(sep_write, i, 8, BUF_SZ); - } - - cr_log_info("sep_writev\n"); - sep_clear_counters(); - for (i = 0; i < ctx_cnt; i++) { - xfer_each_size(sep_writev, i, 8, BUF_SZ); - } - - cr_log_info("sep_writemsg\n"); - sep_clear_counters(); - for (i = 0; i < ctx_cnt; i++) { - xfer_each_size(sep_writemsg, i, 8, BUF_SZ); - } - - cr_log_info("sep_writedata\n"); - sep_clear_counters(); - for (i = 0; i < ctx_cnt; i++) { - xfer_each_size(sep_writedata, i, 8, BUF_SZ); - } - - cr_log_info("sep_atomic\n"); - sep_clear_counters(); - for (i = 0; i < ctx_cnt; i++) { - sep_atomic(i); - } - - cr_log_info("sep_atomic_v\n"); - sep_clear_counters(); - for (i = 0; i < ctx_cnt; i++) { - sep_atomic_v(i); - } - - cr_log_info("sep_atomic_msg\n"); - sep_clear_counters(); - for (i = 0; i < ctx_cnt; i++) { - sep_atomic_msg(i); - } - - cr_log_info("sep_atomic_rw\n"); - sep_clear_counters(); - for (i = 0; i < ctx_cnt; i++) { - sep_atomic_rw(i); - } - - cr_log_info("sep_atomic_rwmsg\n"); - sep_clear_counters(); - for (i = 0; i < ctx_cnt; i++) { - sep_atomic_rwmsg(i); - } - - cr_log_info("sep_atomic_compwrite\n"); - sep_clear_counters(); - for (i = 0; i < ctx_cnt; i++) { - sep_atomic_compwrite(i); - } - - cr_log_info("sep_atomic_compwritev\n"); - sep_clear_counters(); - for (i = 0; i < ctx_cnt; i++) { - sep_atomic_compwritev(i); - } - - cr_log_info("sep_atomic_compwritemsg\n"); - sep_clear_counters(); - for (i = 0; i < ctx_cnt; i++) { - sep_atomic_compwritemsg(i); - } - - cr_log_info("sep_atomic_inject\n"); - sep_clear_counters(); - for (i = 0; i < ctx_cnt; i++) { - sep_atomic_inject(i); - } - - cr_log_info("sep_inject_write\n"); - sep_clear_counters(); - for (i = 0; i < ctx_cnt; i++) { - xfer_each_size(sep_inject_write, i, 8, INJECT_SIZE); - } - - cr_log_info("sep_inject_writedata\n"); - sep_clear_counters(); - for (i = 0; i < ctx_cnt; i++) { - xfer_each_size(sep_inject_writedata, i, 8, INJECT_SIZE); - } - - cr_log_info("sep_invalid_compare_atomic\n"); - for (i = 0; i < FI_ATOMIC_OP_LAST; i++) { - for (j = 0; j < FI_DATATYPE_LAST; j++) { - sep_invalid_compare_atomic(j, i); - } - } - - cr_log_info("sep_invalid_fetch_atomic\n"); - for (i = 0; i < FI_ATOMIC_OP_LAST; i++) { - for (j = 0; j < FI_DATATYPE_LAST; j++) { - sep_invalid_fetch_atomic(j, i); - } - } - -} - -void run_cancel_tests(void) -{ - int i; - - cr_log_info("cancel send test\n"); - for (i = 0; i < ctx_cnt; i++) { - cancel_sep_send_sep(i); - } - - cr_log_info("cancel recv test\n"); - for (i = 0; i < ctx_cnt; i++) { - cancel_sep_recv_sep(i); - } - - cr_log_info("cancel no event test\n"); - for (i = 0; i < ctx_cnt; i++) { - cancel_sep_no_event(i); - } -} - -TestSuite(scalablea, - .init = sep_setup_context, - .fini = sep_teardown_context); - -TestSuite(scalablem_default, - .init = sep_default_setup_map, - .fini = sep_teardown); - -TestSuite(scalablet_default, - .init = sep_default_setup_table, - .fini = sep_teardown); - -TestSuite(scalablem_basic, - .init = sep_basic_setup_map, - .fini = sep_teardown); - -TestSuite(scalablet_basic, - .init = sep_basic_setup_table, - .fini = sep_teardown); - -TestSuite(scalablem_scalable, - .init = sep_scalable_setup_map, - .fini = sep_teardown); - -TestSuite(scalablet_scalable, - .init = sep_scalable_setup_table, - .fini = sep_teardown); - -Test(scalablea, misc) -{ -} - -Test(scalablem_default, misc) -{ - int ret; - struct fi_av_attr av_attr = {0}; - - /* test if bind fails */ - ret = fi_ep_bind(tx_ep[0][0], &tx_cq[0][0]->fid, - FI_TRANSMIT); - cr_assert(ret, "fi_ep_bind should fail"); - - ret = fi_ep_bind(rx_ep[0][0], &rx_cq[0][0]->fid, - FI_TRANSMIT); - cr_assert(ret, "fi_ep_bind should fail"); - - /* test for inserting an ep_name that doesn't fit in the AV */ - av_attr.type = FI_AV_MAP; - av_attr.count = NUMEPS; - av_attr.rx_ctx_bits = 1; - - ret = fi_av_open(dom[0], &av_attr, &t_av, NULL); - cr_assert(!ret, "fi_av_open"); - ret = fi_av_insert(t_av, ep_name[0], 1, &gni_addr[0], 0, NULL); - cr_assert(ret == -FI_EINVAL); - ret = fi_close(&t_av->fid); - cr_assert(!ret, "failure in closing av."); - - /* test closing a scalable endpoint with open contexts */ - ret = fi_close(&sep[0]->fid); - cr_assert_eq(ret, -FI_EBUSY, "close should have failed."); -} - -Test(scalablem_basic, all) -{ - cr_log_info(BLUE "sep:basic:FI_AV_MAP tests:\n" COLOR_RESET); - run_tests(); -} - -Test(scalablem_basic, cancel) -{ - cr_log_info(BLUE "sep:basic:FI_AV_MAP cancel tests:\n" COLOR_RESET); - run_cancel_tests(); -} - -Test(scalablet_basic, all) -{ - cr_log_info(BLUE "sep:basic:FI_AV_TABLE tests:\n" COLOR_RESET); - run_tests(); -} - -Test(scalablet_basic, cancel) -{ - cr_log_info(BLUE "sep:basic:FI_AV_TABLE cancel tests:\n" COLOR_RESET); - run_cancel_tests(); -} - -Test(scalablem_scalable, all) -{ - cr_log_info(BLUE "sep:scalable:FI_AV_MAP tests:\n" COLOR_RESET); - run_tests(); -} - -Test(scalablem_scalable, cancel) -{ - cr_log_info(BLUE "sep:scalable:FI_AV_MAP cancel tests:\n" COLOR_RESET); - run_cancel_tests(); -} - -Test(scalablet_scalable, all) -{ - cr_log_info(BLUE "sep:scalable:FI_AV_TABLE tests:\n" COLOR_RESET); - run_tests(); -} - -Test(scalablet_scalable, cancel) -{ - cr_log_info(BLUE "sep:scalable:FI_AV_TABLE cancel tests:\n" COLOR_RESET); - run_cancel_tests(); -} - -#define INSERT_ADDR_COUNT (NUMCONTEXTS + 6) - -/* test for inserting an ep_name that doesn't fit in the AV */ -Test(scalable, av_insert) -{ - int ret, i; - size_t addrlen = sizeof(struct gnix_ep_name); - struct fi_av_attr av_attr = {0}; - int err[INSERT_ADDR_COUNT] = {0}; - fi_addr_t addresses[INSERT_ADDR_COUNT]; - struct gnix_ep_name epname[TOTALEPS]; - - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - hints->ep_attr->type = FI_EP_RDM; - hints->caps = FI_ATOMIC | FI_RMA | FI_MSG | FI_NAMED_RX_CTX | FI_TAGGED; - hints->mode = FI_LOCAL_MR; - hints->domain_attr->cq_data_size = NUMEPS * 2; - hints->domain_attr->data_progress = FI_PROGRESS_AUTO; - hints->domain_attr->mr_mode = GNIX_DEFAULT_MR_MODE; - hints->fabric_attr->prov_name = strdup("gni"); - hints->ep_attr->tx_ctx_cnt = NUMCONTEXTS; - hints->ep_attr->rx_ctx_cnt = NUMCONTEXTS; - - ret = fi_getinfo(fi_version(), NULL, 0, 0, hints, &fi[0]); - cr_assert(!ret, "fi_getinfo"); - - ret = fi_fabric(fi[0]->fabric_attr, &fab, NULL); - cr_assert(!ret, "fi_fabric"); - - fi[0]->ep_attr->tx_ctx_cnt = NUMCONTEXTS; - fi[0]->ep_attr->rx_ctx_cnt = NUMCONTEXTS; - - ret = fi_domain(fab, fi[0], &dom[0], NULL); - cr_assert(!ret, "fi_domain"); - - for (i = 0; i < NUMEPS; i++) { - ret = fi_scalable_ep(dom[0], fi[0], &sep[i], NULL); - cr_assert(!ret, "fi_scalable_ep"); - - ret = fi_enable(sep[i]); - cr_assert(!ret, "fi_enable"); - - ret = fi_getname(&sep[i]->fid, &epname[i], &addrlen); - cr_assert(ret == FI_SUCCESS); - } - - fi[0]->ep_attr->rx_ctx_cnt = INSERT_ADDR_COUNT; - - for (i = NUMEPS; i < TOTALEPS; i++) { - ret = fi_scalable_ep(dom[0], fi[0], &sep[i], NULL); - cr_assert(!ret, "fi_scalable_ep"); - - ret = fi_enable(sep[i]); - cr_assert(!ret, "fi_enable"); - - ret = fi_getname(&sep[i]->fid, &epname[i], &addrlen); - cr_assert(ret == FI_SUCCESS); - } - - rx_ctx_bits = 0; - ctx_cnt = NUMCONTEXTS; - while (ctx_cnt >> ++rx_ctx_bits); - av_attr.type = FI_AV_TABLE; - av_attr.count = NUMCONTEXTS; - av_attr.rx_ctx_bits = rx_ctx_bits; - - cr_log_info("test av table error path\n"); - ret = fi_av_open(dom[0], &av_attr, &t_av, NULL); - cr_assert(!ret, "fi_av_open"); - - ret = fi_av_insert(t_av, epname, TOTALEPS, - addresses, FI_SYNC_ERR, err); - cr_assert((ret != TOTALEPS), "fi_av_insert should have failed but did not."); - - cr_log_info("check for errors\n"); - for (i = 0; i < NUMEPS; i++) { - cr_assert_eq(err[i], 0, "err[%d]:%d", i, err[i]); - cr_assert_neq(addresses[i], FI_ADDR_NOTAVAIL, - "addresses[%d]:%lx", i, addresses[i]); - } - for (; i < TOTALEPS; i++) { - cr_assert_eq(err[i], -FI_EINVAL, "err[%d]:%d", i, err[i]); - cr_assert_eq(addresses[i], FI_ADDR_NOTAVAIL, - "addresses[%d]:%lx", i, addresses[i]); - } - - ret = fi_close(&t_av->fid); - cr_assert(!ret, "failure in closing av."); - - cr_log_info("test av map error path\n"); - av_attr.type = FI_AV_MAP; - - ret = fi_av_open(dom[0], &av_attr, &t_av, NULL); - cr_assert(!ret, "fi_av_open"); - - ret = fi_av_insert(t_av, epname, TOTALEPS, - addresses, FI_SYNC_ERR, err); - cr_assert((ret != TOTALEPS), "fi_av_insert should have failed but did not."); - - cr_log_info("check for errors\n"); - - for (i = 0; i < NUMEPS; i++) { - cr_assert_eq(err[i], 0, "err[%d]:%d", i, err[i]); - cr_assert_neq(addresses[i], FI_ADDR_NOTAVAIL, - "addresses[%d]:%lx", i, addresses[i]); - } - - for (; i < TOTALEPS; i++) { - cr_assert_eq(err[i], -FI_EINVAL, "err[%d]:%d", i, err[i]); - cr_assert_eq(addresses[i], FI_ADDR_NOTAVAIL, - "addresses[%d]:%lx", i, addresses[i]); - } - - ret = fi_close(&t_av->fid); - cr_assert(!ret, "failure in closing av."); - - ret = fi_close(&sep[0]->fid); - cr_assert(!ret, "failure in closing ep."); - - ret = fi_close(&dom[0]->fid); - cr_assert(!ret, "failure in closing domain."); - fi_freeinfo(hints); - -} diff --git a/prov/gni/test/shmem.c b/prov/gni/test/shmem.c deleted file mode 100644 index 7f38480b371..00000000000 --- a/prov/gni/test/shmem.c +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright (c) 2017 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/* - * Author: jswaro - * Created-on: 6/19/2017 - */ -#include -#include -#include -#include -#include -#include - -#include -#include "common.h" - -#include -#include "gnix_rdma_headers.h" - -struct test_structure { - char name[128]; - int elements; - int data[0]; -}; - -static struct fi_info *info; - -static void __gnix_shmem_test_setup(void) -{ - int ret; - - ret = fi_getinfo(fi_version(), NULL, 0, 0, NULL, &info); - cr_assert(ret == FI_SUCCESS); /* do this to initialize debug output */ -} - -static void __gnix_shmem_test_teardown(void) -{ - fi_freeinfo(info); -} - -static int init_test_structure_function(const char *path, - uint32_t size, void *region) -{ - struct test_structure *tmp = (struct test_structure *) region; - int i; - - tmp->elements = (size - sizeof(struct test_structure)) / sizeof(int); - for (i = 0; i < tmp->elements; i++) - tmp->data[i] = 0; - - return 0; -} - -TestSuite(gnix_shmem, - .init = __gnix_shmem_test_setup, - .fini = __gnix_shmem_test_teardown); - -static inline void basic_test(void) -{ - int size, ret, elements, i; - struct test_structure *region; - struct gnix_shared_memory _gnix_shmem_region; - - elements = 256; - size = sizeof(struct test_structure) + (sizeof(int) * elements); - - ret = _gnix_shmem_create("/tmp/libfabric_key_region", - size, init_test_structure_function, - &_gnix_shmem_region); - cr_assert(ret == FI_SUCCESS, "ret=%d\n", ret); - - region = (struct test_structure *) _gnix_shmem_region.addr; - - cr_assert(region); - cr_assert(region->elements == elements); - for (i = 0; i < elements; i++) - cr_assert(region->data[i] == 0); - - for (i = 0; i < elements; i++) - region->data[i] = i; - - for (i = 0; i < elements; i++) - cr_assert(region->data[i] == i); - - ret = _gnix_shmem_destroy(&_gnix_shmem_region); - cr_assert(ret == FI_SUCCESS); -} - -/* test is redundant to ensure that the shared memory file is removed between - * application runs */ -Test(gnix_shmem, basic_use_2) -{ - basic_test(); -} - -/* test is redundant to ensure that the shared memory file is removed between - * application runs */ -Test(gnix_shmem, basic_use_1) -{ - basic_test(); -} - - diff --git a/prov/gni/test/smrn.c b/prov/gni/test/smrn.c deleted file mode 100644 index 07b31ce8d2c..00000000000 --- a/prov/gni/test/smrn.c +++ /dev/null @@ -1,260 +0,0 @@ -/* - * Copyright (c) 2017 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - - -#include -#include -#include -#include "gnix_smrn.h" - -#include -#include -#include "common.h" - -#define GNIX_DEFAULT_RQ_CNT 4 - -#if HAVE_KDREG -# define KDREG_CHECK false -#else -# define KDREG_CHECK true -#endif - -static struct gnix_smrn *smrn; -static struct gnix_smrn_rq *rqs[GNIX_DEFAULT_RQ_CNT]; -static void **memory_regions; - -static void smrn_setup(void) -{ - int ret; - int i; - struct fi_info *info; - - ret = fi_getinfo(FI_VERSION(1, 5), NULL, NULL, 0, NULL, &info); - cr_assert_eq(ret, FI_SUCCESS); - - fi_freeinfo(info); - - ret = _gnix_smrn_init(); - cr_assert_eq(ret, 0, "_gnix_smrn_init failed, ret=%d\n", ret); - - ret = _gnix_smrn_open(&smrn); - cr_assert(ret == 0, "_gnix_smrn_open failed"); - - for (i = 0; i < GNIX_DEFAULT_RQ_CNT; i++) { - rqs[i] = calloc(1, sizeof(*rqs[i])); - cr_assert_neq(rqs[i], NULL); - - ofi_spin_init(&rqs[i]->lock); - dlist_init(&rqs[i]->list); - dlist_init(&rqs[i]->entry); - } -} - -static void smrn_teardown(void) -{ - int ret; - int i; - - ret = _gnix_smrn_close(smrn); - cr_assert(ret == 0, "_gnix_smrn_close failed"); - - for (i = 0; i < GNIX_DEFAULT_RQ_CNT; i++) { - free(rqs[i]); - rqs[i] = NULL; - } -} - -TestSuite(smrn, - .init = smrn_setup, - .fini = smrn_teardown, - .disabled = KDREG_CHECK); - -#define RQ_ENTRIES 21 -#define REGIONS (GNIX_DEFAULT_RQ_CNT * RQ_ENTRIES) -struct test_structure { - struct gnix_smrn_context context; - int pending; -}; - -Test(smrn, simple) -{ - const int regions = REGIONS; - void *addresses[REGIONS]; - int i; - int len = 8129; - int ret; - struct gnix_smrn_rq *rq; - struct test_structure contexts[REGIONS] = {0}; - struct gnix_smrn_context *current; - struct test_structure *iter; - int expected_events; - - for (i = 0; i < regions; i++) - dlist_init(&contexts[i].context.entry); - - for (i = 0; i < regions; i++) { - addresses[i] = mmap(NULL, len, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_SHARED, -1, 0); - cr_assert_neq(addresses[i], MAP_FAILED); - } - - for (i = 0; i < regions; i++) { - rq = rqs[i / (regions >> 2)]; - - ret = _gnix_smrn_monitor(smrn, rq, - addresses[i], len, (uint64_t) &contexts[i], - &contexts[i].context); - cr_assert_eq(ret, FI_SUCCESS); - } - - - for (i = 0; i < regions; i++) { - ret = munmap(addresses[i], len); - cr_assert_eq(ret, 0); - - contexts[i].pending = 1; - } - - expected_events = regions; - while (expected_events > 0) { - for (i = 0; i < GNIX_DEFAULT_RQ_CNT; i++) { - rq = rqs[i]; - - ret = _gnix_smrn_get_event(smrn, rq, ¤t); - if (ret == -FI_EAGAIN) - continue; - - cr_assert_neq(ret, -FI_EINVAL); - - iter = container_of(current, - struct test_structure, context); - - cr_assert_eq(iter->pending, 1); - iter->pending = 0; - - expected_events -= 1; - } - } -} - - -static void *thread_func(void *context) -{ - const int regions = RQ_ENTRIES; - void **addresses; - int i; - int len = 8129; - int ret; - struct gnix_smrn_rq *rq; - struct test_structure contexts[RQ_ENTRIES] = {0}; - struct gnix_smrn_context *current; - struct test_structure *iter; - int expected_events; - int id = *(int *) context; - - addresses = &memory_regions[id * RQ_ENTRIES]; - rq = rqs[id]; - - for (i = 0; i < regions; i++) - dlist_init(&contexts[i].context.entry); - - for (i = 0; i < regions; i++) { - ret = _gnix_smrn_monitor(smrn, rq, - addresses[i], len, (uint64_t) &contexts[i], - &contexts[i].context); - cr_assert_eq(ret, FI_SUCCESS); - } - - - for (i = 0; i < regions; i++) { - ret = munmap(addresses[i], len); - cr_assert_eq(ret, 0); - - contexts[i].pending = 1; - } - - expected_events = regions; - while (expected_events > 0) { - ret = _gnix_smrn_get_event(smrn, rq, ¤t); - if (ret == -FI_EAGAIN) - continue; - cr_assert_neq(ret, -FI_EINVAL); - - iter = container_of(current, struct test_structure, context); - - cr_assert_eq(iter->pending, 1); - iter->pending = 0; - - expected_events -= 1; - } - - pthread_exit(NULL); -} - - -Test(smrn, threaded) -{ - const int regions = REGIONS; - void *addresses[REGIONS]; - int i; - int len = 8129; - int ret; - pthread_t threads[GNIX_DEFAULT_RQ_CNT]; - int thread_ids[GNIX_DEFAULT_RQ_CNT]; - - memory_regions = (void **) &addresses; - - for (i = 0; i < GNIX_DEFAULT_RQ_CNT; i++) { - thread_ids[i] = i; - } - - for (i = 0; i < regions; i++) { - addresses[i] = mmap(NULL, len, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_SHARED, -1, 0); - cr_assert_neq(addresses[i], MAP_FAILED); - } - - for (i = 0; i < GNIX_DEFAULT_RQ_CNT; i++) { - ret = pthread_create(&threads[i], NULL, thread_func, - (void *) &thread_ids[i]); - cr_assert_eq(ret, 0); - } - - for (i = 0; i < GNIX_DEFAULT_RQ_CNT; i++) { - pthread_join(threads[i], NULL); - } - - memory_regions = NULL; -} - - - diff --git a/prov/gni/test/tags.c b/prov/gni/test/tags.c deleted file mode 100644 index b80c593c535..00000000000 --- a/prov/gni/test/tags.c +++ /dev/null @@ -1,1925 +0,0 @@ -/* - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - - -/* - * Created on: July 17, 2015 - * Author: jswaro - */ -#include -#include -#include -#include -#include - -#include -#include - -#include -#include "gnix_rdma_headers.h" -#include -#include "common.h" - -/* Note: Set to ~FI_NOTIFY_FLAGS_ONLY since this was written before api 1.5 */ -static uint64_t mode_bits = ~FI_NOTIFY_FLAGS_ONLY; -static struct fi_info *hints; -static struct fi_info *fi; - -#define DEFAULT_FORMAT 0x00000000FFFFFFFF -#define SINGLE_FORMAT 0xFFFFFFFFFFFFFFFF -#define DOUBLE_FORMAT 0xFFFFFFFF00000000 -#define TRIPLE_FORMAT 0xFFFF0000FFFFFFFF -#define MULTI_FORMAT 0xFF00FFFF0000FFFF -#define BAD_FORMAT 0xFFFFFFFFFFFFFF00 -#define SIMPLE_FORMAT 0x0000000000FF00FF - -#define TEST_TAG 0x00000000DEADBEEF - -enum { - TEST_ORDER_INORDER = 0, - TEST_ORDER_RANDOM, - TEST_ORDER_REVERSE, -}; - -enum { - TEST_OVERLAY_DEF = 0, - TEST_OVERLAY_SINGLE, - TEST_OVERLAY_DOUBLE, - TEST_OVERLAY_TRIPLE, - TEST_OVERLAY_MULTI, - TEST_OVERLAY_BAD, - TEST_OVERLAY_SIMPLE, - TEST_OVERLAY_MAX, -}; - -struct __test_mask { - uint64_t mask; - uint64_t format; - int type; - int fields; - int field_width[5]; - int reserved_bits; -}; - -struct gnix_fr_element { - struct gnix_fab_req req; - int claimed; - uint64_t ignore; - uint64_t addr_ignore; - uint64_t peek_flags; - uint64_t remove_flags; - void *context; -}; - -struct __test_mask test_masks[TEST_OVERLAY_MAX] = { - { - .mask = DEFAULT_FORMAT, - .format = DEFAULT_FORMAT, - .type = TEST_OVERLAY_DEF, - .fields = 1, - .field_width = {32, 0, 0, 0, 0}, - .reserved_bits = 32, - }, - { - .mask = ~0, - .format = SINGLE_FORMAT, - .type = TEST_OVERLAY_SINGLE, - .fields = 1, - .field_width = {64, 0, 0, 0, 0}, - .reserved_bits = 0, - }, - { - .mask = ~0, - .format = DOUBLE_FORMAT, - .type = TEST_OVERLAY_DOUBLE, - .fields = 2, - .field_width = {32, 32, 0, 0, 0}, - .reserved_bits = 0, - }, - { - .mask = ~0, - .format = TRIPLE_FORMAT, - .type = TEST_OVERLAY_TRIPLE, - .fields = 3, - .field_width = {16, 16, 32, 0, 0}, - .reserved_bits = 0, - }, - { - .mask = ~0, - .format = MULTI_FORMAT, - .type = TEST_OVERLAY_MULTI, - .fields = 5, - .field_width = {8, 8, 16, 16, 16}, - .reserved_bits = 0, - }, - { - .mask = ~0, - .format = BAD_FORMAT, - .type = TEST_OVERLAY_BAD, - .fields = 5, - .field_width = {16, 16, 16, 8, 8}, - .reserved_bits = 0, - }, - { - .mask = 0x0000000000ffffff, - .format = SIMPLE_FORMAT, - .type = TEST_OVERLAY_SIMPLE, - .fields = 3, - .field_width = {8, 8, 8, 0, 0}, - .reserved_bits = 40, - } -}; - -static struct gnix_tag_storage_attr default_list_attr = { - .type = GNIX_TAG_LIST, -}; - -static struct gnix_tag_storage_attr default_hlist_attr = { - .type = GNIX_TAG_HLIST, -}; - -static struct gnix_tag_storage_attr default_kdtree_attr = { - .type = GNIX_TAG_KDTREE, -}; - -static struct gnix_tag_storage_attr default_auto_attr = { - .type = GNIX_TAG_AUTOSELECT, -}; - -static struct gnix_fr_element default_reqs[8] = { - { - .req = { - .msg = { - .tag = 0x00005555, - .ignore = 0x11111111 - }, - }, - .peek_flags = FI_PEEK, - .remove_flags = 0, - .context = NULL, - - }, - { - .req = { - .msg = { - .tag = 0x0000AAAA, - .ignore = 0x11111111 - }, - }, - .peek_flags = FI_PEEK, - .remove_flags = 0, - .context = NULL, - }, - { - .req = { - .msg = { - .tag = 0xAAAA5555, - .ignore = 0x11111111 - }, - }, - .peek_flags = FI_PEEK, - .remove_flags = 0, - .context = NULL, - }, - { - .req = { - .msg = { - .tag = 0x5555AAAA, - .ignore = 0x11111111 - }, - }, - .peek_flags = FI_PEEK, - .remove_flags = 0, - .context = NULL, - }, - { - .req = { - .msg = { - .tag = 0xAAAA5555, - .ignore = 0x11111111 - }, - }, - .peek_flags = FI_PEEK, - .remove_flags = 0, - .context = NULL, - }, - { - .req = { - .msg = { - .tag = 0x00005555, - .ignore = 0x11111111 - }, - }, - .peek_flags = FI_PEEK, - .remove_flags = 0, - .context = NULL, - }, - { - .req = { - .msg = { - .tag = 0x00005555, - .ignore = 0x11111111 - }, - }, - .peek_flags = FI_PEEK, - .remove_flags = 0, - .context = NULL, - }, - { - .req = { - .msg = { - .tag = 0x0000AAAA, - .ignore = 0x11111111 - }, - }, - .peek_flags = FI_PEEK, - .remove_flags = 0, - .context = NULL, - }, -}; - -static struct gnix_fr_element *make_evenly_distributed_tags( - int requests, - struct __test_mask *mask); - -static struct gnix_fr_element *make_random_tags( - int requests, - struct __test_mask *mask); - -struct ipr_test_params { - int elements; - struct gnix_fr_element *(*make_requests)( - int, - struct __test_mask *); -}; - -static struct ipr_test_params ipr_params[6] = { - { - .elements = 16, - .make_requests = make_random_tags, - }, - { - .elements = 128, - .make_requests = make_random_tags, - }, - { - .elements = 1024, - .make_requests = make_random_tags, - }, - { - .elements = 16, - .make_requests = make_evenly_distributed_tags, - }, - { - .elements = 128, - .make_requests = make_evenly_distributed_tags, - }, - { - .elements = 1024, - .make_requests = make_evenly_distributed_tags, - }, -}; - -static struct gnix_tag_storage *test_tag_storage; -static int call_destruct; -static int (*match_func)( - struct dlist_entry *entry, - const void *arg) = _gnix_match_posted_tag; - - -static inline void reset_test_fr_metadata(struct gnix_fr_element *reqs, - int requests) -{ - int i; - - for (i = 0; i < requests; i++) { - reqs[i].claimed = 0; - reqs[i].req.msg.tle.context = NULL; - } -} -static inline void reset_test_tag_storage( - struct gnix_tag_storage *ts, - struct gnix_tag_storage_attr *attr) -{ - int ret; - - ret = _gnix_tag_storage_destroy(ts); - cr_assert(ret == FI_SUCCESS, - "failed to destroy tag storage on reset"); - - ret = _gnix_tag_storage_init(ts, attr, match_func); - cr_assert(ret == FI_SUCCESS, - "failed to initialize tag storage on reset"); -} - -static void __gnix_tags_bare_test_setup(void) -{ - int ret = 0; - - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - - hints->domain_attr->mr_mode = GNIX_DEFAULT_MR_MODE; - hints->domain_attr->cq_data_size = 4; - hints->mode = mode_bits; - - hints->fabric_attr->prov_name = strdup("gni"); - - ret = fi_getinfo(fi_version(), NULL, 0, 0, hints, &fi); - cr_assert_eq(ret, FI_SUCCESS, "fi_getinfo"); - - cr_assert(test_tag_storage == NULL, - "test_tag_storage was not freed prior to setup"); - test_tag_storage = calloc(1, sizeof(*test_tag_storage)); - cr_assert(test_tag_storage != NULL, - "could not allocate test_tag_storage"); - - call_destruct = 0; - - srand(0xDEADBEEF); -} - -static void __gnix_tags_bare_test_teardown(void) -{ - fi_freeinfo(fi); - fi_freeinfo(hints); - - cr_assert(test_tag_storage != NULL, - "test_tag_storage pointer " - "deallocated or overwritten during test"); - free(test_tag_storage); - test_tag_storage = NULL; -} - -static void __gnix_tags_basic_test_setup(void) -{ - __gnix_tags_bare_test_setup(); -} - -static void __gnix_tags_basic_test_teardown(void) -{ - int ret; - - ret = _gnix_tag_storage_destroy(test_tag_storage); - cr_assert(ret == FI_SUCCESS, - "failed to destroy tag storage " - "during basic teardown"); - - __gnix_tags_bare_test_teardown(); -} - -static void __gnix_tags_basic_list_test_setup(void) -{ - int ret; - - __gnix_tags_basic_test_setup(); - - ret = _gnix_tag_storage_init(test_tag_storage, &default_list_attr, - match_func); - cr_assert(ret == FI_SUCCESS, "failed to initialize tag storage " - "during basic list setup"); -} - -static void __gnix_tags_basic_hlist_test_setup(void) -{ - int ret; - - __gnix_tags_basic_test_setup(); - - ret = _gnix_tag_storage_init(test_tag_storage, &default_hlist_attr, - match_func); - cr_assert(ret == FI_SUCCESS, "failed to initialize tag storage " - "during basic hlist setup"); -} - -static void __gnix_tags_basic_kdtree_test_setup(void) -{ - int ret; - - __gnix_tags_basic_test_setup(); - - ret = _gnix_tag_storage_init(test_tag_storage, &default_kdtree_attr, - match_func); - cr_assert(ret == FI_SUCCESS, "failed to initialize tag storage " - "during basic kdtree setup"); -} - -static void __gnix_tags_basic_posted_list_test_setup(void) -{ - match_func = _gnix_match_posted_tag; - __gnix_tags_basic_list_test_setup(); -} - -static void __gnix_tags_basic_posted_hlist_test_setup(void) -{ - match_func = _gnix_match_posted_tag; - __gnix_tags_basic_hlist_test_setup(); -} - -static void __gnix_tags_basic_posted_kdtree_test_setup(void) -{ - match_func = _gnix_match_posted_tag; - __gnix_tags_basic_kdtree_test_setup(); -} - -static void __gnix_tags_basic_unexpected_list_test_setup(void) -{ - match_func = _gnix_match_unexpected_tag; - __gnix_tags_basic_list_test_setup(); -} - -static void __gnix_tags_basic_unexpected_hlist_test_setup(void) -{ - match_func = _gnix_match_unexpected_tag; - __gnix_tags_basic_hlist_test_setup(); -} - -static void __gnix_tags_basic_unexpected_kdtree_test_setup(void) -{ - match_func = _gnix_match_unexpected_tag; - __gnix_tags_basic_kdtree_test_setup(); -} - -__attribute__((unused)) -static void __gnix_tags_basic_auto_test_setup(void) -{ - int ret; - - __gnix_tags_basic_test_setup(); - - ret = _gnix_tag_storage_init(test_tag_storage, &default_auto_attr, - match_func); - cr_assert(ret == FI_SUCCESS, - "failed to initialize tag storage during basic auto setup"); -} - -/* multi-mode tests */ - -static inline uint64_t make_test_tag(struct __test_mask *mask, uint64_t val) -{ - return val & mask->mask; -} - - -static struct gnix_fr_element *make_evenly_distributed_tags( - int requests, - struct __test_mask *mask) -{ - int i, j; - uint64_t offset, field_width, tmp; - struct gnix_fr_element *reqs; - - reqs = calloc(requests, sizeof(*reqs)); - cr_assert(reqs); - - offset = 64 - mask->reserved_bits; - - for (i = 0; i < mask->fields; i++) { - field_width = 1ull << mask->field_width[i]; - if (mask->field_width[i] == 64) - field_width = ~0ull; - - offset -= mask->field_width[i]; - for (j = 0; j < requests; j++) { - tmp = (j % field_width) << offset; - reqs[j].req.msg.tag |= tmp; - } - } - - for (i = 0; i < requests; i++) { - tmp = make_test_tag(mask, reqs[i].req.msg.tag); - reqs[i].req.msg.ignore = 0; - reqs[i].req.msg.tag = tmp; - reqs[i].peek_flags = FI_PEEK; - } - - return reqs; -} - -static struct gnix_fr_element *make_random_tags( - int requests, - struct __test_mask *mask) -{ - int i; - uint64_t tag; - struct gnix_fr_element *reqs; - - reqs = calloc(requests, sizeof(*reqs)); - cr_assert(reqs, "failed to allocate requests for random tag creation"); - - for (i = 0; i < requests; i++) { - tag = rand(); - tag <<= 32; - tag += rand(); - reqs[i].req.msg.tag = make_test_tag(mask, tag); - reqs[i].req.msg.ignore = 0; - reqs[i].peek_flags = FI_PEEK; - } - - return reqs; -} - -static void print_request_lists( - struct gnix_fr_element *reqs, - int requests, - int *correct_order, - int *removal_order, - int ordering_type) -{ - int i; - char *type; - struct gnix_fr_element *current; - - if (ordering_type == TEST_ORDER_INORDER) - type = "INORDER"; - else if (ordering_type == TEST_ORDER_RANDOM) - type = "RANDOM"; - else if (ordering_type == TEST_ORDER_REVERSE) - type = "REVERSE"; - else - type = "UNKNOWN"; - - - fprintf(stderr, "FAILED %s\n", type); - fprintf(stderr, "insertion order:\n"); - for (i = 0; i < requests; i++) { - current = &reqs[i]; - fprintf(stderr, " req=%p req.msg.tag=0x%.16llx index=%i\n", - ¤t->req, - (unsigned long long int) current->req.msg.tag, - i); - } - - fprintf(stderr, "removal order:\n"); - for (i = 0; i < requests; i++) { - current = &reqs[removal_order[i]]; - fprintf(stderr, - " req=%p req.msg.tag=0x%.16llx ignore=0x%.16llx index=%i\n", - ¤t->req, - (unsigned long long int) current->req.msg.tag, - (unsigned long long int) current->ignore, - removal_order[i]); - } - - fprintf(stderr, "correct order:\n"); - for (i = 0; i < requests; i++) { - current = &reqs[correct_order[i]]; - fprintf(stderr, " req=%p req.msg.tag=0x%.16llx index=%i\n", - ¤t->req, - (unsigned long long int) current->req.msg.tag, - correct_order[i]); - } -} - -static void multiple_insert_peek_remove_by_order( - struct gnix_tag_storage *ts, - struct __test_mask *mask, - int requests, - struct gnix_fr_element *reqs, - int *correct_order, - int *removal_order, - int ordering_type) -{ - int ret; - int i, j, ignore_bits; - struct gnix_fr_element *to_remove, *current; - struct gnix_fab_req *correct, *found; - int is_posted = ts->match_func == _gnix_match_posted_tag; - - /* clear claimed flags */ - for (i = 0; i < requests; i++) - reqs[i].claimed = 0; - - /* reset the dlist state as appropriate for a new request */ - for (i = 0; i < requests; i++) - dlist_init(&reqs[i].req.msg.tle.free); - - /* establish correct removal order based on passed in removal order */ - for (i = 0; i < requests; i++) { - to_remove = &reqs[removal_order[i]]; - - for (j = 0; j < requests; j++) { - current = &reqs[j]; - - if (current->claimed) - continue; - - /* when the tag store is a posted tag store, - * always use the ignore bits from the stored request - * - * otherwise, we are attempting to remove from - * a unexpected tag store and we should use the - * provided ignore bits - */ - if (is_posted) - ignore_bits = current->req.msg.ignore; - else - ignore_bits = to_remove->ignore; - - /* applying peek flags - This assumes the appropriate - * peek was performed prior to removal - */ - if ((to_remove->peek_flags & FI_PEEK) && - (to_remove->peek_flags & FI_CLAIM)) { - to_remove->req.msg.tle.context = - to_remove->context; - } - - if (_gnix_req_matches_params(&to_remove->req, - current->req.msg.tag, - ignore_bits, - to_remove->remove_flags, - to_remove->context, - test_tag_storage->attr.use_src_addr_matching, - &to_remove->req.addr, is_posted)) - break; - - to_remove->req.msg.tle.context = NULL; - } - - cr_assert(j != requests, - "failed to find a match for every entry"); - correct_order[i] = j; - current->claimed = 1; - } - - /* clear alterations to the req structure - * during creation of correct list - */ - for (i = 0; i < requests; i++) - reqs[i].req.msg.tle.context = NULL; - - /* clear claimed flags */ - for (i = 0; i < requests; i++) - reqs[i].claimed = 0; - - for (i = 0; i < requests; i++) { - ret = _gnix_insert_tag(ts, reqs[i].req.msg.tag, &reqs[i].req, - reqs[i].ignore); - if (ret) { - print_request_lists(reqs, requests, correct_order, - removal_order, ordering_type); - } - cr_assert(ret == FI_SUCCESS, - "failed to insert tag into storage"); - } - - for (i = 0; i < requests; i++) { - to_remove = &reqs[removal_order[i]]; - correct = &reqs[correct_order[i]].req; - - found = _gnix_match_tag(ts, - to_remove->req.msg.tag, to_remove->ignore, - to_remove->peek_flags | FI_PEEK, to_remove->context, - NULL); - if (found != correct) { - print_request_lists(reqs, requests, correct_order, - removal_order, ordering_type); - fprintf(stderr, - "failed to find request, to_remove=%p " - "correct=%p found=%p", - &to_remove->req, - found, - correct); - } - cr_assert(found == correct, - "failed to find tag in storage"); - - - found = _gnix_match_tag(ts, - to_remove->req.msg.tag, to_remove->ignore, - to_remove->remove_flags, to_remove->context, - NULL); - if (found != correct) { - print_request_lists(reqs, requests, correct_order, - removal_order, ordering_type); - } - cr_assert(found == correct, - "failed to remove tag from storage"); - } -} - -static void multiple_insert_peek_remove_inorder( - struct gnix_tag_storage *ts, - struct __test_mask *mask, - int requests, - struct gnix_fr_element *reqs, - int *correct_order, - int *removal_order) -{ - int i; - - for (i = 0; i < requests; i++) - removal_order[i] = i; - - multiple_insert_peek_remove_by_order( - ts, mask, requests, reqs, - correct_order, removal_order, - TEST_ORDER_INORDER); -} - -static void multiple_insert_peek_remove_reverse( - struct gnix_tag_storage *ts, - struct __test_mask *mask, - int requests, - struct gnix_fr_element *reqs, - int *correct_order, - int *removal_order) -{ - int i; - - for (i = requests - 1; i >= 0; i--) - removal_order[requests - 1 - i] = i; - - multiple_insert_peek_remove_by_order(ts, - mask, requests, reqs, - correct_order, removal_order, - TEST_ORDER_REVERSE); -} - -static void multiple_insert_peek_remove_random( - struct gnix_tag_storage *ts, - struct __test_mask *mask, - int requests, - struct gnix_fr_element *reqs, - int *correct_order, - int *removal_order) -{ - int i, j, rand_index, rand_j; - - for (i = 0; i < requests; i++) { - rand_index = rand() % requests; - for (j = 0; j < requests; j++) { - rand_j = (rand_index + j) % requests; - if (!reqs[rand_j].claimed) - break; - } - - removal_order[i] = rand_j; - reqs[rand_j].claimed = 1; - } - - /* clear claimed flags */ - for (i = 0; i < requests; i++) - reqs[i].claimed = 0; - - multiple_insert_peek_remove_by_order(ts, - mask, requests, reqs, - correct_order, removal_order, - TEST_ORDER_RANDOM); -} - -static inline void __test_multiple_ipr_reqs_by_function( - int requests, - struct gnix_tag_storage_attr *test_attr, - void (*order_func)( - struct gnix_tag_storage*, - struct __test_mask*, - int, - struct gnix_fr_element*, - int*, - int*), - struct gnix_fr_element *(make_requests)( - int, - struct __test_mask *), - int *correct_order, - int *removal_order) -{ - int i; - struct gnix_fr_element *reqs; - struct gnix_tag_storage_attr attr; - - memcpy(&attr, test_attr, sizeof(struct gnix_tag_storage_attr)); - - for (i = 0; i < TEST_OVERLAY_MAX; i++) { - reqs = make_requests(requests, &test_masks[i]); - cr_assert(reqs != NULL, "failed to make random tags"); - - order_func(test_tag_storage, &test_masks[i], requests, reqs, - correct_order, removal_order); - - /* make necessary alterations to the structure - * for the next test mask - */ - reset_test_tag_storage(test_tag_storage, &attr); - - free(reqs); - } -} - -static inline void __test_multiple_inorder_ipr_reqs( - int requests, - struct gnix_tag_storage_attr *test_attr, - struct gnix_fr_element *(make_requests)( - int, - struct __test_mask *)) - -{ - int *correct_order, *removal_order; - - correct_order = calloc(requests, sizeof(*correct_order)); - cr_assert(correct_order != NULL); - - removal_order = calloc(requests, sizeof(*removal_order)); - cr_assert(removal_order != NULL); - - __test_multiple_ipr_reqs_by_function(requests, test_attr, - multiple_insert_peek_remove_inorder, - make_requests, correct_order, removal_order); - - free(correct_order); - free(removal_order); -} - -static inline void __test_multiple_reverse_ipr_reqs( - int requests, - struct gnix_tag_storage_attr *test_attr, - struct gnix_fr_element *(make_requests)( - int, - struct __test_mask *)) - -{ - int *correct_order, *removal_order; - - correct_order = calloc(requests, sizeof(*correct_order)); - cr_assert(correct_order != NULL); - - removal_order = calloc(requests, sizeof(*removal_order)); - cr_assert(removal_order != NULL); - - __test_multiple_ipr_reqs_by_function(requests, test_attr, - multiple_insert_peek_remove_reverse, - make_requests, correct_order, removal_order); - - free(correct_order); - free(removal_order); -} - -static inline void __test_multiple_random_ipr_reqs( - int requests, - struct gnix_tag_storage_attr *test_attr, - struct gnix_fr_element *(make_requests)( - int, - struct __test_mask *)) - -{ - int *correct_order, *removal_order; - - correct_order = calloc(requests, sizeof(*correct_order)); - cr_assert(correct_order != NULL); - - removal_order = calloc(requests, sizeof(*removal_order)); - cr_assert(removal_order != NULL); - - __test_multiple_ipr_reqs_by_function(requests, test_attr, - multiple_insert_peek_remove_random, - make_requests, correct_order, removal_order); - - free(correct_order); - free(removal_order); -} - -static inline void __test_multiple_type_ipr_reqs( - int requests, - struct gnix_tag_storage_attr *test_attr, - struct gnix_fr_element *(make_requests)( - int, - struct __test_mask *)) -{ - __test_multiple_inorder_ipr_reqs(requests, test_attr, make_requests); - - reset_test_tag_storage(test_tag_storage, test_attr); - - __test_multiple_reverse_ipr_reqs(requests, test_attr, make_requests); - - reset_test_tag_storage(test_tag_storage, test_attr); - - __test_multiple_random_ipr_reqs(requests, test_attr, make_requests); - - reset_test_tag_storage(test_tag_storage, test_attr); -} - -static inline void __test_multiple_8_duplicate_tags( - struct gnix_tag_storage_attr *test_attr) -{ - int i; - struct gnix_fr_element reqs[8]; - int requests = 8; - int *correct_order, *removal_order; - - correct_order = calloc(8, sizeof(*correct_order)); - cr_assert(correct_order != NULL); - - removal_order = calloc(8, sizeof(*removal_order)); - cr_assert(removal_order != NULL); - - memcpy(reqs, default_reqs, sizeof(struct gnix_fr_element) * requests); - - for (i = 0; i < TEST_OVERLAY_MAX; i++) { - multiple_insert_peek_remove_inorder( - test_tag_storage, &test_masks[i], - requests, reqs, correct_order, - removal_order); - - reset_test_tag_storage(test_tag_storage, test_attr); - reset_test_fr_metadata(reqs, requests); - - multiple_insert_peek_remove_reverse( - test_tag_storage, &test_masks[i], - requests, reqs, correct_order, removal_order); - - reset_test_tag_storage(test_tag_storage, test_attr); - reset_test_fr_metadata(reqs, requests); - - multiple_insert_peek_remove_random( - test_tag_storage, &test_masks[i], - requests, reqs, correct_order, removal_order); - - reset_test_tag_storage(test_tag_storage, test_attr); - reset_test_fr_metadata(reqs, requests); - } - - free(correct_order); - free(removal_order); -} - -static inline void __test_not_found_empty(void) -{ - struct gnix_fab_req *found; - - found = _gnix_match_tag( - test_tag_storage, 0xCAFEBABE, 0, FI_PEEK, NULL, NULL); - cr_assert(found == NULL, - "something in this storage should not exist"); -} - -static inline void __test_not_found_non_empty(void) -{ - int ret; - int i, requests = 8; - struct gnix_fab_req *correct, *found; - struct gnix_fr_element *to_remove; - struct gnix_fr_element reqs[8]; - - memcpy(reqs, default_reqs, sizeof(struct gnix_fr_element) * 8); - - /* reset the dlist state as appropriate for a new request */ - for (i = 0; i < requests; i++) - dlist_init(&reqs[i].req.msg.tle.free); - - for (i = 0; i < requests; i++) { - ret = _gnix_insert_tag(test_tag_storage, - reqs[i].req.msg.tag, &reqs[i].req, - reqs[i].ignore); - cr_assert(ret == FI_SUCCESS, - "failed to insert tag into storage"); - } - - found = _gnix_match_tag(test_tag_storage, 0xCAFEBABE, 0, FI_PEEK, - NULL, NULL); - cr_assert(found == NULL, - "something in this storage should not exist"); - - /* only doing in-order removal */ - for (i = 0; i < requests; i++) { - to_remove = &reqs[i]; - correct = &reqs[i].req; - - found = _gnix_match_tag( - test_tag_storage, to_remove->req.msg.tag, - to_remove->ignore, to_remove->peek_flags, - to_remove->context, NULL); - cr_assert(found == correct, - "failed to find tag in storage"); - - found = _gnix_match_tag(test_tag_storage, - to_remove->req.msg.tag, to_remove->ignore, - to_remove->remove_flags, to_remove->context, - NULL); - cr_assert(found == correct, - "failed to remove tag from storage"); - } -} - -static inline void __test_ignore_mask_set( - struct gnix_tag_storage_attr *test_attr) -{ - int i; - struct gnix_fr_element reqs[8]; - int requests = 8; - int correct_order[8], removal_order[8]; - - memcpy(reqs, default_reqs, sizeof(struct gnix_fr_element) * requests); - - for (i = requests - 1; i >= 0 ; i--) - removal_order[i] = (requests - 1) - i; - - reqs[7].ignore = 0xffff; - reqs[6].ignore = 0xffff0000; - reqs[5].ignore = 0xffff0000; - reqs[4].ignore = 0xffff0000; - reqs[3].ignore = 0xffff0000; - reqs[2].ignore = 0xffff0000; - reqs[1].ignore = 0x0; - reqs[0].ignore = 0xffffffff; - - for (i = 0; i < requests; i++) - reqs[i].req.msg.ignore = reqs[i].ignore; - - multiple_insert_peek_remove_by_order(test_tag_storage, - &test_masks[TEST_OVERLAY_DEF], requests, reqs, - correct_order, removal_order, TEST_ORDER_REVERSE); -} - -static inline void __test_claim_pass( - struct gnix_tag_storage_attr *test_attr) -{ - int ret; - struct gnix_fr_element request = { - .req = { - .msg = { - .tag = 0xA5A5A5A5, - .ignore = 0xFFFFFFFF - }, - }, - .peek_flags = FI_PEEK | FI_CLAIM, - .remove_flags = FI_CLAIM, - .context = (void *) 0xDEADBEEF, - }; - struct gnix_fab_req *found; - - dlist_init(&request.req.msg.tle.free); - - ret = _gnix_insert_tag( - test_tag_storage, request.req.msg.tag, - &request.req, request.ignore); - cr_assert(ret == FI_SUCCESS, "failed to insert tag into storage"); - - found = _gnix_match_tag( - test_tag_storage, request.req.msg.tag, request.ignore, - request.peek_flags, request.context, NULL); - cr_assert(found == &request.req, "failed to find tag in storage"); - - found = _gnix_match_tag( - test_tag_storage, request.req.msg.tag, request.ignore, - request.remove_flags, request.context, NULL); - cr_assert(found == &request.req, "failed to remove tag from storage"); -} - -/* tests to ensure you cannot remove an unclaimed messge when FI_CLAIM is set */ -static inline void __test_fail_no_claimed_tags( - struct gnix_tag_storage_attr *test_attr) -{ - int ret; - struct gnix_fab_req *found; - struct gnix_fr_element request = { - .req = { - .msg = { - .tag = 0xA5A5A5A5, - .ignore = 0xFFFFFFFF - }, - }, - .peek_flags = FI_PEEK, - .remove_flags = FI_CLAIM, - .context = (void *) 0xDEADBEEF, - }; - - dlist_init(&request.req.msg.tle.free); - - ret = _gnix_insert_tag( - test_tag_storage, request.req.msg.tag, - &request.req, request.ignore); - cr_assert(ret == FI_SUCCESS, "failed to insert tag into storage"); - - found = _gnix_match_tag( - test_tag_storage, request.req.msg.tag, request.ignore, - request.peek_flags, request.context, NULL); - cr_assert(found == &request.req, "failed to find tag in storage"); - - found = _gnix_match_tag( - test_tag_storage, request.req.msg.tag, request.ignore, - request.remove_flags, request.context, NULL); - cr_assert(found == NULL, "found an unexpected tag in remove"); - - /* use the peek tags this time to remove the entry */ - found = _gnix_match_tag( - test_tag_storage, request.req.msg.tag, request.ignore, - 0, request.context, NULL); - cr_assert(found == &request.req, "failed to find tag in storage"); -} - -/* test to ensure you cannot remove a message that has been claimed */ -static inline void __test_fail_all_claimed_tags( - struct gnix_tag_storage_attr *test_attr) -{ - int ret; - struct gnix_fr_element request = { - .req = { - .msg = { - .tag = 0xA5A5A5A5, - .ignore = 0xFFFFFFFF - }, - }, - .peek_flags = FI_PEEK | FI_CLAIM, - .remove_flags = 0, - .context = (void *) 0xDEADBEEF, - }; - struct gnix_fab_req *found; - - /* reset the dlist state as appropriate for a new request */ - dlist_init(&request.req.msg.tle.free); - - - ret = _gnix_insert_tag( - test_tag_storage, request.req.msg.tag, - &request.req, request.ignore); - cr_assert(ret == FI_SUCCESS, "failed to insert tag into storage"); - - found = _gnix_match_tag( - test_tag_storage, request.req.msg.tag, request.ignore, - request.peek_flags, request.context, NULL); - cr_assert(found == &request.req, "failed to find tag in storage"); - - found = _gnix_match_tag( - test_tag_storage, request.req.msg.tag, request.ignore, - request.remove_flags, request.context, NULL); - cr_assert(found == NULL, "found an unexpected tag during remove"); - - /* use the peek tags this time to remove the entry */ - found = _gnix_match_tag( - test_tag_storage, request.req.msg.tag, request.ignore, - FI_CLAIM, request.context, NULL); - cr_assert(found == &request.req, "failed to find tag in storage"); -} - -static inline void __test_fail_peek_all_claimed( - struct gnix_tag_storage_attr *test_attr) -{ - int ret; - struct gnix_fr_element request = { - .req = { - .msg = { - .tag = 0xA5A5A5A5, - .ignore = 0xFFFFFFFF - }, - }, - .peek_flags = FI_PEEK, - .remove_flags = FI_CLAIM, - .context = (void *) 0xDEADBEEF, - }; - struct gnix_fab_req *found; - - dlist_init(&request.req.msg.tle.free); - - ret = _gnix_insert_tag( - test_tag_storage, request.req.msg.tag, - &request.req, request.ignore); - cr_assert(ret == FI_SUCCESS, "failed to insert tag into storage"); - - found = _gnix_match_tag( - test_tag_storage, request.req.msg.tag, request.ignore, - request.peek_flags | FI_CLAIM, request.context, NULL); - cr_assert(found == &request.req, "fail to claim tag"); - - found = _gnix_match_tag( - test_tag_storage, request.req.msg.tag, request.ignore, - request.peek_flags, request.context, NULL); - cr_assert(found == NULL, "unexpectedly found a tag"); - - found = _gnix_match_tag( - test_tag_storage, request.req.msg.tag, request.ignore, - request.remove_flags, request.context, NULL); - cr_assert(found == &request.req, "failed to remove tag from storage"); -} - -static inline void __test_src_addr_match( - struct gnix_tag_storage_attr *test_attr) -{ - int ret; - struct gnix_fr_element request = { - .req = { - .msg = { - .tag = 0xA5A5A5A5, - }, - .addr = { - .cdm_id = 1, - .device_addr = 1, - }, - }, - .peek_flags = FI_PEEK, - }; - struct gnix_fab_req *found; - - dlist_init(&request.req.msg.tle.free); - - ret = _gnix_insert_tag( - test_tag_storage, request.req.msg.tag, - &request.req, request.ignore); - cr_assert(ret == FI_SUCCESS, "failed to insert tag into storage"); - - found = _gnix_match_tag( - test_tag_storage, request.req.msg.tag, - request.ignore, request.peek_flags, - request.context, &request.req.addr); - cr_assert(found == &request.req, "failed to find tag in storage"); - - found = _gnix_match_tag( - test_tag_storage, request.req.msg.tag, - request.ignore, request.remove_flags, - request.context, &request.req.addr); - cr_assert(found == &request.req, "failed to remove tag from storage"); -} - -/* tests to ensure you cannot remove an unclaimed messge when FI_CLAIM is set */ -static inline void __test_src_addr_fail_wrong_src_addr( - struct gnix_tag_storage_attr *test_attr) -{ - int ret; - struct gnix_fab_req *found; - struct gnix_fr_element request = { - .req = { - .msg = { - .tag = 0xA5A5A5A5, - }, - .addr = { - .cdm_id = 1, - .device_addr = 1, - }, - }, - .ignore = 0, - .addr_ignore = 0, - .peek_flags = FI_PEEK, - }; - struct gnix_address addr_to_find = { - .cdm_id = 1, - .device_addr = 2, - }; - - dlist_init(&request.req.msg.tle.free); - - /* hack, don't actually do this */ - test_tag_storage->attr.use_src_addr_matching = 1; - - ret = _gnix_insert_tag( - test_tag_storage, request.req.msg.tag, - &request.req, request.ignore); - cr_assert(ret == FI_SUCCESS, "failed to insert tag into storage"); - - found = _gnix_match_tag( - test_tag_storage, request.req.msg.tag, - request.ignore, request.peek_flags, - request.context, &addr_to_find); - cr_assert(found == NULL, "found unexpected tag"); - - found = _gnix_match_tag( - test_tag_storage, request.req.msg.tag, - request.ignore, request.peek_flags, - request.context, &request.req.addr); - cr_assert(found == &request.req, "failed to find tag in storage"); - - found = _gnix_match_tag( - test_tag_storage, request.req.msg.tag, - request.ignore, request.remove_flags, - request.context, &request.req.addr); - cr_assert(found == &request.req, "failed to find tag in storage"); -} - -/* test to ensure you cannot remove a message that has been claimed */ -static inline void __test_src_addr_match_unspec( - struct gnix_tag_storage_attr *test_attr) -{ - int ret; - struct gnix_fr_element request = { - .req = { - .msg = { - .tag = 0xA5A5A5A5, - }, - .addr = { - .cdm_id = 1, - .device_addr = 1, - }, - }, - .peek_flags = FI_PEEK, - .ignore = 0, - .addr_ignore = 0, - }; - struct gnix_fab_req *found; - struct gnix_address to_find = { - .cdm_id = -1, - .device_addr = -1, - }; - - dlist_init(&request.req.msg.tle.free); - - if (test_tag_storage->match_func == _gnix_match_posted_tag) { - /* swap addresses because posted tag receives check - * the address in the request for unspec rather than - * looking at the address in the parameters - */ - request.req.addr.cdm_id = -1; - request.req.addr.device_addr = -1; - - to_find.cdm_id = 1; - to_find.device_addr = 1; - } - - - - /* hack, don't actually do this */ - test_tag_storage->attr.use_src_addr_matching = 1; - - ret = _gnix_insert_tag( - test_tag_storage, request.req.msg.tag, - &request.req, request.ignore); - cr_assert(ret == FI_SUCCESS, "failed to insert tag into storage"); - - - - found = _gnix_match_tag( - test_tag_storage, request.req.msg.tag, - request.ignore, request.peek_flags, - request.context, &to_find); - cr_assert(found == &request.req, "failed to find tag in storage"); - - found = _gnix_match_tag( - test_tag_storage, request.req.msg.tag, - request.ignore, request.remove_flags, - request.context, &to_find); - cr_assert(found == &request.req, "failed to find tag in storage"); -} - -static void single_insert_peek_remove( - struct gnix_tag_storage *ts, - struct __test_mask *mask, - struct gnix_fr_element *reqs) -{ - int correct_order, removal_order; - - multiple_insert_peek_remove_inorder(ts, mask, 1, reqs, &correct_order, - &removal_order); -} - -/* - * Basic functionality tests for the gnix_bitmap_t object - */ - -TestSuite(gnix_tags_bare, - .init = __gnix_tags_bare_test_setup, - .fini = __gnix_tags_bare_test_teardown); - -TestSuite(gnix_tags_basic_posted_list, - .init = __gnix_tags_basic_posted_list_test_setup, - .fini = __gnix_tags_basic_test_teardown); - -TestSuite(gnix_tags_basic_posted_hlist, - .init = __gnix_tags_basic_posted_hlist_test_setup, - .fini = __gnix_tags_basic_test_teardown); - -TestSuite(gnix_tags_basic_posted_kdtree, - .init = __gnix_tags_basic_posted_kdtree_test_setup, - .fini = __gnix_tags_basic_test_teardown, - .disabled = true); - -TestSuite(gnix_tags_basic_unexpected_list, - .init = __gnix_tags_basic_unexpected_list_test_setup, - .fini = __gnix_tags_basic_test_teardown); - -TestSuite(gnix_tags_basic_unexpected_hlist, - .init = __gnix_tags_basic_unexpected_hlist_test_setup, - .fini = __gnix_tags_basic_test_teardown); - -TestSuite(gnix_tags_basic_unexpected_kdtree, - .init = __gnix_tags_basic_unexpected_kdtree_test_setup, - .fini = __gnix_tags_basic_test_teardown, - .disabled = true); - -Test(gnix_tags_bare, uninitialized) -{ - cr_assert(test_tag_storage->gen == 0); - cr_assert(test_tag_storage->attr.type == GNIX_TAG_AUTOSELECT); - cr_assert(test_tag_storage->state == GNIX_TS_STATE_UNINITIALIZED); -} - -Test(gnix_tags_bare, initialize_list) -{ - int ret; - - ret = _gnix_tag_storage_init(test_tag_storage, &default_list_attr, - match_func); - cr_assert(ret == FI_SUCCESS); - - ret = _gnix_tag_storage_destroy(test_tag_storage); - cr_assert(ret == FI_SUCCESS); -} - -Test(gnix_tags_bare, simple) -{ - int ret; - struct gnix_fab_req req, *tmp; - dlist_init(&req.msg.tle.free); - - ret = _gnix_tag_storage_init(test_tag_storage, &default_list_attr, - match_func); - cr_assert(ret == FI_SUCCESS); - - ret = _gnix_insert_tag(test_tag_storage, 0xdeadbeef, &req, 0); - cr_assert(ret == FI_SUCCESS); - - ret = _gnix_insert_tag(test_tag_storage, 0xdeadbeef, &req, 0); - cr_assert(ret == -FI_EALREADY); - - tmp = _gnix_match_tag(test_tag_storage, 0xdeadbeef, 0, 0, 0, 0); - cr_assert(&req == tmp); - - ret = _gnix_tag_storage_destroy(test_tag_storage); - cr_assert(ret == FI_SUCCESS); -} - - - -Test(gnix_tags_bare, initialize_hlist) -{ - int ret; - - ret = _gnix_tag_storage_init(test_tag_storage, &default_hlist_attr, - match_func); - cr_assert(ret == FI_SUCCESS); - - ret = _gnix_tag_storage_destroy(test_tag_storage); - cr_assert(ret == FI_SUCCESS); -} - -Test(gnix_tags_bare, initialize_kdtree) -{ - int ret; - - ret = _gnix_tag_storage_init(test_tag_storage, &default_kdtree_attr, - match_func); - cr_assert(ret == -FI_ENOSYS); - - //ret = _gnix_tag_storage_destroy(test_tag_storage); - //cr_assert(ret == FI_SUCCESS); -} - - -Test(gnix_tags_bare, initialize_auto) -{ - int ret; - - ret = _gnix_tag_storage_init(test_tag_storage, &default_auto_attr, - match_func); - cr_assert(ret == FI_SUCCESS); - - ret = _gnix_tag_storage_destroy(test_tag_storage); - cr_assert(ret == FI_SUCCESS); -} - -Test(gnix_tags_bare, already_initialized) -{ - int ret; - - ret = _gnix_tag_storage_init(test_tag_storage, &default_list_attr, - match_func); - cr_assert(ret == FI_SUCCESS); - - ret = _gnix_tag_storage_init(test_tag_storage, &default_list_attr, - match_func); - cr_assert(ret == -FI_EINVAL); - - ret = _gnix_tag_storage_destroy(test_tag_storage); - cr_assert(ret == FI_SUCCESS); -} - -Test(gnix_tags_basic_posted_list, single_insert_remove) -{ - int i; - struct gnix_fr_element request = { - .req = { - .msg = { - .tag = 0xA5A5A5A5, - .ignore = 0xFFFFFFFF - }, - }, - .peek_flags = FI_PEEK, - }; - struct gnix_tag_storage_attr attr; - - memcpy(&attr, &default_list_attr, sizeof(struct gnix_tag_storage_attr)); - - for (i = 0; i < TEST_OVERLAY_MAX; i++) { - - single_insert_peek_remove(test_tag_storage, - &test_masks[i], &request); - - /* make necessary alterations to the structure - * for the next test mask - */ - reset_test_tag_storage(test_tag_storage, &attr); - } -} - -ParameterizedTestParameters(gnix_tags_basic_posted_list, multiple_ipr_tags) -{ - size_t nb_params = sizeof (ipr_params) / sizeof (struct ipr_test_params); - return cr_make_param_array(struct ipr_test_params, ipr_params, nb_params); -} - -ParameterizedTest(struct ipr_test_params *params, - gnix_tags_basic_posted_list, multiple_ipr_tags) -{ - __test_multiple_type_ipr_reqs(params->elements, &default_list_attr, - params->make_requests); -} - - -Test(gnix_tags_basic_posted_list, multiple_8_duplicate_tags) -{ - __test_multiple_8_duplicate_tags(&default_list_attr); -} - -Test(gnix_tags_basic_posted_list, not_found_non_empty) -{ - __test_not_found_non_empty(); -} - -Test(gnix_tags_basic_posted_list, not_found_empty) -{ - __test_not_found_empty(); -} - -Test(gnix_tags_basic_posted_list, ignore_mask_set) -{ - __test_ignore_mask_set(&default_list_attr); -} - -Test(gnix_tags_basic_posted_list, fi_claim_pass) -{ - __test_claim_pass(&default_list_attr); -} - -Test(gnix_tags_basic_posted_list, fi_claim_fail_no_claimed_tags) -{ - __test_fail_no_claimed_tags(&default_list_attr); -} - -Test(gnix_tags_basic_posted_list, fi_claim_fail_all_claimed_tags) -{ - __test_fail_all_claimed_tags(&default_list_attr); -} - -Test(gnix_tags_basic_posted_list, fi_claim_fail_peek_all_claimed) -{ - __test_fail_peek_all_claimed(&default_list_attr); -} - -/* unexpected list src address matching tests */ -Test(gnix_tags_basic_posted_list, src_addr_match_success) -{ - __test_src_addr_match(&default_list_attr); -} - -Test(gnix_tags_basic_posted_list, src_addr_no_match_wrong_addr) -{ - __test_src_addr_fail_wrong_src_addr(&default_list_attr); -} - -Test(gnix_tags_basic_posted_list, src_addr_match_unspec) -{ - __test_src_addr_match_unspec(&default_list_attr); -} - - -/* - * unexpected list tests - */ - -Test(gnix_tags_basic_unexpected_list, single_insert_remove) -{ - int i; - struct gnix_fr_element request = { - .req = { - .msg = { - .tag = 0xA5A5A5A5, - .ignore = 0xFFFFFFFF - }, - }, - .peek_flags = FI_PEEK, - }; - struct gnix_tag_storage_attr attr; - - memcpy(&attr, &default_list_attr, sizeof(struct gnix_tag_storage_attr)); - - for (i = 0; i < TEST_OVERLAY_MAX; i++) { - - single_insert_peek_remove(test_tag_storage, - &test_masks[i], &request); - - /* make necessary alterations to the structure - * for the next test mask - */ - reset_test_tag_storage(test_tag_storage, &attr); - } -} - -ParameterizedTestParameters(gnix_tags_basic_unexpected_list, multiple_ipr_tags) -{ - size_t nb_params = sizeof (ipr_params) / sizeof (struct ipr_test_params); - return cr_make_param_array(struct ipr_test_params, ipr_params, nb_params); -} - -ParameterizedTest(struct ipr_test_params *params, - gnix_tags_basic_unexpected_list, multiple_ipr_tags) -{ - __test_multiple_type_ipr_reqs(params->elements, &default_list_attr, - params->make_requests); -} - -Test(gnix_tags_basic_unexpected_list, multiple_8_duplicate_tags) -{ - __test_multiple_8_duplicate_tags(&default_list_attr); -} - -Test(gnix_tags_basic_unexpected_list, not_found_non_empty) -{ - __test_not_found_non_empty(); -} - -Test(gnix_tags_basic_unexpected_list, not_found_empty) -{ - __test_not_found_empty(); -} - -Test(gnix_tags_basic_unexpected_list, ignore_mask_set) -{ - __test_ignore_mask_set(&default_list_attr); -} - -Test(gnix_tags_basic_unexpected_list, fi_claim_pass) -{ - __test_claim_pass(&default_list_attr); -} - -Test(gnix_tags_basic_unexpected_list, fi_claim_fail_no_claimed_tags) -{ - __test_fail_no_claimed_tags(&default_list_attr); -} - -Test(gnix_tags_basic_unexpected_list, fi_claim_fail_all_claimed_tags) -{ - __test_fail_all_claimed_tags(&default_list_attr); -} - -Test(gnix_tags_basic_unexpected_list, fi_claim_fail_peek_all_claimed) -{ - __test_fail_peek_all_claimed(&default_list_attr); -} - -/* unexpected list src address matching tests */ -Test(gnix_tags_basic_unexpected_list, src_addr_match_success) -{ - __test_src_addr_match(&default_list_attr); -} - -Test(gnix_tags_basic_unexpected_list, src_addr_no_match_wrong_addr) -{ - __test_src_addr_fail_wrong_src_addr(&default_list_attr); -} - -Test(gnix_tags_basic_unexpected_list, src_addr_match_unspec) -{ - __test_src_addr_match_unspec(&default_list_attr); -} - -/* - * hlist tests - */ - -Test(gnix_tags_basic_posted_hlist, single_insert_remove) -{ - int i; - struct gnix_fr_element request = { - .req = { - .msg = { - .tag = 0xA5A5A5A5, - .ignore = 0xFFFFFFFF - }, - }, - .peek_flags = FI_PEEK, - }; - struct gnix_tag_storage_attr attr; - - memcpy(&attr, &default_hlist_attr, sizeof(struct gnix_tag_storage_attr)); - - for (i = 0; i < TEST_OVERLAY_MAX; i++) { - - single_insert_peek_remove(test_tag_storage, - &test_masks[i], &request); - - /* make necessary alterations to the structure - * for the next test mask - */ - reset_test_tag_storage(test_tag_storage, &attr); - } -} - -ParameterizedTestParameters(gnix_tags_basic_posted_hlist, multiple_ipr_tags) -{ - size_t nb_params = sizeof (ipr_params) / sizeof (struct ipr_test_params); - return cr_make_param_array(struct ipr_test_params, ipr_params, nb_params); -} - -ParameterizedTest(struct ipr_test_params *params, - gnix_tags_basic_posted_hlist, multiple_ipr_tags) -{ - __test_multiple_type_ipr_reqs(params->elements, &default_hlist_attr, - params->make_requests); -} - - -Test(gnix_tags_basic_posted_hlist, multiple_8_duplicate_tags) -{ - __test_multiple_8_duplicate_tags(&default_hlist_attr); -} - -Test(gnix_tags_basic_posted_hlist, not_found_non_empty) -{ - __test_not_found_non_empty(); -} - -Test(gnix_tags_basic_posted_hlist, not_found_empty) -{ - __test_not_found_empty(); -} - -Test(gnix_tags_basic_posted_hlist, ignore_mask_set) -{ - __test_ignore_mask_set(&default_hlist_attr); -} - -Test(gnix_tags_basic_posted_hlist, fi_claim_pass) -{ - __test_claim_pass(&default_hlist_attr); -} - -Test(gnix_tags_basic_posted_hlist, fi_claim_fail_no_claimed_tags) -{ - __test_fail_no_claimed_tags(&default_hlist_attr); -} - -Test(gnix_tags_basic_posted_hlist, fi_claim_fail_all_claimed_tags) -{ - __test_fail_all_claimed_tags(&default_hlist_attr); -} - -Test(gnix_tags_basic_posted_hlist, fi_claim_fail_peek_all_claimed) -{ - __test_fail_peek_all_claimed(&default_hlist_attr); -} - -/* unexpected list src address matching tests */ -Test(gnix_tags_basic_posted_hlist, src_addr_match_success) -{ - __test_src_addr_match(&default_hlist_attr); -} - -Test(gnix_tags_basic_posted_hlist, src_addr_no_match_wrong_addr) -{ - __test_src_addr_fail_wrong_src_addr(&default_hlist_attr); -} - -Test(gnix_tags_basic_posted_hlist, src_addr_match_unspec) -{ - __test_src_addr_match_unspec(&default_hlist_attr); -} - - -/* - * unexpected hlist tests - */ - -Test(gnix_tags_basic_unexpected_hlist, single_insert_remove) -{ - int i; - struct gnix_fr_element request = { - .req = { - .msg = { - .tag = 0xA5A5A5A5, - .ignore = 0xFFFFFFFF - }, - }, - .peek_flags = FI_PEEK, - }; - struct gnix_tag_storage_attr attr; - - memcpy(&attr, &default_hlist_attr, sizeof(struct gnix_tag_storage_attr)); - - for (i = 0; i < TEST_OVERLAY_MAX; i++) { - - single_insert_peek_remove(test_tag_storage, - &test_masks[i], &request); - - /* make necessary alterations to the structure - * for the next test mask - */ - reset_test_tag_storage(test_tag_storage, &attr); - } -} - -ParameterizedTestParameters(gnix_tags_basic_unexpected_hlist, multiple_ipr_tags) -{ - size_t nb_params = sizeof (ipr_params) / sizeof (struct ipr_test_params); - return cr_make_param_array(struct ipr_test_params, ipr_params, nb_params); -} - -ParameterizedTest(struct ipr_test_params *params, - gnix_tags_basic_unexpected_hlist, multiple_ipr_tags) -{ - __test_multiple_type_ipr_reqs(params->elements, &default_hlist_attr, - params->make_requests); -} - -Test(gnix_tags_basic_unexpected_hlist, multiple_8_duplicate_tags) -{ - __test_multiple_8_duplicate_tags(&default_hlist_attr); -} - -Test(gnix_tags_basic_unexpected_hlist, not_found_non_empty) -{ - __test_not_found_non_empty(); -} - -Test(gnix_tags_basic_unexpected_hlist, not_found_empty) -{ - __test_not_found_empty(); -} - -Test(gnix_tags_basic_unexpected_hlist, ignore_mask_set) -{ - __test_ignore_mask_set(&default_hlist_attr); -} - -Test(gnix_tags_basic_unexpected_hlist, fi_claim_pass) -{ - __test_claim_pass(&default_hlist_attr); -} - -Test(gnix_tags_basic_unexpected_hlist, fi_claim_fail_no_claimed_tags) -{ - __test_fail_no_claimed_tags(&default_hlist_attr); -} - -Test(gnix_tags_basic_unexpected_hlist, fi_claim_fail_all_claimed_tags) -{ - __test_fail_all_claimed_tags(&default_hlist_attr); -} - -Test(gnix_tags_basic_unexpected_hlist, fi_claim_fail_peek_all_claimed) -{ - __test_fail_peek_all_claimed(&default_hlist_attr); -} - -/* unexpected list src address matching tests */ -Test(gnix_tags_basic_unexpected_hlist, src_addr_match_success) -{ - __test_src_addr_match(&default_hlist_attr); -} - -Test(gnix_tags_basic_unexpected_hlist, src_addr_no_match_wrong_addr) -{ - __test_src_addr_fail_wrong_src_addr(&default_hlist_attr); -} - -Test(gnix_tags_basic_unexpected_hlist, src_addr_match_unspec) -{ - __test_src_addr_match_unspec(&default_hlist_attr); -} - diff --git a/prov/gni/test/utils.c b/prov/gni/test/utils.c deleted file mode 100644 index e39709bee6b..00000000000 --- a/prov/gni/test/utils.c +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Copyright (c) 2015-2016 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -#include -#include -#include - -#include -#include "gnix_rdma_headers.h" -#include "gnix_util.h" -#include "gnix.h" - -struct gnix_reference_tester { - struct gnix_reference ref_cnt; - int destructed; -}; - -Test(utils, proc) -{ - int rc; - - rc = _gnix_task_is_not_app(); - cr_expect(rc == 0); - - rc = _gnix_job_enable_unassigned_cpus(); - cr_expect(rc == 0); - - rc = _gnix_job_disable_unassigned_cpus(); - cr_expect(rc == 0); - - rc = _gnix_job_enable_affinity_apply(); - cr_expect(rc == 0); - - rc = _gnix_job_disable_affinity_apply(); - cr_expect(rc == 0); - -} - -Test(utils, alps) -{ - int rc; - uint8_t ptag; - uint32_t cookie, fmas, cqs, npes, npr; - void *addr = NULL; - char *cptr = NULL; - int lrank, trank; - - _gnix_app_cleanup(); - - rc = gnixu_get_rdma_credentials(addr, &ptag, &cookie); - cr_expect(!rc); - - rc = _gnix_job_fma_limit(0, ptag, &fmas); - cr_expect(!rc); - - rc = _gnix_job_cq_limit(0, ptag, &cqs); - cr_expect(!rc); - - rc = _gnix_pes_on_node(&npes); - cr_expect(!rc); - - rc = _gnix_nics_per_rank(&npr); - cr_expect(!rc); - - /* - * TODO: this will need more work for CCM, - * where the env. variables checked below - * aren't defined - */ - rc = _gnix_pe_node_rank(&lrank); - if (rc != -FI_EADDRNOTAVAIL) { - cr_expect(!rc); - - cptr = getenv("PMI_FORK_RANK"); - if (cptr == NULL) - cptr = getenv("ALPS_APP_PE"); - if (cptr != NULL) { - trank = atoi(cptr); - trank -= gnix_first_pe_on_node; - cr_expect(trank == lrank); - } else - cr_expect(0); - } - - - cqs /= GNIX_CQS_PER_EP; - cr_expect(((fmas > cqs ? cqs : fmas) / npes) == npr); - - _gnix_app_cleanup(); -} - -static void test_destruct(void *obj) -{ - struct gnix_reference_tester *t = (struct gnix_reference_tester *) obj; - - t->destructed = 1; -} - -Test(utils, references) -{ - int refs; - struct gnix_reference_tester test; - - /* initialize test structure */ - _gnix_ref_init(&test.ref_cnt, 1, test_destruct); - test.destructed = 0; - - /* check for validity */ - cr_assert(ofi_atomic_get32(&test.ref_cnt.references) == 1); - cr_assert(test.destructed == 0); - - /* increment refs and check */ - refs = _gnix_ref_get(&test); - cr_assert(refs == 2); - cr_assert(ofi_atomic_get32(&test.ref_cnt.references) == 2); - cr_assert(test.destructed == 0); - - /* decrement refs and check */ - refs = _gnix_ref_put(&test); - cr_assert(refs == 1); - cr_assert(ofi_atomic_get32(&test.ref_cnt.references) == 1); - cr_assert(test.destructed == 0); - - /* decrement and destruct, check for validity */ - refs = _gnix_ref_put(&test); - cr_assert(refs == 0); - cr_assert(ofi_atomic_get32(&test.ref_cnt.references) == 0); - cr_assert(test.destructed == 1); -} - diff --git a/prov/gni/test/vc.c b/prov/gni/test/vc.c deleted file mode 100644 index fa61cbf7a05..00000000000 --- a/prov/gni/test/vc.c +++ /dev/null @@ -1,965 +0,0 @@ -/* - * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights reserved - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * Copyright (c) 2020 Triad National Security, LLC. All rights reserved. - * - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "gnix_vc.h" -#include "gnix_nic.h" -#include "gnix_cm_nic.h" -#include "gnix_hashtable.h" -#include "gnix_av.h" - -#include -#include "gnix_rdma_headers.h" -#include "common.h" - -/* Note: Set to ~FI_NOTIFY_FLAGS_ONLY since this was written before api 1.5 */ -static uint64_t mode_bits = ~FI_NOTIFY_FLAGS_ONLY; -static struct fid_fabric *fab; -static struct fid_domain *dom; -static struct fid_ep *ep[2]; -static struct fid_av *av; -static struct fi_info *hints; -static struct fi_info *fi; -static struct fid_cq *cq[2]; -static struct fi_cq_attr cq_attr; -static void *ep_name[2]; -static fi_addr_t gni_addr[2]; -static struct gnix_av_addr_entry gnix_addr[2]; - -/* Third EP with unqiue domain is used to test inter-CM connect. */ -static struct fid_domain *dom3; -static struct fid_ep *ep3; -static struct fid_av *av3; -static struct fid_cq *cq3; -static void *ep_name3; - -/* Register a target buffer with both domains for pings. */ -static void *target_buf, *target_buf_base; -static int target_len = 64; -static struct fid_mr *rem_mr, *rem_mr3; -static uint64_t mr_key, mr_key3; - -static void vc_setup_common(uint32_t version, int mr_mode); - -static inline void __vc_setup(uint32_t version, - int mr_mode, - int control_progress) -{ - - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - - hints->domain_attr->cq_data_size = 4; - hints->domain_attr->control_progress = control_progress; - hints->mode = mode_bits; - - vc_setup_common(version, mr_mode); -} - -static void vc_setup_manual_basic(void) -{ - __vc_setup(fi_version(), GNIX_MR_BASIC, FI_PROGRESS_MANUAL); -} - -static void vc_setup_manual_scalable(void) -{ - __vc_setup(fi_version(), GNIX_MR_SCALABLE, FI_PROGRESS_MANUAL); -} - -static void vc_setup_auto_basic(void) -{ - __vc_setup(fi_version(), GNIX_MR_BASIC, FI_PROGRESS_AUTO); -} - -static void vc_setup_auto_scalable(void) -{ - __vc_setup(fi_version(), GNIX_MR_SCALABLE, FI_PROGRESS_AUTO); -} - -static void vc_setup_common(uint32_t version, int mr_mode) -{ - int ret = 0; - struct fi_av_attr attr = {0}; - size_t addrlen = 0; - struct gnix_fid_av *gnix_av; - - hints->domain_attr->mr_mode = mr_mode; - hints->fabric_attr->prov_name = strdup("gni"); - - ret = fi_getinfo(version, NULL, 0, 0, hints, &fi); - cr_assert(!ret, "fi_getinfo"); - - ret = fi_fabric(fi->fabric_attr, &fab, NULL); - cr_assert(!ret, "fi_fabric"); - - ret = fi_domain(fab, fi, &dom, NULL); - cr_assert(!ret, "fi_domain"); - - memset(&attr, 0, sizeof(attr)); - attr.type = FI_AV_MAP; - attr.count = 16; - - ret = fi_av_open(dom, &attr, &av, NULL); - cr_assert(!ret, "fi_av_open"); - - gnix_av = container_of(av, struct gnix_fid_av, av_fid); - - ret = fi_endpoint(dom, fi, &ep[0], NULL); - cr_assert(!ret, "fi_endpoint"); - - ret = fi_getname(&ep[0]->fid, NULL, &addrlen); - cr_assert(addrlen > 0); - - ep_name[0] = malloc(addrlen); - cr_assert(ep_name[0] != NULL); - - ep_name[1] = malloc(addrlen); - cr_assert(ep_name[1] != NULL); - - ret = fi_getname(&ep[0]->fid, ep_name[0], &addrlen); - cr_assert(ret == FI_SUCCESS); - - ret = fi_endpoint(dom, fi, &ep[1], NULL); - cr_assert(!ret, "fi_endpoint"); - - ret = fi_getname(&ep[1]->fid, ep_name[1], &addrlen); - cr_assert(ret == FI_SUCCESS); - - ret = fi_av_insert(av, ep_name[0], 1, &gni_addr[0], 0, - NULL); - cr_assert(ret == 1); - - ret = _gnix_av_lookup(gnix_av, gni_addr[0], &gnix_addr[0]); - cr_assert(ret == FI_SUCCESS); - - ret = fi_av_insert(av, ep_name[1], 1, &gni_addr[1], 0, - NULL); - cr_assert(ret == 1); - - ret = _gnix_av_lookup(gnix_av, gni_addr[1], &gnix_addr[1]); - cr_assert(ret == FI_SUCCESS); - - ret = fi_ep_bind(ep[0], &av->fid, 0); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_ep_bind(ep[1], &av->fid, 0); - cr_assert(!ret, "fi_ep_bind"); - - cq_attr.format = FI_CQ_FORMAT_TAGGED; - cq_attr.size = 1024; - cq_attr.wait_obj = 0; - - ret = fi_cq_open(dom, &cq_attr, &cq[0], 0); - cr_assert(!ret, "fi_cq_open"); - - ret = fi_ep_bind(ep[0], &cq[0]->fid, FI_SEND | FI_RECV); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_cq_open(dom, &cq_attr, &cq[1], 0); - cr_assert(!ret, "fi_cq_open"); - - ret = fi_ep_bind(ep[1], &cq[1]->fid, FI_SEND | FI_RECV); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_enable(ep[0]); - cr_assert(!ret, "fi_enable"); - - ret = fi_enable(ep[1]); - cr_assert(!ret, "fi_ep_enable"); -} - -void vc_teardown(void) -{ - int ret = 0; - - ret = fi_close(&ep[0]->fid); - cr_assert(!ret, "failure in closing ep."); - - ret = fi_close(&ep[1]->fid); - cr_assert(!ret, "failure in closing ep."); - - ret = fi_close(&cq[0]->fid); - cr_assert(!ret, "failure in closing cq."); - - ret = fi_close(&cq[1]->fid); - cr_assert(!ret, "failure in closing cq."); - - ret = fi_close(&av->fid); - cr_assert(!ret, "failure in closing av."); - - ret = fi_close(&dom->fid); - cr_assert(!ret, "failure in closing domain."); - - ret = fi_close(&fab->fid); - cr_assert(!ret, "failure in closing fabric."); - - fi_freeinfo(fi); - fi_freeinfo(hints); - free(ep_name[0]); - free(ep_name[1]); -} - -static void vc_conn_ping_setup(uint32_t version, int mr_mode) -{ - int ret = 0; - struct fi_av_attr attr = {0}; - size_t addrlen = 0; - - hints->domain_attr->mr_mode = mr_mode; - hints->fabric_attr->name = strdup("gni"); - - ret = fi_getinfo(version, NULL, 0, 0, hints, &fi); - cr_assert(!ret, "fi_getinfo"); - - ret = fi_fabric(fi->fabric_attr, &fab, NULL); - cr_assert(!ret, "fi_fabric"); - - ret = fi_domain(fab, fi, &dom, NULL); - cr_assert(!ret, "fi_domain"); - - attr.type = FI_AV_TABLE; - attr.count = 16; - - ret = fi_av_open(dom, &attr, &av, NULL); - cr_assert(!ret, "fi_av_open"); - - ret = fi_endpoint(dom, fi, &ep[0], NULL); - cr_assert(!ret, "fi_endpoint"); - - ret = fi_getname(&ep[0]->fid, NULL, &addrlen); - cr_assert(addrlen > 0); - - ep_name[0] = malloc(addrlen); - cr_assert(ep_name[0] != NULL); - - ep_name[1] = malloc(addrlen); - cr_assert(ep_name[1] != NULL); - - ret = fi_getname(&ep[0]->fid, ep_name[0], &addrlen); - cr_assert(ret == FI_SUCCESS); - - ret = fi_endpoint(dom, fi, &ep[1], NULL); - cr_assert(!ret, "fi_endpoint"); - - ret = fi_getname(&ep[1]->fid, ep_name[1], &addrlen); - cr_assert(ret == FI_SUCCESS); - - ret = fi_av_insert(av, ep_name[0], 1, NULL, 0, NULL); - cr_assert(ret == 1); - - ret = fi_av_insert(av, ep_name[1], 1, NULL, 0, NULL); - cr_assert(ret == 1); - - ret = fi_ep_bind(ep[0], &av->fid, 0); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_ep_bind(ep[1], &av->fid, 0); - cr_assert(!ret, "fi_ep_bind"); - - cq_attr.format = FI_CQ_FORMAT_TAGGED; - cq_attr.size = 1024; - cq_attr.wait_obj = 0; - - ret = fi_cq_open(dom, &cq_attr, &cq[0], 0); - cr_assert(!ret, "fi_cq_open"); - - ret = fi_ep_bind(ep[0], &cq[0]->fid, FI_SEND | FI_RECV); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_enable(ep[0]); - cr_assert(!ret, "fi_enable"); - - ret = fi_cq_open(dom, &cq_attr, &cq[1], 0); - cr_assert(!ret, "fi_cq_open"); - - ret = fi_ep_bind(ep[1], &cq[1]->fid, FI_SEND | FI_RECV); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_enable(ep[1]); - cr_assert(!ret, "fi_ep_enable"); - - /* Setup third EP with separate domain to test inter-CM NIC connect. */ - ret = fi_domain(fab, fi, &dom3, NULL); - cr_assert(!ret, "fi_domain"); - - attr.type = FI_AV_TABLE; - attr.count = 16; - - ret = fi_av_open(dom3, &attr, &av3, NULL); - cr_assert(!ret, "fi_av_open"); - - ret = fi_endpoint(dom3, fi, &ep3, NULL); - cr_assert(!ret, "fi_endpoint"); - - ret = fi_getname(&ep3->fid, NULL, &addrlen); - cr_assert(addrlen > 0); - - ep_name3 = malloc(addrlen); - cr_assert(ep_name3 != NULL); - - ret = fi_getname(&ep3->fid, ep_name3, &addrlen); - cr_assert(ret == FI_SUCCESS); - - ret = fi_av_insert(av3, ep_name[0], 1, NULL, 0, NULL); - cr_assert(ret == 1); - - ret = fi_av_insert(av3, ep_name[1], 1, NULL, 0, NULL); - cr_assert(ret == 1); - - ret = fi_av_insert(av3, ep_name3, 1, NULL, 0, NULL); - cr_assert(ret == 1); - - ret = fi_ep_bind(ep3, &av3->fid, 0); - cr_assert(!ret, "fi_ep_bind"); - - cq_attr.format = FI_CQ_FORMAT_TAGGED; - cq_attr.size = 1024; - cq_attr.wait_obj = 0; - - ret = fi_cq_open(dom3, &cq_attr, &cq3, 0); - cr_assert(!ret, "fi_cq_open"); - - ret = fi_ep_bind(ep3, &cq3->fid, FI_SEND | FI_RECV); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_enable(ep3); - cr_assert(!ret, "fi_enable"); - - /* Register target buffer for pings. */ - target_buf_base = malloc(GNIT_ALIGN_LEN(target_len)); - assert(target_buf_base); - target_buf = GNIT_ALIGN_BUFFER(void *, target_buf_base); - - ret = fi_mr_reg(dom, target_buf, sizeof(target_buf), - FI_REMOTE_WRITE, 0, (USING_SCALABLE(fi) ? 1 : 0), - 0, &rem_mr, &target_buf); - cr_assert_eq(ret, 0); - - if (USING_SCALABLE(fi)) - MR_ENABLE(rem_mr, target_buf, sizeof(target_buf)); - - mr_key = fi_mr_key(rem_mr); - - ret = fi_mr_reg(dom3, target_buf, sizeof(target_buf), - FI_REMOTE_WRITE, 0, (USING_SCALABLE(fi) ? 2 : 0), - 0, &rem_mr3, &target_buf); - cr_assert_eq(ret, 0); - - if (USING_SCALABLE(fi)) - MR_ENABLE(rem_mr3, target_buf, sizeof(target_buf)); - - mr_key3 = fi_mr_key(rem_mr3); - - ret = fi_av_insert(av, ep_name3, 1, NULL, 0, NULL); - cr_assert(ret == 1); -} - -static void __vc_conn_ping_setup(uint32_t version, - int mr_mode, - int control_progress) -{ - - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - - hints->domain_attr->cq_data_size = 4; - hints->domain_attr->control_progress = control_progress; - hints->mode = mode_bits; - - vc_conn_ping_setup(version, mr_mode); -} - -static void vc_conn_ping_setup_manual_basic(void) -{ - __vc_conn_ping_setup(fi_version(), GNIX_MR_BASIC, FI_PROGRESS_MANUAL); -} - -static void vc_conn_ping_setup_manual_scalable(void) -{ - __vc_conn_ping_setup(fi_version(), GNIX_MR_SCALABLE, - FI_PROGRESS_MANUAL); -} - -static void vc_conn_ping_setup_auto_basic(void) -{ - __vc_conn_ping_setup(fi_version(), GNIX_MR_BASIC, FI_PROGRESS_AUTO); -} - -static void vc_conn_ping_setup_auto_scalable(void) -{ - __vc_conn_ping_setup(fi_version(), GNIX_MR_SCALABLE, FI_PROGRESS_AUTO); -} - -void vc_conn_ping_teardown(void) -{ - int ret = 0; - - ret = fi_close(&rem_mr3->fid); - cr_assert(!ret, "failure in closing mr3."); - - ret = fi_close(&rem_mr->fid); - cr_assert(!ret, "failure in closing mr."); - - free(target_buf_base); - - ret = fi_close(&cq3->fid); - cr_assert(!ret, "failure in closing cq3."); - - ret = fi_close(&ep3->fid); - cr_assert(!ret, "failure in closing ep3."); - - ret = fi_close(&av3->fid); - cr_assert(!ret, "failure in closing av3."); - - ret = fi_close(&dom3->fid); - cr_assert(!ret, "failure in closing domain3."); - - ret = fi_close(&ep[0]->fid); - cr_assert(!ret, "failure in closing ep."); - - ret = fi_close(&ep[1]->fid); - cr_assert(!ret, "failure in closing ep."); - - ret = fi_close(&cq[0]->fid); - cr_assert(!ret, "failure in closing cq."); - - ret = fi_close(&cq[1]->fid); - cr_assert(!ret, "failure in closing cq."); - - ret = fi_close(&av->fid); - cr_assert(!ret, "failure in closing av."); - - ret = fi_close(&dom->fid); - cr_assert(!ret, "failure in closing domain."); - - ret = fi_close(&fab->fid); - cr_assert(!ret, "failure in closing fabric."); - - fi_freeinfo(fi); - fi_freeinfo(hints); - free(ep_name[0]); - free(ep_name[1]); -} - -/******************************************************************************* - * Test vc functions. - ******************************************************************************/ - -TestSuite(vc_management_auto_basic, - .init = vc_setup_auto_basic, - .fini = vc_teardown, - .disabled = false); - -TestSuite(vc_management_manual_basic, - .init = vc_setup_manual_basic, - .fini = vc_teardown, - .disabled = false); - -TestSuite(vc_management_auto_scalable, - .init = vc_setup_auto_scalable, - .fini = vc_teardown, - .disabled = false); - -TestSuite(vc_management_manual_scalable, - .init = vc_setup_manual_scalable, - .fini = vc_teardown, - .disabled = false); - - -static inline void __vc_alloc_simple(void) -{ - int ret; - struct gnix_vc *vc[2]; - struct gnix_fid_ep *ep_priv; - - ep_priv = container_of(ep[0], struct gnix_fid_ep, ep_fid); - - ret = _gnix_vc_alloc(ep_priv, &gnix_addr[0], &vc[0]); - cr_assert_eq(ret, FI_SUCCESS); - - ret = _gnix_vc_alloc(ep_priv, &gnix_addr[1], &vc[1]); - cr_assert_eq(ret, FI_SUCCESS); - - /* - * vc_id's have to be different since the - * vc's were allocated using the same ep. - */ - cr_assert_neq(vc[0]->vc_id, vc[1]->vc_id); - - ret = _gnix_vc_destroy(vc[0]); - cr_assert_eq(ret, FI_SUCCESS); - - ret = _gnix_vc_destroy(vc[1]); - cr_assert_eq(ret, FI_SUCCESS); -} - -Test(vc_management_auto_basic, vc_alloc_simple) -{ - __vc_alloc_simple(); -} - -Test(vc_management_auto_scalable, vc_alloc_simple) -{ - __vc_alloc_simple(); -} - -static inline void __vc_lookup_by_id(void) -{ - int ret; - struct gnix_vc *vc[2], *vc_chk; - struct gnix_fid_ep *ep_priv; - - ep_priv = container_of(ep[0], struct gnix_fid_ep, ep_fid); - - ret = _gnix_vc_alloc(ep_priv, &gnix_addr[0], &vc[0]); - cr_assert_eq(ret, FI_SUCCESS); - - ret = _gnix_vc_alloc(ep_priv, &gnix_addr[1], &vc[1]); - cr_assert_eq(ret, FI_SUCCESS); - - vc_chk = __gnix_nic_elem_by_rem_id(ep_priv->nic, vc[0]->vc_id); - cr_assert_eq(vc_chk, vc[0]); - - vc_chk = __gnix_nic_elem_by_rem_id(ep_priv->nic, vc[1]->vc_id); - cr_assert_eq(vc_chk, vc[1]); - - ret = _gnix_vc_destroy(vc[0]); - cr_assert_eq(ret, FI_SUCCESS); - - ret = _gnix_vc_destroy(vc[1]); - cr_assert_eq(ret, FI_SUCCESS); -} - -Test(vc_management_auto_basic, vc_lookup_by_id) -{ - __vc_lookup_by_id(); -} - -Test(vc_management_auto_scalable, vc_lookup_by_id) -{ - __vc_lookup_by_id(); -} - -static inline void __vc_connect(void) -{ - int ret; - struct gnix_vc *vc_conn; - struct gnix_fid_ep *ep_priv[2]; - gnix_ht_key_t key; - enum gnix_vc_conn_state state; - - ep_priv[0] = container_of(ep[0], struct gnix_fid_ep, ep_fid); - - ep_priv[1] = container_of(ep[1], struct gnix_fid_ep, ep_fid); - - ret = _gnix_vc_alloc(ep_priv[0], &gnix_addr[1], &vc_conn); - cr_assert_eq(ret, FI_SUCCESS); - - memcpy(&key, &gni_addr[1], - sizeof(gnix_ht_key_t)); - - ret = _gnix_ht_insert(ep_priv[0]->vc_ht, key, vc_conn); - cr_assert_eq(ret, FI_SUCCESS); - vc_conn->modes |= GNIX_VC_MODE_IN_HT; - - ret = _gnix_vc_connect(vc_conn); - cr_assert_eq(ret, FI_SUCCESS); - - /* - * since we asked for FI_PROGRESS_AUTO for control - * we can just spin here. add a yield in case the - * test is only being run on one cpu. - */ - - /* We need to run CM NIC progress for an intra CM NIC connection. */ - ret = _gnix_cm_nic_progress(ep_priv[0]->domain->cm_nic); - cr_assert_eq(ret, FI_SUCCESS); - - state = GNIX_VC_CONN_NONE; - while (state != GNIX_VC_CONNECTED) { - pthread_yield(); - state = _gnix_vc_state(vc_conn); - } - - /* VC is destroyed by the EP */ -} - -Test(vc_management_auto_basic, vc_connect) -{ - __vc_connect(); -} - -Test(vc_management_auto_scalable, vc_connect) -{ - __vc_connect(); -} - -static inline void __vc_connect2(void) -{ - int ret; - struct gnix_vc *vc_conn0, *vc_conn1; - struct gnix_fid_ep *ep_priv[2]; - gnix_ht_key_t key; - enum gnix_vc_conn_state state; - - ep_priv[0] = container_of(ep[0], struct gnix_fid_ep, ep_fid); - ep_priv[1] = container_of(ep[1], struct gnix_fid_ep, ep_fid); - - ret = _gnix_vc_alloc(ep_priv[0], &gnix_addr[1], &vc_conn0); - cr_assert_eq(ret, FI_SUCCESS); - - memcpy(&key, &gni_addr[1], - sizeof(gnix_ht_key_t)); - - ret = _gnix_ht_insert(ep_priv[0]->vc_ht, key, vc_conn0); - cr_assert_eq(ret, FI_SUCCESS); - - vc_conn0->modes |= GNIX_VC_MODE_IN_HT; - - ret = _gnix_vc_alloc(ep_priv[1], &gnix_addr[0], &vc_conn1); - cr_assert_eq(ret, FI_SUCCESS); - - memcpy(&key, &gni_addr[0], - sizeof(gnix_ht_key_t)); - - ret = _gnix_ht_insert(ep_priv[1]->vc_ht, key, vc_conn1); - cr_assert_eq(ret, FI_SUCCESS); - - vc_conn1->modes |= GNIX_VC_MODE_IN_HT; - - ret = _gnix_vc_connect(vc_conn0); - cr_assert_eq(ret, FI_SUCCESS); - - ret = _gnix_vc_connect(vc_conn1); - cr_assert_eq(ret, FI_SUCCESS); - - /* - * since we asked for FI_PROGRESS_AUTO for control - * we can just spin here. add a yield in case the - * test is only being run on one cpu. - */ - - /* We need to run CM NIC progress for an intra CM NIC connection. */ - ret = _gnix_cm_nic_progress(ep_priv[0]->domain->cm_nic); - cr_assert_eq(ret, FI_SUCCESS); - - state = GNIX_VC_CONN_NONE; - while (state != GNIX_VC_CONNECTED) { - pthread_yield(); - state = _gnix_vc_state(vc_conn0); - } - - /* We need to run CM NIC progress for an intra CM NIC connection. */ - ret = _gnix_cm_nic_progress(ep_priv[1]->domain->cm_nic); - cr_assert_eq(ret, FI_SUCCESS); - - state = GNIX_VC_CONN_NONE; - while (state != GNIX_VC_CONNECTED) { - pthread_yield(); - state = _gnix_vc_state(vc_conn1); - } - - /* VC is destroyed by the EP */ -} - -Test(vc_management_auto_basic, vc_connect2) -{ - __vc_connect2(); -} - -Test(vc_management_auto_scalable, vc_connect2) -{ - __vc_connect2(); -} - -static void vc_conn_ping(struct fid_ep *send_ep, struct fid_cq *send_cq, - fi_addr_t target_pe, void *target_addr, - size_t target_len, uint64_t target_key) -{ - ssize_t sz; - int ret; - struct fi_cq_tagged_entry cqe; - void *context = (void *)0x65468; - - sz = fi_write(send_ep, target_buf, target_len, - NULL, target_pe, _REM_ADDR(fi, target_buf, target_addr), - target_key, context); - cr_assert_eq(sz, 0); - - while ((ret = fi_cq_read(send_cq, &cqe, 1)) == -FI_EAGAIN) { - pthread_yield(); - } - - cr_assert_eq(ret, 1); - cr_assert(cqe.op_context == context, "CQE Context mismatch"); -} - -static void vc_conn_proc_cqes(struct fid_cq *cq0, void *ctx0, - struct fid_cq *cq1, void *ctx1) -{ - int cqe0 = 0, cqe1 = 0, ret; - struct fi_cq_tagged_entry cqe; - - do { - if (cq0 == cq1) { - if ((ret = fi_cq_read(cq0, &cqe, 1)) != -FI_EAGAIN) { - cr_assert_eq(ret, 1); - if (cqe.op_context == ctx0) { - cr_assert(!cqe0); - cqe0++; - } else if (cqe.op_context == ctx1) { - cr_assert(!cqe1); - cqe1++; - } else { - cr_assert(cqe.op_context == ctx0 || - cqe.op_context == ctx1, - "CQE Context mismatch"); - } - } - } else { - if ((ret = fi_cq_read(cq0, &cqe, 1)) != -FI_EAGAIN) { - cr_assert(!cqe0); - cr_assert_eq(ret, 1); - cr_assert(cqe.op_context == ctx0, - "CQE Context mismatch"); - cqe0++; - } - - if ((ret = fi_cq_read(cq1, &cqe, 1)) != -FI_EAGAIN) { - cr_assert(!cqe1); - cr_assert_eq(ret, 1); - cr_assert(cqe.op_context == ctx1, - "CQE Context mismatch"); - cqe1++; - } - } - - pthread_yield(); - } while (!cqe0 || !cqe1); -} - -static void vc_conn_pingpong(struct fid_ep *ep0, struct fid_cq *cq0, - fi_addr_t a0, - struct fid_ep *ep1, struct fid_cq *cq1, - fi_addr_t a1) -{ - int i; - ssize_t sz; -#define DATA_LEN 64 - char b0[DATA_LEN], b1[DATA_LEN]; - - for (i = 0; i < DATA_LEN; i++) { - b0[i] = i; - } - - sz = fi_send(ep0, b0, DATA_LEN, NULL, a0, b1); - cr_assert_eq(sz, 0); - - sz = fi_recv(ep1, b1, DATA_LEN, NULL, a1, b0); - cr_assert_eq(sz, 0); - - vc_conn_proc_cqes(cq0, b1, cq1, b0); - - for (i = 0; i < DATA_LEN; i++) { - cr_assert(b1[i] == i); - b1[i] = DATA_LEN + i; - } - - sz = fi_send(ep1, b1, DATA_LEN, NULL, a1, b0); - cr_assert_eq(sz, 0); - - sz = fi_recv(ep0, b0, DATA_LEN, NULL, a0, b1); - cr_assert_eq(sz, 0); - - vc_conn_proc_cqes(cq1, b0, cq0, b1); - - for (i = 0; i < DATA_LEN; i++) { - cr_assert(b0[i] == DATA_LEN + i); - } -} - -TestSuite(vc_conn_ping_auto_basic, - .init = vc_conn_ping_setup_auto_basic, - .fini = vc_conn_ping_teardown, - .disabled = false); - -TestSuite(vc_conn_ping_manual_basic, - .init = vc_conn_ping_setup_manual_basic, - .fini = vc_conn_ping_teardown, - .disabled = false); - -TestSuite(vc_conn_ping_auto_scalable, - .init = vc_conn_ping_setup_auto_scalable, - .fini = vc_conn_ping_teardown, - .disabled = false); - -TestSuite(vc_conn_ping_manual_scalable, - .init = vc_conn_ping_setup_manual_scalable, - .fini = vc_conn_ping_teardown, - .disabled = false); - -/* Connect EP to itself. */ -Test(vc_conn_ping_manual_basic, ep_connect_self) -{ - vc_conn_ping(ep[0], cq[0], 0, target_buf, sizeof(target_buf), mr_key); -} - -Test(vc_conn_ping_auto_basic, ep_connect_self) -{ - vc_conn_ping(ep[0], cq[0], 0, target_buf, sizeof(target_buf), mr_key); -} - -Test(vc_conn_ping_manual_basic, ep_connect_self_pp) -{ - vc_conn_pingpong(ep[0], cq[0], 0, ep[0], cq[0], 0); -} - -Test(vc_conn_ping_auto_basic, ep_connect_self_pp) -{ - vc_conn_pingpong(ep[0], cq[0], 0, ep[0], cq[0], 0); -} - -Test(vc_conn_ping_manual_scalable, ep_connect_self) -{ - vc_conn_ping(ep[0], cq[0], 0, target_buf, sizeof(target_buf), mr_key); -} - -Test(vc_conn_ping_auto_scalable, ep_connect_self) -{ - vc_conn_ping(ep[0], cq[0], 0, target_buf, sizeof(target_buf), mr_key); -} - -Test(vc_conn_ping_manual_scalable, ep_connect_self_pp) -{ - vc_conn_pingpong(ep[0], cq[0], 0, ep[0], cq[0], 0); -} - -Test(vc_conn_ping_auto_scalable, ep_connect_self_pp) -{ - vc_conn_pingpong(ep[0], cq[0], 0, ep[0], cq[0], 0); -} - -/* Do intra-CM EP connect. */ -Test(vc_conn_ping_manual_basic, ep_connect_intra_cm) -{ - vc_conn_ping(ep[0], cq[0], 1, target_buf, sizeof(target_buf), mr_key); -} - -Test(vc_conn_ping_auto_basic, ep_connect_intra_cm) -{ - vc_conn_ping(ep[0], cq[0], 1, target_buf, sizeof(target_buf), mr_key); -} - -Test(vc_conn_ping_manual_basic, ep_connect_intra_cm_pp) -{ - vc_conn_pingpong(ep[0], cq[0], 1, ep[1], cq[1], 0); -} - -Test(vc_conn_ping_auto_basic, ep_connect_intra_cm_pp) -{ - vc_conn_pingpong(ep[0], cq[0], 1, ep[1], cq[1], 0); -} - -Test(vc_conn_ping_manual_scalable, ep_connect_intra_cm) -{ - vc_conn_ping(ep[0], cq[0], 1, target_buf, sizeof(target_buf), mr_key); -} - -Test(vc_conn_ping_auto_scalable, ep_connect_intra_cm) -{ - vc_conn_ping(ep[0], cq[0], 1, target_buf, sizeof(target_buf), mr_key); -} - -Test(vc_conn_ping_manual_scalable, ep_connect_intra_cm_pp) -{ - vc_conn_pingpong(ep[0], cq[0], 1, ep[1], cq[1], 0); -} - -Test(vc_conn_ping_auto_scalable, ep_connect_intra_cm_pp) -{ - vc_conn_pingpong(ep[0], cq[0], 1, ep[1], cq[1], 0); -} - -/* Do inter-CM EP connect. */ -#if 0 -Test(vc_conn_ping_manual, ep_connect_inter_cm) -{ - vc_conn_ping(ep[0], cq[0], 2, target_buf, sizeof(target_buf), mr_key3); -} -#endif - -Test(vc_conn_ping_auto_basic, ep_connect_inter_cm) -{ - vc_conn_ping(ep[0], cq[0], 2, target_buf, sizeof(target_buf), mr_key3); -} - -Test(vc_conn_ping_manual_basic, ep_connect_inter_cm_pp) -{ - vc_conn_pingpong(ep[0], cq[0], 2, ep3, cq3, 0); -} - -Test(vc_conn_ping_auto_basic, ep_connect_inter_cm_pp) -{ - vc_conn_pingpong(ep[0], cq[0], 2, ep3, cq3, 0); -} - -Test(vc_conn_ping_auto_scalable, ep_connect_inter_cm) -{ - vc_conn_ping(ep[0], cq[0], 2, target_buf, sizeof(target_buf), mr_key3); -} - -Test(vc_conn_ping_manual_scalable, ep_connect_inter_cm_pp) -{ - vc_conn_pingpong(ep[0], cq[0], 2, ep3, cq3, 0); -} - -Test(vc_conn_ping_auto_scalable, ep_connect_inter_cm_pp) -{ - vc_conn_pingpong(ep[0], cq[0], 2, ep3, cq3, 0); -} - - diff --git a/prov/gni/test/vc_lookup.c b/prov/gni/test/vc_lookup.c deleted file mode 100644 index ebd95a2d39c..00000000000 --- a/prov/gni/test/vc_lookup.c +++ /dev/null @@ -1,216 +0,0 @@ -/* - * Copyright (c) 2016-2017 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "gnix_ep.h" -#include "gnix_vc.h" -#include "common.h" - -#include - -static struct fi_info *hints; -static struct fi_info *fi; -/* Note: Set to ~FI_NOTIFY_FLAGS_ONLY since this was written before api 1.5 */ -static uint64_t mode_bits = ~FI_NOTIFY_FLAGS_ONLY; -static struct fid_fabric *fab; -static struct fid_domain *dom; -static struct fid_ep *ep; -static struct gnix_fid_ep *gnix_ep; -static struct gnix_ep_name ep_name; -static size_t ep_name_len; -static struct fid_av *av; -static struct fi_info *hints; -static struct fi_info *fi; - -void vc_lookup_setup(int av_type, int av_size) -{ - int ret = 0; - struct fi_av_attr attr; - - hints = fi_allocinfo(); - - hints->mode = mode_bits; - hints->domain_attr->mr_mode = GNIX_DEFAULT_MR_MODE; - hints->fabric_attr->prov_name = strdup("gni"); - - /* Create endpoint */ - ret = fi_getinfo(fi_version(), NULL, 0, 0, hints, &fi); - cr_assert(!ret, "fi_getinfo"); - - ret = fi_fabric(fi->fabric_attr, &fab, NULL); - cr_assert(!ret, "fi_fabric"); - - ret = fi_domain(fab, fi, &dom, NULL); - cr_assert(!ret, "fi_domain"); - - attr.type = av_type; - attr.count = av_size; - - ret = fi_av_open(dom, &attr, &av, NULL); - cr_assert(!ret, "fi_av_open"); - - ret = fi_endpoint(dom, fi, &ep, NULL); - cr_assert(!ret, "fi_endpoint"); - - gnix_ep = container_of(ep, struct gnix_fid_ep, ep_fid); - - ret = fi_getname(&ep->fid, NULL, &ep_name_len); - - ret = fi_getname(&ep->fid, &ep_name, &ep_name_len); - cr_assert(ret == FI_SUCCESS); - - ret = fi_ep_bind(ep, &av->fid, 0); - cr_assert(!ret, "fi_ep_bind"); - - ret = fi_enable(ep); - cr_assert(!ret, "fi_ep_enable"); - - fi_freeinfo(hints); -} - -void vc_lookup_teardown(void) -{ - int ret = 0; - - ret = fi_close(&ep->fid); - cr_assert(!ret, "failure in closing ep[0]."); - - ret = fi_close(&av->fid); - cr_assert(!ret, "failure in closing dom[0] av."); - - ret = fi_close(&dom->fid); - cr_assert(!ret, "failure in closing domain dom[0]."); - - ret = fi_close(&fab->fid); - cr_assert(!ret, "failure in closing fabric."); - - fi_freeinfo(fi); -} - -int _do_vc_lookup_perf(int av_type, int niters, int npeers, int naddrs) -{ - struct gnix_ep_name *addrs; - fi_addr_t *fi_addrs; - uint32_t i, ret, inc = npeers / naddrs; - struct timeval s1, s2; - int sec, usec; - double usec_p_lookup; - struct gnix_vc *vc; - struct gnix_ep_name tmp_name = ep_name; - - addrs = malloc(ep_name_len * npeers); - cr_assert(addrs, "failed to malloc addresses"); - - fi_addrs = (fi_addr_t *)malloc(sizeof(fi_addr_t) * npeers); - cr_assert(fi_addrs, "failed to malloc FI addresses"); - - for (i = 0; i < npeers; i++) { - /* insert fake addresses into AV */ - tmp_name.gnix_addr.cdm_id++; - tmp_name.cm_nic_cdm_id++; - addrs[i] = tmp_name; - } - - ret = fi_av_insert(av, (void *)addrs, npeers, - (void *)fi_addrs, 0, NULL); - cr_assert(ret == npeers); - - for (i = 0; i < npeers; i++) { - /* do warump */ - ret = _gnix_vc_ep_get_vc(gnix_ep, fi_addrs[i], - &vc); - cr_assert(ret == FI_SUCCESS); - } - - gettimeofday(&s1, 0); - for (i = 0; i < niters; i++) { - /* do lookups */ - ret = _gnix_vc_ep_get_vc(gnix_ep, fi_addrs[(i * inc)%npeers], - &vc); - /* cr_assert has ridiculous overhead */ - /* cr_assert(ret == FI_SUCCESS); */ - } - gettimeofday(&s2, 0); - - calculate_time_difference(&s1, &s2, &sec, &usec); - usec += sec * 1e6; - usec_p_lookup = (double)usec; - usec_p_lookup /= niters; - - fprintf(stderr, "type: %s\tnpeers: %7d naddrs: %7d ns: %f\n", - av_type == 1 ? "MAP" : "TABLE", npeers, naddrs, - usec_p_lookup*1000); - - free(fi_addrs); - free(addrs); - - return 0; -} - -int do_vc_lookup_perf(int av_type, int niters, int naddrs, int npeers) -{ - vc_lookup_setup(av_type, npeers); - _do_vc_lookup_perf(av_type, niters, naddrs, npeers); - vc_lookup_teardown(); - - return 0; -} - -#define NITERS 10000 -#define EXP_INC 4 -#define TESTS 5 -Test(vc_lookup, perf, .disabled = true) -{ - int i, j; - - for (i = 0; i < TESTS; i++) { - for (j = 0; j <= i; j++) { - do_vc_lookup_perf(FI_AV_MAP, NITERS, - 2<<(i*EXP_INC), 2<<(j*EXP_INC)); - } - } - - for (i = 0; i < TESTS; i++) { - for (j = 0; j <= i; j++) { - do_vc_lookup_perf(FI_AV_TABLE, NITERS, - 2<<(i*EXP_INC), 2<<(j*EXP_INC)); - } - } -} - diff --git a/prov/gni/test/vector.c b/prov/gni/test/vector.c deleted file mode 100644 index ee6415177f6..00000000000 --- a/prov/gni/test/vector.c +++ /dev/null @@ -1,384 +0,0 @@ -/* - * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All - * rights reserved. - * Copyright (c) 2015-2016 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "gnix_vector.h" -#include -#include -#include "gnix_rdma_headers.h" -#include - -gnix_vector_t vec; -gnix_vec_attr_t attr; - -#define VEC_MAX (1024) -#define VEC_INIT 128 - -void vector_setup_lockless(void) -{ - int ret; - - attr.vec_increase_step = 2; - attr.vec_increase_type = GNIX_VEC_INCREASE_MULT; - attr.vec_initial_size = VEC_INIT; - attr.vec_internal_locking = GNIX_VEC_UNLOCKED; - attr.vec_maximum_size = VEC_MAX; - - ret = _gnix_vec_init(&vec, &attr); - cr_assert(!ret, "_gnix_vec_init"); -} - -void vector_setup_locked() -{ - int ret; - - attr.vec_increase_step = 2; - attr.vec_increase_type = GNIX_VEC_INCREASE_ADD; - attr.vec_initial_size = VEC_INIT; - attr.vec_internal_locking = GNIX_VEC_LOCKED; - attr.vec_maximum_size = VEC_MAX; - - ret = _gnix_vec_init(&vec, &attr); - cr_assert(!ret, "_gnix_vec_init"); -} - -void do_invalid_ops_params() -{ - int ret; - void *tmp; - - ret = _gnix_vec_insert_first(NULL, NULL); - cr_assert_eq(ret, -FI_EINVAL); - - ret = _gnix_vec_remove_first(NULL); - cr_assert_eq(ret, -FI_EINVAL); - - ret = _gnix_vec_first(NULL, &tmp); - cr_assert_eq(ret, -FI_EINVAL); -} - -void vector_teardown(void) -{ - int ret; - - ret = _gnix_vec_close(&vec); - cr_assert(!ret, "_gnix_vec_close"); -} - -void vector_teardown_error(void) -{ - int ret; - - ret = _gnix_vec_close(NULL); - cr_assert_eq(ret, -FI_EINVAL); -} - -/* Test invalid parameters for setup */ -void vector_setup_error(void) -{ - int ret; - - attr.vec_increase_step = 2; - attr.vec_increase_type = GNIX_VEC_INCREASE_MULT; - attr.vec_initial_size = VEC_INIT; - attr.vec_internal_locking = GNIX_VEC_UNLOCKED; - attr.vec_maximum_size = VEC_MAX; - - ret = _gnix_vec_init(NULL, NULL); - cr_assert_eq(ret, -FI_EINVAL); - - ret = _gnix_vec_init(NULL, &attr); - cr_assert_eq(ret, -FI_EINVAL); - - ret = _gnix_vec_init(&vec, NULL); - cr_assert_eq(ret, -FI_EINVAL); - - attr.vec_initial_size = 256; - attr.vec_maximum_size = 128; - - ret = _gnix_vec_init(&vec, &attr); - cr_assert_eq(ret, -FI_EINVAL); -} - -void do_insert_first() -{ - int ret; - void *tmp = malloc(sizeof(gnix_vec_entry_t)); - cr_assert(tmp, "do_insert_first"); - - ret = _gnix_vec_insert_first(&vec, tmp); - cr_assert(!ret, "_gnix_vec_insert_first"); - - ret = _gnix_vec_insert_first(&vec, tmp); - cr_assert_eq(ret, -FI_ECANCELED); -} - -void do_insert_last() -{ - int ret; - void *tmp = malloc(sizeof(gnix_vec_entry_t)); - cr_assert(tmp, "do_insert_last"); - - ret = _gnix_vec_insert_last(&vec, tmp); - cr_assert(!ret, "_gnix_vec_insert_last"); - - ret = _gnix_vec_insert_last(&vec, tmp); - cr_assert_eq(ret, -FI_ECANCELED); -} - -void do_fill_insert_at() -{ - int i, ret; - void *tmp; - - for (i = 0; i < vec.attr.cur_size; i++) { - tmp = malloc(sizeof(gnix_vec_entry_t)); - cr_assert(tmp, "do_insert_at"); - - ret = _gnix_vec_insert_at(&vec, tmp, i); - cr_assert(!ret, "_gnix_vec_insert_at"); - } - - /* Test grow. */ - tmp = malloc(sizeof(gnix_vec_entry_t)); - cr_assert(tmp, "do_insert_at"); - - ret = _gnix_vec_insert_at(&vec, tmp, VEC_MAX-1); - cr_assert(!ret, "_gnix_vec_insert_at"); - cr_assert_eq(vec.attr.cur_size, VEC_MAX); - - ret = _gnix_vec_insert_at(&vec, tmp, VEC_MAX-1); - cr_assert_eq(ret, -FI_ECANCELED); - - for (; i < vec.attr.cur_size - 1; i++) { - tmp = malloc(sizeof(gnix_vec_entry_t)); - cr_assert(tmp, "do_insert_at"); - - ret = _gnix_vec_insert_at(&vec, tmp, i); - cr_assert(!ret, "_gnix_vec_insert_at"); - } - - ret = _gnix_vec_insert_at(&vec, tmp, VEC_MAX); - cr_assert_eq(ret, -FI_EINVAL); -} - -void do_remove_first() -{ - int ret; - void *tmp; - - ret = _gnix_vec_first(&vec, &tmp); - cr_assert(!ret, "_gnix_vec_first"); - free(tmp); - - ret = _gnix_vec_remove_first(&vec); - cr_assert(!ret, "_gnix_vec_remove_first"); - - ret = _gnix_vec_remove_first(&vec); - cr_assert_eq(ret, -FI_ECANCELED); -} - -void do_remove_last() -{ - int ret; - void *tmp; - - ret = _gnix_vec_last(&vec, &tmp); - cr_assert(!ret, "_gnix_vec_last"); - free(tmp); - - ret = _gnix_vec_remove_last(&vec); - cr_assert(!ret, "_gnix_vec_remove_last"); - - ret = _gnix_vec_remove_last(&vec); - cr_assert_eq(ret, -FI_ECANCELED); -} - -void do_unfill_remove_at() -{ - int i, ret; - void *tmp; - - for (i = 0; i < vec.attr.cur_size; i++) { - ret = _gnix_vec_at(&vec, &tmp, i); - cr_assert(!ret, "_gnix_vec_at"); - free(tmp); - - ret = _gnix_vec_remove_at(&vec, i); - cr_assert(!ret, "_gnix_vec_remove_at"); - } - - ret = _gnix_vec_remove_at(&vec, vec.attr.cur_size / 2); - cr_assert_eq(ret, -FI_ECANCELED); -} - -void do_first() -{ - int ret; - void *tmp; - - ret = _gnix_vec_first(&vec, &tmp); - cr_assert(tmp, "_gnix_vec_first"); - cr_assert(!ret, "_gnix_vec_first"); - - tmp = NULL; - - do_remove_first(); - - ret = _gnix_vec_first(&vec, &tmp); - cr_assert_eq(tmp, NULL); - cr_assert_eq(ret, -FI_ECANCELED); -} - -void do_last() -{ - int ret; - void *tmp; - - ret = _gnix_vec_last(&vec, &tmp); - cr_assert(tmp, "_gnix_vec_last"); - cr_assert(!ret, "_gnix_vec_last"); - - tmp = NULL; - - do_remove_last(); - - ret = _gnix_vec_last(&vec, &tmp); - cr_assert_eq(tmp, NULL); - cr_assert_eq(ret, -FI_ECANCELED); -} - -void do_at() -{ - int i ,ret; - void *tmp; - - for (i = 0; i < vec.attr.cur_size; i++) { - ret = _gnix_vec_at(&vec, &tmp, i); - cr_assert(!ret, "_gnix_vec_at"); - - cr_assert(!!tmp, "_gnix_vec_at"); - tmp = NULL; - } - - ret = _gnix_vec_at(&vec, &tmp, i); - cr_assert(!tmp, "_gnix_vec_at"); - cr_assert(ret == -FI_EINVAL, "_gnix_vec_at"); -} - -void do_iterator_next() -{ - int ret; - void *tmp1, *tmp2; - GNIX_VECTOR_ITERATOR(&vec, iter); - - do_fill_insert_at(); - - while (GNIX_VECTOR_ITERATOR_IDX(iter) + 1 < vec.attr.cur_size) { - tmp1 = _gnix_vec_iterator_next(&iter); - - ret = _gnix_vec_at(&vec, &tmp2, GNIX_VECTOR_ITERATOR_IDX(iter)); - cr_assert(!ret, "_gnix_vec_at"); - - cr_assert_eq(tmp1, tmp2); - } - - tmp1 = _gnix_vec_iterator_next(&iter); - cr_assert(tmp1 == NULL, "_gnix_vec_iterator_next"); - cr_assert_eq(GNIX_VECTOR_ITERATOR_IDX(iter) + 1, vec.attr.cur_size); - - do_unfill_remove_at(); -} - -TestSuite(vector_lockless, .init = vector_setup_lockless, - .fini = vector_teardown, .disabled = false); - -Test(vector_lockless, do_first) -{ - do_insert_first(); - do_first(); -} - -Test(vector_lockless, do_last) -{ - do_insert_last(); - do_last(); -} - -Test(vector_lockless, do_at) -{ - do_fill_insert_at(); - do_at(); - do_unfill_remove_at(); -} - -Test(vector_lockless, do_iterator_next) -{ - do_iterator_next(); -} - -TestSuite(vector_locked, .init = vector_setup_locked, - .fini = vector_teardown, .disabled = false); - -Test(vector_locked, do_first) -{ - do_insert_first(); - do_first(); -} - -Test(vector_locked, do_last) -{ - do_insert_last(); - do_last(); -} - -Test(vector_locked, do_at) -{ - do_fill_insert_at(); - do_at(); - do_unfill_remove_at(); -} - -Test(vector_locked, do_iterator_next) -{ - /* TODO: Multithreaded test */ - do_iterator_next(); -} - -TestSuite(vector_error_lockless, .init = vector_setup_error, - .fini = vector_teardown_error, .disabled = false); - -Test(vector_error_lockless, setup_teardown_error) -{ - do_invalid_ops_params(); -} diff --git a/prov/gni/test/wait.c b/prov/gni/test/wait.c deleted file mode 100644 index 3d4cbc77332..00000000000 --- a/prov/gni/test/wait.c +++ /dev/null @@ -1,376 +0,0 @@ -/* - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights reserved. - * Copyright (c) 2015-2017 Cray Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "gnix.h" -#include "gnix_wait.h" -#include -#include - -#include -#include "gnix_rdma_headers.h" -#include "common.h" - -/* Note: Set to ~FI_NOTIFY_FLAGS_ONLY since this was written before api 1.5 */ -static uint64_t mode_bits = ~FI_NOTIFY_FLAGS_ONLY; -static struct fid_fabric *fab; -static struct fi_info *hints; -static struct fi_info *fi; -static struct gnix_fid_wait *wait_priv; -static struct fi_wait_attr wait_attr; -static struct fid_wait *wait_set; - -void wait_setup(void) -{ - int ret = 0; - - hints = fi_allocinfo(); - cr_assert(hints, "fi_allocinfo"); - - hints->mode = mode_bits; - hints->domain_attr->mr_mode = GNIX_DEFAULT_MR_MODE; - hints->fabric_attr->prov_name = strdup("gni"); - - ret = fi_getinfo(fi_version(), NULL, 0, 0, hints, &fi); - cr_assert(!ret, "fi_getinfo"); - - ret = fi_fabric(fi->fabric_attr, &fab, NULL); - cr_assert(!ret, "fi_fabric"); -} - -void wait_teardown(void) -{ - int ret = 0; - - ret = fi_close(&wait_set->fid); - cr_assert(!ret, "failure in closing wait set."); - - ret = fi_close(&fab->fid); - cr_assert(!ret, "failure in closing fabric."); - - fi_freeinfo(fi); - fi_freeinfo(hints); -} - -void setup_wait_type(enum fi_wait_obj wait_obj) -{ - int ret; - - wait_setup(); - wait_attr.wait_obj = wait_obj; - - ret = fi_wait_open(fab, &wait_attr, &wait_set); - cr_assert(!ret, "fi_wait_open"); - - wait_priv = container_of(wait_set, struct gnix_fid_wait, wait); -} - -void unspec_setup(void) -{ - setup_wait_type(FI_WAIT_UNSPEC); -} - -void fd_setup(void) -{ - setup_wait_type(FI_WAIT_FD); -} - -void mutex_cond_setup(void) -{ - setup_wait_type(FI_WAIT_MUTEX_COND); -} - -Test(wait_creation, unspec, .init = unspec_setup, .fini = wait_teardown) -{ - cr_expect_eq(wait_priv->type, FI_WAIT_UNSPEC); - cr_expect_eq(wait_priv->type, wait_attr.wait_obj); - cr_expect_eq(&wait_priv->fabric->fab_fid, fab); - cr_expect_eq(wait_priv->cond_type, FI_CQ_COND_NONE); -} - -Test(wait_creation, fd, .init = fd_setup, .fini = wait_teardown, - .disabled = true) -{ - cr_expect_eq(wait_priv->type, FI_WAIT_FD); - cr_expect_eq(wait_priv->type, wait_attr.wait_obj); - cr_expect_eq(&wait_priv->fabric->fab_fid, fab); - cr_expect_eq(wait_priv->cond_type, FI_CQ_COND_NONE); -} - -Test(wait_creation, mutex_cond, .init = mutex_cond_setup, .fini = wait_teardown, - .disabled = true) -{ - cr_expect_eq(wait_priv->type, FI_WAIT_MUTEX_COND); - cr_expect_eq(wait_priv->type, wait_attr.wait_obj); - cr_expect_eq(&wait_priv->fabric->fab_fid, fab); - cr_expect_eq(wait_priv->cond_type, FI_CQ_COND_NONE); -} - -Test(wait_control, unspec, .init = unspec_setup, .fini = wait_teardown, - .disabled = true) -{ - int fd; - int ret; - - ret = fi_control(&wait_priv->wait.fid, FI_GETWAIT, &fd); - cr_expect_eq(FI_SUCCESS, ret, "fi_control failed."); - - cr_expect_eq(wait_priv->fd[WAIT_READ], fd); -} - -Test(wait_control, fd, .init = fd_setup, .fini = wait_teardown, - .disabled = true) -{ - int fd; - int ret; - - ret = fi_control(&wait_priv->wait.fid, FI_GETWAIT, &fd); - cr_expect_eq(FI_SUCCESS, ret, "fi_control failed."); - - cr_expect_eq(wait_priv->fd[WAIT_READ], fd); -} - -Test(wait_control, mutex_cond, .init = mutex_cond_setup, .fini = wait_teardown, - .disabled = true) -{ - int ret; - struct fi_mutex_cond mutex_cond; - - ret = fi_control(&wait_priv->wait.fid, FI_GETWAIT, &mutex_cond); - cr_expect_eq(FI_SUCCESS, ret, "fi_control failed."); - - ret = memcmp(&wait_priv->mutex, mutex_cond.mutex, - sizeof(*mutex_cond.mutex)); - cr_expect_eq(0, ret, "mutex compare failed."); - - ret = memcmp(&wait_priv->cond, mutex_cond.cond, - sizeof(*mutex_cond.cond)); - cr_expect_eq(0, ret, "cond compare failed."); -} - -Test(wait_set, signal_multi, .init = unspec_setup) -{ - int ret; - struct gnix_wait_entry *entry; - - struct fid temp_wait = { - .fclass = FI_CLASS_CNTR - }; - - struct fid temp_wait2 = { - .fclass = FI_CLASS_CQ - }; - - cr_expect(slist_empty(&wait_priv->set), - "wait set is not initially empty."); - ret = _gnix_wait_set_add(&wait_priv->wait, &temp_wait); - - cr_expect_eq(FI_SUCCESS, ret, "gnix_wait_set_add failed."); - - ret = _gnix_wait_set_add(&wait_priv->wait, &temp_wait2); - cr_expect_eq(FI_SUCCESS, ret, "gnix_wait_set_add failed."); - - cr_expect(!slist_empty(&wait_priv->set), - "wait set is empty after add."); - - entry = container_of(wait_priv->set.head, struct gnix_wait_entry, - entry); - - ret = memcmp(entry->wait_obj, &temp_wait, sizeof(temp_wait)); - cr_expect_eq(0, ret, "wait objects are not equal."); - - ret = fi_close(&wait_set->fid); - cr_expect_eq(-FI_EBUSY, ret); - - ret = _gnix_wait_set_remove(&wait_priv->wait, &temp_wait); - - cr_expect_eq(FI_SUCCESS, ret, "gnix_wait_set_remove failed."); - - ret = _gnix_wait_set_remove(&wait_priv->wait, &temp_wait2); - - cr_expect_eq(FI_SUCCESS, ret, "gnix_wait_set_remove failed."); - - ret = fi_close(&wait_set->fid); - cr_expect_eq(FI_SUCCESS, ret, "fi_close on wait set failed."); - - ret = fi_close(&fab->fid); - cr_expect_eq(FI_SUCCESS, ret, "failure in closing fabric."); - - fi_freeinfo(fi); - fi_freeinfo(hints); -} - -Test(wait_set, add, .init = unspec_setup) -{ - int ret; - struct gnix_wait_entry *entry; - - struct fid temp_wait = { - .fclass = FI_CLASS_CQ - }; - - cr_expect(slist_empty(&wait_priv->set), - "wait set is not initially empty."); - ret = _gnix_wait_set_add(&wait_priv->wait, &temp_wait); - - cr_expect_eq(FI_SUCCESS, ret, "gnix_wait_set_add failed."); - - cr_expect(!slist_empty(&wait_priv->set), - "wait set is empty after add."); - - entry = container_of(wait_priv->set.head, struct gnix_wait_entry, - entry); - - ret = memcmp(entry->wait_obj, &temp_wait, sizeof(temp_wait)); - cr_expect_eq(0, ret, "wait objects are not equal."); - - ret = fi_close(&wait_set->fid); - cr_expect_eq(-FI_EBUSY, ret); - - ret = _gnix_wait_set_remove(&wait_priv->wait, &temp_wait); - - cr_expect_eq(FI_SUCCESS, ret, "gnix_wait_set_remove failed."); - - ret = fi_close(&wait_set->fid); - cr_expect_eq(FI_SUCCESS, ret, "fi_close on wait set failed."); - - ret = fi_close(&fab->fid); - cr_expect_eq(FI_SUCCESS, ret, "failure in closing fabric."); - - fi_freeinfo(fi); - fi_freeinfo(hints); -} - -Test(wait_set, empty_remove, .init = unspec_setup) -{ - int ret; - - struct fid temp_wait = { - .fclass = FI_CLASS_CQ - }; - - cr_expect(slist_empty(&wait_priv->set)); - ret = _gnix_wait_set_remove(&wait_priv->wait, &temp_wait); - cr_expect_eq(-FI_EINVAL, ret); - cr_expect(slist_empty(&wait_priv->set)); - - ret = fi_close(&wait_set->fid); - cr_expect_eq(FI_SUCCESS, ret, "fi_close on wait set failed."); - - ret = fi_close(&fab->fid); - cr_expect_eq(FI_SUCCESS, ret, "fi_close on fabric failed."); - - fi_freeinfo(fi); - fi_freeinfo(hints); -} - -Test(wait_verify, invalid_type, .init = wait_setup) -{ - int ret; - - wait_attr.wait_obj = FI_WAIT_SET; - - ret = fi_wait_open(fab, &wait_attr, &wait_set); - cr_expect_eq(-FI_EINVAL, ret, - "Requesting incorrect type FI_WAIT_SET succeeded."); - - ret = fi_wait_open(fab, NULL, &wait_set); - cr_expect_eq(-FI_EINVAL, ret, - "Requesting verification with NULL attr succeeded."); - - wait_attr.flags = 1; - ret = fi_wait_open(fab, &wait_attr, &wait_set); - cr_expect_eq(-FI_EINVAL, ret, - "Requesting verifications with flags set succeeded."); -} - -Test(wait_signal, has_data, .init = unspec_setup) -{ - int ret; - - struct fid temp_wait = { - .fclass = FI_CLASS_CQ - }; - - cr_expect(slist_empty(&wait_priv->set), - "error"); - ret = _gnix_wait_set_add(&wait_priv->wait, &temp_wait); - - cr_expect_eq(FI_SUCCESS, ret, "gnix_wait_set_add failed."); - - cr_expect(!slist_empty(&wait_priv->set), - "wait set is empty after add."); - - _gnix_signal_wait_obj(&wait_priv->wait); - - ret = fi_wait(&wait_priv->wait, 60); - cr_expect_eq(FI_SUCCESS, ret, "fi_wait test failed. %d", ret); - - ret = _gnix_wait_set_remove(&wait_priv->wait, &temp_wait); - - ret = fi_close(&wait_set->fid); - cr_expect_eq(FI_SUCCESS, ret, "fi_close on wait set failed."); - - ret = fi_close(&fab->fid); - cr_expect_eq(FI_SUCCESS, ret, "fi_close on fabric failed."); - - fi_freeinfo(fi); - fi_freeinfo(hints); -} - -Test(wait_spin_adjust, set_val, .init = unspec_setup) -{ - int ret; - int op = GNI_WAIT_THREAD_SLEEP; - struct fi_gni_ops_fab *gni_fabric_ops; - int32_t get_val, val; - - ret = fi_open_ops(&fab->fid, FI_GNI_FAB_OPS_1, - 0, (void **) &gni_fabric_ops, NULL); - - cr_assert(ret == FI_SUCCESS, "fi_open_ops"); - - ret = gni_fabric_ops->get_val(&fab->fid, op, &get_val); - cr_assert(ret == FI_SUCCESS, "get_val"); - - cr_expect_eq(20, get_val, "Value returned does not match default"); - - val = 300; - ret = gni_fabric_ops->set_val(&fab->fid, op, &val); - - cr_assert(ret == FI_SUCCESS, "set val"); - - ret = gni_fabric_ops->get_val(&fab->fid, op, &get_val); - cr_assert(val == get_val, "get val"); - - -} diff --git a/prov/psm3/configure.ac b/prov/psm3/configure.ac index 9ff226d8d28..1dfc2dfc012 100644 --- a/prov/psm3/configure.ac +++ b/prov/psm3/configure.ac @@ -866,8 +866,6 @@ dnl Provider-specific checks dnl FI_PROVIDER_INIT AC_DEFINE([HAVE_EFA], 0, [Ignore HAVE_EFA]) AC_DEFINE([HAVE_EFA_DL], 0, [Ignore HAVE_EFA_DL]) -AC_DEFINE([HAVE_GNI], 0, [Ignore HAVE_GNI]) -AC_DEFINE([HAVE_GNI_DL], 0, [Ignore HAVE_GNI_DL]) AC_DEFINE([HAVE_MRAIL], 0, [Ignore HAVE_MRAIL]) AC_DEFINE([HAVE_MRAIL_DL], 0, [Ignore HAVE_MRAIL_DL]) AC_DEFINE([HAVE_NET], 0, [Ignore HAVE_NET]) diff --git a/src/common.c b/src/common.c index 88df4e465c1..8c6db788afd 100644 --- a/src/common.c +++ b/src/common.c @@ -425,10 +425,6 @@ const char *ofi_straddr(char *buf, size_t *len, *(uint64_t *)addr, *((uint64_t *)addr + 1), *((uint64_t *)addr + 2), *((uint64_t *)addr + 3)); break; - case FI_ADDR_GNI: - size = snprintf(buf, *len, "fi_addr_gni://%" PRIx64, - *(uint64_t *)addr); - break; case FI_ADDR_OPX: size = snprintf(buf, *len, "fi_addr_opx://%016lx", *(uint64_t *)addr); break; @@ -489,8 +485,6 @@ uint32_t ofi_addr_format(const char *str) return FI_ADDR_PSMX2; else if (!strcasecmp(fmt, "fi_addr_psmx3")) return FI_ADDR_PSMX3; - else if (!strcasecmp(fmt, "fi_addr_gni")) - return FI_ADDR_GNI; else if (!strcasecmp(fmt, "fi_addr_opx")) return FI_ADDR_OPX; else if (!strcasecmp(fmt, "fi_addr_efa")) @@ -899,7 +893,6 @@ int ofi_str_toaddr(const char *str, uint32_t *addr_format, return ofi_str_to_efa(str, addr, len); case FI_SOCKADDR_IB: return ofi_str_to_sib(str, addr, len); - case FI_ADDR_GNI: case FI_ADDR_MLX: case FI_ADDR_UCX: default: diff --git a/src/fabric.c b/src/fabric.c index 46ba69e65b0..1706aa5b037 100644 --- a/src/fabric.c +++ b/src/fabric.c @@ -445,7 +445,7 @@ static struct fi_provider *ofi_get_hook(const char *name) static void ofi_ordered_provs_init(void) { char *ordered_prov_names[] = { - "efa", "psm2", "opx", "gni", "verbs", + "efa", "psm2", "opx", "verbs", "netdir", "psm3", "ucx", "ofi_rxm", "ofi_rxd", "shm", /* Initialize the socket based providers last of the @@ -889,7 +889,6 @@ void fi_ini(void) ofi_register_provider(PSM3_INIT, NULL); ofi_register_provider(PSM2_INIT, NULL); - ofi_register_provider(GNI_INIT, NULL); ofi_register_provider(NETDIR_INIT, NULL); ofi_register_provider(SHM_INIT, NULL); ofi_register_provider(SM2_INIT, NULL); diff --git a/src/fi_tostr.c b/src/fi_tostr.c index d578e092e44..b4d99bc830b 100644 --- a/src/fi_tostr.c +++ b/src/fi_tostr.c @@ -117,7 +117,6 @@ static void ofi_tostr_addr_format(char *buf, size_t len, uint32_t addr_format) CASEENUMSTRN(FI_SOCKADDR_IN6, len); CASEENUMSTRN(FI_SOCKADDR_IB, len); CASEENUMSTRN(FI_ADDR_PSMX2, len); - CASEENUMSTRN(FI_ADDR_GNI, len); CASEENUMSTRN(FI_ADDR_MLX, len); CASEENUMSTRN(FI_ADDR_UCX, len); CASEENUMSTRN(FI_ADDR_STR, len); @@ -262,7 +261,6 @@ static void ofi_tostr_protocol(char *buf, size_t len, uint32_t protocol) CASEENUMSTRN(FI_PROTO_SOCK_TCP, len); CASEENUMSTRN(FI_PROTO_IB_RDM, len); CASEENUMSTRN(FI_PROTO_IWARP_RDM, len); - CASEENUMSTRN(FI_PROTO_GNI, len); CASEENUMSTRN(FI_PROTO_RXM, len); CASEENUMSTRN(FI_PROTO_RXD, len); CASEENUMSTRN(FI_PROTO_MLX, len); diff --git a/util/info.c b/util/info.c index 80bae9fd273..6dc7789f2d1 100644 --- a/util/info.c +++ b/util/info.c @@ -192,7 +192,6 @@ static int str2addr_format(char *inputstr, uint32_t *value) ORCASE(FI_SOCKADDR_IN); ORCASE(FI_SOCKADDR_IN6); ORCASE(FI_SOCKADDR_IB); - ORCASE(FI_ADDR_GNI); ORCASE(FI_ADDR_MLX); ORCASE(FI_ADDR_STR); ORCASE(FI_ADDR_PSMX2); From ac4400ad77b87b0d61aa08be4731b07eeabf2102 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Wed, 20 Sep 2023 13:42:57 -0700 Subject: [PATCH 05/34] prov/netdir: Remove provider NetworkDirect support is supported by verbs provider. Signed-off-by: Sean Hefty --- .appveyor.ps1 | 2 +- README.md | 9 +- libfabric.vcxproj | 54 +- libfabric.vcxproj.filters | 93 +- prov/netdir/NetDirect/README.NetworkDirect | 12 - prov/netdir/src/netdir.h | 197 --- prov/netdir/src/netdir_buf.h | 255 ---- prov/netdir/src/netdir_cntr.c | 208 ---- prov/netdir/src/netdir_cq.c | 1067 ----------------- prov/netdir/src/netdir_cq.h | 210 ---- prov/netdir/src/netdir_domain.c | 545 --------- prov/netdir/src/netdir_ep.c | 880 -------------- prov/netdir/src/netdir_ep_msg.c | 666 ---------- prov/netdir/src/netdir_ep_rma.c | 710 ----------- prov/netdir/src/netdir_ep_srx.c | 337 ------ prov/netdir/src/netdir_eq.c | 411 ------- prov/netdir/src/netdir_fabric.c | 121 -- prov/netdir/src/netdir_iface.h | 273 ----- prov/netdir/src/netdir_init.c | 205 ---- prov/netdir/src/netdir_log.h | 195 --- prov/netdir/src/netdir_mr.c | 355 ------ prov/netdir/src/netdir_ov.c | 159 --- prov/netdir/src/netdir_ov.h | 249 ---- prov/netdir/src/netdir_pep.c | 481 -------- prov/netdir/src/netdir_queue.h | 187 --- prov/netdir/src/netdir_unexp.c | 532 -------- prov/netdir/src/netdir_unexp.h | 84 -- prov/netdir/src/netdir_util.h | 176 --- prov/verbs/include/windows/verbs_nd.h | 6 + .../src/windows/verbs_nd_addr.c} | 6 - .../src/windows/verbs_nd_fs.c} | 7 +- prov/verbs/src/windows/verbs_nd_init.c | 12 +- .../src/windows/verbs_nd_ndinit.c} | 5 - src/fabric.c | 1 - 34 files changed, 41 insertions(+), 8669 deletions(-) delete mode 100644 prov/netdir/NetDirect/README.NetworkDirect delete mode 100644 prov/netdir/src/netdir.h delete mode 100644 prov/netdir/src/netdir_buf.h delete mode 100644 prov/netdir/src/netdir_cntr.c delete mode 100644 prov/netdir/src/netdir_cq.c delete mode 100644 prov/netdir/src/netdir_cq.h delete mode 100644 prov/netdir/src/netdir_domain.c delete mode 100644 prov/netdir/src/netdir_ep.c delete mode 100644 prov/netdir/src/netdir_ep_msg.c delete mode 100644 prov/netdir/src/netdir_ep_rma.c delete mode 100644 prov/netdir/src/netdir_ep_srx.c delete mode 100644 prov/netdir/src/netdir_eq.c delete mode 100644 prov/netdir/src/netdir_fabric.c delete mode 100644 prov/netdir/src/netdir_iface.h delete mode 100644 prov/netdir/src/netdir_init.c delete mode 100644 prov/netdir/src/netdir_log.h delete mode 100644 prov/netdir/src/netdir_mr.c delete mode 100644 prov/netdir/src/netdir_ov.c delete mode 100644 prov/netdir/src/netdir_ov.h delete mode 100644 prov/netdir/src/netdir_pep.c delete mode 100644 prov/netdir/src/netdir_queue.h delete mode 100644 prov/netdir/src/netdir_unexp.c delete mode 100644 prov/netdir/src/netdir_unexp.h delete mode 100644 prov/netdir/src/netdir_util.h rename prov/{netdir/src/netdir_addr.c => verbs/src/windows/verbs_nd_addr.c} (97%) rename prov/{netdir/src/netdir_fs.c => verbs/src/windows/verbs_nd_fs.c} (98%) rename prov/{netdir/src/netdir_ndinit.c => verbs/src/windows/verbs_nd_ndinit.c} (99%) diff --git a/.appveyor.ps1 b/.appveyor.ps1 index 77282cdac99..a44a887c9fd 100644 --- a/.appveyor.ps1 +++ b/.appveyor.ps1 @@ -12,7 +12,7 @@ $wd=$PWD.Path; & { Add-Type -A "System.IO.Compression.FileSystem"; [IO.Compressi Write-Verbose "done" Write-Verbose "moving NetworkDirect headers.." -move NetDirect\include\* prov\netdir\NetDirect +move NetDirect\include\* include\windows Write-Verbose "done" $efaWinVersion="1.0.0" diff --git a/README.md b/README.md index c4ec349fb31..829ae944df6 100644 --- a/README.md +++ b/README.md @@ -282,9 +282,8 @@ See the `fi_netdir(7)` man page for more details. - The Network Direct provider requires Network Direct SPI. If you are compiling libfabric from source and want to enable Network Direct support, you will also need the matching header files for the Network Direct SPI. - If the libraries and header files are not in default paths (the default path is - root of provier directory, i.e. \prov\netdir\NetDirect, where NetDirect contains - the header files), specify them in the configuration properties of the VS project. + If the libraries and header files are not in default paths, specify them in the + configuration properties of the VS project. ### shm @@ -326,8 +325,8 @@ It is possible to compile and link libfabric with windows applications. on page press Download button and select NetworkDirect_DDK.zip. Extract header files from downloaded - NetworkDirect_DDK.zip:`\NetDirect\include\` file into `\prov\netdir\NetDirect\`, - or add path to NetDirect headers into VS include paths + NetworkDirect_DDK.zip:`\NetDirect\include\` into `include\windows`, or + add the path to NetDirect headers into VS include paths - 2. compiling: libfabric has 6 Visual Studio solution configurations: diff --git a/libfabric.vcxproj b/libfabric.vcxproj index 7b7b2592104..cffccc1383f 100644 --- a/libfabric.vcxproj +++ b/libfabric.vcxproj @@ -192,7 +192,7 @@ NotUsing Level4 - $(ProjectDir)include;$(ProjectDir)include\windows;$(ProjectDir)prov\netdir\NetDirect;$(ProjectDir)prov\verbs\include;$(ProjectDir)prov\hook\src;$(ProjectDir)prov\hook\include;$(ProjectDir)prov\hook\perf\include;$(ProjectDir)packages\NetworkDirect.2.0.1\include;$(ProjectDir)prov\efa\src;$(ProjectDir)prov\efa\include;$(ProjectDir)prov\efa\src\rxr;$(ProjectDir)prov\efa\src\efa_verbs;$(ProjectDir)prov\efa\src\efa_verbs\plat + $(ProjectDir)include;$(ProjectDir)include\windows;$(ProjectDir)prov\verbs\include;$(ProjectDir)prov\hook\src;$(ProjectDir)prov\hook\include;$(ProjectDir)prov\hook\perf\include;$(ProjectDir)packages\NetworkDirect.2.0.1\include;$(ProjectDir)prov\efa\src;$(ProjectDir)prov\efa\include;$(ProjectDir)prov\efa\src\rxr;$(ProjectDir)prov\efa\src\efa_verbs;$(ProjectDir)prov\efa\src\efa_verbs\plat true CompileAsC false @@ -209,7 +209,7 @@ Disabled WIN32;_WINSOCKAPI_=;_CRT_SECURE_NO_WARNINGS;_WINSOCK_DEPRECATED_NO_WARNINGS;_WINDOWS;_USRDLL;LIBFABRIC_EXPORTS;HAVE_CONFIG_H;ENABLE_DEBUG;%(PreprocessorDefinitions) 4127;4200;4204;4221;4115;4201;4100 - $(ProjectDir)include;$(ProjectDir)include\windows;$(ProjectDir)prov\netdir\NetDirect;$(ProjectDir)prov\verbs\include;$(ProjectDir)prov\hook\src;$(ProjectDir)prov\hook\include;$(ProjectDir)prov\hook\perf\include;$(ProjectDir)packages\NetworkDirect.2.0.1\include;$(ProjectDir)prov\efa\src;$(ProjectDir)prov\efa\include;$(ProjectDir)prov\efa\src\rxr;$(ProjectDir)prov\efa\src\efa_verbs;$(ProjectDir)prov\efa\src\efa_verbs\plat + $(ProjectDir)include;$(ProjectDir)include\windows;$(ProjectDir)prov\verbs\include;$(ProjectDir)prov\hook\src;$(ProjectDir)prov\hook\include;$(ProjectDir)prov\hook\perf\include;$(ProjectDir)packages\NetworkDirect.2.0.1\include;$(ProjectDir)prov\efa\src;$(ProjectDir)prov\efa\include;$(ProjectDir)prov\efa\src\rxr;$(ProjectDir)prov\efa\src\efa_verbs;$(ProjectDir)prov\efa\src\efa_verbs\plat true MultiThreadedDebug false @@ -223,7 +223,7 @@ Disabled WIN32;_WINSOCKAPI_=;_CRT_SECURE_NO_WARNINGS;_WINSOCK_DEPRECATED_NO_WARNINGS;_WINDOWS;_USRDLL;LIBFABRIC_EXPORTS;HAVE_CONFIG_H;ENABLE_DEBUG;%(PreprocessorDefinitions) true - $(ProjectDir)include;$(ProjectDir)include\windows;$(ProjectDir)prov\netdir\NetDirect;$(ProjectDir)prov\verbs\include;$(ProjectDir)prov\hook\src;$(ProjectDir)prov\hook\include;$(ProjectDir)prov\hook\perf\include;$(ProjectDir)packages\NetworkDirect.2.0.1\include;$(ProjectDir)prov\efa\src;$(ProjectDir)prov\efa\include;$(ProjectDir)prov\efa\src\rxr;$(ProjectDir)prov\efa\src\efa_verbs;$(ProjectDir)prov\efa\src\efa_verbs\plat + $(ProjectDir)include;$(ProjectDir)include\windows;$(ProjectDir)prov\verbs\include;$(ProjectDir)prov\hook\src;$(ProjectDir)prov\hook\include;$(ProjectDir)prov\hook\perf\include;$(ProjectDir)packages\NetworkDirect.2.0.1\include;$(ProjectDir)prov\efa\src;$(ProjectDir)prov\efa\include;$(ProjectDir)prov\efa\src\rxr;$(ProjectDir)prov\efa\src\efa_verbs;$(ProjectDir)prov\efa\src\efa_verbs\plat CompileAsC 4127;4200;4204;4221;4115;4201;4100 true @@ -238,7 +238,7 @@ Disabled WIN32;_WINSOCKAPI_=;_CRT_SECURE_NO_WARNINGS;_WINSOCK_DEPRECATED_NO_WARNINGS;_WINDOWS;_USRDLL;LIBFABRIC_EXPORTS;HAVE_CONFIG_H;ENABLE_DEBUG;%(PreprocessorDefinitions) 4127;4200;4204;4221;4115;4201;4100 - $(ProjectDir)include;$(ProjectDir)include\windows;$(ProjectDir)prov\netdir\NetDirect;$(ProjectDir)prov\verbs\include;$(ProjectDir)prov\hook\src;$(ProjectDir)prov\hook\include;$(ProjectDir)prov\hook\perf\include;$(ProjectDir)packages\NetworkDirect.2.0.1\include;$(ProjectDir)prov\efa\src;$(ProjectDir)prov\efa\include;$(ProjectDir)prov\efa\src\rxr;$(ProjectDir)prov\efa\src\efa_verbs;$(ProjectDir)prov\efa\src\efa_verbs\plat + $(ProjectDir)include;$(ProjectDir)include\windows;$(ProjectDir)prov\verbs\include;$(ProjectDir)prov\hook\src;$(ProjectDir)prov\hook\include;$(ProjectDir)prov\hook\perf\include;$(ProjectDir)packages\NetworkDirect.2.0.1\include;$(ProjectDir)prov\efa\src;$(ProjectDir)prov\efa\include;$(ProjectDir)prov\efa\src\rxr;$(ProjectDir)prov\efa\src\efa_verbs;$(ProjectDir)prov\efa\src\efa_verbs\plat true MultiThreadedDebug false @@ -250,7 +250,7 @@ Disabled WIN32;_WINSOCKAPI_=;_CRT_SECURE_NO_WARNINGS;_WINSOCK_DEPRECATED_NO_WARNINGS;_WINDOWS;_USRDLL;LIBFABRIC_EXPORTS;HAVE_CONFIG_H;ENABLE_DEBUG;%(PreprocessorDefinitions) 4127;4200;4204;4221;4115;4201;4100 - $(ProjectDir)include;$(ProjectDir)include\windows;$(ProjectDir)prov\sockets\include;$(ProjectDir)prov\netdir\NetDirect;$(ProjectDir)prov\hook\src;$(ProjectDir)prov\hook\include;$(ProjectDir)prov\hook\perf\include;$(ProjectDir)prov\efa\src;$(ProjectDir)prov\efa\src\rxr;$(ProjectDir)prov\efa\src\windows; + $(ProjectDir)include;$(ProjectDir)include\windows;$(ProjectDir)prov\sockets\include;$(ProjectDir)prov\hook\src;$(ProjectDir)prov\hook\include;$(ProjectDir)prov\hook\perf\include;$(ProjectDir)prov\efa\src;$(ProjectDir)prov\efa\src\rxr;$(ProjectDir)prov\efa\src\windows; true MultiThreadedDebug false @@ -265,7 +265,7 @@ Disabled WIN32;_WINSOCKAPI_=;_CRT_SECURE_NO_WARNINGS;_WINSOCK_DEPRECATED_NO_WARNINGS;_WINDOWS;_USRDLL;LIBFABRIC_EXPORTS;HAVE_CONFIG_H;ENABLE_DEBUG;%(PreprocessorDefinitions) true - $(ProjectDir)include;$(ProjectDir)include\windows;$(ProjectDir)prov\netdir\NetDirect;$(ProjectDir)prov\verbs\include;$(ProjectDir)prov\hook\src;$(ProjectDir)prov\hook\include;$(ProjectDir)prov\hook\perf\include;$(ProjectDir)packages\NetworkDirect.2.0.1\include;$(ProjectDir)prov\efa\src;$(ProjectDir)prov\efa\include;$(ProjectDir)prov\efa\src\rxr;$(ProjectDir)prov\efa\src\efa_verbs;$(ProjectDir)prov\efa\src\efa_verbs\plat + $(ProjectDir)include;$(ProjectDir)include\windows;$(ProjectDir)prov\verbs\include;$(ProjectDir)prov\hook\src;$(ProjectDir)prov\hook\include;$(ProjectDir)prov\hook\perf\include;$(ProjectDir)packages\NetworkDirect.2.0.1\include;$(ProjectDir)prov\efa\src;$(ProjectDir)prov\efa\include;$(ProjectDir)prov\efa\src\rxr;$(ProjectDir)prov\efa\src\efa_verbs;$(ProjectDir)prov\efa\src\efa_verbs\plat CompileAsC 4127;4200;94;4204;4221;869 true @@ -289,7 +289,7 @@ true WIN32;_WINSOCKAPI_=;_CRT_SECURE_NO_WARNINGS;_WINSOCK_DEPRECATED_NO_WARNINGS;_WINDOWS;_USRDLL;LIBFABRIC_EXPORTS;HAVE_CONFIG_H;%(PreprocessorDefinitions) true - $(ProjectDir)include;$(ProjectDir)include\windows;$(ProjectDir)prov\netdir\NetDirect;$(ProjectDir)prov\verbs\include;$(ProjectDir)prov\hook\src;$(ProjectDir)prov\hook\include;$(ProjectDir)prov\hook\perf\include;$(ProjectDir)packages\NetworkDirect.2.0.1\include;$(ProjectDir)prov\efa\src;$(ProjectDir)prov\efa\include;$(ProjectDir)prov\efa\src\rxr;$(ProjectDir)prov\efa\src\efa_verbs;$(ProjectDir)prov\efa\src\efa_verbs\plat + $(ProjectDir)include;$(ProjectDir)include\windows;$(ProjectDir)prov\verbs\include;$(ProjectDir)prov\hook\src;$(ProjectDir)prov\hook\include;$(ProjectDir)prov\hook\perf\include;$(ProjectDir)packages\NetworkDirect.2.0.1\include;$(ProjectDir)prov\efa\src;$(ProjectDir)prov\efa\include;$(ProjectDir)prov\efa\src\rxr;$(ProjectDir)prov\efa\src\efa_verbs;$(ProjectDir)prov\efa\src\efa_verbs\plat 4127;4200;4204;4221;4115;4201;4100 true false @@ -315,7 +315,7 @@ true WIN32;_WINSOCKAPI_=;_CRT_SECURE_NO_WARNINGS;_WINSOCK_DEPRECATED_NO_WARNINGS;_WINDOWS;_USRDLL;LIBFABRIC_EXPORTS;HAVE_CONFIG_H;%(PreprocessorDefinitions) true - $(ProjectDir)include;$(ProjectDir)include\windows;$(ProjectDir)prov\netdir\NetDirect;$(ProjectDir)prov\verbs\include;$(ProjectDir)prov\hook\src;$(ProjectDir)prov\hook\include;$(ProjectDir)prov\hook\perf\include;$(ProjectDir)packages\NetworkDirect.2.0.1\include;$(ProjectDir)prov\efa\src;$(ProjectDir)prov\efa\include;$(ProjectDir)prov\efa\src\rxr;$(ProjectDir)prov\efa\src\efa_verbs;$(ProjectDir)prov\efa\src\efa_verbs\plat + $(ProjectDir)include;$(ProjectDir)include\windows;$(ProjectDir)prov\verbs\include;$(ProjectDir)prov\hook\src;$(ProjectDir)prov\hook\include;$(ProjectDir)prov\hook\perf\include;$(ProjectDir)packages\NetworkDirect.2.0.1\include;$(ProjectDir)prov\efa\src;$(ProjectDir)prov\efa\include;$(ProjectDir)prov\efa\src\rxr;$(ProjectDir)prov\efa\src\efa_verbs;$(ProjectDir)prov\efa\src\efa_verbs\plat 4127;4200;4204;4221;4115;4201;4100 true false @@ -335,7 +335,7 @@ true true WIN32;_WINSOCKAPI_=;_CRT_SECURE_NO_WARNINGS;_WINSOCK_DEPRECATED_NO_WARNINGS;_WINDOWS;_USRDLL;LIBFABRIC_EXPORTS;HAVE_CONFIG_H;%(PreprocessorDefinitions) - $(ProjectDir)include;$(ProjectDir)include\windows;$(ProjectDir)prov\netdir\NetDirect;$(ProjectDir)prov\verbs\include;$(ProjectDir)prov\hook\src;$(ProjectDir)prov\hook\include;$(ProjectDir)prov\hook\perf\include;$(ProjectDir)packages\NetworkDirect.2.0.1\include;$(ProjectDir)prov\efa\src;$(ProjectDir)prov\efa\include;$(ProjectDir)prov\efa\src\rxr;$(ProjectDir)prov\efa\src\efa_verbs;$(ProjectDir)prov\efa\src\efa_verbs\plat + $(ProjectDir)include;$(ProjectDir)include\windows;$(ProjectDir)prov\verbs\include;$(ProjectDir)prov\hook\src;$(ProjectDir)prov\hook\include;$(ProjectDir)prov\hook\perf\include;$(ProjectDir)packages\NetworkDirect.2.0.1\include;$(ProjectDir)prov\efa\src;$(ProjectDir)prov\efa\include;$(ProjectDir)prov\efa\src\rxr;$(ProjectDir)prov\efa\src\efa_verbs;$(ProjectDir)prov\efa\src\efa_verbs\plat 4127;4200;4204;4221;4115;4201;4100 true true @@ -356,7 +356,7 @@ true true WIN32;_WINSOCKAPI_=;_CRT_SECURE_NO_WARNINGS;_WINSOCK_DEPRECATED_NO_WARNINGS;_WINDOWS;_USRDLL;LIBFABRIC_EXPORTS;HAVE_CONFIG_H;%(PreprocessorDefinitions) - $(ProjectDir)include;$(ProjectDir)include\windows;$(ProjectDir)prov\sockets\include;$(ProjectDir)prov\netdir\NetDirect;$(ProjectDir)prov\hook\src;$(ProjectDir)prov\hook\include;$(ProjectDir)prov\hook\perf\include;$(ProjectDir)prov\efa\src;$(ProjectDir)prov\efa\src\rxr;$(ProjectDir)prov\efa\src\windows; + $(ProjectDir)include;$(ProjectDir)include\windows;$(ProjectDir)prov\sockets\include;$(ProjectDir)prov\hook\src;$(ProjectDir)prov\hook\include;$(ProjectDir)prov\hook\perf\include;$(ProjectDir)prov\efa\src;$(ProjectDir)prov\efa\src\rxr;$(ProjectDir)prov\efa\src\windows; 4127;4200;4204;4221;4115;4201;4100 true true @@ -378,7 +378,7 @@ true WIN32;_WINSOCKAPI_=;_CRT_SECURE_NO_WARNINGS;_WINSOCK_DEPRECATED_NO_WARNINGS;_WINDOWS;_USRDLL;LIBFABRIC_EXPORTS;HAVE_CONFIG_H;%(PreprocessorDefinitions) true - $(ProjectDir)include;$(ProjectDir)include\windows;$(ProjectDir)prov\netdir\NetDirect;$(ProjectDir)prov\verbs\include;$(ProjectDir)prov\hook\src;$(ProjectDir)prov\hook\include;$(ProjectDir)prov\hook\perf\include;$(ProjectDir)packages\NetworkDirect.2.0.1\include;$(ProjectDir)prov\efa\src;$(ProjectDir)prov\efa\include;$(ProjectDir)prov\efa\src\rxr;$(ProjectDir)prov\efa\src\efa_verbs;$(ProjectDir)prov\efa\src\efa_verbs\plat + $(ProjectDir)include;$(ProjectDir)include\windows;$(ProjectDir)prov\verbs\include;$(ProjectDir)prov\hook\src;$(ProjectDir)prov\hook\include;$(ProjectDir)prov\hook\perf\include;$(ProjectDir)packages\NetworkDirect.2.0.1\include;$(ProjectDir)prov\efa\src;$(ProjectDir)prov\efa\include;$(ProjectDir)prov\efa\src\rxr;$(ProjectDir)prov\efa\src\efa_verbs;$(ProjectDir)prov\efa\src\efa_verbs\plat 4127;4200;94;4204;4221;869 true false @@ -406,12 +406,6 @@ - - - - - - @@ -471,17 +465,6 @@ - - - - - - - - - - - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) @@ -858,6 +841,12 @@ $(ProjectDir)prov\verbs\src;$(ProjectDir)prov\verbs\include;$(ProjectDir)prov\verbs\include\windows;%(AdditionalIncludeDirectories) + + $(ProjectDir)prov\verbs\src;$(ProjectDir)prov\verbs\include;$(ProjectDir)prov\verbs\include\windows;%(AdditionalIncludeDirectories) + + + $(ProjectDir)prov\verbs\src;$(ProjectDir)prov\verbs\include;$(ProjectDir)prov\verbs\include\windows;%(AdditionalIncludeDirectories) + $(ProjectDir)prov\verbs\src;$(ProjectDir)prov\verbs\include;$(ProjectDir)prov\verbs\include\windows;%(AdditionalIncludeDirectories) @@ -981,18 +970,9 @@ - - - - - - - - - diff --git a/libfabric.vcxproj.filters b/libfabric.vcxproj.filters index 72962eb9394..5d7bd424e2f 100644 --- a/libfabric.vcxproj.filters +++ b/libfabric.vcxproj.filters @@ -79,15 +79,6 @@ {e89f94da-f0f0-4e62-aad4-d738963d7dd9} - - {bbe0be7c-1187-45ab-bd2e-5c1f61095a9b} - - - {9ba21998-278b-4f63-9ff2-8fb533eeee84} - - - {b3471da7-1f83-4d3f-b31c-89b197aa6327} - {5547753c-42f5-4cb6-a311-07911724f5cf} @@ -432,57 +423,6 @@ Source Files\src - - Source Files\prov\netdir\src - - - Source Files\prov\netdir\src - - - Source Files\prov\netdir\src - - - Source Files\prov\netdir\src - - - Source Files\prov\netdir\src - - - Source Files\prov\netdir\src - - - Source Files\prov\netdir\src - - - Source Files\prov\netdir\src - - - Source Files\prov\netdir\src - - - Source Files\prov\netdir\src - - - Source Files\prov\netdir\src - - - Source Files\prov\netdir\src - - - Source Files\prov\netdir\src - - - Source Files\prov\netdir\src - - - Source Files\prov\netdir\src - - - Source Files\prov\netdir\src - - - Source Files\prov\netdir\src - Source Files\prov\util @@ -645,6 +585,12 @@ Source Files\prov\verbs\src\windows + + Source Files\prov\verbs\src\windows + + + Source Files\prov\verbs\src\windows + @@ -833,33 +779,6 @@ Source Files\prov\rxm\include - - Source Files\prov\netdir\include - - - Source Files\prov\netdir\include - - - Source Files\prov\netdir\include - - - Source Files\prov\netdir\include - - - Source Files\prov\netdir\include - - - Source Files\prov\netdir\include - - - Source Files\prov\netdir\include - - - Source Files\prov\netdir\include - - - Source Files\prov\netdir\include - Header Files\windows\sys diff --git a/prov/netdir/NetDirect/README.NetworkDirect b/prov/netdir/NetDirect/README.NetworkDirect deleted file mode 100644 index a6d113a247c..00000000000 --- a/prov/netdir/NetDirect/README.NetworkDirect +++ /dev/null @@ -1,12 +0,0 @@ -Network Direct SDK/DDK may be obtained as a nuget package (preferred) from: - -https://www.nuget.org/packages/NetworkDirect - -or downloaded from: - -https://www.microsoft.com/en-us/download/details.aspx?id=36043 -on page press Download button and select NetworkDirect_DDK.zip. - -Extract header files from downloaded -NetworkDirect_DDK.zip:\NetDirect\include\ file into this directory, -or add path to NetDirect headers into VS include paths diff --git a/prov/netdir/src/netdir.h b/prov/netdir/src/netdir.h deleted file mode 100644 index 7fc361b6f79..00000000000 --- a/prov/netdir/src/netdir.h +++ /dev/null @@ -1,197 +0,0 @@ -/* -* Copyright (c) 2015-2016 Intel Corporation, Inc. All rights reserved. -* -* This software is available to you under a choice of one of two -* licenses. You may choose to be licensed under the terms of the GNU -* General Public License (GPL) Version 2, available from the file -* COPYING in the main directory of this source tree, or the -* BSD license below: -* -* Redistribution and use in source and binary forms, with or -* without modification, are permitted provided that the following -* conditions are met: -* -* - Redistributions of source code must retain the above -* copyright notice, this list of conditions and the following -* disclaimer. -* -* - Redistributions in binary form must reproduce the above -* copyright notice, this list of conditions and the following -* disclaimer in the documentation and/or other materials -* provided with the distribution. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -*/ - -#ifndef _FI_NETDIR_H_ -#define _FI_NETDIR_H_ - -#if HAVE_CONFIG_H -# include -#endif /* HAVE_CONFIG_H */ - -#include -#include -#include - -#include -#include "ofi_osd.h" - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - - -#define ND_MSG_IOV_LIMIT (256) -#define ND_MSG_INTERNAL_IOV_LIMIT (512) -#define OFI_ND_MAX_MR_CNT (1 << 16) - -#define OFI_ND_DOMAIN_CAPS (FI_LOCAL_COMM | FI_REMOTE_COMM) - -#define OFI_ND_EP_CAPS (FI_MSG | FI_RMA | \ - FI_SEND | FI_RECV | \ - FI_READ | FI_WRITE | \ - FI_REMOTE_READ | FI_REMOTE_WRITE) - -#define OFI_ND_TX_OP_FLAGS (FI_INJECT | FI_COMPLETION | FI_TRANSMIT_COMPLETE | \ - FI_INJECT_COMPLETE | FI_DELIVERY_COMPLETE | \ - FI_SELECTIVE_COMPLETION) - -#define OFI_ND_MSG_ORDER (OFI_ORDER_RAR_SET | OFI_ORDER_RAW_SET | FI_ORDER_RAS | \ - OFI_ORDER_WAW_SET | FI_ORDER_WAS | FI_ORDER_SAW | FI_ORDER_SAS ) - -extern struct gl_data { - int inline_thr; - int prepost_cnt; - int prepost_buf_cnt; - int flow_control_cnt; - int total_avail; -} gl_data; - -extern struct fi_provider ofi_nd_prov; -extern struct util_prov ofi_nd_util_prov; -extern struct fi_info ofi_nd_info; - -extern const char ofi_nd_prov_name[]; - -int ofi_nd_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, - struct fid_cq **cq_fid, void *context); -int ofi_nd_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr, - struct fid_eq **peq, void *context); -int ofi_nd_endpoint(struct fid_domain *domain, struct fi_info *info, - struct fid_ep **ep_fid, void *context); -int ofi_nd_passive_endpoint(struct fid_fabric *fabric, struct fi_info *info, - struct fid_pep **pep, void *context); -int ofi_nd_domain_open(struct fid_fabric *fabric, struct fi_info *info, - struct fid_domain **domain, void *context); -int ofi_nd_srx_ctx(struct fid_domain *domain, - struct fi_rx_attr *attr, struct fid_ep **rx_ep, - void *context); -int ofi_nd_fabric(struct fi_fabric_attr *attr, struct fid_fabric **fabric, - void *context); - -int ofi_nd_getinfo(uint32_t version, const char *node, const char *service, - uint64_t flags, const struct fi_info *hints, struct fi_info **info); -void ofi_nd_fini(void); - -int ofi_nd_mr_reg(struct fid *fid, const void *buf, size_t len, - uint64_t access, uint64_t offset, uint64_t requested_key, - uint64_t flags, struct fid_mr **mr, void *context); -int ofi_nd_mr_regv(struct fid *fid, const struct iovec *iov, - size_t count, uint64_t access, - uint64_t offset, uint64_t requested_key, - uint64_t flags, struct fid_mr **mr, void *context); -int ofi_nd_mr_regattr(struct fid *fid, const struct fi_mr_attr *attr, - uint64_t flags, struct fid_mr **mr); -int ofi_nd_cntr_open(struct fid_domain *pdomain, struct fi_cntr_attr *attr, - struct fid_cntr **pcntr, void *context); - -typedef int(*ofi_nd_adapter_cb_t)(const ND2_ADAPTER_INFO* info, const char *name); - -void ofi_nd_send_event(ND2_RESULT *result); -void ofi_nd_read_event(ND2_RESULT *result); -void ofi_nd_write_event(ND2_RESULT *result); - -HRESULT ofi_nd_startup(ofi_nd_adapter_cb_t cb); -HRESULT ofi_nd_shutdown(); - -int ofi_nd_lookup_adapter(const char *name, IND2Adapter **adapter, struct sockaddr** addr); - -int ofi_nd_is_valid_addr(const SOCKADDR *addr); -int ofi_nd_addr_cmp(const void* vaddr1, const void* vaddr2); - -int ofi_nd_is_same_file(const wchar_t* path1, const wchar_t* path2); -int ofi_nd_file_exists(const wchar_t* path); -int ofi_nd_is_directory(const wchar_t* path); -const wchar_t *ofi_nd_filename(const wchar_t *path); - -static inline size_t unique(void *base, size_t num, size_t width, - int(*cmp)(const void *, const void *)) -{ - char *dst = (char*)base; - char *src = (char*)base + width; - - size_t i; - size_t n = 1; - - if (num < 2) - return num; - - for (i = 1; i < num; i++) { - if (cmp(dst, src)) { - dst += width; - if (dst != src) - memcpy(dst, src, width); - n++; - } - src += width; - } - - return n; -} - -#define H2F(x) ofi_nd_hresult_2_fierror(x) - -static inline int ofi_nd_hresult_2_fierror(HRESULT hr) -{ - switch (hr) { - case S_OK: - case ND_PENDING: - return FI_SUCCESS; - case ND_BUFFER_OVERFLOW: - return -FI_EOVERFLOW; - case ND_CONNECTION_REFUSED: - return -FI_ECONNREFUSED; - case ND_TIMEOUT: - return -FI_ETIMEDOUT; - default: - return -FI_EOTHER; - } -} - -#define OFI_ND_TIMEOUT_INIT(timeout) \ - uint64_t sfinish = ((timeout) >= 0) ? \ - (ofi_gettime_ms() + (timeout) * 10000) : -1; - -#define OFI_ND_TIMEDOUT() ((sfinish > 0) ? ofi_gettime_ms() >= sfinish : 0) - -#ifdef ENABLE_DEBUG -# define NODEFAULT assert(0) -#else -# define NODEFAULT __assume(0) -#endif - - -#ifdef __cplusplus -} -#endif /* __cplusplus */ - -#endif /* _FI_NETDIR_H_ */ - diff --git a/prov/netdir/src/netdir_buf.h b/prov/netdir/src/netdir_buf.h deleted file mode 100644 index d399c20c3c4..00000000000 --- a/prov/netdir/src/netdir_buf.h +++ /dev/null @@ -1,255 +0,0 @@ -/* -* Copyright (c) 2015-2016 Intel Corporation, Inc. All rights reserved. -* -* This software is available to you under a choice of one of two -* licenses. You may choose to be licensed under the terms of the GNU -* General Public License (GPL) Version 2, available from the file -* COPYING in the main directory of this source tree, or the -* BSD license below: -* -* Redistribution and use in source and binary forms, with or -* without modification, are permitted provided that the following -* conditions are met: -* -* - Redistributions of source code must retain the above -* copyright notice, this list of conditions and the following -* disclaimer. -* -* - Redistributions in binary form must reproduce the above -* copyright notice, this list of conditions and the following -* disclaimer in the documentation and/or other materials -* provided with the distribution. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -*/ - -#ifndef _FI_NETDIR_BUF_H_ -#define _FI_NETDIR_BUF_H_ - -#include - -#include "rdma/fabric.h" - -#include "ofi.h" -#include "ofi_osd.h" - -#include "netdir.h" - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -/* Implementation of lock-free thread safe allocator of fixed size data */ -/* - * Lock free allocator is implemented using atomics add/compare-and-swap/sub. - * There is three levels of data: - * footer - high level structure, holds links to first chunk, first non-used - * | element, number of allocated elelements and number of used elements. - * | - * +-chunk - one-directional linked list where stored items - * | - * +-item - one directional linked list where stored user's data - * - * chunk elements are linked to be freed only, nothing else used linked nature of - * chunk. - * item elemens may have two states: used/not-used. All not-used (free) elements - * are linked into one-directional linked list, pointer to first non-used - * element is located at footer. - * - * when user allocates new element function buf_alloc looks for number of used - * elements and compares it with allocated number of elements: used atomic - * API ofi_atomic_inc32 to increment counter by 1 and check if there - * is non-used items. In case if no more available elements exist - * (used > counter) - allocate new chunk. - * - * All pointers are updated using atomic API compare-and-swap which guarantee - * thread safe of access to allocator. Only init/fini calls are not thread safe. - * - * Call of ofi_nd_buf_init_*** API is not mandatory - default initialization - * by NULL is enough - * - * Note: underscored (__ofi_nd_buf_****) functions are used to implement footer - * inside of another objects - */ - -#define OFI_ND_BUF_DEFCOUNT 1024 -#define ICEP InterlockedCompareExchangePointer -#define countof(x) _countof(x) /*(sizeof(x) / sizeof(*(x)))*/ - -#define OFI_ND_NB_BUF_IMP(name) struct nd_buf_footer_##name nd_footer_##name = {0}; - -#define OFI_ND_NB_BUF_IMP_ALLOC(name, a, f) \ - struct nd_buf_footer_##name nd_footer_##name = {.alloc_chunk = a, .free_chunk = f}; - -#define OFI_ND_NB_BUF(name) OFI_ND_NB_BUF_TYPED(name, name) - -#define OFI_ND_NB_BUF_TYPED(name, type) \ - struct nd_buf_item_##name { \ - struct nd_buf_item_##name *next; /* next free element */ \ - type data; /* user's data */ \ - }; \ - \ - struct nd_buf_chunk_##name { /* cache of elements for fast alloc */ \ - struct nd_buf_chunk_##name *next; \ - struct nd_buf_item_##name item[OFI_ND_BUF_DEFCOUNT]; \ - }; \ - \ - struct nd_buf_footer_##name { \ - LONG count; /* number of elements */ \ - LONG used; /* count of used elements */ \ - struct nd_buf_chunk_##name*(*alloc_chunk)( \ - struct nd_buf_footer_##name *footer, size_t *count); \ - void(*free_chunk)(struct nd_buf_chunk_##name *chunk); \ - volatile struct nd_buf_chunk_##name *chunk_head; \ - volatile struct nd_buf_item_##name *item_free; \ - }; \ - \ - extern struct nd_buf_footer_##name nd_footer_##name; \ - \ - static inline int __ofi_nd_buf_init_##name(struct nd_buf_footer_##name *footer) \ - { \ - assert(footer); \ - memset(footer, 0, sizeof(*footer)); \ - return FI_SUCCESS; \ - } \ - \ - static inline int ofi_nd_buf_init_##name(void) \ - { \ - return __ofi_nd_buf_init_##name(&nd_footer_##name); \ - } \ - \ - static inline void __ofi_nd_buf_fini_##name(struct nd_buf_footer_##name *footer) \ - { \ - typedef struct nd_buf_chunk_##name nd_buf_chunk_##name; \ - assert(footer); \ - assert(!footer->used); \ - \ - nd_buf_chunk_##name *next = (nd_buf_chunk_##name*)footer->chunk_head; \ - while (next) { \ - nd_buf_chunk_##name *chunk = next; \ - next = chunk->next; \ - if(!footer->free_chunk) \ - free(chunk); \ - else \ - footer->free_chunk(chunk); \ - } \ - memset(footer, 0, sizeof(*footer)); \ - } \ - \ - static inline void ofi_nd_buf_fini_##name(void) \ - { \ - __ofi_nd_buf_fini_##name(&nd_footer_##name); \ - } \ - \ - /* Do NOT clean item after allocation because there may be user's data */ \ - static inline type *__ofi_nd_buf_alloc_##name(struct nd_buf_footer_##name *footer) \ - { \ - typedef struct nd_buf_chunk_##name nd_buf_chunk_##name; \ - typedef struct nd_buf_item_##name nd_buf_item_##name; \ - \ - assert(footer); \ - \ - if (InterlockedIncrement(&footer->used) > footer->count) { \ - /* allocate new chunk of data */ \ - size_t i; \ - nd_buf_chunk_##name *data; \ - nd_buf_chunk_##name *next; \ - nd_buf_item_##name *next_free; \ - size_t count; \ - \ - if(!footer->alloc_chunk) { \ - data = (nd_buf_chunk_##name*)malloc(sizeof(*data)); \ - memset(data, 0, sizeof(*data)); \ - count = countof(data->item); \ - } \ - else { \ - data = footer->alloc_chunk(footer, &count); \ - } \ - if (!data || !count) \ - return 0; \ - for (i = 0; i < count - 1; i++) { \ - data->item[i].next = &data->item[i + 1]; \ - } \ - /* insert new created free elements into linked list */ \ - /* data->item[0] will be returned to user as used element */ \ - /* items data->item[1..N] are linked to non-used list elements */ \ - do { \ - next_free = (nd_buf_item_##name*)footer->item_free; \ - data->item[count - 1].next = next_free; \ - } while (ICEP((volatile PVOID*)&footer->item_free, \ - (void*)&data->item[1], (void*)next_free) != next_free); \ - \ - InterlockedAdd(&footer->count, (LONG)count); \ - \ - /* add new chunk into footer */ \ - do { \ - next = (nd_buf_chunk_##name*)footer->chunk_head; \ - data->next = next; \ - } while (ICEP((volatile PVOID*)&footer->chunk_head, \ - (void*)data, (void*)next) != next); \ - return &data->item[0].data; \ - } \ - \ - nd_buf_item_##name *top_free; \ - nd_buf_item_##name *next_free; \ - do { \ - assert(footer->item_free); \ - top_free = (nd_buf_item_##name *)footer->item_free; \ - next_free = (nd_buf_item_##name *)footer->item_free->next; \ - } while (ICEP((volatile PVOID*)&footer->item_free, \ - (void*)next_free, (void*)top_free) != top_free); \ - \ - return &top_free->data; \ - } \ - \ - static inline type *ofi_nd_buf_alloc_##name(void) \ - { \ - return __ofi_nd_buf_alloc_##name(&nd_footer_##name); \ - } \ - \ - static inline \ - void __ofi_nd_buf_free_##name(type *data, struct nd_buf_footer_##name *footer) \ - { \ - typedef struct nd_buf_item_##name nd_buf_item_##name; \ - \ - assert(footer); \ - assert(data); \ - \ - nd_buf_item_##name *item = container_of(data, struct nd_buf_item_##name, data); \ - \ - do { \ - item->next = (nd_buf_item_##name*)footer->item_free; \ - } while (ICEP((volatile PVOID*)&footer->item_free, \ - (void*)item, (void*)item->next) != item->next); \ - \ - LONG dec = InterlockedDecrement(&footer->used); \ - assert(dec >= 0); \ - OFI_UNUSED(dec); \ - } \ - \ - static inline void ofi_nd_buf_free_##name(type *data) \ - { \ - __ofi_nd_buf_free_##name(data, &nd_footer_##name); \ - } - -#define ND_BUF_ALLOC(name) ofi_nd_buf_alloc_##name() -#define ND_BUF_FREEPTR(name) ofi_nd_buf_free_##name -#define ND_BUF_FREE(name, ptr) ND_BUF_FREEPTR(name)(ptr) -#define ND_BUF_CHUNK(name) struct nd_buf_chunk_##name -#define ND_BUF_FOOTER(name) struct nd_buf_footer_##name -#define ND_BUF_FINIPTR(name) ofi_nd_buf_fini_##name -#define ND_BUF_FINI(name) ND_BUF_FINIPTR(name)() - - -#ifdef __cplusplus -} -#endif /* __cplusplus */ - -#endif /* _FI_NETDIR_BUF_H_ */ diff --git a/prov/netdir/src/netdir_cntr.c b/prov/netdir/src/netdir_cntr.c deleted file mode 100644 index cd3a1978192..00000000000 --- a/prov/netdir/src/netdir_cntr.c +++ /dev/null @@ -1,208 +0,0 @@ -/* -* Copyright (c) 2015-2016 Intel Corporation, Inc. All rights reserved. -* -* This software is available to you under a choice of one of two -* licenses. You may choose to be licensed under the terms of the GNU -* General Public License (GPL) Version 2, available from the file -* COPYING in the main directory of this source tree, or the -* BSD license below: -* -* Redistribution and use in source and binary forms, with or -* without modification, are permitted provided that the following -* conditions are met: -* -* - Redistributions of source code must retain the above -* copyright notice, this list of conditions and the following -* disclaimer. -* -* - Redistributions in binary form must reproduce the above -* copyright notice, this list of conditions and the following -* disclaimer in the documentation and/or other materials -* provided with the distribution. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -*/ - -#ifdef _WIN32 - -#include "netdir.h" -#include "netdir_ov.h" -#include "netdir_iface.h" - -#include "rdma/fabric.h" -#include "ofi_util.h" - -static int ofi_nd_cntr_close(struct fid *fid); -static uint64_t ofi_nd_cntr_read(struct fid_cntr *cntr); -static uint64_t ofi_nd_cntr_readerr(struct fid_cntr *cntr); -static int ofi_nd_cntr_add(struct fid_cntr *cntr, uint64_t value); -static int ofi_nd_cntr_set(struct fid_cntr *cntr, uint64_t value); -static int ofi_nd_cntr_wait(struct fid_cntr *cntr, - uint64_t threshold, int timeout); - -static struct fi_ops ofi_nd_fi_ops = { - .size = sizeof(ofi_nd_fi_ops), - .close = ofi_nd_cntr_close, - .bind = fi_no_bind, - .control = fi_no_control, - .ops_open = fi_no_ops_open, -}; - -static struct fid ofi_nd_fid = { - .fclass = FI_CLASS_CNTR, - .context = NULL, - .ops = &ofi_nd_fi_ops -}; - -static struct fi_ops_cntr ofi_nd_cntr_ops = { - .size = sizeof(ofi_nd_cntr_ops), - .read = ofi_nd_cntr_read, - .readerr = ofi_nd_cntr_readerr, - .add = ofi_nd_cntr_add, - .set = ofi_nd_cntr_set, - .wait = ofi_nd_cntr_wait -}; - -static int ofi_nd_cntr_close(struct fid *fid) -{ - assert(fid->fclass == FI_CLASS_CNTR); - if (fid->fclass != FI_CLASS_CQ) - return -FI_EINVAL; - - struct nd_cntr *cntr = container_of(fid, struct nd_cntr, fid.fid); - - free(cntr); - - return FI_SUCCESS; -} - -int ofi_nd_cntr_open(struct fid_domain *pdomain, struct fi_cntr_attr *attr, - struct fid_cntr **pcntr, void *context) -{ - OFI_UNUSED(context); - - assert(pdomain); - assert(pdomain->fid.fclass == FI_CLASS_DOMAIN); - - if (attr) { - if (attr->wait_obj != FI_WAIT_NONE && - attr->wait_obj != FI_WAIT_UNSPEC) - return -FI_EBADFLAGS; - } - - struct nd_cntr *cntr = (struct nd_cntr*)calloc(1, sizeof(*cntr)); - if (!cntr) - return -FI_ENOMEM; - - struct nd_cntr def = { - .fid = { - .fid = ofi_nd_fid, - .ops = &ofi_nd_cntr_ops - }, - }; - - *cntr = def; - - *pcntr = &cntr->fid; - - return FI_SUCCESS; -} - -static uint64_t ofi_nd_cntr_read(struct fid_cntr *pcntr) -{ - assert(pcntr); - assert(pcntr->fid.fclass == FI_CLASS_CNTR); - - struct nd_cntr *cntr = container_of(pcntr, struct nd_cntr, fid); - return cntr->counter; -} - -static uint64_t ofi_nd_cntr_readerr(struct fid_cntr *pcntr) -{ - assert(pcntr); - assert(pcntr->fid.fclass == FI_CLASS_CNTR); - - struct nd_cntr *cntr = container_of(pcntr, struct nd_cntr, fid); - return cntr->err; -} - -static int ofi_nd_cntr_add(struct fid_cntr *pcntr, uint64_t value) -{ - assert(pcntr); - assert(pcntr->fid.fclass == FI_CLASS_CNTR); - - if (pcntr->fid.fclass != FI_CLASS_CNTR) - return -FI_EINVAL; - - struct nd_cntr *cntr = container_of(pcntr, struct nd_cntr, fid); - - cntr->counter += value; - WakeByAddressAll((void*)&cntr->counter); - - return FI_SUCCESS; -} - -static int ofi_nd_cntr_set(struct fid_cntr *pcntr, uint64_t value) -{ - assert(pcntr); - assert(pcntr->fid.fclass == FI_CLASS_CNTR); - - if (pcntr->fid.fclass != FI_CLASS_CNTR) - return -FI_EINVAL; - - struct nd_cntr *cntr = container_of(pcntr, struct nd_cntr, fid); - - cntr->counter = value; - WakeByAddressAll((void*)&cntr->counter); - - return FI_SUCCESS; -} - -static int ofi_nd_cntr_wait(struct fid_cntr *pcntr, - uint64_t threshold, int timeout) -{ - assert(pcntr); - assert(pcntr->fid.fclass == FI_CLASS_CNTR); - - if (pcntr->fid.fclass != FI_CLASS_CNTR) - return -FI_EINVAL; - - struct nd_cntr *cntr = container_of(pcntr, struct nd_cntr, fid); - - /* process corner timeouts separately to optimize */ - if (!timeout) { /* no wait */ - return (cntr->counter >= (LONGLONG)threshold) ? - FI_SUCCESS : -FI_ETIMEDOUT; - } - else if (timeout < 0) { /* infinite wait */ - while (cntr->counter < (LONG64)threshold) { - LONG64 val = cntr->counter; - WaitOnAddress(&cntr->counter, &val, - sizeof(val), INFINITE); - } - return FI_SUCCESS; - } - else { /* timeout wait */ - OFI_ND_TIMEOUT_INIT(timeout); - - do { - if (cntr->counter >= (LONG64)threshold) - return FI_SUCCESS; - LONG64 val = cntr->counter; - WaitOnAddress(&cntr->counter, &val, - sizeof(val), timeout); - } while (!OFI_ND_TIMEDOUT()); - } - - return FI_SUCCESS; -} - -#endif /* _WIN32 */ - diff --git a/prov/netdir/src/netdir_cq.c b/prov/netdir/src/netdir_cq.c deleted file mode 100644 index b21328a03af..00000000000 --- a/prov/netdir/src/netdir_cq.c +++ /dev/null @@ -1,1067 +0,0 @@ -/* -* Copyright (c) 2015-2016 Intel Corporation, Inc. All rights reserved. -* -* This software is available to you under a choice of one of two -* licenses. You may choose to be licensed under the terms of the GNU -* General Public License (GPL) Version 2, available from the file -* COPYING in the main directory of this source tree, or the -* BSD license below: -* -* Redistribution and use in source and binary forms, with or -* without modification, are permitted provided that the following -* conditions are met: -* -* - Redistributions of source code must retain the above -* copyright notice, this list of conditions and the following -* disclaimer. -* -* - Redistributions in binary form must reproduce the above -* copyright notice, this list of conditions and the following -* disclaimer in the documentation and/or other materials -* provided with the distribution. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -*/ - -#ifdef _WIN32 - -#include "netdir.h" -#include "netdir_cq.h" -#include "netdir_iface.h" -#include "netdir_unexp.h" - -#include "rdma/fabric.h" -#include "ofi_util.h" - -static int ofi_nd_cq_close(struct fid *fid); -static ssize_t ofi_nd_cq_read(struct fid_cq *cq, void *buf, size_t count); -static ssize_t ofi_nd_cq_readfrom(struct fid_cq *cq, void *buf, size_t count, - fi_addr_t *src_addr); -static ssize_t ofi_nd_cq_readerr(struct fid_cq *cq, struct fi_cq_err_entry *buf, - uint64_t flags); -static ssize_t ofi_nd_cq_sread(struct fid_cq *cq, void *buf, size_t count, - const void *cond, int timeout); -static ssize_t ofi_nd_cq_sreadfrom(struct fid_cq *cq, void *buf, size_t count, - fi_addr_t *src_addr, const void *cond, - int timeout); -static int ofi_nd_cq_signal(struct fid_cq* fid_cq); -static const char *ofi_nd_cq_strerror(struct fid_cq *cq, int prov_errno, - const void *err_data, char *buf, - size_t len); -ssize_t ofi_nd_ep_injectdata(struct fid_ep *ep, const void *buf, size_t len, - uint64_t data, fi_addr_t dest_addr); - -static struct fi_ops ofi_nd_fi_ops = { - .size = sizeof(ofi_nd_fi_ops), - .close = ofi_nd_cq_close, - .bind = fi_no_bind, - .control = fi_no_control, - .ops_open = fi_no_ops_open, -}; - -static struct fid ofi_nd_fid = { - .fclass = FI_CLASS_CQ, - .context = NULL, - .ops = &ofi_nd_fi_ops -}; - -static struct fi_ops_cq ofi_nd_cq_ops = { - .size = sizeof(ofi_nd_cq_ops), - .read = ofi_nd_cq_read, - .readfrom = ofi_nd_cq_readfrom, - .readerr = ofi_nd_cq_readerr, - .sread = ofi_nd_cq_sread, - .sreadfrom = ofi_nd_cq_sreadfrom, - .signal = ofi_nd_cq_signal, - .strerror = ofi_nd_cq_strerror -}; - -typedef struct nd_cq_action { - nd_event_base base; - struct nd_cq *cq; -} nd_cq_action; - -OFI_ND_NB_BUF(nd_cq_action); - -OFI_ND_NB_BUF_IMP(nd_cq_action); -OFI_ND_NB_BUF_IMP(nd_cq_entry); - -OFI_ND_NB_BUF_IMP(nd_send_entry); -OFI_ND_NB_BUF_IMP(nd_sge); - -/* State-Event matrix callbacks. Must have the following signature - * "void (nd_cq_entry*, void*)" */ -static inline void ofi_nd_handle_unknown(nd_cq_entry *entry, void *misc); -static inline void ofi_nd_unexp_2_read(nd_cq_entry *entry, void *unexpected); -static inline void ofi_nd_read_2_send_ack(nd_cq_entry *entry, void *res); -static inline void ofi_nd_event_2_cq(nd_cq_entry *entry, void *misc); -static inline void ofi_nd_unexp_ack_2_cq(nd_cq_entry *entry, void *unexpected); - -/* Auxillary functions to ensure workability of callbacks of S-E matrix - * and are used implicitly inside these callbakcs */ -static void ofi_nd_unexp_2_cq(nd_cq_entry *entry, nd_unexpected_entry *unexp); -static void ofi_nd_read_2_cq(nd_cq_entry *entry, ND2_RESULT *result); - -#define UNKNWN ofi_nd_handle_unknown -#define UNEXP_2_READ ofi_nd_unexp_2_read -#define READ_2_SENDACK ofi_nd_read_2_send_ack -#define EVENT_2_CQ ofi_nd_event_2_cq -#define UNEXP_ACK_2_CQ ofi_nd_unexp_ack_2_cq - -typedef void(*cq_matrix_cb)(struct nd_cq_entry *entry, void *misc); - -static cq_matrix_cb cq_matrix[MAX_STATE][MAX_EVENT] = { - { EVENT_2_CQ, UNEXP_2_READ, UNKNWN }, - { UNKNWN, READ_2_SENDACK, UNKNWN }, - { UNKNWN, UNKNWN, UNEXP_ACK_2_CQ } -}; - -void ofi_nd_dispatch_cq_event(ofi_nd_cq_event event, nd_cq_entry *entry, void *misc) -{ - ofi_nd_cq_state state = entry->state; - cq_matrix_cb cb = cq_matrix[state][event]; - ND_LOG_DEBUG(FI_LOG_EP_DATA, "Dispatch Event - %d:%d\n", - state, event); - cb(entry, misc); -} - -static int ofi_nd_cq_close(struct fid *fid) -{ - assert(fid->fclass == FI_CLASS_CQ); - if (fid->fclass != FI_CLASS_CQ) - return -FI_EINVAL; - - struct nd_cq *cq = container_of(fid, struct nd_cq, fid.fid); - - if (cq->iocp && cq->iocp != INVALID_HANDLE_VALUE) - CloseHandle(cq->iocp); - if (cq->err && cq->err != INVALID_HANDLE_VALUE) - CloseHandle(cq->err); - - free(cq); - - return FI_SUCCESS; -} - -int ofi_nd_cq_open(struct fid_domain *pdomain, struct fi_cq_attr *attr, - struct fid_cq **pcq_fid, void *context) -{ - OFI_UNUSED(context); - - assert(pdomain); - assert(pdomain->fid.fclass == FI_CLASS_DOMAIN); - - if (pdomain->fid.fclass != FI_CLASS_DOMAIN) - return -FI_EINVAL; - - ND_REGISTER_FINI(ND_BUF_FINIPTR(nd_cq_action)); - ND_REGISTER_FINI(ND_BUF_FINIPTR(nd_cq_entry)); - - if (pdomain->fid.fclass != FI_CLASS_DOMAIN) - return -FI_EINVAL; - - HRESULT hr; - - if (attr) { - if (attr->wait_obj != FI_WAIT_NONE && - attr->wait_obj != FI_WAIT_UNSPEC) - return -FI_EBADFLAGS; - } - - struct nd_cq *cq = (struct nd_cq*)calloc(1, sizeof(*cq)); - if (!cq) - return -FI_ENOMEM; - - struct nd_cq def = { - .fid = { - .fid = ofi_nd_fid, - .ops = &ofi_nd_cq_ops - }, - .format = attr ? attr->format : FI_CQ_FORMAT_CONTEXT - }; - - *cq = def; - - if (cq->format == FI_CQ_FORMAT_UNSPEC) { - cq->format = FI_CQ_FORMAT_CONTEXT; - if (attr) - attr->format = cq->format; - } - - struct nd_domain *domain = container_of(pdomain, struct nd_domain, fid); - assert(domain->adapter); - assert(domain->adapter_file); - OFI_UNUSED(domain); - - cq->iocp = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0); - if (!cq->iocp || cq->iocp == INVALID_HANDLE_VALUE) { - hr = -FI_EINVAL; - goto hr_fail; - } - cq->err = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0); - if (!cq->err || cq->err == INVALID_HANDLE_VALUE) { - hr = -FI_EINVAL; - goto hr_fail; - } - - *pcq_fid = &cq->fid; - - return FI_SUCCESS; - -hr_fail: - ofi_nd_cq_close(&cq->fid.fid); - ND_LOG_WARN(FI_LOG_CQ, ofi_nd_strerror((DWORD)hr, NULL)); - return H2F(hr); -} - -static uint64_t ofi_nd_cq_sanitize_flags(uint64_t flags) -{ - return (flags & (FI_SEND | FI_RECV | FI_RMA | FI_ATOMIC | - FI_MSG | FI_TAGGED | - FI_READ | FI_WRITE | - FI_REMOTE_READ | FI_REMOTE_WRITE | - FI_REMOTE_CQ_DATA | FI_MULTI_RECV)); -} - -static void ofi_nd_cq_ov2buf(struct nd_cq *cq, OVERLAPPED_ENTRY *ov, - void* buf, ULONG count) -{ - ULONG i; - struct nd_msgprefix *prefix; - - switch (cq->format) { - case FI_CQ_FORMAT_CONTEXT: - { - struct fi_cq_entry *entry = (struct fi_cq_entry*)buf; - for (i = 0; i < count; i++) { - struct nd_cq_entry *cqen = container_of(ov[i].lpOverlapped, struct nd_cq_entry, base.ov); - entry[i].op_context = cqen->context; - ofi_nd_free_cq_entry(cqen); - } - } - break; - case FI_CQ_FORMAT_MSG: - { - struct fi_cq_msg_entry *entry = (struct fi_cq_msg_entry*)buf; - for (i = 0; i < count; i++) { - struct nd_cq_entry *cqen = container_of(ov[i].lpOverlapped, struct nd_cq_entry, base.ov); - entry[i].op_context = cqen->context; - entry[i].flags = ofi_nd_cq_sanitize_flags(cqen->flags); - /* for send/receive operations there message header used, - and common size of transferred message is bit - bigger, in this case decrement transferred message - size by header size */ - size_t header_len = (cqen->result.RequestType == Nd2RequestTypeSend || - cqen->result.RequestType == Nd2RequestTypeReceive) ? - sizeof(prefix->header) : 0; - entry[i].len = cqen->result.BytesTransferred - header_len; - ofi_nd_free_cq_entry(cqen); - } - } - break; - case FI_CQ_FORMAT_DATA: - { - struct fi_cq_data_entry *entry = (struct fi_cq_data_entry*)buf; - for (i = 0; i < count; i++) { - struct nd_cq_entry *cqen = container_of(ov[i].lpOverlapped, struct nd_cq_entry, base.ov); - entry[i].op_context = cqen->context; - entry[i].flags = ofi_nd_cq_sanitize_flags(cqen->flags); - size_t header_len = (cqen->result.RequestType == Nd2RequestTypeSend || - cqen->result.RequestType == Nd2RequestTypeReceive) ? - sizeof(prefix->header) : 0; - entry[i].len = cqen->result.BytesTransferred - header_len; - entry[i].buf = cqen->buf; - ofi_nd_free_cq_entry(cqen); - } - } - break; - case FI_CQ_FORMAT_TAGGED: - { - struct fi_cq_tagged_entry *entry = (struct fi_cq_tagged_entry*)buf; - for (i = 0; i < count; i++) { - struct nd_cq_entry *cqen = container_of(ov[i].lpOverlapped, struct nd_cq_entry, base.ov); - entry[i].op_context = cqen->context; - entry[i].flags = ofi_nd_cq_sanitize_flags(cqen->flags); - size_t header_len = (cqen->result.RequestType == Nd2RequestTypeSend || - cqen->result.RequestType == Nd2RequestTypeReceive) ? - sizeof(prefix->header) : 0; - entry[i].len = cqen->result.BytesTransferred - header_len; - entry[i].buf = cqen->buf; - entry[i].tag = 0; - ofi_nd_free_cq_entry(cqen); - } - } - break; - default: - ND_LOG_WARN(FI_LOG_CQ, "incorrect CQ format: %d\n", cq->format); - break; - } -} - -static ssize_t ofi_nd_cq_read(struct fid_cq *pcq, void *buf, size_t count) -{ - assert(pcq); - assert(pcq->fid.fclass == FI_CLASS_CQ); - - if (pcq->fid.fclass != FI_CLASS_CQ) - return -FI_EINVAL; - - struct nd_cq *cq = container_of(pcq, struct nd_cq, fid); - - ULONG cnt = (ULONG)count; - ULONG dequeue = 0; - ssize_t res = 0; - OVERLAPPED_ENTRY _ov[256]; - - if (!cq->count) - return -FI_EAGAIN; - - OVERLAPPED_ENTRY *ov = (cnt <= countof(_ov)) ? - _ov : malloc(cnt * sizeof(*ov)); - - if (!ov) { - ND_LOG_WARN(FI_LOG_CQ, "failed to allocate OV\n"); - return -FI_ENOMEM; - } - - assert(cq->iocp && cq->iocp != INVALID_HANDLE_VALUE); - if (!GetQueuedCompletionStatusEx(cq->iocp, ov, cnt, &dequeue, 0, FALSE) || - !dequeue) { - res = cq->count ? -FI_EAVAIL : -FI_EAGAIN; - goto fn_complete; - } - - ofi_nd_cq_ov2buf(cq, ov, buf, dequeue); - res = (ssize_t)dequeue; - InterlockedAdd(&cq->count, -(LONG)dequeue); - assert(cq->count >= 0); - -fn_complete: - if (ov != _ov) - free(ov); - return res; -} - -static ssize_t ofi_nd_cq_readfrom(struct fid_cq *cq, void *buf, size_t count, - fi_addr_t *src_addr) -{ - size_t i; - for(i = 0; i < count; i++) - src_addr[i] = FI_ADDR_NOTAVAIL; - return ofi_nd_cq_read(cq, buf, count); -} - -static ssize_t ofi_nd_cq_readerr(struct fid_cq *pcq, struct fi_cq_err_entry *buf, - uint64_t flags) -{ - assert(pcq); - assert(pcq->fid.fclass == FI_CLASS_CQ); - assert(buf); - - OFI_UNUSED(flags); - - if (pcq->fid.fclass != FI_CLASS_CQ) - return -FI_EINVAL; - - struct nd_cq *cq = container_of(pcq, struct nd_cq, fid); - - ULONG_PTR key = 0; - DWORD bytes; - OVERLAPPED *ov; - - if (!cq->count) - return -FI_EAGAIN; - - assert(cq->err && cq->err != INVALID_HANDLE_VALUE); - if (!GetQueuedCompletionStatus(cq->err, &bytes, &key, &ov, 0)) - return -FI_EAGAIN; - - struct nd_cq_entry *entry = container_of(ov, struct nd_cq_entry, base.ov); - - buf->op_context = entry->result.RequestContext; - buf->flags = entry->flags; - buf->len = entry->len; - buf->buf = entry->buf; - buf->data = entry->data; - buf->tag = 0; /* while tagged send/recv isn't added */ - buf->olen = 0; - buf->err = -H2F(entry->result.Status); - buf->prov_errno = entry->result.Status; - buf->err_data_size = 0; - - InterlockedDecrement(&cq->count); - assert(cq->count >= 0); - - return 0; -} - -static ssize_t ofi_nd_cq_sread(struct fid_cq *pcq, void *buf, size_t count, - const void *cond, int timeout) -{ - assert(pcq); - assert(pcq->fid.fclass == FI_CLASS_CQ); - - OFI_UNUSED(cond); - - if (pcq->fid.fclass != FI_CLASS_CQ) - return -FI_EINVAL; - - struct nd_cq *cq = container_of(pcq, struct nd_cq, fid); - - ULONG cnt = (ULONG)count; - ULONG dequeue = 0; - ssize_t res = 0; - OVERLAPPED_ENTRY _ov[256]; - - OVERLAPPED_ENTRY *ov = (cnt <= countof(_ov)) ? - _ov : malloc(cnt * sizeof(*ov)); - - if (!ov) { - ND_LOG_WARN(FI_LOG_CQ, "failed to allocate OV\n"); - return -FI_ENOMEM; - } - - LONG zero = 0; - OFI_ND_TIMEOUT_INIT(timeout); - - do { - do { - if (!WaitOnAddress( - &cq->count, &zero, sizeof(cq->count), - (DWORD)timeout) && timeout >= 0) { - res = -FI_EAGAIN; - goto fn_complete; - } - } while (!cq->count && !OFI_ND_TIMEDOUT()); - - if (cq->count <= 0) { - res = -FI_EAGAIN; - goto fn_complete; - } - - - - assert(cq->iocp && cq->iocp != INVALID_HANDLE_VALUE); - if (!GetQueuedCompletionStatusEx(cq->iocp, ov, cnt, &dequeue, 0, FALSE) || - !dequeue) { - if (cq->count) { - res = -FI_EAVAIL; - goto fn_complete; - } - else { - continue; - } - } - - ofi_nd_cq_ov2buf(cq, ov, buf, dequeue); - res = (ssize_t)dequeue; - InterlockedAdd(&cq->count, -(LONG)dequeue); - assert(cq->count >= 0); - goto fn_complete; - } while (!OFI_ND_TIMEDOUT()); - -fn_complete: - if (ov != _ov) - free(ov); - return res; -} - -static ssize_t ofi_nd_cq_sreadfrom(struct fid_cq *cq, void *buf, size_t count, - fi_addr_t *src_addr, const void *cond, - int timeout) -{ - size_t i; - for (i = 0; i < count; i++) - src_addr[i] = FI_ADDR_NOTAVAIL; - return ofi_nd_cq_sread(cq, buf, count, cond, timeout); -} - -static int ofi_nd_cq_signal(struct fid_cq* fid_cq) -{ - assert(fid_cq); - assert(fid_cq->fid.fclass == FI_CLASS_CQ); - - if (fid_cq->fid.fclass != FI_CLASS_CQ) - return -FI_EINVAL; - - struct nd_cq* cq = container_of(fid_cq, struct nd_cq, fid); - - InterlockedDecrement(&cq->count); - WakeByAddressAll(&cq->count); - - return FI_SUCCESS; -} - -static const char *ofi_nd_cq_strerror(struct fid_cq *cq, int prov_errno, - const void *err_data, char *buf, - size_t len) -{ - OFI_UNUSED(cq); - OFI_UNUSED(err_data); - - if (buf && len) - return strncpy(buf, fi_strerror(-prov_errno), len); - return fi_strerror(-prov_errno); -} - -static void ofi_nd_cq_event_free(nd_event_base *base) -{ - nd_cq_action *action = container_of(base, nd_cq_action, base); - ND_BUF_FREE(nd_cq_action, action); -} - -void ofi_nd_handle_unknown(nd_cq_entry *entry, void *unexp) -{ - OFI_UNUSED(entry); - OFI_UNUSED(unexp); - - ND_LOG_DEBUG(FI_LOG_CQ, "Unknown event-state, " - "the event can't be handled\n"); - - /* Shouldn't go here */ - assert(0); - - return; -} - -void ofi_nd_event_2_cq(nd_cq_entry *entry, void *misc) -{ - /* Memory region is set in CQ entry only in case of RMA operation. - * Use this fact to realize what kind of operation is completed */ - if (!entry->mr_count) - ofi_nd_unexp_2_cq(entry, (nd_unexpected_entry *)misc); - else - ofi_nd_read_2_cq(entry, (ND2_RESULT *)misc); -} - -/* do NOT release unexpected here, becuase it just an allocated on stack entry */ -void ofi_nd_unexp_ack_2_cq(nd_cq_entry *entry, void *unexpected) -{ - nd_unexpected_entry *unexp = (nd_unexpected_entry *)unexpected; - struct nd_ep *ep = unexp->ep; - ND2_RESULT *result = &unexp->result; - - struct nd_cq_entry *parent_entry = entry->aux_entry; - - if (ep->cntr_send) { - if (result->Status != S_OK) { - InterlockedIncrement64(&ep->cntr_send->err); - } - InterlockedIncrement64(&ep->cntr_send->counter); - WakeByAddressAll((void*)&ep->cntr_send->counter); - } - - int notify = ofi_nd_util_completion_blackmagic( - ep->info->tx_attr->op_flags, ep->send_flags, parent_entry->flags) || - result->Status != S_OK; - - if (notify) { - PostQueuedCompletionStatus( - parent_entry->result.Status == S_OK ? ep->cq_send->iocp : ep->cq_send->err, - 0, 0, &parent_entry->base.ov); - InterlockedIncrement(&ep->cq_send->count); - WakeByAddressAll((void*)&ep->cq_send->count); - } - else { /* if notification is not requested - just free entry */ - ofi_nd_free_cq_entry(parent_entry); - } - - /* This CQ is no longer needed for us, let's release it. - * Set address of parent entry (that's used for initial - * send opertation) to NULL, it's just to not release it - * during free parent CQ entry below */ - entry->aux_entry = NULL; - ofi_nd_free_cq_entry(entry); -} - -void ofi_nd_unexp_2_cq(nd_cq_entry *entry, nd_unexpected_entry *unexp) -{ - assert(entry); - assert(unexp); - assert(unexp->ep); - assert(unexp->ep->fid.fid.fclass == FI_CLASS_EP || - unexp->ep->fid.fid.fclass == FI_CLASS_SRX_CTX); - - /* copy data to user's buffer */ - size_t i; - char *buf = unexp->buf->received_buf.data; - - size_t len = unexp->result.BytesTransferred - sizeof(unexp->buf->header); - for (i = 0; i < entry->iov_cnt && len; i++) { - size_t cp = min(entry->iov[i].iov_len, len); - memcpy(entry->iov[i].iov_base, buf, cp); - len -= cp; - buf += cp; - } - - entry->data = unexp->buf->header.data; /* copy send data */ - - int status = (unexp->result.Status == S_OK && - (unexp->result.BytesTransferred - sizeof(unexp->buf->header)) <= entry->len) ? - S_OK : (unexp->result.Status != S_OK ? - H2F(unexp->result.Status) : FI_ETRUNC); - - struct nd_ep *ep = unexp->ep; - - ofi_nd_release_unexp_entry(unexp); - - if (ep->cntr_recv) { - if (status != S_OK) { - InterlockedIncrement64(&ep->cntr_recv->err); - } - InterlockedIncrement64(&ep->cntr_recv->counter); - WakeByAddressAll((void*)&ep->cntr_recv->counter); - } - - int notify = ofi_nd_util_completion_blackmagic( - ep->info->rx_attr->op_flags, - ep->recv_flags, entry->flags); - - if (status == S_OK) { - if (notify && ep->cq_recv) { - PostQueuedCompletionStatus( - ep->cq_recv->iocp, 0, 0, &entry->base.ov); - InterlockedIncrement(&ep->cq_recv->count); - WakeByAddressAll((void*)&ep->cq_recv->count); - } - else { /* if notification is not requested - just free entry */ - ofi_nd_free_cq_entry(entry); - } - } - else { - if (ep->cq_recv) { - PostQueuedCompletionStatus( - ep->cq_recv->err, 0, 0, &entry->base.ov); - InterlockedIncrement(&ep->cq_recv->count); - WakeByAddressAll((void*)&ep->cq_recv->count); - } - else { /* TODO add warning here */ - ofi_nd_free_cq_entry(entry); - } - } -} - -void ofi_nd_unexp_2_read(nd_cq_entry *entry, void *unexpected) -{ - nd_unexpected_entry *unexp = (nd_unexpected_entry *)unexpected; - - assert(entry); - assert(unexp); - assert(unexp->ep); - assert(unexp->ep->fid.fid.fclass == FI_CLASS_EP || - unexp->ep->fid.fid.fclass == FI_CLASS_SRX_CTX); - - size_t location_cnt = unexp->buf->header.location_cnt; - struct nd_msg_location *locations = unexp->buf->received_buf.locations; - struct nd_ep *ep = unexp->ep; - HRESULT hr = 0; - size_t i; - - ofi_nd_release_unexp_entry(unexp); - - struct nd_cq_entry *rma_entries[ND_MSG_IOV_LIMIT]; - - struct iovec from_iovecs[ND_MSG_IOV_LIMIT]; - for (i = 0; i < location_cnt; i++) { - from_iovecs[i].iov_base = (void *)locations[i].addr; - from_iovecs[i].iov_len = locations[i].len; - } - - struct iovec new_iovecs[ND_MSG_INTERNAL_IOV_LIMIT]; - size_t new_iovecs_count = 0; - size_t from_split_map[ND_MSG_INTERNAL_IOV_LIMIT]; - size_t to_split_map[ND_MSG_INTERNAL_IOV_LIMIT]; - uint64_t remote_addr[ND_MSG_INTERNAL_IOV_LIMIT]; - - ofi_nd_repack_iovecs(from_iovecs, location_cnt, - entry->iov, entry->iov_cnt, - new_iovecs, &new_iovecs_count, - from_split_map, to_split_map, remote_addr); - assert(new_iovecs_count <= ND_MSG_INTERNAL_IOV_LIMIT); - - entry->wait_completion.comp_count = 0; - entry->wait_completion.total_count = new_iovecs_count; - - InitializeCriticalSection(&entry->wait_completion.comp_lock); - - for (i = 0; i < new_iovecs_count; i++) { - rma_entries[i] = ofi_nd_buf_alloc_nd_cq_entry(); - if (!rma_entries[i]) - goto fn_fail_alloc; - memset(rma_entries[i], 0, sizeof(*rma_entries[i])); - - rma_entries[i]->len = entry->len; - rma_entries[i]->data = entry->data; - rma_entries[i]->flags = entry->flags; - rma_entries[i]->domain = entry->domain; - rma_entries[i]->context = entry->context; - rma_entries[i]->iov[0].iov_base = new_iovecs[i].iov_base; - rma_entries[i]->iov[0].iov_len = new_iovecs[i].iov_len; - rma_entries[i]->iov_cnt = 1; - rma_entries[i]->seq = entry->seq; - /* Store native CQ entry not to be forgotten for free */ - rma_entries[i]->aux_entry = entry; - rma_entries[i]->rma_location.count = location_cnt; - rma_entries[i]->rma_location.locations = locations; - - void *tobuf = rma_entries[i]->iov[0].iov_base; - ULONG tobuf_len = (ULONG)rma_entries[i]->iov[0].iov_len; - - hr = ep->domain->adapter->lpVtbl->CreateMemoryRegion( - ep->domain->adapter, &IID_IND2MemoryRegion, - ep->domain->adapter_file, (void**)&rma_entries[i]->mr[0]); - if (FAILED(hr)) - goto fn_fail; - rma_entries[i]->mr_count = 1; - - hr = ofi_nd_util_register_mr( - rma_entries[i]->mr[0], tobuf, tobuf_len, - ND_MR_FLAG_ALLOW_LOCAL_WRITE | - ND_MR_FLAG_ALLOW_REMOTE_READ | - ND_MR_FLAG_ALLOW_REMOTE_WRITE); - if (FAILED(hr)) - goto fn_fail; - - ND2_SGE sge = { - .Buffer = tobuf, - .BufferLength = tobuf_len, - .MemoryRegionToken = rma_entries[i]->mr[0]->lpVtbl->GetLocalToken(rma_entries[i]->mr[0]) - }; - - rma_entries[i]->state = LARGE_MSG_RECV_REQ; - - hr = ep->qp->lpVtbl->Read(ep->qp, rma_entries[i], &sge, 1, - remote_addr[i], locations[from_split_map[i]].remote_mr_token, 0); - if (FAILED(hr)) - goto fn_fail; - } - return; - -fn_fail_alloc: - while (i--) - ofi_nd_free_cq_entry(rma_entries[i]); - /* TODO: generate cq_err if RMA read fails */ - ND_LOG_WARN(FI_LOG_EP_DATA, ofi_nd_strerror((DWORD)hr, NULL)); - return; -fn_fail: - while (i) - ofi_nd_free_cq_entry(rma_entries[i--]); - /* TODO: generate cq_err if RMA read fails */ - ND_LOG_WARN(FI_LOG_EP_DATA, ofi_nd_strerror((DWORD)hr, NULL)); -} - -void ofi_nd_read_2_cq(nd_cq_entry *entry, ND2_RESULT *result) -{ - struct nd_ep *ep = (struct nd_ep*)result->QueuePairContext; - - assert(ep); - assert(ep->fid.fid.fclass == FI_CLASS_EP); - assert(entry); - - if (entry->inline_buf) { - assert(result->BytesTransferred <= (ULONG)gl_data.inline_thr); - assert(!entry->mr); - memcpy(entry->iov[0].iov_base, entry->inline_buf->buffer, - result->BytesTransferred); - } - - if (ep->cntr_read) { - if (result->Status != S_OK) { - InterlockedIncrement64(&ep->cntr_read->err); - } - InterlockedIncrement64(&ep->cntr_read->counter); - WakeByAddressAll((void*)&ep->cntr_read->counter); - } - - int notify = ofi_nd_util_completion_blackmagic( - ep->info->rx_attr->op_flags, ep->recv_flags, entry->flags) || - result->Status != S_OK; - - if (notify) { - PostQueuedCompletionStatus( - entry->result.Status == S_OK ? ep->cq_recv->iocp : ep->cq_recv->err, - 0, 0, &entry->base.ov); - InterlockedIncrement(&ep->cq_recv->count); - WakeByAddressAll((void*)&ep->cq_recv->count); - } - else { /* if notification is not requested - just free entry */ - ofi_nd_free_cq_entry(entry); - } -} - -void ofi_nd_read_2_send_ack(nd_cq_entry *entry, void *res) -{ - ND2_RESULT *result = res; - struct nd_ep *ep = (struct nd_ep*)result->QueuePairContext; - size_t i; - - assert(ep); - assert(ep->fid.fid.fclass == FI_CLASS_EP); - assert(entry); - - HRESULT hr; - struct nd_cq_entry *ack_entry = ofi_nd_buf_alloc_nd_cq_entry(); - if (!ack_entry) { - ND_LOG_WARN(FI_LOG_EP_DATA, "Unable to allocate buffer for CQ entry"); - return; - } - memset(ack_entry, 0, sizeof(*ack_entry)); - - ack_entry->data = entry->data; - ack_entry->flags = entry->flags; - ack_entry->domain = entry->domain; - ack_entry->context = entry->context; - ack_entry->iov_cnt = entry->iov_cnt; - ack_entry->seq = entry->seq; - ack_entry->state = NORMAL_STATE; - - ack_entry->prefix = __ofi_nd_buf_alloc_nd_msgprefix( - &ep->domain->msgfooter); - if (!ack_entry->prefix) { - hr = ND_NO_MEMORY; - goto fn_fail; - } - - nd_flow_cntrl_flags flow_control_flags = { - .req_ack = 0, - .ack = 0, - .empty = 0 - }; - - struct nd_msgheader header_def = { - .data = entry->data, - .event = LARGE_MSG_ACK, - .flags = flow_control_flags, - .location_cnt = entry->rma_location.count - }; - ack_entry->prefix->header = header_def; - ack_entry->event = LARGE_MSG_ACK; - ack_entry->flow_cntrl_flags = flow_control_flags; - - ack_entry->notify_buf = __ofi_nd_buf_alloc_nd_notifybuf( - &ep->domain->notifybuf); - if (!ack_entry->notify_buf) { - hr = ND_NO_MEMORY; - goto fn_fail; - } - - /* Fill the header by values of received header from - * originator of large message transmission */ - for (i = 0; i < entry->rma_location.count; i++) { - struct nd_msg_location *parent_location = - &entry->rma_location.locations[i]; - struct nd_msg_location location_def = { - .addr = parent_location->addr, - .len = parent_location->len, - .remote_mr_token = parent_location->remote_mr_token, - }; - - ack_entry->notify_buf->location[i] = location_def; - } - - /* Generate CQ to notify that data successfuly read - * and can be obtained by user. Use intial CQ for that */ - ofi_nd_read_2_cq(entry->aux_entry, result); - - /* Set intial CQ entry to NULL just to avoid releasing of - * CQ entry for which CQ event haven't generated yet to - * requestor of large message RECV operation */ - entry->aux_entry = NULL; - ofi_nd_free_cq_entry(entry); - - /* Gracefully complete receiving large message - - * ACK to peer should be sent */ - ND2_SGE sge[2] = { - { - .Buffer = &ack_entry->prefix->header, - .BufferLength = (ULONG)sizeof(ack_entry->prefix->header), - .MemoryRegionToken = ack_entry->prefix->token - }, - { - .Buffer = &ack_entry->notify_buf->location, - .BufferLength = (ULONG)(sizeof(*ack_entry->notify_buf->location) * entry->rma_location.count), - .MemoryRegionToken = ack_entry->notify_buf->token - } - }; - - nd_sge *sge_entry = ofi_nd_buf_alloc_nd_sge(); - if (!sge_entry) { - ND_LOG_WARN(FI_LOG_EP_DATA, "SGE entry buffer can't be allocated"); - hr = ND_NO_MEMORY; - goto fn_fail; - } - memset(sge_entry, 0, sizeof(*sge_entry)); - - sge_entry->count = 2; - for (i = 0; i < sge_entry->count; i++) - sge_entry->entries[i] = sge[i]; - - nd_send_entry *send_entry = ofi_nd_buf_alloc_nd_send_entry(); - if (!send_entry) { - ND_LOG_WARN(FI_LOG_EP_DATA, "Send entry buffer can't be allocated"); - hr = ND_NO_MEMORY; - goto fn_fail; - } - memset(send_entry, 0, sizeof(*send_entry)); - - send_entry->cq_entry = ack_entry; - send_entry->sge = sge_entry; - send_entry->ep = ep; - - /* Push the transmission of ACK into - * the Send Queue for furhter handling */ - ack_entry->send_entry = send_entry; - ofi_nd_queue_push(&ep->send_queue, &send_entry->queue_item); - - /* Let's progress Send Queue for current EP if possible */ - ofi_nd_ep_progress(ep); - - return; - -fn_fail: - ofi_nd_free_cq_entry(ack_entry); - ND_LOG_WARN(FI_LOG_EP_DATA, ofi_nd_strerror((DWORD)hr, NULL)); -} - -void ofi_nd_send_ack(nd_cq_entry *entry, struct nd_ep *ep) -{ - assert(ep); - assert(ep->fid.fid.fclass == FI_CLASS_EP); - assert(entry); - - HRESULT hr; - struct nd_cq_entry *ack_entry = NULL; - struct nd_queue_item *qentry = NULL; - nd_send_entry *send_entry = NULL; - - EnterCriticalSection(&ep->send_op.send_lock); - if (ofi_nd_queue_peek(&ep->send_queue, &qentry)) { - send_entry = container_of(qentry, nd_send_entry, queue_item); - struct nd_msgheader *header = (struct nd_msgheader *) - send_entry->sge->entries[0].Buffer; - header->flags.ack = 1; - } - else { - ack_entry = ofi_nd_buf_alloc_nd_cq_entry(); - if (!ack_entry) { - ND_LOG_WARN(FI_LOG_EP_DATA, "Unable to allocate buffer " - "for CQ entry"); - LeaveCriticalSection(&ep->send_op.send_lock); - return; - } - memset(ack_entry, 0, sizeof(*ack_entry)); - - ack_entry->data = entry->data; - ack_entry->flags = entry->flags; - ack_entry->domain = entry->domain; - ack_entry->context = entry->context; - ack_entry->iov_cnt = entry->iov_cnt; - ack_entry->seq = entry->seq; - ack_entry->state = NORMAL_STATE; - - ack_entry->prefix = __ofi_nd_buf_alloc_nd_msgprefix( - &ep->domain->msgfooter); - if (!ack_entry->prefix) { - hr = ND_NO_MEMORY; - goto fn_fail; - } - - nd_flow_cntrl_flags flow_control_flags = { - .req_ack = 0, - .ack = 1, - .empty = 1 - }; - - struct nd_msgheader header_def = { - .data = entry->data, - .event = NORMAL_EVENT, - .flags = flow_control_flags, - .location_cnt = 0 - }; - ack_entry->prefix->header = header_def; - ack_entry->event = NORMAL_EVENT; - ack_entry->flow_cntrl_flags = flow_control_flags; - - ND2_SGE sge = { - .Buffer = &ack_entry->prefix->header, - .BufferLength = (ULONG)sizeof(ack_entry->prefix->header), - .MemoryRegionToken = ack_entry->prefix->token - }; - - hr = ep->qp->lpVtbl->Send(ep->qp, ack_entry, &sge, 1, 0); - if (FAILED(hr)) - ND_LOG_WARN(FI_LOG_CQ, "Send failed from Send Queue\n"); - } - LeaveCriticalSection(&ep->send_op.send_lock); - - /* Let's progress Send Queue for current EP if possible */ - ofi_nd_ep_progress(ep); - - return; -fn_fail: - ofi_nd_free_cq_entry(ack_entry); -} - -void ofi_nd_repack_iovecs(const struct iovec *from_iovecs, const size_t from_count, - const struct iovec *to_iovecs, const size_t to_count, - struct iovec new_iovecs[ND_MSG_INTERNAL_IOV_LIMIT], - size_t *new_count, - size_t from_split_map[ND_MSG_INTERNAL_IOV_LIMIT], - size_t to_split_map[ND_MSG_INTERNAL_IOV_LIMIT], - uint64_t remote_addr[ND_MSG_INTERNAL_IOV_LIMIT]) -{ - size_t from_iter = 0; - size_t to_iter = 0; - size_t new_iter = 0; - size_t to_offset = 0; - size_t from_offset = 0; - - for(;;) { - new_iovecs[new_iter].iov_base = (char *)to_iovecs[to_iter].iov_base + to_offset; - remote_addr[new_iter] = - (uint64_t)((char *)from_iovecs[from_iter].iov_base + from_offset); - from_split_map[new_iter] = from_iter; - to_split_map[new_iter] = to_iter; - ND_LOG_DEBUG(FI_LOG_EP_DATA, "\nFL = %lu, FO = %lu, FI = %lu " - "\nTL = %lu, TO = %lu, TI = %lu\n", - from_iovecs[from_iter].iov_len, from_offset, from_iter, - to_iovecs[to_iter].iov_len, to_offset, to_iter); - - if (from_iovecs[from_iter].iov_len - from_offset < to_iovecs[to_iter].iov_len - to_offset) { - new_iovecs[new_iter].iov_len = from_iovecs[from_iter].iov_len - from_offset; - to_offset += from_iovecs[from_iter].iov_len - from_offset; - from_iter++; - from_offset = 0; - } - else if (to_iovecs[to_iter].iov_len - to_offset < from_iovecs[from_iter].iov_len - from_offset) { - new_iovecs[new_iter].iov_len = to_iovecs[to_iter].iov_len - to_offset; - from_offset += to_iovecs[to_iter].iov_len - to_offset; - to_iter++; - to_offset = 0; - } - else { - new_iovecs[new_iter].iov_len = to_iovecs[to_iter].iov_len; - from_iter++; - to_iter++; - to_offset = 0; - from_offset = 0; - } - - new_iter++; - /* Check that whether some iovecs was emptied */ - if ((from_iter == from_count) && (!from_offset) || - ((to_iter == to_count) && (!to_offset))) - break; - } - *new_count = new_iter; -} - -#endif /* _WIN32 */ - diff --git a/prov/netdir/src/netdir_cq.h b/prov/netdir/src/netdir_cq.h deleted file mode 100644 index 98d6959850e..00000000000 --- a/prov/netdir/src/netdir_cq.h +++ /dev/null @@ -1,210 +0,0 @@ -/* -* Copyright (c) 2015-2016 Intel Corporation, Inc. All rights reserved. -* -* This software is available to you under a choice of one of two -* licenses. You may choose to be licensed under the terms of the GNU -* General Public License (GPL) Version 2, available from the file -* COPYING in the main directory of this source tree, or the -* BSD license below: -* -* Redistribution and use in source and binary forms, with or -* without modification, are permitted provided that the following -* conditions are met: -* -* - Redistributions of source code must retain the above -* copyright notice, this list of conditions and the following -* disclaimer. -* -* - Redistributions in binary form must reproduce the above -* copyright notice, this list of conditions and the following -* disclaimer in the documentation and/or other materials -* provided with the distribution. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -*/ - -#ifndef _FI_NETDIR_CQ_H_ -#define _FI_NETDIR_CQ_H_ - -#include -#include -#include - -#include "ndspi.h" - -#include "rdma/fabric.h" -#include "ofi_mem.h" - -#include "netdir.h" -#include "netdir_buf.h" -#include "netdir_log.h" -#include "netdir_util.h" -#include "netdir_iface.h" -#include "netdir_queue.h" - -#include "rdma/fi_eq.h" - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -static inline void ofi_nd_free_cq_entry(struct nd_cq_entry *entry) -{ - assert(entry); - - if (entry->prefix) - __ofi_nd_buf_free_nd_msgprefix(entry->prefix, - &entry->domain->msgfooter); - - if (entry->inline_buf) - __ofi_nd_buf_free_nd_inlinebuf(entry->inline_buf, - &entry->domain->inlinebuf); - - while (entry->mr_count) { - entry->mr_count--; - entry->mr[entry->mr_count]->lpVtbl->Release(entry->mr[entry->mr_count]); - } - - /* Means that waiting of completion are used. The completion - * critical section must be released */ - if (entry->wait_completion.total_count != 0) - DeleteCriticalSection(&entry->wait_completion.comp_lock); - - /* Release nested entry */ - if (entry->aux_entry) - ofi_nd_free_cq_entry(entry->aux_entry); - - ND_BUF_FREE(nd_cq_entry, entry); -} - -static inline ssize_t ofi_nd_cq_cancel(fid_t fid, void *context) -{ - assert(context); - - ssize_t ret = -ENOENT; - struct nd_cq_entry *entry = (struct nd_cq_entry *)ND_FI_CONTEXT(context); - CRITICAL_SECTION *prepost_lock; - struct nd_queue_queue *prepost; - struct nd_srx *srx; - struct nd_ep *ep; - - switch (fid->fclass) { - case FI_CLASS_SRX_CTX: - srx = container_of(fid, struct nd_srx, fid.fid); - prepost_lock = &srx->prepost_lock; - prepost = &srx->prepost; - break; - case FI_CLASS_EP: - ep = container_of(fid, struct nd_ep, fid.fid); - prepost_lock = &ep->prepost_lock; - prepost = &ep->prepost; - break; - default: - ND_LOG_WARN(FI_LOG_EP_DATA, "Invalid endpoint type \n"); - return -FI_EINVAL; - } - - if (entry) { - struct nd_queue_item *item = &entry->queue_item; - - EnterCriticalSection(prepost_lock); - - ofi_nd_queue_pop(prepost, &item); - ofi_nd_free_cq_entry(entry); - ND_FI_CONTEXT(context) = 0; - LeaveCriticalSection(prepost_lock); - - ret = 0; - } - - return ret; -} - -/* do NOT forget to add progress of Send Queue in all places - * where entry is enqueued for specific EP. Just to don't - * rely on ND's asynchronous invocation of providers callback - * about completion of an operation */ -static inline void ofi_nd_ep_progress(struct nd_ep *ep) -{ - HRESULT hr; - struct nd_queue_item *qentry = NULL; - nd_send_entry *send_entry = NULL; - - EnterCriticalSection(&ep->send_op.send_lock); - while (ofi_nd_queue_peek(&ep->send_queue, &qentry) && - !(ep->send_op.flags.is_send_blocked)) { - ep->send_op.used_counter++; - send_entry = container_of(qentry, nd_send_entry, queue_item); - ofi_nd_queue_pop(&ep->send_queue, &qentry); - - if (!(ep->send_op.used_counter % gl_data.prepost_cnt)) { - ep->send_op.flags.is_send_blocked = 1; - ep->send_op.used_counter = 0; - struct nd_msgheader *header = (struct nd_msgheader *) - send_entry->sge->entries[0].Buffer; - header->flags.req_ack = 1; - } - - /* If there is prepost entry (it means that this SEND event - * expects an answer). In this case, push CQ entry to prepost - * queue to receive event(answer) */ - if (send_entry->prepost_entry) { - ND_LOG_DEBUG(FI_LOG_EP_DATA, "Posted entry(state = %d) that " - "expects an answer from peer to which the send " - "event is belong\n", send_entry->prepost_entry->state); - ofi_nd_queue_push(&ep->internal_prepost, - &send_entry->prepost_entry->queue_item); - } - - hr = send_entry->ep->qp->lpVtbl->Send(send_entry->ep->qp, - send_entry->cq_entry, - send_entry->sge->entries, - send_entry->sge->count, 0); - if (FAILED(hr)) - ND_LOG_WARN(FI_LOG_CQ, "Send failed from Send Queue\n"); - } - LeaveCriticalSection(&ep->send_op.send_lock); -} - -#if 0 -static inline void ofi_nd_progress(void *arg) -{ - struct nd_domain *domain = arg; - struct dlist_entry *item; - struct nd_ep *ep; - - while (domain->do_progress) { - dlist_foreach(&domain->ep_list, item) { - ep = container_of(item, struct nd_ep, entry); - ofi_nd_ep_progress(ep); - } - } -} -#endif - -void ofi_nd_repack_iovecs(const struct iovec *from_iovecs, const size_t from_count, - const struct iovec *to_iovecs, const size_t to_count, - struct iovec new_iovecs[ND_MSG_INTERNAL_IOV_LIMIT], - size_t *new_count, - size_t from_split_map[ND_MSG_INTERNAL_IOV_LIMIT], - size_t to_split_map[ND_MSG_INTERNAL_IOV_LIMIT], - uint64_t remote_addr[ND_MSG_INTERNAL_IOV_LIMIT]); - - -void ofi_nd_dispatch_cq_event(ofi_nd_cq_event event, nd_cq_entry *entry, - void *misc); -void ofi_nd_send_ack(nd_cq_entry *entry, struct nd_ep *ep); - -#ifdef __cplusplus -} -#endif /* __cplusplus */ - -#endif /* _FI_NETDIR_CQ_H_ */ - diff --git a/prov/netdir/src/netdir_domain.c b/prov/netdir/src/netdir_domain.c deleted file mode 100644 index 590e1aad41b..00000000000 --- a/prov/netdir/src/netdir_domain.c +++ /dev/null @@ -1,545 +0,0 @@ -/* -* Copyright (c) 2015-2016 Intel Corporation, Inc. All rights reserved. -* -* This software is available to you under a choice of one of two -* licenses. You may choose to be licensed under the terms of the GNU -* General Public License (GPL) Version 2, available from the file -* COPYING in the main directory of this source tree, or the -* BSD license below: -* -* Redistribution and use in source and binary forms, with or -* without modification, are permitted provided that the following -* conditions are met: -* -* - Redistributions of source code must retain the above -* copyright notice, this list of conditions and the following -* disclaimer. -* -* - Redistributions in binary form must reproduce the above -* copyright notice, this list of conditions and the following -* disclaimer in the documentation and/or other materials -* provided with the distribution. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -*/ - -#ifdef _WIN32 - -#include -#define WIN32_NO_STATUS - -#include "netdir.h" -#include "netdir_ov.h" -#include "netdir_log.h" -#include "netdir_util.h" -#include "netdir_iface.h" -#include "netdir_unexp.h" -#include "netdir_cq.h" - -#include "ofi.h" -#include "ofi_util.h" -#include "ofi_enosys.h" -#include "rdma/fabric.h" -#include "rdma/fi_domain.h" - -static int ofi_nd_domain_close(fid_t fid); -static int ofi_nd_domain_bind(struct fid *fid, struct fid *bfid, - uint64_t flags); - -struct nd_msgchunk { - IND2MemoryRegion *mr; - ND_BUF_CHUNK(nd_msgprefix) chunk; -}; - -struct nd_inlinechunk { - IND2MemoryRegion *mr; - char* base; - ND_BUF_CHUNK(nd_inlinebuf) chunk; -}; - -struct nd_notifychunk { - IND2MemoryRegion *mr; - ND_BUF_CHUNK(nd_notifybuf) chunk; -}; - -static ND_BUF_CHUNK(nd_msgprefix) -*ofi_nd_alloc_msgprefix_chunk(ND_BUF_FOOTER(nd_msgprefix) *footer, - size_t *count); -static void ofi_nd_free_msgprefix_chunk(ND_BUF_CHUNK(nd_msgprefix) *chunk); - -static ND_BUF_CHUNK(nd_inlinebuf) -*ofi_nd_alloc_inlinebuf_chunk(ND_BUF_FOOTER(nd_inlinebuf) *footer, size_t *count); -static void ofi_nd_free_inlinebuf_chunk(ND_BUF_CHUNK(nd_inlinebuf) *pchunk); - -static ND_BUF_CHUNK(nd_notifybuf) -*ofi_nd_alloc_notifybuf_chunk(ND_BUF_FOOTER(nd_notifybuf) *footer, size_t *count); -static void ofi_nd_free_notifybuf_chunk(ND_BUF_CHUNK(nd_notifybuf) *pchunk); - -static HRESULT ofi_nd_domain_notify(struct nd_domain *domain); -static void ofi_nd_domain_event(struct nd_event_base* base, DWORD bytes); -static void ofi_nd_domain_err(struct nd_event_base* base, DWORD bytes, DWORD err); - -static struct fi_ops_domain ofi_nd_domain_ops = { - .size = sizeof(ofi_nd_domain_ops), - .av_open = fi_no_av_open, - .cq_open = ofi_nd_cq_open, - .endpoint = ofi_nd_endpoint, - .scalable_ep = fi_no_scalable_ep, - .cntr_open = ofi_nd_cntr_open, - .poll_open = fi_no_poll_open, - .stx_ctx = fi_no_stx_context, - .srx_ctx = ofi_nd_srx_ctx -}; - -static struct fi_ops_mr ofi_nd_mr_ops = { - .size = sizeof(ofi_nd_mr_ops), - .reg = ofi_nd_mr_reg, - .regv = ofi_nd_mr_regv, - .regattr = ofi_nd_mr_regattr -}; - -static struct fi_ops ofi_nd_fi_ops = { - .size = sizeof(ofi_nd_fi_ops), - .close = ofi_nd_domain_close, - .bind = ofi_nd_domain_bind, - .control = fi_no_control, - .ops_open = fi_no_ops_open -}; - -static struct fid ofi_nd_fid = { - .fclass = FI_CLASS_DOMAIN, - .context = NULL, - .ops = &ofi_nd_fi_ops -}; - -static int ofi_nd_domain_close(fid_t fid) -{ - assert(fid->fclass == FI_CLASS_DOMAIN); - - struct nd_domain *domain = container_of(fid, struct nd_domain, fid.fid); - - DWORD ref = 0; -#if 0 - domain->do_progress = 0; - pthread_join(domain->progress_thread, NULL); -#endif - - if (domain->cq) { - domain->cq->lpVtbl->CancelOverlappedRequests(domain->cq); - while (!domain->cq_canceled || nd_async_progress) - SwitchToThread(); - domain->cq->lpVtbl->Release(domain->cq); - } - if (domain->info) - fi_freeinfo(domain->info); - if (domain->adapter_file && domain->adapter_file != INVALID_HANDLE_VALUE) - CloseHandle(domain->adapter_file); - if (domain->adapter) { - ref = domain->adapter->lpVtbl->Release(domain->adapter); - ND_LOG_DEBUG(FI_LOG_EP_CTRL, "domain->adapter ref count: %d\n", ref); - } - - __ofi_nd_buf_fini_nd_msgprefix(&domain->msgfooter); - __ofi_nd_buf_fini_nd_inlinebuf(&domain->inlinebuf); - - free(domain); - - return 0; -} - -int ofi_nd_domain_open(struct fid_fabric *fabric, struct fi_info *info, - struct fid_domain **pdomain, void *context) -{ - OFI_UNUSED(context); - - assert(fabric); - assert(fabric->fid.fclass == FI_CLASS_FABRIC); - assert(info); - assert(info->domain_attr); - assert(info->domain_attr->name); - - if (!info || !info->domain_attr || !info->domain_attr->name) - return -FI_EINVAL; - - HRESULT hr; - int res; - struct sockaddr* addr; - - struct nd_domain *domain = (struct nd_domain*)calloc(1, sizeof(*domain)); - if (!domain) - return -FI_ENOMEM; - - struct nd_domain def = { - .fid = { - .fid = ofi_nd_fid, - .ops = &ofi_nd_domain_ops, - .mr = &ofi_nd_mr_ops - }, - .info = fi_dupinfo(info), - .msgfooter = { - .alloc_chunk = ofi_nd_alloc_msgprefix_chunk, - .free_chunk = ofi_nd_free_msgprefix_chunk - }, - .inlinebuf = { - .alloc_chunk = ofi_nd_alloc_inlinebuf_chunk, - .free_chunk = ofi_nd_free_inlinebuf_chunk - }, - .notifybuf = { - .alloc_chunk = ofi_nd_alloc_notifybuf_chunk, - .free_chunk = ofi_nd_free_notifybuf_chunk - } - }; - - *domain = def; - - dlist_init(&domain->ep_list); -#if 0 - domain->do_progress = 1; - if (pthread_create(&domain->progress_thread, NULL, - ofi_nd_progress, domain)) { - ofi_nd_domain_close(&domain->fid.fid); - return -FI_ENOMEM;; - } -#endif - - res = ofi_nd_lookup_adapter(info->domain_attr->name, &domain->adapter, &addr); - if (res || !domain->adapter) { - ofi_nd_domain_close(&domain->fid.fid); - return res; - } - - memcpy(&domain->addr, addr, ofi_sizeofaddr(addr)); - - hr = domain->adapter->lpVtbl->CreateOverlappedFile(domain->adapter, - &domain->adapter_file); - - if (FAILED(hr)) - goto hr_failed; - - if (!BindIoCompletionCallback(domain->adapter_file, domain_io_cb, 0)) { - hr = HRESULT_FROM_WIN32(GetLastError()); - goto hr_failed; - } - - domain->ainfo.InfoVersion = ND_VERSION_2; - ULONG len = sizeof(domain->ainfo); - hr = domain->adapter->lpVtbl->Query(domain->adapter, &domain->ainfo, - &len); - if (FAILED(hr)) - goto hr_failed; - - hr = domain->adapter->lpVtbl->CreateCompletionQueue( - domain->adapter, &IID_IND2CompletionQueue, domain->adapter_file, - domain->ainfo.MaxCompletionQueueDepth, 0, 0, - (void**)&domain->cq); - if (FAILED(hr)) - goto hr_failed; - - *pdomain = &domain->fid; - - ND_LOG_DEBUG(FI_LOG_DOMAIN, "domain notification OV: %p\n", &domain->ov.ov); - hr = ofi_nd_domain_notify(domain); - if (FAILED(hr)) - goto hr_failed; - - return FI_SUCCESS; - -hr_failed: - ofi_nd_domain_close(&domain->fid.fid); - return H2F(hr); -} - -static int ofi_nd_domain_bind(struct fid *fid, struct fid *bfid, - uint64_t flags) -{ - assert(fid->fclass == FI_CLASS_DOMAIN); - - struct nd_domain *domain = container_of(fid, struct nd_domain, fid.fid); - - switch (bfid->fclass) { - case FI_CLASS_EQ: - domain->eq = container_of(bfid, struct nd_eq, fid.fid); - domain->eq_flags = flags; - break; - default: - ND_LOG_WARN(FI_LOG_DOMAIN, - "ofi_nd_domain_bind: incorrect bind object class: %d", - bfid->fclass); - return -FI_EINVAL; - } - - return FI_SUCCESS; -} - -static ND_BUF_CHUNK(nd_msgprefix) -*ofi_nd_alloc_msgprefix_chunk(ND_BUF_FOOTER(nd_msgprefix) *footer, size_t *count) -{ - struct nd_domain *dom = container_of(footer, struct nd_domain, msgfooter); - assert(dom->fid.fid.fclass == FI_CLASS_DOMAIN); - - HRESULT hr; - size_t i; - UINT32 token; - - struct nd_msgchunk *chunk = malloc(sizeof(*chunk)); - if (!chunk) - return 0; - memset(chunk, 0, sizeof(*chunk)); - assert(count); - *count = countof(chunk->chunk.item); - - assert(dom->adapter); - - hr = dom->adapter->lpVtbl->CreateMemoryRegion( - dom->adapter, &IID_IND2MemoryRegion, dom->adapter_file, (void**)&chunk->mr); - if (FAILED(hr)) - goto fn_fail; - - hr = ofi_nd_util_register_mr( - chunk->mr, &chunk->chunk, sizeof(chunk->chunk), - ND_MR_FLAG_ALLOW_LOCAL_WRITE); - if (FAILED(hr)) - goto fn_fail_mr; - - token = chunk->mr->lpVtbl->GetLocalToken(chunk->mr); - - for (i = 0; i < countof(chunk->chunk.item); i++) - chunk->chunk.item[i].data.token = token; - - return &chunk->chunk; - -fn_fail_mr: - chunk->mr->lpVtbl->Release(chunk->mr); -fn_fail: - free(chunk); - return 0; -} - -static void ofi_nd_free_msgprefix_chunk(ND_BUF_CHUNK(nd_msgprefix) *pchunk) -{ - assert(pchunk); - - struct nd_msgchunk *chunk = container_of(pchunk, struct nd_msgchunk, chunk); - if (chunk->mr) { - ofi_nd_util_unregister_mr(chunk->mr); - chunk->mr->lpVtbl->Release(chunk->mr); - } - free(chunk); -} - -static ND_BUF_CHUNK(nd_inlinebuf) -*ofi_nd_alloc_inlinebuf_chunk(ND_BUF_FOOTER(nd_inlinebuf) *footer, size_t *count) -{ - struct nd_domain *dom = container_of(footer, struct nd_domain, inlinebuf); - assert(dom->fid.fid.fclass == FI_CLASS_DOMAIN); - - HRESULT hr; - size_t i; - UINT32 token; - - struct nd_inlinechunk *chunk = malloc(sizeof(*chunk)); - if (!chunk) - return 0; - memset(chunk, 0, sizeof(*chunk)); - assert(count); - *count = countof(chunk->chunk.item); - - size_t len = gl_data.inline_thr * countof(chunk->chunk.item); - chunk->base = malloc(len); - if (!chunk->base) - goto fn_fail; - - assert(dom->adapter); - - hr = dom->adapter->lpVtbl->CreateMemoryRegion( - dom->adapter, &IID_IND2MemoryRegion, dom->adapter_file, (void**)&chunk->mr); - if (FAILED(hr)) - goto fn_fail_base; - - hr = ofi_nd_util_register_mr( - chunk->mr, chunk->base, len, - ND_MR_FLAG_ALLOW_LOCAL_WRITE | - ND_MR_FLAG_ALLOW_REMOTE_READ | - ND_MR_FLAG_ALLOW_REMOTE_WRITE); - if (FAILED(hr)) - goto fn_fail_mr; - - token = chunk->mr->lpVtbl->GetLocalToken(chunk->mr); - - for (i = 0; i < countof(chunk->chunk.item); i++) { - chunk->chunk.item[i].data.token = token; - chunk->chunk.item[i].data.buffer = chunk->base + (i * gl_data.inline_thr); - } - - return &chunk->chunk; - -fn_fail_mr: - chunk->mr->lpVtbl->Release(chunk->mr); -fn_fail_base: - free(chunk->base); -fn_fail: - free(chunk); - return 0; -} - -static void ofi_nd_free_inlinebuf_chunk(ND_BUF_CHUNK(nd_inlinebuf) *pchunk) -{ - assert(pchunk); - - struct nd_inlinechunk *chunk = container_of(pchunk, struct nd_inlinechunk, chunk); - if (chunk->mr) { - ofi_nd_util_unregister_mr(chunk->mr); - chunk->mr->lpVtbl->Release(chunk->mr); - } - if (chunk->base) - free(chunk->base); - free(chunk); -} - -static ND_BUF_CHUNK(nd_notifybuf) -*ofi_nd_alloc_notifybuf_chunk(ND_BUF_FOOTER(nd_notifybuf) *footer, size_t *count) -{ - struct nd_domain *dom = container_of(footer, struct nd_domain, notifybuf); - assert(dom->fid.fid.fclass == FI_CLASS_DOMAIN); - - HRESULT hr; - size_t i; - UINT32 token; - - struct nd_notifychunk *chunk = malloc(sizeof(*chunk)); - if (!chunk) - return 0; - memset(chunk, 0, sizeof(*chunk)); - assert(count); - *count = countof(chunk->chunk.item); - - assert(dom->adapter); - - hr = dom->adapter->lpVtbl->CreateMemoryRegion( - dom->adapter, &IID_IND2MemoryRegion, dom->adapter_file, (void**)&chunk->mr); - if (FAILED(hr)) - goto fn_fail; - - hr = ofi_nd_util_register_mr( - chunk->mr, &chunk->chunk, sizeof(chunk->chunk), - ND_MR_FLAG_ALLOW_LOCAL_WRITE); - if (FAILED(hr)) - goto fn_fail_mr; - - token = chunk->mr->lpVtbl->GetLocalToken(chunk->mr); - - for (i = 0; i < countof(chunk->chunk.item); i++) - chunk->chunk.item[i].data.token = token; - - return &chunk->chunk; - -fn_fail_mr: - chunk->mr->lpVtbl->Release(chunk->mr); - fn_fail: - free(chunk); - return 0; -} - -static void ofi_nd_free_notifybuf_chunk(ND_BUF_CHUNK(nd_notifybuf) *pchunk) -{ - assert(pchunk); - - struct nd_notifychunk *chunk = container_of(pchunk, struct nd_notifychunk, chunk); - if (chunk->mr) { - ofi_nd_util_unregister_mr(chunk->mr); - chunk->mr->lpVtbl->Release(chunk->mr); - } - free(chunk); -} - -static HRESULT ofi_nd_domain_notify(struct nd_domain *domain) -{ - assert(domain); - assert(domain->fid.fid.fclass == FI_CLASS_DOMAIN); - assert(domain->cq); - - nd_event_base ov = { - .event_cb = ofi_nd_domain_event, - .err_cb = ofi_nd_domain_err - }; - - domain->ov = ov; - return domain->cq->lpVtbl->Notify(domain->cq, ND_CQ_NOTIFY_ANY, &domain->ov.ov); -} - -static void ofi_nd_domain_event(struct nd_event_base* base, DWORD bytes) -{ - OFI_UNUSED(bytes); - - assert(base); - struct nd_domain *domain = container_of(base, struct nd_domain, ov); - - assert(domain->fid.fid.fclass == FI_CLASS_DOMAIN); - assert(domain->cq); - - ND2_RESULT result[256]; - DWORD count; - nd_unexpected_ctx *ctx; - do { - count = domain->cq->lpVtbl->GetResults(domain->cq, result, countof(result)); - size_t i; - for (i = 0; i < count; i++) { - ND_LOG_DEBUG(FI_LOG_EP_DATA, "Domain event is %d with status %s\n", - result[i].RequestType, - ofi_nd_error_str(result[i].Status)); - switch (result[i].RequestType) { - case Nd2RequestTypeReceive: - ctx = (nd_unexpected_ctx *)result[i].RequestContext; - if (!OFI_ND_IS_SERVICE_EVENT(ctx->entry->header.event)) - ofi_nd_unexp_event(&result[i]); - else - ofi_nd_unexp_service_event(&result[i]); - break; - case Nd2RequestTypeSend: - ofi_nd_send_event(&result[i]); - break; - case Nd2RequestTypeRead: - ofi_nd_read_event(&result[i]); - break; - case Nd2RequestTypeWrite: - ofi_nd_write_event(&result[i]); - break; - default: - /* shouldn't go here */ - NODEFAULT; - } - - /* Let's walk through sending queue to send data - * that are ready to be transmitted */ - struct nd_ep *ep = (struct nd_ep*)result[i].QueuePairContext; - ofi_nd_ep_progress(ep); - } - } while (count == countof(result)); - - ofi_nd_domain_notify(domain); -} - -static void ofi_nd_domain_err(struct nd_event_base* base, DWORD bytes, DWORD err) -{ - OFI_UNUSED(err); - if (err == STATUS_CANCELLED) { - struct nd_domain *domain = container_of(base, struct nd_domain, ov); - - assert(domain->fid.fid.fclass == FI_CLASS_DOMAIN); - assert(domain->cq); - domain->cq_canceled = 1; - return; - } - - ofi_nd_domain_event(base, bytes); -} - -#endif /* _WIN32 */ - diff --git a/prov/netdir/src/netdir_ep.c b/prov/netdir/src/netdir_ep.c deleted file mode 100644 index c956fbbcef8..00000000000 --- a/prov/netdir/src/netdir_ep.c +++ /dev/null @@ -1,880 +0,0 @@ -/* -* Copyright (c) 2015-2016 Intel Corporation, Inc. All rights reserved. -* -* This software is available to you under a choice of one of two -* licenses. You may choose to be licensed under the terms of the GNU -* General Public License (GPL) Version 2, available from the file -* COPYING in the main directory of this source tree, or the -* BSD license below: -* -* Redistribution and use in source and binary forms, with or -* without modification, are permitted provided that the following -* conditions are met: -* -* - Redistributions of source code must retain the above -* copyright notice, this list of conditions and the following -* disclaimer. -* -* - Redistributions in binary form must reproduce the above -* copyright notice, this list of conditions and the following -* disclaimer in the documentation and/or other materials -* provided with the distribution. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -*/ - -#ifdef _WIN32 - -#include -#define WIN32_NO_STATUS - -#include "netdir.h" -#include "netdir_ov.h" -#include "netdir_log.h" -#include "netdir_util.h" -#include "netdir_iface.h" -#include "netdir_unexp.h" -#include "netdir_cq.h" - -#include "rdma/fabric.h" -#include "rdma/fi_endpoint.h" - -#include "ofi.h" -#include "ofi_util.h" - -static int ofi_nd_ep_control(struct fid *fid, int command, void *arg); -static int ofi_nd_ep_close(struct fid *fid); -static int ofi_nd_ep_bind(fid_t ep, fid_t cq, uint64_t flags); -static int ofi_nd_ep_getname(fid_t fid, void *addr, size_t *addrlen); -static int ofi_nd_ep_getpeer(struct fid_ep *fid, void *addr, size_t *addrlen); -static int ofi_nd_ep_connect(struct fid_ep *ep, const void *addr, - const void *param, size_t paramlen); -static int ofi_nd_ep_accept(struct fid_ep *ep, const void *param, - size_t paramlen); -static int ofi_nd_ep_shutdown(struct fid_ep *ep, uint64_t flags); - -static ND_BUF_CHUNK(nd_ep_msgprefix) - *ofi_nd_ep_alloc_chunk(ND_BUF_FOOTER(nd_ep_msgprefix) *footer, - size_t *count); -static void ofi_nd_ep_free_chunk(ND_BUF_CHUNK(nd_ep_msgprefix) *chunk); - -static void ofi_nd_ep_disconnected_free(struct nd_event_base* base); -static void ofi_nd_ep_disconnected(struct nd_event_base* base, DWORD bytes); -static void ofi_nd_ep_disconnected_err(struct nd_event_base* base, DWORD bytes, DWORD err); -static ssize_t ofi_nd_ep_cancel(fid_t fid, void *context); -int ofi_nd_ep_getopt(struct fid* ep, int level, int optname, - void* optval, size_t* optlen); - -static struct fi_ops ofi_nd_fi_ops = { - .size = sizeof(ofi_nd_fi_ops), - .close = ofi_nd_ep_close, - .bind = ofi_nd_ep_bind, - .control = ofi_nd_ep_control, - .ops_open = fi_no_ops_open, -}; - -static struct fi_ops_cm ofi_nd_cm_ops = { - .size = sizeof(ofi_nd_cm_ops), - .setname = fi_no_setname, - .getname = ofi_nd_ep_getname, - .getpeer = ofi_nd_ep_getpeer, - .connect = fi_no_connect, - .listen = fi_no_listen, - .accept = fi_no_accept, - .reject = fi_no_reject, - .shutdown = ofi_nd_ep_shutdown, - .join = fi_no_join, -}; - -extern struct fi_ops_msg ofi_nd_ep_msg; -extern struct fi_ops_rma ofi_nd_ep_rma; - -static struct fi_ops_ep ofi_nd_ep_ops = { - .size = sizeof(ofi_nd_ep_ops), - .cancel = ofi_nd_ep_cancel, - .getopt = ofi_nd_ep_getopt, - .setopt = fi_no_setopt, - .tx_ctx = fi_no_tx_ctx, - .rx_ctx = fi_no_rx_ctx, - .rx_size_left = fi_no_rx_size_left, - .tx_size_left = fi_no_tx_size_left, -}; - -typedef struct nd_ep_connect_data { - struct { - struct nd_msg_location send_res; - } flow_control; - - struct { - void *param; - size_t paramlen; - } user_data; - - struct { - void *data; - size_t size; - } total_conn_data; -} nd_ep_connect_data; - -typedef struct nd_ep_connect { - nd_event_base base; - struct nd_ep *ep; - struct nd_eq *eq; - IND2Connector *connector; - int active; -} nd_ep_connect; - -typedef struct nd_ep_completed { - nd_event_base base; - struct nd_ep *ep; - struct nd_eq *eq; - IND2Connector *connector; -} nd_ep_completed; - -OFI_ND_NB_BUF(nd_ep_connect); -OFI_ND_NB_BUF(nd_ep_completed); - -OFI_ND_NB_BUF_IMP(nd_ep_connect); -OFI_ND_NB_BUF_IMP(nd_ep_completed); - -OFI_ND_NB_BUF_TYPED(nd_connreq, struct nd_connreq); - -int ofi_nd_endpoint(struct fid_domain *pdomain, struct fi_info *info, - struct fid_ep **ep_fid, void *context) -{ - assert(info); - assert(pdomain); - assert(pdomain->fid.fclass == FI_CLASS_DOMAIN); - - ND_REGISTER_FINI(ND_BUF_FINIPTR(nd_ep_connect)); - ND_REGISTER_FINI(ND_BUF_FINIPTR(nd_ep_completed)); - ND_REGISTER_FINI(ND_BUF_FINIPTR(ofi_nd_util_ov)); - ND_REGISTER_FINI(ND_BUF_FINIPTR(nd_send_entry)); - ND_REGISTER_FINI(ND_BUF_FINIPTR(nd_sge)); - - HRESULT hr; - - struct nd_domain *domain = container_of(pdomain, struct nd_domain, fid); - struct nd_connreq *connreq = 0; - struct nd_ep *ep = (struct nd_ep*) calloc(1, sizeof(*ep)); - if (!ep) - return -FI_ENOMEM; - - struct nd_ep def = { - .fid = { - .fid = { - .fclass = FI_CLASS_EP, - .context = context, - .ops = &ofi_nd_fi_ops - }, - .ops = &ofi_nd_ep_ops, - .cm = &ofi_nd_cm_ops, - .msg = &ofi_nd_ep_msg, - .rma = &ofi_nd_ep_rma - }, - .info = fi_dupinfo(info), - .domain = domain, - .eq = domain->eq, - .disconnect_ov = { - .free = ofi_nd_ep_disconnected_free, - .event_cb = ofi_nd_ep_disconnected, - .err_cb = ofi_nd_ep_disconnected_err - } - }; - - *ep = def; - - /* Initialzie flow control counter */ - ep->send_op.used_counter = 0; - InitializeCriticalSection(&ep->send_op.send_lock); - - if (info->handle) { - assert(info->handle->fclass == FI_CLASS_CONNREQ); - if (info->handle->fclass != FI_CLASS_CONNREQ) { - hr = E_HANDLE; - goto fn_fail; - } - connreq = container_of(info->handle, struct nd_connreq, - handle); - } - - InitializeCriticalSection(&ep->prepost_lock); - - assert(domain->adapter); - - if (connreq) { - assert(connreq->connector); - ep->connector = connreq->connector; - ND_BUF_FREE(nd_connreq, connreq); - ep->fid.cm->accept = ofi_nd_ep_accept; - } - else { - hr = domain->adapter->lpVtbl->CreateConnector(domain->adapter, - &IID_IND2Connector, - domain->adapter_file, - (void**)&ep->connector); - if (FAILED(hr)) - goto fn_fail; - - hr = ep->connector->lpVtbl->Bind(ep->connector, - &domain->addr.addr, - (ULONG)ofi_sizeofaddr(&domain->addr.addr)); - if (FAILED(hr)) - goto fn_fail; - - ep->fid.cm->connect = ofi_nd_ep_connect; - } - - dlist_insert_tail(&ep->entry, &domain->ep_list); - - /* do NOT create real ND endpoint here: we could not know - how CQ will be attached here */ - - *ep_fid = &ep->fid; - hr = ofi_nd_unexp_init(ep); - - return 0; - -fn_fail: - ofi_nd_ep_close(&ep->fid.fid); - ND_LOG_WARN(FI_LOG_EP_CTRL, ofi_nd_strerror((DWORD)hr, NULL)); - return H2F(hr); -} - -static int ofi_nd_ep_control(struct fid *fid, int command, void *arg) -{ - OFI_UNUSED(arg); - - assert(fid->fclass == FI_CLASS_EP); - - HRESULT hr; - - if (command != FI_ENABLE) - return -FI_EINVAL; - - struct nd_ep *ep = container_of(fid, struct nd_ep, fid.fid); - - if (ep->qp) - return FI_SUCCESS; /* already enabled */ - - hr = ep->domain->adapter->lpVtbl->CreateQueuePair( - ep->domain->adapter, &IID_IND2QueuePair, - (IUnknown*)ep->domain->cq, - (IUnknown*)ep->domain->cq, - ep, - (ULONG) ep->info->rx_attr->size, - (ULONG) ep->info->tx_attr->size, - (ULONG) ep->info->rx_attr->iov_limit, - (ULONG) ep->info->tx_attr->iov_limit, - 0, (void**)&ep->qp); - if (FAILED(hr)) - return H2F(hr); - - /* Initialzie unexpected functionality */ - InitializeCriticalSection(&ep->unexpected.unexp_lock); - ofi_nd_unexp_run(ep); - - return FI_SUCCESS; -} - -static int ofi_nd_ep_close(struct fid *fid) -{ - ND_LOG_DEBUG(FI_LOG_EP_CTRL, "closing ep\n"); - - assert(fid->fclass == FI_CLASS_EP); - - struct nd_ep *ep = container_of(fid, struct nd_ep, fid.fid); - - ofi_nd_ep_shutdown(&ep->fid, 0); - - int res; - if (ep->connector) { - res = (int)ep->connector->lpVtbl->Release(ep->connector); - ND_LOG_DEBUG(FI_LOG_EP_CTRL, "ep->connector ref count: %d\n", res); - } - if (ep->qp) { - res = (int)ep->qp->lpVtbl->Release(ep->qp); - ND_LOG_DEBUG(FI_LOG_EP_CTRL, "ep->qp ref count: %d\n", res); - } - if (ep->info) - fi_freeinfo(ep->info); - - DeleteCriticalSection(&ep->prepost_lock); - /* Release Critical Section for unexpected events */ - DeleteCriticalSection(&ep->unexpected.unexp_lock); - - /* Retrieve this endpoint from domain EP list */ - dlist_remove(&ep->entry); - DeleteCriticalSection(&ep->send_op.send_lock); - free(ep); - ep = NULL; - - return 0; -} - -static void ofi_nd_ep_completed_free(nd_event_base *base) -{ - assert(base); - - nd_ep_completed *compl = container_of(base, nd_ep_completed, base); - assert(compl->connector); - compl->connector->lpVtbl->Release(compl->connector); - ND_BUF_FREE(nd_ep_completed, compl); -} - -static void ofi_nd_ep_completed(nd_event_base *base, DWORD bytes) -{ - OFI_UNUSED(bytes); - assert(base); - assert(base->free); - - nd_ep_completed *compl = container_of(base, nd_ep_completed, base); - assert(compl->connector); - OFI_UNUSED(compl); - - base->free(base); -} - -static void ofi_nd_ep_completed_err(nd_event_base *base, DWORD bytes, - DWORD error) -{ - OFI_UNUSED(bytes); - assert(base); - assert(base->free); - - nd_ep_completed *compl = container_of(base, nd_ep_completed, base); - - struct nd_eq_event *err = ND_BUF_ALLOC(nd_eq_event); - if (!err) { - ND_LOG_WARN(FI_LOG_EP_CTRL, - "failed to allocate error event\n"); - goto fn_completed; - } - - memset(err, 0, sizeof(*err)); - err->error.err = -H2F(error); - err->error.prov_errno = (int)error; - err->error.fid = &compl->ep->fid.fid; - ofi_nd_eq_push_err(compl->eq, err); - -fn_completed: - base->free(base); -} - -static void ofi_nd_ep_accepted_free(nd_event_base *base) -{ - assert(base); - - nd_ep_connect *connect = container_of(base, nd_ep_connect, base); - if (connect->connector) - connect->connector->lpVtbl->Release(connect->connector); - ND_BUF_FREE(nd_ep_connect, connect); -} - -static void ofi_nd_ep_accepted(nd_event_base *base, DWORD bytes) -{ - assert(base); - OFI_UNUSED(bytes); - - HRESULT hr; - ULONG len = 0; - nd_ep_connect *connect = container_of(base, nd_ep_connect, base); - struct nd_eq_event *err; - nd_ep_completed *compl = NULL; - - assert(connect->connector); - assert(connect->ep); - assert(connect->eq); - - struct nd_eq_event *ev = ND_BUF_ALLOC(nd_eq_event); - if (!ev) { - hr = ND_NO_MEMORY; - goto fn_fail_ev; - } - memset(ev, 0, sizeof(*ev)); - ev->eq_event = FI_CONNECTED; - - hr = connect->connector->lpVtbl->GetPrivateData( - connect->connector, NULL, &len); - - if (connect->active) { - hr = connect->connector->lpVtbl->GetPrivateData( - connect->connector, NULL, &len); - - if (FAILED(hr) && hr != ND_BUFFER_OVERFLOW) { - ND_LOG_WARN(FI_LOG_EP_CTRL, - "failed to get connection data\n"); - goto fn_fail_data; - } - - if (len) { - ev->data = malloc(len); - if (!ev->data) { - ND_LOG_WARN(FI_LOG_EP_CTRL, - "failed to allocate connection data\n"); - hr = ND_NO_MEMORY; - ev->len = 0; - goto fn_fail_data; - } - - hr = connect->connector->lpVtbl->GetPrivateData( - connect->connector, ev->data, &len); - if (FAILED(hr)) { - ND_LOG_WARN(FI_LOG_EP_CTRL, - "failed to copy connection data\n"); - free(ev->data); - ev->len = 0; - goto fn_fail_data; - } - } - ev->len = (size_t)len; - - compl = ND_BUF_ALLOC(nd_ep_completed); - if (!compl) { - ND_LOG_WARN(FI_LOG_EP_CTRL, - "failed to allocate connection-complete event\n"); - goto fn_fail_data; - } - memset(compl, 0 , sizeof(*compl)); - compl->base.event_cb = ofi_nd_ep_completed; - compl->base.err_cb = ofi_nd_ep_completed_err; - compl->base.free = ofi_nd_ep_completed_free; - compl->ep = connect->ep; - compl->eq = connect->eq; - compl->connector = connect->connector; - connect->connector->lpVtbl->AddRef(connect->connector); - - hr = connect->connector->lpVtbl->CompleteConnect(connect->connector, - &compl->base.ov); - if (FAILED(hr)) { - ND_LOG_WARN(FI_LOG_EP_CTRL, - "failed to complete connection\n"); - ND_BUF_FREE(nd_ep_completed, compl); - goto fn_fail_compl; - } - } - - ND_LOG_DEBUG(FI_LOG_EP_CTRL, "register disconnect notification: %p\n", - &connect->ep->disconnect_ov.ov); - hr = connect->connector->lpVtbl->NotifyDisconnect( - connect->connector, &connect->ep->disconnect_ov.ov); - if (FAILED(hr)) { - ND_LOG_WARN(FI_LOG_EP_CTRL, - "failed to notify disconnect\n"); - ND_BUF_FREE(nd_ep_completed, compl); - goto fn_fail_compl; - } - - struct fi_eq_cm_entry *cm = (struct fi_eq_cm_entry*)&ev->operation; - cm->fid = &connect->ep->fid.fid; - ofi_nd_eq_push(connect->eq, ev); - ofi_nd_ep_accepted_free(&connect->base); - connect->ep->connected = 1; - return; - -fn_fail_compl: - if (len) { - free(ev->data); - ev->len = 0; - } - connect->connector->lpVtbl->Release(connect->connector); - -fn_fail_data: - ofi_nd_buf_free_nd_eq_event(ev); - -fn_fail_ev: - err = ofi_nd_buf_alloc_nd_eq_event(); - if (!err) { - ND_LOG_WARN(FI_LOG_EP_CTRL, - "failed to allocate error event\n"); - ofi_nd_ep_accepted_free(&connect->base); - return; - } - memset(err, 0, sizeof(*err)); - err->error.err = -H2F(hr); - err->error.prov_errno = (int)hr; - err->error.fid = &connect->ep->fid.fid; - ofi_nd_eq_push_err(connect->eq, err); - ofi_nd_ep_accepted_free(&connect->base); -} - -static void ofi_nd_ep_rejected(nd_event_base *base, DWORD bytes, DWORD error) -{ - assert(base); - OFI_UNUSED(bytes); - - nd_ep_connect *connect = container_of(base, nd_ep_connect, base); - - assert(connect->connector); - assert(connect->ep); - assert(connect->eq); - - HRESULT hr = S_OK; - - struct nd_eq_event *err = ofi_nd_buf_alloc_nd_eq_event(); - if (!err) { - ND_LOG_WARN(FI_LOG_EP_CTRL, - "failed to allocate error event\n"); - ofi_nd_ep_accepted_free(&connect->base); - return; - } - memset(err, 0, sizeof(*err)); - err->error.err = -H2F(error); - err->error.prov_errno = (int)error; - err->error.fid = &connect->ep->fid.fid; - ofi_nd_eq_push_err(connect->eq, err); - - if (error == ND_CONNECTION_REFUSED) { - ULONG len = 0; - hr = connect->connector->lpVtbl->GetPrivateData( - connect->connector, NULL, &len); - - if (FAILED(hr) && hr != ND_BUFFER_OVERFLOW) { - ND_LOG_WARN(FI_LOG_EP_CTRL, - "failed to get connection data\n"); - goto fn_complete; - } - - if (len) { - err->error.err_data = malloc((size_t)len); - if (!err->error.err_data) { - ND_LOG_WARN(FI_LOG_EP_CTRL, - "failed to allocate connection data\n"); - hr = ND_NO_MEMORY; - goto fn_complete; - } - hr = connect->connector->lpVtbl->GetPrivateData( - connect->connector, err->error.err_data, &len); - - if (FAILED(hr)) { - ND_LOG_WARN(FI_LOG_EP_CTRL, - "failed to copy connection data\n"); - goto fn_complete; - } - err->error.err_data_size = (size_t)len; - } - } - -fn_complete: - ofi_nd_ep_accepted_free(&connect->base); -} - -static int ofi_nd_ep_connect(struct fid_ep *pep, const void *addr, - const void *param, size_t paramlen) -{ - assert(pep->fid.fclass == FI_CLASS_EP); - - struct nd_ep *ep = container_of(pep, struct nd_ep, fid); - - if (!addr) - return -FI_EINVAL; - - int res = fi_enable(&ep->fid); - if (res) - return res; - - assert(ep->connector); - assert(ep->qp); - - HRESULT hr; - - struct nd_ep_connect *wait = ofi_nd_buf_alloc_nd_ep_connect(); - if (!wait) - return -FI_ENOMEM; - - memset(wait, 0, sizeof(*wait)); - wait->ep = ep; - wait->eq = ep->eq; - wait->connector = ep->connector; - wait->base.event_cb = ofi_nd_ep_accepted; - wait->base.err_cb = ofi_nd_ep_rejected; - wait->base.free = ofi_nd_ep_accepted_free; - wait->active = 1; - ep->connector->lpVtbl->AddRef(ep->connector); - - hr = ep->connector->lpVtbl->Connect( - ep->connector, (IUnknown*)ep->qp, - (struct sockaddr*)addr, (ULONG)ofi_sizeofaddr((struct sockaddr*)addr), - ep->domain->ainfo.MaxInboundReadLimit, - ep->domain->ainfo.MaxOutboundReadLimit, - param, (ULONG)paramlen, &wait->base.ov); - return H2F(hr); -} - -static int ofi_nd_ep_accept(struct fid_ep *pep, const void *param, size_t paramlen) -{ - assert(pep->fid.fclass == FI_CLASS_EP); - - struct nd_ep *ep = container_of(pep, struct nd_ep, fid); - - int res = fi_enable(&ep->fid); - if (res) - return res; - - assert(ep->connector); - assert(ep->qp); - - HRESULT hr; - - struct nd_ep_connect *accept = ofi_nd_buf_alloc_nd_ep_connect(); - if (!accept) - return -FI_ENOMEM; - - memset(accept, 0, sizeof(*accept)); - accept->ep = ep; - accept->eq = ep->eq; - accept->connector = ep->connector; - accept->base.event_cb = ofi_nd_ep_accepted; - accept->base.err_cb = ofi_nd_ep_rejected; - accept->base.free = ofi_nd_ep_accepted_free; - accept->connector->lpVtbl->AddRef(accept->connector); - - ND_LOG_DEBUG(FI_LOG_EP_CTRL, "sending accept message\n"); - - hr = ep->connector->lpVtbl->Accept( - ep->connector, (IUnknown*)ep->qp, - ep->domain->ainfo.MaxInboundReadLimit, - ep->domain->ainfo.MaxOutboundReadLimit, - param, (ULONG)paramlen, &accept->base.ov); - if (FAILED(hr)) - ND_LOG_WARN(FI_LOG_EP_CTRL, "failed to send accept message: %x\n", - hr); - - return H2F(hr); -} - -static int ofi_nd_ep_getname(fid_t fid, void *addr, size_t *addrlen) -{ - assert(fid && fid->fclass == FI_CLASS_EP); - - if (fid->fclass != FI_CLASS_EP) - return -FI_EINVAL; - - HRESULT hr; - ULONG len = (ULONG)*addrlen; - struct nd_ep *ep = container_of(fid, struct nd_ep, fid.fid); - - if (!ep->connector) - return -FI_EOPBADSTATE; - - hr = ep->connector->lpVtbl->GetLocalAddress(ep->connector, - (struct sockaddr *)addr, - &len); - if (*addrlen < len) { - ND_LOG_INFO(FI_LOG_EP_CTRL, - "Provided buffer (size = %"PRIu64") is too small, required = %"PRIu64, - addrlen, len); - *addrlen = (size_t)len; - return -FI_ETOOSMALL; - } - *addrlen = (size_t)len; - - return H2F(hr); -} - -static int ofi_nd_ep_getpeer(struct fid_ep *pep, void *addr, size_t *addrlen) -{ - assert(pep); - assert(pep->fid.fclass == FI_CLASS_EP); - - if (pep->fid.fclass != FI_CLASS_EP) - return -FI_EINVAL; - - HRESULT hr; - ULONG len = (ULONG)*addrlen; - struct nd_ep *ep = container_of(pep, struct nd_ep, fid.fid); - - if (!ep->connector) - return -FI_EOPBADSTATE; - - hr = ep->connector->lpVtbl->GetPeerAddress(ep->connector, - (struct sockaddr*)addr, &len); - - *addrlen = (size_t)len; - - return H2F(hr); -} - -static int ofi_nd_ep_bind(fid_t pep, fid_t bfid, uint64_t flags) -{ - assert(pep->fclass == FI_CLASS_EP); - - if (pep->fclass != FI_CLASS_EP) - return -FI_EINVAL; - - struct nd_ep *ep = container_of(pep, struct nd_ep, fid.fid); - - switch (bfid->fclass) { - case FI_CLASS_EQ: - ep->eq = container_of(bfid, struct nd_eq, fid.fid); - return FI_SUCCESS; - case FI_CLASS_CQ: - if (flags & FI_TRANSMIT) { - ep->cq_send = container_of(bfid, struct nd_cq, fid.fid); - ep->send_flags = flags; - } - if (flags & FI_RECV) { - ep->cq_recv = container_of(bfid, struct nd_cq, fid.fid); - ep->recv_flags = flags; - } - if (flags & FI_REMOTE_READ || flags & FI_REMOTE_WRITE) - return -FI_EBADFLAGS; - return FI_SUCCESS; - case FI_CLASS_CNTR: - if (flags & FI_SEND) - ep->cntr_send = container_of(bfid, struct nd_cntr, fid.fid); - if (flags & FI_RECV) - ep->cntr_recv = container_of(bfid, struct nd_cntr, fid.fid); - if (flags & FI_READ) - ep->cntr_read = container_of(bfid, struct nd_cntr, fid.fid); - if (flags & FI_WRITE) - ep->cntr_write = container_of(bfid, struct nd_cntr, fid.fid); - if (flags & FI_REMOTE_READ || flags & FI_REMOTE_WRITE) - return -FI_EBADFLAGS; - return FI_SUCCESS; - case FI_CLASS_SRX_CTX: - ep->srx = container_of(bfid, struct nd_srx, fid.fid); - return FI_SUCCESS; - default: - ND_LOG_WARN(FI_LOG_EP_CTRL, - "ofi_nd_ep_bind: unknown bind class: %d", - (int)bfid->fclass); - return -FI_EINVAL; - } -} - -static int ofi_nd_ep_shutdown(struct fid_ep *pep, uint64_t flags) -{ - assert(pep); - assert(pep->fid.fclass == FI_CLASS_EP); - - OFI_UNUSED(flags); - - if (pep->fid.fclass != FI_CLASS_EP) - return -FI_EINVAL; - - struct nd_ep *ep = container_of(pep, struct nd_ep, fid.fid); - - if (!ep->qp) - return FI_SUCCESS; - - ofi_nd_unexp_fini(ep); - - HRESULT hr = S_OK; - ofi_nd_util_ov *ov = NULL; - if (ep->connected) { - ep->connected = 0; - - ov = ND_BUF_ALLOC(ofi_nd_util_ov); - if (!ov) - return -FI_ENOMEM; - - hr = ep->connector->lpVtbl->Disconnect(ep->connector, &ov->base.ov); - if (FAILED(hr)) - goto fn_fail; - - hr = ofi_nd_util_ov_wait(ep->connector, ov); - } - - return H2F(hr); - -fn_fail: - if (ov) - ND_BUF_FREE(ofi_nd_util_ov, ov); - return H2F(hr); -} - -static void ofi_nd_ep_disconnected_free(struct nd_event_base* base) -{ - OFI_UNUSED(base); -} - -static void ofi_nd_ep_disconnected(struct nd_event_base* base, DWORD bytes) -{ - OFI_UNUSED(bytes); - - struct nd_ep *ep = container_of(base, struct nd_ep, disconnect_ov); - assert(ep->fid.fid.fclass == FI_CLASS_EP); - - ep->connected = 0; - - struct nd_eq_event *ev = ND_BUF_ALLOC(nd_eq_event); - if (!ev) { - return; - } - memset(ev, 0, sizeof(*ev)); - struct fi_eq_cm_entry *cm = (struct fi_eq_cm_entry*)&ev->operation; - ev->eq_event = FI_SHUTDOWN; - cm->fid = &ep->fid.fid; - ofi_nd_eq_push(ep->eq, ev); - - //ofi_nd_ep_shutdown(&ep->fid, 0); -} - -static void ofi_nd_ep_disconnected_err(struct nd_event_base* base, DWORD bytes, - DWORD err) -{ - if (err == STATUS_CONNECTION_DISCONNECTED) { - ofi_nd_ep_disconnected(base, bytes); - } - else { - struct nd_ep *ep = container_of(base, struct nd_ep, disconnect_ov); - - struct nd_eq_event *ev = ND_BUF_ALLOC(nd_eq_event); - if (!ev) { - return; - } - memset(ev, 0, sizeof(*ev)); - ev->eq_event = FI_SHUTDOWN; - ev->error.err = H2F(err); - ev->error.prov_errno = err; - ev->error.fid = &ep->fid.fid; - ofi_nd_eq_push_err(ep->eq, ev); - } -} - -static ssize_t ofi_nd_ep_cancel(fid_t fid, void *context) -{ - assert(fid); - assert(fid->fclass == FI_CLASS_EP); - assert(context); - - if (!context) { - ND_LOG_WARN(FI_LOG_EP_DATA, "Context is NULL \n"); - return -FI_EINVAL; - } - - return ofi_nd_cq_cancel(fid, context); -} - -int ofi_nd_ep_getopt(struct fid* fid, int level, int optname, - void* optval, size_t* optlen) -{ - assert(fid->fclass == FI_CLASS_EP); - struct nd_ep* ep = container_of(fid, struct nd_ep, fid.fid); - - assert(optval); - assert(optlen); - - if (level != FI_OPT_ENDPOINT || optname != FI_OPT_CM_DATA_SIZE) - return -FI_ENOPROTOOPT; - - if (*optlen < sizeof(size_t)) { - *optlen = sizeof(size_t); - return -FI_ETOOSMALL; - } - - *((size_t*)optval) = ep->domain->ainfo.MaxCallerData; - *optlen = sizeof(size_t); - - return 0; -} - -#endif /* _WIN32 */ diff --git a/prov/netdir/src/netdir_ep_msg.c b/prov/netdir/src/netdir_ep_msg.c deleted file mode 100644 index 422474774d9..00000000000 --- a/prov/netdir/src/netdir_ep_msg.c +++ /dev/null @@ -1,666 +0,0 @@ -/* -* Copyright (c) 2015-2016 Intel Corporation, Inc. All rights reserved. -* -* This software is available to you under a choice of one of two -* licenses. You may choose to be licensed under the terms of the GNU -* General Public License (GPL) Version 2, available from the file -* COPYING in the main directory of this source tree, or the -* BSD license below: -* -* Redistribution and use in source and binary forms, with or -* without modification, are permitted provided that the following -* conditions are met: -* -* - Redistributions of source code must retain the above -* copyright notice, this list of conditions and the following -* disclaimer. -* -* - Redistributions in binary form must reproduce the above -* copyright notice, this list of conditions and the following -* disclaimer in the documentation and/or other materials -* provided with the distribution. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -*/ - -#ifdef _WIN32 - -#include -#define WIN32_NO_STATUS - -#include "netdir.h" -#include "netdir_ov.h" -#include "netdir_cq.h" -#include "netdir_log.h" -#include "netdir_iface.h" -#include "netdir_unexp.h" - -#include "rdma/fabric.h" -#include "rdma/fi_endpoint.h" - -#include "ofi.h" -#include "ofi_util.h" - -static ssize_t ofi_nd_ep_recv(struct fid_ep *ep, void *buf, size_t len, - void *desc, fi_addr_t src_addr, void *context); -static ssize_t ofi_nd_ep_send(struct fid_ep *ep, const void *buf, size_t len, - void *desc, fi_addr_t src_addr, void *context); -static ssize_t ofi_nd_ep_recvmsg(struct fid_ep *ep_fid, const struct fi_msg *msg, - uint64_t flags); -static ssize_t ofi_nd_ep_recvv(struct fid_ep *ep_fid, const struct iovec *iov, - void **desc, size_t count, fi_addr_t src_addr, - void *context); -static ssize_t ofi_nd_ep_sendmsg(struct fid_ep *ep_fid, const struct fi_msg *msg, - uint64_t flags); -static ssize_t ofi_nd_ep_sendv(struct fid_ep *ep_fid, const struct iovec *iov, - void **desc, size_t count, fi_addr_t dest_addr, - void *context); -static ssize_t ofi_nd_ep_inject(struct fid_ep *ep_fid, const void *buf, size_t len, - fi_addr_t dest_addr); -static ssize_t ofi_nd_ep_senddata(struct fid_ep *ep, const void *buf, size_t len, - void *desc, uint64_t data, fi_addr_t dest_addr, - void *context); -ssize_t ofi_nd_ep_injectdata(struct fid_ep *ep, const void *buf, size_t len, - uint64_t data, fi_addr_t dest_addr); - -struct fi_ops_msg ofi_nd_ep_msg = { - .size = sizeof(ofi_nd_ep_msg), - .recv = ofi_nd_ep_recv, - .recvv = ofi_nd_ep_recvv, - .recvmsg = ofi_nd_ep_recvmsg, - .send = ofi_nd_ep_send, - .sendv = ofi_nd_ep_sendv, - .sendmsg = ofi_nd_ep_sendmsg, - .inject = ofi_nd_ep_inject, - .senddata = ofi_nd_ep_senddata, - .injectdata = ofi_nd_ep_injectdata -}; - -static int ofi_nd_ep_sendmsg_inline(struct nd_ep *ep, - struct nd_cq_entry *entry, - const struct fi_msg *msg, - size_t len) -{ - int res; - size_t i; - - nd_flow_cntrl_flags flow_control_flags = { - .req_ack = 0, - .ack = 0, - .empty = 0 - }; - - struct nd_msgheader header_def = { - .data = entry->data, - .event = NORMAL_EVENT, - .flags = flow_control_flags, - .location_cnt = 0 - }; - entry->prefix->header = header_def; - entry->event = NORMAL_EVENT; - entry->flow_cntrl_flags = flow_control_flags; - - - nd_sge *sge_entry = ofi_nd_buf_alloc_nd_sge(); - if (!sge_entry) { - ND_LOG_WARN(FI_LOG_EP_DATA, "SGE entry buffer can't be allocated"); - res = -FI_ENOMEM; - goto fn_fail_1; - } - memset(sge_entry, 0, sizeof(*sge_entry)); - - if (entry->flags & FI_INJECT) { - if (len) { - entry->inline_buf = __ofi_nd_buf_alloc_nd_inlinebuf(&ep->domain->inlinebuf); - if (!entry->inline_buf) { - res = -FI_ENOMEM; - goto fn_fail_2; - } - - char *buf = (char*)entry->inline_buf->buffer; - for (i = 0; i < msg->iov_count; i++) { - memcpy(buf, msg->msg_iov[i].iov_base, msg->msg_iov[i].iov_len); - buf += msg->msg_iov[i].iov_len; - } - } - - ND2_SGE sge[2] = { - { - .Buffer = &entry->prefix->header, - .BufferLength = (ULONG)sizeof(entry->prefix->header), - .MemoryRegionToken = entry->prefix->token - }, - { - .Buffer = len ? entry->inline_buf->buffer : 0, - .BufferLength = (ULONG)len, - .MemoryRegionToken = len ? entry->inline_buf->token : 0 - } - }; - - sge_entry->count = 2; - for (i = 0; i < sge_entry->count; i++) - sge_entry->entries[i] = sge[i]; - } - else { - ND2_SGE sge = { - .Buffer = &entry->prefix->header, - .BufferLength = (ULONG)sizeof(entry->prefix->header), - .MemoryRegionToken = entry->prefix->token - }; - sge_entry->entries[0] = sge; - - for (i = 0; i < msg->iov_count; i++) { - ND2_SGE sge_def = { - .Buffer = msg->msg_iov[i].iov_base, - .BufferLength = (ULONG)msg->msg_iov[i].iov_len, - .MemoryRegionToken = (UINT32)(uintptr_t)msg->desc[i] - }; - sge_entry->entries[i + 1] = sge_def; - } - - sge_entry->count = (ULONG)msg->iov_count + 1; - } - - nd_send_entry *send_entry = ofi_nd_buf_alloc_nd_send_entry(); - if (!send_entry) { - ND_LOG_WARN(FI_LOG_EP_DATA, "Send entry buffer can't be allocated"); - res = -FI_ENOMEM; - goto fn_fail_3; - } - memset(send_entry, 0, sizeof(*send_entry)); - - send_entry->cq_entry = entry; - send_entry->sge = sge_entry; - send_entry->ep = ep; - - /* Push the user's transmission request into - * the Send Queue for furhter handling */ - entry->send_entry = send_entry; - ofi_nd_queue_push(&ep->send_queue, &send_entry->queue_item); - - return FI_SUCCESS; -fn_fail_3: - if (entry->inline_buf) - __ofi_nd_buf_free_nd_inlinebuf(entry->inline_buf, - &ep->domain->inlinebuf); -fn_fail_2: - ofi_nd_buf_free_nd_sge(sge_entry); -fn_fail_1: - ND_LOG_WARN(FI_LOG_EP_DATA, "The error happened during handling Send"); - return res; -} - -static int ofi_nd_ep_prepare_sendmsg_large(struct nd_ep *ep, - struct nd_cq_entry *entry, - struct nd_cq_entry *wait_ack_entry, - const struct fi_msg *msg) -{ - size_t i; - HRESULT hr; - - for (i = 0; i < msg->iov_count; i++) { - uint64_t addr = (uint64_t)msg->msg_iov[i].iov_base; - size_t len = msg->msg_iov[i].iov_len; - - /* Register MR to share data via RMA, store MR descriptor - * in allocated CQ entry for receiving ACK */ - hr = ep->domain->adapter->lpVtbl->CreateMemoryRegion( - ep->domain->adapter, &IID_IND2MemoryRegion, - ep->domain->adapter_file, (void**)&wait_ack_entry->mr[i]); - if (FAILED(hr)) { - /* TODO: we leak previously created MRs */ - ND_LOG_WARN(FI_LOG_EP_DATA, ofi_nd_strerror((DWORD)hr, NULL)); - return H2F(hr); - } - wait_ack_entry->mr_count++; - - hr = ofi_nd_util_register_mr( - wait_ack_entry->mr[i], (void *)addr, len, - ND_MR_FLAG_ALLOW_LOCAL_WRITE | - ND_MR_FLAG_ALLOW_REMOTE_READ | - ND_MR_FLAG_ALLOW_REMOTE_WRITE); - if (FAILED(hr)) { - /* TODO: we leak previously created MRs */ - return H2F(hr); - } - - struct nd_msg_location location_def = { - .addr = addr, - .len = len, - .remote_mr_token = wait_ack_entry->mr[i]->lpVtbl->GetRemoteToken( - wait_ack_entry->mr[i]) - }; - - entry->notify_buf->location[i] = location_def; - } - - return FI_SUCCESS; -} - - - -static int ofi_nd_ep_sendmsg_large(struct nd_ep *ep, - struct nd_cq_entry *entry, - const struct fi_msg *msg) -{ - int res; - size_t i; - struct nd_cq_entry *wait_ack_entry; - - nd_flow_cntrl_flags flow_control_flags = { - .req_ack = 0, - .ack = 0, - .empty = 0 - }; - - struct nd_msgheader header_def = { - .data = entry->data, - .event = LARGE_MSG_REQ, - .flags = flow_control_flags, - .location_cnt = msg->iov_count - }; - entry->prefix->header = header_def; - entry->event = LARGE_MSG_REQ; - entry->flow_cntrl_flags = flow_control_flags; - - entry->notify_buf = __ofi_nd_buf_alloc_nd_notifybuf( - &ep->domain->notifybuf); - if (!entry->notify_buf) { - res = -FI_ENOMEM; - goto fn_fail_1; - } - - /* The CQ entry to wait ACK of read completion from peer */ - wait_ack_entry = ofi_nd_buf_alloc_nd_cq_entry(); - if (!wait_ack_entry) { - res = -FI_ENOMEM; - goto fn_fail_2; - } - memset(wait_ack_entry, 0, sizeof(*wait_ack_entry)); - wait_ack_entry->notify_buf = __ofi_nd_buf_alloc_nd_notifybuf( - &ep->domain->notifybuf); - if (!wait_ack_entry->notify_buf) { - res = -FI_ENOMEM; - goto fn_fail_3; - } - wait_ack_entry->buf = wait_ack_entry->notify_buf; - wait_ack_entry->len = sizeof(struct nd_notifybuf); - wait_ack_entry->data = msg->data; - wait_ack_entry->flags = FI_MSG | FI_RECV; - wait_ack_entry->domain = ep->domain; - wait_ack_entry->context = msg->context; - wait_ack_entry->seq = entry->seq; - wait_ack_entry->state = LARGE_MSG_WAIT_ACK; - wait_ack_entry->aux_entry = entry; - - res = ofi_nd_ep_prepare_sendmsg_large(ep, entry, wait_ack_entry, msg); - if (res) - goto fn_fail_4; - - entry->state = LARGE_MSG_WAIT_ACK; - ND2_SGE sge[2] = { - { - .Buffer = &entry->prefix->header, - .BufferLength = (ULONG)sizeof(entry->prefix->header), - .MemoryRegionToken = entry->prefix->token - }, - { - .Buffer = entry->notify_buf->location, - .BufferLength = (ULONG)(sizeof(*entry->notify_buf->location) * msg->iov_count), - .MemoryRegionToken = entry->notify_buf->token - } - }; - - nd_sge *sge_entry = ofi_nd_buf_alloc_nd_sge(); - if (!sge_entry) { - ND_LOG_WARN(FI_LOG_EP_DATA, "SGE entry buffer can't be allocated"); - res = -FI_ENOMEM; - goto fn_fail_4; - } - memset(sge_entry, 0, sizeof(*sge_entry)); - - sge_entry->count = 2; - for (i = 0; i < sge_entry->count; i++) - sge_entry->entries[i] = sge[i]; - - nd_send_entry *send_entry = ofi_nd_buf_alloc_nd_send_entry(); - if (!send_entry) { - ND_LOG_WARN(FI_LOG_EP_DATA, "Send entry buffer can't be allocated"); - res = -FI_ENOMEM; - goto fn_fail_5; - } - memset(send_entry, 0, sizeof(*send_entry)); - - send_entry->cq_entry = entry; - send_entry->sge = sge_entry; - send_entry->ep = ep; - send_entry->prepost_entry = wait_ack_entry; - - /* Push the user's transmission request into - * the Send Queue for furhter handling */ - entry->send_entry = send_entry; - ofi_nd_queue_push(&ep->send_queue, &send_entry->queue_item); - - return FI_SUCCESS; -fn_fail_5: - ofi_nd_buf_free_nd_sge(sge_entry); -fn_fail_4: - __ofi_nd_buf_free_nd_notifybuf(wait_ack_entry->notify_buf, - &ep->domain->notifybuf); -fn_fail_3: - ofi_nd_free_cq_entry(wait_ack_entry); -fn_fail_2: - __ofi_nd_buf_free_nd_notifybuf(entry->notify_buf, - &ep->domain->notifybuf); -fn_fail_1: - ND_LOG_WARN(FI_LOG_EP_DATA, "The error happened during handling Send"); - return res; -} - -static ssize_t -ofi_nd_ep_sendmsg(struct fid_ep *pep, const struct fi_msg *msg, uint64_t flags) -{ - assert(pep->fid.fclass == FI_CLASS_EP); - assert(msg); - - if (pep->fid.fclass != FI_CLASS_EP) - return -FI_EINVAL; - - size_t i; - size_t len = 0; - ssize_t res = FI_SUCCESS; - struct nd_ep *ep = container_of(pep, struct nd_ep, fid); - - if (!ep->qp) - return -FI_EOPBADSTATE; - - for (i = 0; i < msg->iov_count; i++) { - if (msg->msg_iov[i].iov_len && !msg->msg_iov[i].iov_base) - return -FI_EINVAL; - len += msg->msg_iov[i].iov_len; - } - - if ((msg->iov_count > min(ep->domain->ainfo.MaxReceiveSge, ND_MSG_IOV_LIMIT) - 1) || - (len > ep->domain->info->ep_attr->max_msg_size)) - return -FI_EINVAL; - - struct nd_cq_entry *entry = ofi_nd_buf_alloc_nd_cq_entry(); - if (!entry) - return -FI_ENOMEM; - memset(entry, 0, sizeof(*entry)); - - entry->buf = (msg->iov_count == 1) ? msg->msg_iov[0].iov_base : 0; - entry->len = len; - entry->data = msg->data; - entry->flags = flags | FI_MSG | FI_SEND; - entry->domain = ep->domain; - entry->context = msg->context; - entry->seq = InterlockedAdd64(&ep->domain->msg_cnt, 1); - - /* since send operation can't be canceled, set NULL into - * the 1st pointer of internal data of context */ - if (msg->context) - ND_FI_CONTEXT(msg->context) = 0; - - entry->prefix = __ofi_nd_buf_alloc_nd_msgprefix( - &ep->domain->msgfooter); - if (!entry->prefix) { - res = -FI_ENOMEM; - goto fn_fail_1; - } - - if (entry->len <= gl_data.inline_thr) - res = ofi_nd_ep_sendmsg_inline(ep, entry, msg, len); - else - res = ofi_nd_ep_sendmsg_large(ep, entry, msg); - if (res) - goto fn_fail_2; - /* Let's progress Send Queue for current EP if possible */ - ofi_nd_ep_progress(ep); - - return FI_SUCCESS; -fn_fail_2: - __ofi_nd_buf_free_nd_msgprefix(entry->prefix, &ep->domain->msgfooter); -fn_fail_1: - ofi_nd_buf_free_nd_cq_entry(entry); - return res; -} - -static ssize_t ofi_nd_ep_inject(struct fid_ep *pep, const void *buf, size_t len, - fi_addr_t dest_addr) -{ - return ofi_nd_ep_injectdata(pep, buf, len, 0, dest_addr); -} - -ssize_t -ofi_nd_ep_injectdata(struct fid_ep *pep, const void *buf, size_t len, - uint64_t data, fi_addr_t dest_addr) -{ - struct iovec iov = { - .iov_base = (void*)buf, - .iov_len = len - }; - - struct fi_msg msg = { - .msg_iov = &iov, - .desc = 0, - .iov_count = 1, - .addr = dest_addr, - .context = 0, - .data = data - }; - - return ofi_nd_ep_sendmsg(pep, &msg, FI_INJECT); -} - -static ssize_t ofi_nd_ep_senddata(struct fid_ep *pep, const void *buf, size_t len, void *desc, - uint64_t data, fi_addr_t dest_addr, void *context) -{ - struct iovec iov = { - .iov_base = (void*)buf, - .iov_len = len - }; - - struct fi_msg msg = { - .msg_iov = &iov, - .desc = &desc, - .iov_count = 1, - .addr = dest_addr, - .context = context, - .data = data - }; - - assert(pep->fid.fclass == FI_CLASS_EP); - - if (pep->fid.fclass != FI_CLASS_EP) - return -FI_EINVAL; - - struct nd_ep *ep = container_of(pep, struct nd_ep, fid); - - return ofi_nd_ep_sendmsg(pep, &msg, ep->info->tx_attr->op_flags); -} - -static ssize_t ofi_nd_ep_send(struct fid_ep *pep, const void *buf, size_t len, - void *desc, fi_addr_t dest_addr, void *context) -{ - return ofi_nd_ep_senddata(pep, buf, len, desc, 0, dest_addr, context); -} - -static ssize_t ofi_nd_ep_sendv(struct fid_ep *pep, const struct iovec *iov, - void **desc, size_t count, fi_addr_t dest_addr, - void *context) -{ - struct fi_msg msg = { - .msg_iov = iov, - .desc = desc, - .iov_count = count, - .addr = dest_addr, - .context = context, - .data = 0 - }; - - assert(pep->fid.fclass == FI_CLASS_EP); - - if (pep->fid.fclass != FI_CLASS_EP) - return -FI_EINVAL; - - struct nd_ep *ep = container_of(pep, struct nd_ep, fid); - - return ofi_nd_ep_sendmsg(pep, &msg, ep->info->tx_attr->op_flags); -} - -static ssize_t ofi_nd_ep_recvmsg(struct fid_ep *pep, const struct fi_msg *msg, - uint64_t flags) -{ - assert(pep->fid.fclass == FI_CLASS_EP); - assert(msg); - - if (pep->fid.fclass != FI_CLASS_EP) - return -FI_EINVAL; - - size_t i; - size_t len = 0; - - struct nd_ep *ep = container_of(pep, struct nd_ep, fid); - - if (!ep->qp) - return -FI_EOPBADSTATE; - - for (i = 0; i < msg->iov_count; i++) { - if (msg->msg_iov[i].iov_len && !msg->msg_iov[i].iov_base) - return -FI_EINVAL; - len += msg->msg_iov[i].iov_len; - } - - if ((msg->iov_count > min(ep->domain->ainfo.MaxReceiveSge, ND_MSG_IOV_LIMIT) - 1) || - (len > ep->domain->info->ep_attr->max_msg_size)) - return -FI_EINVAL; - - struct nd_cq_entry *entry = ofi_nd_buf_alloc_nd_cq_entry(); - if (!entry) - return -FI_ENOMEM; - memset(entry, 0, sizeof(*entry)); - - entry->buf = (msg->iov_count == 1) ? msg->msg_iov[0].iov_base : NULL; - entry->len = len; - entry->data = msg->data; - entry->flags = flags | FI_MSG | FI_RECV; - entry->domain = ep->domain; - entry->context = msg->context; - entry->iov_cnt = msg->iov_count; - entry->seq = InterlockedAdd64(&ep->domain->msg_cnt, 1); - - for (i = 0; i < msg->iov_count; i++) - entry->iov[i] = msg->msg_iov[i]; - - /* store allocated entry in 1st pointer of internal data of context */ - if (msg->context) - ND_FI_CONTEXT(msg->context) = entry; - - ofi_nd_queue_push(&ep->prepost, &entry->queue_item); - - ofi_nd_unexp_match(ep); - - return FI_SUCCESS; -} - -static ssize_t ofi_nd_ep_recvv(struct fid_ep *pep, const struct iovec *iov, - void **desc, - size_t count, fi_addr_t src_addr, void *context) -{ - struct fi_msg msg = { - .msg_iov = iov, - .desc = desc, - .iov_count = count, - .addr = src_addr, - .context = context, - .data = 0 - }; - - assert(pep->fid.fclass == FI_CLASS_EP); - - if (pep->fid.fclass != FI_CLASS_EP) - return -FI_EINVAL; - - struct nd_ep *ep = container_of(pep, struct nd_ep, fid); - - return ofi_nd_ep_recvmsg(pep, &msg, ep->info->rx_attr->op_flags); -} - -static ssize_t ofi_nd_ep_recv(struct fid_ep *pep, void *buf, size_t len, - void *desc, fi_addr_t src_addr, void *context) -{ - struct iovec iov = { - .iov_base = buf, - .iov_len = len - }; - - return ofi_nd_ep_recvv(pep, &iov, &desc, 1, src_addr, context); -} - -void ofi_nd_send_event(ND2_RESULT *result) -{ - assert(result); - assert(result->RequestType == Nd2RequestTypeSend); - - nd_cq_entry *entry = (nd_cq_entry*)result->RequestContext; - assert(entry); - - struct nd_ep *ep = (struct nd_ep*)result->QueuePairContext; - assert(ep); - assert(ep->fid.fid.fclass == FI_CLASS_EP); - - ND_LOG_EVENT_INFO(entry); - - /* Send entry is no more needed */ - if (entry->send_entry) - ofi_nd_free_send_entry(entry->send_entry); - - if (entry->state == LARGE_MSG_WAIT_ACK) { - /* If send operation isn't able to transmit large message, don't - * notify user as long as we didn't received ACK of completion - * read of shared buffer. This CQ entry will be released - * when auxillary CQ for ACK will be received */ - return; - } - else if (entry->event == LARGE_MSG_ACK || - (entry->flow_cntrl_flags.ack && entry->flow_cntrl_flags.empty)) { - /* Silently release this CQ entry. From now we can consider - * that read of large message is completed successfuly */ - ofi_nd_free_cq_entry(entry); - return; - } - - if (ep->cntr_send) { - if (result->Status != S_OK) { - InterlockedIncrement64(&ep->cntr_send->err); - } - InterlockedIncrement64(&ep->cntr_send->counter); - WakeByAddressAll((void*)&ep->cntr_send->counter); - } - - int notify = ofi_nd_util_completion_blackmagic( - ep->info->tx_attr->op_flags, ep->send_flags, entry->flags) || - result->Status != S_OK; - - if (notify) { - PostQueuedCompletionStatus( - entry->result.Status == S_OK ? ep->cq_send->iocp : ep->cq_send->err, - 0, 0, &entry->base.ov); - InterlockedIncrement(&ep->cq_send->count); - WakeByAddressAll((void*)&ep->cq_send->count); - } - else { /* if notification is not requested - just free entry */ - ofi_nd_free_cq_entry(entry); - } -} - -#endif /* _WIN32 */ - diff --git a/prov/netdir/src/netdir_ep_rma.c b/prov/netdir/src/netdir_ep_rma.c deleted file mode 100644 index 2132720249d..00000000000 --- a/prov/netdir/src/netdir_ep_rma.c +++ /dev/null @@ -1,710 +0,0 @@ -/* -* Copyright (c) 2015-2016 Intel Corporation, Inc. All rights reserved. -* -* This software is available to you under a choice of one of two -* licenses. You may choose to be licensed under the terms of the GNU -* General Public License (GPL) Version 2, available from the file -* COPYING in the main directory of this source tree, or the -* BSD license below: -* -* Redistribution and use in source and binary forms, with or -* without modification, are permitted provided that the following -* conditions are met: -* -* - Redistributions of source code must retain the above -* copyright notice, this list of conditions and the following -* disclaimer. -* -* - Redistributions in binary form must reproduce the above -* copyright notice, this list of conditions and the following -* disclaimer in the documentation and/or other materials -* provided with the distribution. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -*/ - -#ifdef _WIN32 - -#include "netdir.h" -#include "netdir_ov.h" -#include "netdir_cq.h" -#include "netdir_log.h" -#include "netdir_iface.h" - -#include "rdma/fabric.h" -#include "rdma/fi_endpoint.h" - -#include "ofi.h" -#include "ofi_util.h" - -static ssize_t -ofi_nd_ep_read(struct fid_ep *ep, void *buf, size_t len, void *desc, - fi_addr_t src_addr, uint64_t addr, uint64_t key, void *context); -static ssize_t -ofi_nd_ep_readv(struct fid_ep *ep, const struct iovec *iov, void **desc, - size_t count, fi_addr_t src_addr, uint64_t addr, uint64_t key, - void *context); -static ssize_t -ofi_nd_ep_readmsg(struct fid_ep *ep, const struct fi_msg_rma *msg, - uint64_t flags); -static ssize_t -ofi_nd_ep_write(struct fid_ep *ep, const void *buf, size_t len, void *desc, - fi_addr_t dest_addr, uint64_t addr, uint64_t key, void *context); -static ssize_t -ofi_nd_ep_writev(struct fid_ep *ep, const struct iovec *iov, void **desc, - size_t count, fi_addr_t dest_addr, uint64_t addr, uint64_t key, - void *context); -static ssize_t -ofi_nd_ep_writemsg(struct fid_ep *ep, const struct fi_msg_rma *msg, - uint64_t flags); -static ssize_t -ofi_nd_ep_inject(struct fid_ep *ep, const void *buf, size_t len, - fi_addr_t dest_addr, uint64_t addr, uint64_t key); -static ssize_t -ofi_nd_ep_writedata(struct fid_ep *ep, const void *buf, size_t len, void *desc, - uint64_t data, fi_addr_t dest_addr, uint64_t addr, uint64_t key, - void *context); -static ssize_t -ofi_nd_ep_writeinjectdata(struct fid_ep *ep, const void *buf, size_t len, - uint64_t data, fi_addr_t dest_addr, uint64_t addr, - uint64_t key); -ssize_t ofi_nd_ep_injectdata(struct fid_ep *ep, const void *buf, size_t len, - uint64_t data, fi_addr_t dest_addr); -static void -ofi_nd_split_msg_iov_2_rma_iov(const struct fi_rma_iov *rma_iovecs, const size_t rma_count, - const struct iovec *msg_iovecs, const size_t msg_count, - struct fi_rma_iov res_iovecs[ND_MSG_INTERNAL_IOV_LIMIT], size_t *res_count, - size_t from_split_map[ND_MSG_INTERNAL_IOV_LIMIT], - size_t to_split_map[ND_MSG_INTERNAL_IOV_LIMIT], - uint64_t remote_addr[ND_MSG_INTERNAL_IOV_LIMIT]); - -struct fi_ops_rma ofi_nd_ep_rma = { - .size = sizeof(ofi_nd_ep_rma), - .read = ofi_nd_ep_read, - .readv = ofi_nd_ep_readv, - .readmsg = ofi_nd_ep_readmsg, - .write = ofi_nd_ep_write, - .writev = ofi_nd_ep_writev, - .writemsg = ofi_nd_ep_writemsg, - .inject = ofi_nd_ep_inject, - .writedata = ofi_nd_ep_writedata, - .injectdata = ofi_nd_ep_writeinjectdata -}; - - -static ssize_t -ofi_nd_ep_read(struct fid_ep *ep, void *buf, size_t len, void *desc, - fi_addr_t src_addr, uint64_t addr, uint64_t key, void *context) -{ - struct iovec iov = { - .iov_base = buf, - .iov_len = len - }; - return ofi_nd_ep_readv(ep, &iov, &desc, 1, src_addr, addr, key, context); -} - -static ssize_t -ofi_nd_ep_readv(struct fid_ep *pep, const struct iovec *iov, void **desc, - size_t count, fi_addr_t src_addr, uint64_t addr, uint64_t key, - void *context) -{ - struct fi_rma_iov rma_iov = { - .addr = addr, - .len = iov[0].iov_len, - .key = key - }; - - struct fi_msg_rma msg = { - .msg_iov = iov, - .desc = desc, - .iov_count = count, - .addr = src_addr, - .rma_iov = &rma_iov, - .rma_iov_count = 1, - .context = context, - .data = 0 - }; - - assert(pep->fid.fclass == FI_CLASS_EP); - - if (pep->fid.fclass != FI_CLASS_EP) - return -FI_EINVAL; - - struct nd_ep *ep = container_of(pep, struct nd_ep, fid); - - return ofi_nd_ep_readmsg(pep, &msg, ep->info->rx_attr->op_flags); -} - -static ssize_t -ofi_nd_ep_readmsg(struct fid_ep *pep, const struct fi_msg_rma *msg, - uint64_t flags) -{ - assert(pep->fid.fclass == FI_CLASS_EP); - assert(msg); - - if (pep->fid.fclass != FI_CLASS_EP) - return -FI_EINVAL; - - size_t msg_len = 0, rma_len = 0, i; - HRESULT hr = 0; - - struct nd_ep *ep = container_of(pep, struct nd_ep, fid); - - if (!ep->qp) - return -FI_EOPBADSTATE; - - for (i = 0; i < msg->iov_count; i++) { - if (msg->msg_iov[i].iov_len && !msg->msg_iov[i].iov_base) - return -FI_EINVAL; - msg_len += msg->msg_iov[i].iov_len; - } - - for (i = 0; i < msg->rma_iov_count; i++) { - if (msg->rma_iov[i].len && !msg->rma_iov[i].addr) - return -FI_EINVAL; - rma_len += msg->rma_iov[i].len; - } - - /* Check the following: */ - if ((msg_len != rma_len) || /* - msg and rma len are correlated */ - /* - iov counts are less or equal than supported */ - (msg->iov_count > ND_MSG_IOV_LIMIT || - msg->rma_iov_count > ND_MSG_IOV_LIMIT) || - /* - transmitted length is less or equal than max possible */ - (msg_len > ep->domain->info->ep_attr->max_msg_size)) - return -FI_EINVAL; - - struct nd_cq_entry *main_entry = ofi_nd_buf_alloc_nd_cq_entry(); - if (!main_entry) - return -FI_ENOMEM; - memset(main_entry, 0, sizeof(*main_entry)); - main_entry->data = msg->data; - main_entry->flags = flags; - main_entry->domain = ep->domain; - main_entry->context = msg->context; - main_entry->seq = InterlockedAdd64(&ep->domain->msg_cnt, 1); - - /* since write operation can't be canceled, set NULL into - * the 1st pointer of internal data of context */ - if (msg->context) - ND_FI_CONTEXT(msg->context) = 0; - - struct fi_rma_iov rma_iovecs[ND_MSG_INTERNAL_IOV_LIMIT]; - size_t rma_count = 0; - size_t from_split_map[ND_MSG_INTERNAL_IOV_LIMIT]; - size_t to_split_map[ND_MSG_INTERNAL_IOV_LIMIT]; - uint64_t remote_addr[ND_MSG_INTERNAL_IOV_LIMIT]; - - ofi_nd_split_msg_iov_2_rma_iov(msg->rma_iov, msg->rma_iov_count, - msg->msg_iov, msg->iov_count, - rma_iovecs, &rma_count, - from_split_map, to_split_map, remote_addr); - - assert(rma_count <= ND_MSG_INTERNAL_IOV_LIMIT); - - main_entry->wait_completion.comp_count = 0; - main_entry->wait_completion.total_count = rma_count; - - InitializeCriticalSection(&main_entry->wait_completion.comp_lock); - - struct nd_cq_entry *entries[ND_MSG_IOV_LIMIT]; - - for (i = 0; i < rma_count; i++) { - entries[i] = ofi_nd_buf_alloc_nd_cq_entry(); - if (!entries[i]) - goto fn_fail; - memset(entries[i], 0, sizeof(*entries[i])); - - entries[i]->data = msg->data; - entries[i]->flags = flags; - entries[i]->domain = ep->domain; - entries[i]->context = msg->context; - entries[i]->seq = main_entry->seq; - entries[i]->aux_entry = main_entry; - - hr = ep->domain->adapter->lpVtbl->CreateMemoryRegion( - ep->domain->adapter, &IID_IND2MemoryRegion, - ep->domain->adapter_file, (void**)&entries[i]->mr[0]); - if (FAILED(hr)) - goto fn_fail; - entries[i]->mr_count = 1; - - hr = ofi_nd_util_register_mr( - entries[i]->mr[0], - (const void *)remote_addr[i], - rma_iovecs[i].len, - ND_MR_FLAG_ALLOW_LOCAL_WRITE | - ND_MR_FLAG_ALLOW_REMOTE_READ | - ND_MR_FLAG_ALLOW_REMOTE_WRITE); - if (FAILED(hr)) - goto fn_fail; - - ND2_SGE sge = { - .Buffer = (void *)remote_addr[i], - .BufferLength = (ULONG)rma_iovecs[i].len, - .MemoryRegionToken = (UINT32)(uintptr_t)msg->desc[to_split_map[i]] - }; - - hr = ep->qp->lpVtbl->Read(ep->qp, entries[i], &sge, 1, - (UINT64)rma_iovecs[i].addr, (UINT32)rma_iovecs[i].key, 0); - if (FAILED(hr)) - goto fn_fail; - } - - return FI_SUCCESS; - -fn_fail: - while (i-- > 0) - ofi_nd_free_cq_entry(entries[i]); - ND_LOG_WARN(FI_LOG_EP_DATA, ofi_nd_strerror((DWORD)hr, NULL)); - return H2F(hr); -} - -static ssize_t -ofi_nd_ep_write(struct fid_ep *ep, const void *buf, size_t len, void *desc, - fi_addr_t dest_addr, uint64_t addr, uint64_t key, void *context) -{ - struct iovec iov = { - .iov_base = (void*)buf, - .iov_len = len - }; - return ofi_nd_ep_writev(ep, &iov, &desc, 1, dest_addr, addr, key, context); -} - -static ssize_t -ofi_nd_ep_writev(struct fid_ep *pep, const struct iovec *iov, void **desc, - size_t count, fi_addr_t dest_addr, uint64_t addr, uint64_t key, - void *context) -{ - struct fi_rma_iov rma_iov = { - .addr = addr, - .len = iov[0].iov_len, - .key = key - }; - - struct fi_msg_rma msg = { - .msg_iov = iov, - .desc = desc, - .iov_count = count, - .addr = dest_addr, - .rma_iov = &rma_iov, - .rma_iov_count = 1, - .context = context, - .data = 0 - }; - - assert(pep->fid.fclass == FI_CLASS_EP); - - if (pep->fid.fclass != FI_CLASS_EP) - return -FI_EINVAL; - - struct nd_ep *ep = container_of(pep, struct nd_ep, fid); - - return ofi_nd_ep_writemsg(pep, &msg, ep->info->tx_attr->op_flags); -} - -static ssize_t -ofi_nd_ep_writemsg(struct fid_ep *pep, const struct fi_msg_rma *msg, - uint64_t flags) -{ - assert(pep->fid.fclass == FI_CLASS_EP); - assert(msg); - - if (pep->fid.fclass != FI_CLASS_EP) - return -FI_EINVAL; - - size_t msg_len = 0, rma_len = 0, i; - HRESULT hr = 0; - - struct nd_cq_entry *entries[ND_MSG_IOV_LIMIT]; - struct nd_ep *ep = container_of(pep, struct nd_ep, fid); - - if (!ep->qp) - return -FI_EOPBADSTATE; - - for (i = 0; i < msg->iov_count; i++) { - if (msg->msg_iov[i].iov_len && !msg->msg_iov[i].iov_base) - return -FI_EINVAL; - msg_len += msg->msg_iov[i].iov_len; - } - - if ((msg_len > ep->domain->info->ep_attr->max_msg_size) && - (flags & FI_INJECT)) - return -FI_EINVAL; - - for (i = 0; i < msg->rma_iov_count; i++) { - if (msg->rma_iov[i].len && !msg->rma_iov[i].addr) - return -FI_EINVAL; - rma_len += msg->rma_iov[i].len; - } - - /* Check the following: */ - if ((msg_len != rma_len) || /* - msg and rma len are correlated */ - /* - iov counts are less or equal than supported */ - ((msg->iov_count > ND_MSG_IOV_LIMIT || - msg->rma_iov_count > ND_MSG_IOV_LIMIT)) || - /* - transmitted length is less or equal than max possible */ - (msg_len > ep->domain->info->ep_attr->max_msg_size) || - /* - if INJECT, data should be inlined */ - ((flags & FI_INJECT) && - (msg_len > ep->domain->info->tx_attr->inject_size))) - return -FI_EINVAL; - - struct nd_cq_entry *main_entry = ofi_nd_buf_alloc_nd_cq_entry(); - if (!main_entry) - return -FI_ENOMEM; - memset(main_entry, 0, sizeof(*main_entry)); - main_entry->data = msg->data; - main_entry->flags = flags; - main_entry->domain = ep->domain; - main_entry->context = msg->context; - main_entry->seq = InterlockedAdd64(&ep->domain->msg_cnt, 1); - - /* since write operation can't be canceled, set NULL into - * the 1st pointer of internal data of context */ - if (msg->context) - ND_FI_CONTEXT(msg->context) = 0; - - /* TODO */ - if (msg_len > (size_t)gl_data.inline_thr) { - struct fi_rma_iov rma_iovecs[ND_MSG_INTERNAL_IOV_LIMIT]; - size_t rma_count = 0; - size_t from_split_map[ND_MSG_INTERNAL_IOV_LIMIT]; - size_t to_split_map[ND_MSG_INTERNAL_IOV_LIMIT]; - uint64_t remote_addr[ND_MSG_INTERNAL_IOV_LIMIT]; - - ofi_nd_split_msg_iov_2_rma_iov(msg->rma_iov, msg->rma_iov_count, - msg->msg_iov, msg->iov_count, - rma_iovecs, &rma_count, - from_split_map, to_split_map, remote_addr); - - assert(rma_count <= ND_MSG_INTERNAL_IOV_LIMIT); - - main_entry->wait_completion.comp_count = 0; - main_entry->wait_completion.total_count = rma_count; - - InitializeCriticalSection(&main_entry->wait_completion.comp_lock); - - for (i = 0; i < rma_count; i++) { - entries[i] = ofi_nd_buf_alloc_nd_cq_entry(); - if (!entries[i]) - goto fn_fail; - memset(entries[i], 0, sizeof(*entries[i])); - - entries[i]->data = msg->data; - entries[i]->flags = flags; - entries[i]->domain = ep->domain; - entries[i]->context = msg->context; - entries[i]->seq = main_entry->seq; - entries[i]->aux_entry = main_entry; - - ND2_SGE sge = { - .Buffer = (void *)remote_addr[i], - .BufferLength = (ULONG)rma_iovecs[i].len, - .MemoryRegionToken = (UINT32)(uintptr_t)msg->desc[to_split_map[i]] - }; - - hr = ep->qp->lpVtbl->Write(ep->qp, entries[i], &sge, 1, - (UINT64)rma_iovecs[i].addr, (UINT32)rma_iovecs[i].key, 0); - if (FAILED(hr)) - goto fn_fail; - } - - return FI_SUCCESS; - } - else { - if (msg_len) { - main_entry->inline_buf = __ofi_nd_buf_alloc_nd_inlinebuf(&ep->domain->inlinebuf); - if (!main_entry->inline_buf) - return -FI_ENOMEM; - - char *buf = (char*)main_entry->inline_buf->buffer; - for (i = 0; i < msg->iov_count; i++) { - memcpy(buf, msg->msg_iov[i].iov_base, msg->msg_iov[i].iov_len); - buf += msg->msg_iov[i].iov_len; - } - } - - for (i = 0; i < msg->rma_iov_count; i++) { - char *buf = (char *)main_entry->inline_buf->buffer; - - entries[i] = ofi_nd_buf_alloc_nd_cq_entry(); - if (!entries[i]) - goto fn_fail; - memset(entries[i], 0, sizeof(*entries[i])); - - entries[i]->data = msg->data; - entries[i]->flags = flags; - entries[i]->domain = ep->domain; - entries[i]->context = msg->context; - entries[i]->seq = main_entry->seq; - entries[i]->aux_entry = main_entry; - - ND2_SGE sge = { - .Buffer = (void *)(buf + msg->rma_iov[i].len), - .BufferLength = (ULONG)msg->rma_iov[i].len, - .MemoryRegionToken = main_entry->inline_buf->token - }; - - hr = ep->qp->lpVtbl->Write(ep->qp, entries[i], &sge, 1, - (UINT64)msg->rma_iov[i].addr, (UINT32)msg->rma_iov[i].key, 0); - if (FAILED(hr)) - goto fn_fail; - } - - return FI_SUCCESS; - } -fn_fail: - while (i-- > 0) - ofi_nd_free_cq_entry(entries[i]); - ND_LOG_WARN(FI_LOG_EP_DATA, ofi_nd_strerror((DWORD)hr, NULL)); - return H2F(hr); -} - -static ssize_t -ofi_nd_ep_inject(struct fid_ep *pep, const void *buf, size_t len, - fi_addr_t dest_addr, uint64_t addr, uint64_t key) -{ - struct iovec iov = { - .iov_base = (void*)buf, - .iov_len = len - }; - - struct fi_rma_iov rma_iov = { - .addr = addr, - .len = len, - .key = key - }; - - struct fi_msg_rma msg = { - .msg_iov = &iov, - .desc = 0, - .iov_count = 1, - .addr = dest_addr, - .rma_iov = &rma_iov, - .rma_iov_count = 1, - .context = 0, - .data = 0 - }; - - return ofi_nd_ep_writemsg(pep, &msg, FI_INJECT); -} - -static ssize_t -ofi_nd_ep_writedata(struct fid_ep *pep, const void *buf, size_t len, void *desc, - uint64_t data, fi_addr_t dest_addr, uint64_t addr, uint64_t key, - void *context) -{ - struct iovec iov = { - .iov_base = (void*)buf, - .iov_len = len - }; - - struct fi_rma_iov rma_iov = { - .addr = addr, - .len = len, - .key = key - }; - - struct fi_msg_rma msg = { - .msg_iov = &iov, - .desc = &desc, - .iov_count = 1, - .addr = dest_addr, - .rma_iov = &rma_iov, - .rma_iov_count = 1, - .context = context, - .data = data - }; - - assert(pep->fid.fclass == FI_CLASS_EP); - - if (pep->fid.fclass != FI_CLASS_EP) - return -FI_EINVAL; - - struct nd_ep *ep = container_of(pep, struct nd_ep, fid); - - return ofi_nd_ep_writemsg(pep, &msg, ep->info->tx_attr->op_flags | FI_REMOTE_CQ_DATA); -} - -static ssize_t -ofi_nd_ep_writeinjectdata(struct fid_ep *ep, const void *buf, size_t len, - uint64_t data, fi_addr_t dest_addr, uint64_t addr, - uint64_t key) -{ - struct iovec iov = { - .iov_base = (void*)buf, - .iov_len = len - }; - - struct fi_rma_iov rma_iov = { - .addr = addr, - .len = len, - .key = key - }; - - struct fi_msg_rma msg = { - .msg_iov = &iov, - .desc = 0, - .iov_count = 1, - .addr = dest_addr, - .rma_iov = &rma_iov, - .rma_iov_count = 1, - .context = 0, - .data = data - }; - - return ofi_nd_ep_writemsg(ep, &msg, FI_INJECT | FI_REMOTE_CQ_DATA); -} - -void ofi_nd_read_event(ND2_RESULT *result) -{ - assert(result); - assert(result->RequestType == Nd2RequestTypeRead); - - nd_cq_entry *entry = (nd_cq_entry*)result->RequestContext; - assert(entry); - - ND_LOG_EVENT_INFO(entry); - - /* Check whether the operation is complex, i.e. read operation - * may consists from several subtasks of read */ - if (entry->aux_entry) { - EnterCriticalSection(&entry->aux_entry->wait_completion.comp_lock); - entry->aux_entry->wait_completion.comp_count++; - ND_LOG_DEBUG(FI_LOG_EP_DATA, "READ Event comp_count = %d, total_count = %d\n", - entry->aux_entry->wait_completion.comp_count, - entry->aux_entry->wait_completion.total_count); - if (entry->aux_entry->wait_completion.comp_count < entry->aux_entry->wait_completion.total_count) { - /* Should wait some remaining completion events about read operation */ - LeaveCriticalSection(&entry->aux_entry->wait_completion.comp_lock); - entry->aux_entry = NULL; - ofi_nd_free_cq_entry(entry); - return; - } - LeaveCriticalSection(&entry->aux_entry->wait_completion.comp_lock); - } - - /*TODO: Handle erroneous case "result->Status != S_OK" */ - ofi_nd_dispatch_cq_event(entry->state == LARGE_MSG_RECV_REQ ? - LARGE_MSG_REQ : NORMAL_EVENT, entry, result); -} - -void ofi_nd_write_event(ND2_RESULT *result) -{ - assert(result); - assert(result->RequestType == Nd2RequestTypeWrite); - - nd_cq_entry *entry = (nd_cq_entry*)result->RequestContext; - assert(entry); - - struct nd_ep *ep = (struct nd_ep*)result->QueuePairContext; - assert(ep); - assert(ep->fid.fid.fclass == FI_CLASS_EP); - - ND_LOG_EVENT_INFO(entry); - - /* Check whether the operation is complex, i.e. write operation - * may consist from several subtasks of write */ - if (entry->aux_entry) { - EnterCriticalSection(&entry->aux_entry->wait_completion.comp_lock); - entry->aux_entry->wait_completion.comp_count++; - - if (entry->aux_entry->wait_completion.comp_count < entry->aux_entry->wait_completion.total_count) { - /* Should wait some remaining completion events about write operation */ - LeaveCriticalSection(&entry->aux_entry->wait_completion.comp_lock); - entry->aux_entry = NULL; - ofi_nd_free_cq_entry(entry); - return; - } - LeaveCriticalSection(&entry->aux_entry->wait_completion.comp_lock); - } - - if (!entry->context) { - /* This means that this write was an internal event, - * just release it */ - ofi_nd_free_cq_entry(entry); - return; - } - - if (entry->flags & FI_REMOTE_CQ_DATA) { - if (ofi_nd_ep_injectdata( - &ep->fid, 0, 0, entry->data, - FI_ADDR_UNSPEC) != FI_SUCCESS) - ND_LOG_WARN(FI_LOG_CQ, "failed to write-inject"); - } - - if (ep->cntr_write) { - if (result->Status != S_OK) { - InterlockedIncrement64(&ep->cntr_write->err); - } - InterlockedIncrement64(&ep->cntr_write->counter); - WakeByAddressAll((void*)&ep->cntr_write->counter); - } - - int notify = ofi_nd_util_completion_blackmagic( - ep->info->tx_attr->op_flags, ep->send_flags, entry->flags) || - result->Status != S_OK; - - if (notify) { - PostQueuedCompletionStatus( - entry->result.Status == S_OK ? ep->cq_send->iocp : ep->cq_send->err, - 0, 0, &entry->base.ov); - InterlockedIncrement(&ep->cq_send->count); - WakeByAddressAll((void*)&ep->cq_send->count); - } - else { /* if notification is not requested - just free entry */ - ofi_nd_free_cq_entry(entry); - } -} - -void ofi_nd_split_msg_iov_2_rma_iov(const struct fi_rma_iov *rma_iovecs, const size_t rma_count, - const struct iovec *msg_iovecs, const size_t msg_count, - struct fi_rma_iov res_iovecs[ND_MSG_INTERNAL_IOV_LIMIT], - size_t *res_count, - size_t from_split_map[ND_MSG_INTERNAL_IOV_LIMIT], - size_t to_split_map[ND_MSG_INTERNAL_IOV_LIMIT], - uint64_t remote_addr[ND_MSG_INTERNAL_IOV_LIMIT]) -{ - size_t i; - - struct iovec from_rma_iovecs[ND_MSG_IOV_LIMIT]; - size_t from_rma_count = rma_count; - - struct iovec res_msg_iovecs[ND_MSG_IOV_LIMIT]; - size_t res_msg_count = 0; - - - /* Convert RMA iovecs to MSG iovecs to be able to reuse - * them in @ofi_nd_repack_iovecs */ - for (i = 0; i < rma_count; i++) { - from_rma_iovecs[i].iov_base = (void *)rma_iovecs[i].addr; - from_rma_iovecs[i].iov_len = rma_iovecs[i].len; - } - - ofi_nd_repack_iovecs(from_rma_iovecs, from_rma_count, - msg_iovecs, msg_count, - res_msg_iovecs, &res_msg_count, - from_split_map, to_split_map, remote_addr); - - /* Extract MSG iov to RMA iovecs and returns them */ - for (i = 0; i < res_msg_count; i++) { - res_iovecs[i].addr = remote_addr[i]; - res_iovecs[i].len = res_msg_iovecs[i].iov_len; - res_iovecs[i].key = rma_iovecs[from_split_map[i]].key; - - remote_addr[i] = (uint64_t)res_msg_iovecs[i].iov_base; - } - - *res_count = res_msg_count; -} - -#endif /* _WIN32 */ - diff --git a/prov/netdir/src/netdir_ep_srx.c b/prov/netdir/src/netdir_ep_srx.c deleted file mode 100644 index e42dc05044c..00000000000 --- a/prov/netdir/src/netdir_ep_srx.c +++ /dev/null @@ -1,337 +0,0 @@ -/* -* Copyright (c) 2015-2016 Intel Corporation, Inc. All rights reserved. -* -* This software is available to you under a choice of one of two -* licenses. You may choose to be licensed under the terms of the GNU -* General Public License (GPL) Version 2, available from the file -* COPYING in the main directory of this source tree, or the -* BSD license below: -* -* Redistribution and use in source and binary forms, with or -* without modification, are permitted provided that the following -* conditions are met: -* -* - Redistributions of source code must retain the above -* copyright notice, this list of conditions and the following -* disclaimer. -* -* - Redistributions in binary form must reproduce the above -* copyright notice, this list of conditions and the following -* disclaimer in the documentation and/or other materials -* provided with the distribution. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -*/ - -#ifdef _WIN32 - -#include "netdir.h" -#include "netdir_ov.h" -#include "netdir_cq.h" -#include "netdir_log.h" -#include "netdir_iface.h" -#include "netdir_unexp.h" - -#include "rdma/fabric.h" -#include "rdma/fi_endpoint.h" - -#include "ofi.h" -#include "ofi_util.h" - -static ssize_t ofi_nd_srx_recv(struct fid_ep *ep, void *buf, size_t len, - void *desc, fi_addr_t src_addr, void *context); -static ssize_t ofi_nd_srx_recvmsg(struct fid_ep *ep_fid, const struct fi_msg *msg, - uint64_t flags); -static ssize_t ofi_nd_srx_recvv(struct fid_ep *ep_fid, const struct iovec *iov, void **desc, - size_t count, fi_addr_t src_addr, void *context); -static ssize_t ofi_nd_no_send(struct fid_ep *ep, const void *buf, size_t len, - void *desc, fi_addr_t src_addr, void *context); -static ssize_t ofi_nd_no_sendmsg(struct fid_ep *ep_fid, const struct fi_msg *msg, - uint64_t flags); -static ssize_t ofi_nd_no_sendv(struct fid_ep *ep_fid, const struct iovec *iov, void **desc, - size_t count, fi_addr_t dest_addr, void *context); -static ssize_t ofi_nd_no_inject(struct fid_ep *ep_fid, const void *buf, size_t len, - fi_addr_t dest_addr); -static ssize_t ofi_nd_no_senddata(struct fid_ep *ep, const void *buf, size_t len, void *desc, - uint64_t data, fi_addr_t dest_addr, void *context); -static ssize_t ofi_nd_no_injectdata(struct fid_ep *ep, const void *buf, size_t len, - uint64_t data, fi_addr_t dest_addr); -static int ofi_nd_srx_close(struct fid *fid); -static ssize_t ofi_nd_srx_cancel(fid_t fid, void *context); -extern int ofi_nd_ep_getopt(struct fid* ep, int level, int optname, - void* optval, size_t* optlen); - -struct fi_ops_msg ofi_nd_srx_msg = { - .size = sizeof(ofi_nd_srx_msg), - .recv = ofi_nd_srx_recv, - .recvv = ofi_nd_srx_recvv, - .recvmsg = ofi_nd_srx_recvmsg, - .send = ofi_nd_no_send, - .sendv = ofi_nd_no_sendv, - .sendmsg = ofi_nd_no_sendmsg, - .inject = ofi_nd_no_inject, - .senddata = ofi_nd_no_senddata, - .injectdata = ofi_nd_no_injectdata -}; - -static struct fi_ops ofi_nd_fi_ops = { - .size = sizeof(ofi_nd_fi_ops), - .close = ofi_nd_srx_close, - .bind = fi_no_bind, - .control = fi_no_control, - .ops_open = fi_no_ops_open -}; - -static struct fid ofi_nd_fid = { - .fclass = FI_CLASS_SRX_CTX, - .context = NULL, - .ops = &ofi_nd_fi_ops -}; - -static struct fi_ops_ep ofi_nd_ep_ops = { - .size = sizeof(ofi_nd_ep_ops), - .cancel = ofi_nd_srx_cancel, - .getopt = ofi_nd_ep_getopt, - .setopt = fi_no_setopt, - .tx_ctx = fi_no_tx_ctx, - .rx_ctx = fi_no_rx_ctx, - .rx_size_left = fi_no_rx_size_left, - .tx_size_left = fi_no_tx_size_left -}; - -int ofi_nd_srx_ctx(struct fid_domain *pdomain, - struct fi_rx_attr *attr, struct fid_ep **rx_ep, - void *context) -{ - OFI_UNUSED(attr); - struct nd_domain *domain = container_of(pdomain, struct nd_domain, fid); - struct nd_srx *srx = (struct nd_srx*) calloc(1, sizeof(*srx)); - if (!srx) - return -FI_ENOMEM; - - struct nd_srx def = { - .fid = { - .fid = { - .fclass = FI_CLASS_SRX_CTX, - .context = context, - .ops = &ofi_nd_fi_ops - }, - .ops = &ofi_nd_ep_ops, - .msg = &ofi_nd_srx_msg - }, - .domain = domain, - .attr = { - .caps = FI_MSG | FI_RECV, - .mode = 0, - .op_flags = 0, - .comp_order = FI_ORDER_STRICT, - .total_buffered_recv = 0, - .size = (size_t)gl_data.inline_thr, - .iov_limit = (size_t)min(domain->ainfo.MaxReceiveSge, ND_MSG_IOV_LIMIT) - 1 - } - }; - - *srx = def; - /* TODO */ - dlist_init(&srx->received); - - *rx_ep = &srx->fid; - - InitializeCriticalSection(&srx->prepost_lock); - - return FI_SUCCESS; -} - -static ssize_t ofi_nd_srx_recvmsg(struct fid_ep *pep, const struct fi_msg *msg, - uint64_t flags) -{ - assert(pep->fid.fclass == FI_CLASS_SRX_CTX); - assert(msg); - - if (pep->fid.fclass != FI_CLASS_SRX_CTX) - return -FI_EINVAL; - - size_t i; - size_t len = 0; - - struct nd_srx *srx = container_of(pep, struct nd_srx, fid); - - if (msg->iov_count > min(srx->domain->ainfo.MaxReceiveSge, ND_MSG_IOV_LIMIT) - 1) - return -FI_EINVAL; - - for (i = 0; i < msg->iov_count; i++) { - if (msg->msg_iov[i].iov_len && !msg->msg_iov[i].iov_base) - return -FI_EINVAL; - len += msg->msg_iov[i].iov_len; - } - - struct nd_cq_entry *entry = ofi_nd_buf_alloc_nd_cq_entry(); - if (!entry) - return -FI_ENOMEM; - memset(entry, 0, sizeof(*entry)); - - entry->buf = (msg->iov_count == 1) ? msg->msg_iov[0].iov_base : 0; - entry->len = len; - entry->data = msg->data; - entry->flags = flags | FI_MSG | FI_RECV; - entry->domain = srx->domain; - entry->context = msg->context; - entry->iov_cnt = msg->iov_count; - entry->seq = InterlockedAdd64(&srx->domain->msg_cnt, 1); - - for (i = 0; i < msg->iov_count; i++) { - entry->iov[i] = msg->msg_iov[i]; - } - - /* store allocated entry in 1st pointer of internal data of context */ - if (msg->context) - ND_FI_CONTEXT(msg->context) = entry; - - ofi_nd_queue_push(&srx->prepost, &entry->queue_item); - - ofi_nd_srx_match(srx); - - return FI_SUCCESS; -} - -static ssize_t ofi_nd_srx_recvv(struct fid_ep *pep, const struct iovec *iov, - void **desc, - size_t count, fi_addr_t src_addr, void *context) -{ - struct fi_msg msg = { - .msg_iov = iov, - .desc = desc, - .iov_count = count, - .addr = src_addr, - .context = context, - .data = 0 - }; - - assert(pep->fid.fclass == FI_CLASS_SRX_CTX); - - if (pep->fid.fclass != FI_CLASS_SRX_CTX) - return -FI_EINVAL; - - struct nd_srx *rx_ctx = container_of(pep, struct nd_srx, fid); - - return ofi_nd_srx_recvmsg(pep, &msg, rx_ctx->attr.op_flags); -} - -static ssize_t ofi_nd_srx_recv(struct fid_ep *pep, void *buf, size_t len, - void *desc, fi_addr_t src_addr, void *context) -{ - struct iovec iov = { - .iov_base = buf, - .iov_len = len - }; - - return ofi_nd_srx_recvv(pep, &iov, &desc, 1, src_addr, context); -} - -static int ofi_nd_srx_close(struct fid *fid) -{ - assert(fid); - assert(fid->fclass == FI_CLASS_SRX_CTX); - - if (fid->fclass != FI_CLASS_SRX_CTX) - return -FI_EINVAL; - - struct nd_srx *srx = container_of(fid, struct nd_srx, fid.fid); - - DeleteCriticalSection(&srx->prepost_lock); - if (srx->srx) - srx->srx->lpVtbl->Release(srx->srx); - free(srx); - - return FI_SUCCESS; -} - -static ssize_t ofi_nd_no_send(struct fid_ep *ep, const void *buf, size_t len, - void *desc, fi_addr_t src_addr, void *context) -{ - OFI_UNUSED(ep); - OFI_UNUSED(buf); - OFI_UNUSED(len); - OFI_UNUSED(desc); - OFI_UNUSED(src_addr); - OFI_UNUSED(context); - return -FI_ENOSYS; -} - -static ssize_t ofi_nd_no_sendmsg(struct fid_ep *ep_fid, const struct fi_msg *msg, - uint64_t flags) -{ - OFI_UNUSED(ep_fid); - OFI_UNUSED(msg); - OFI_UNUSED(flags); - return -FI_ENOSYS; -} - -static ssize_t ofi_nd_no_sendv(struct fid_ep *ep_fid, const struct iovec *iov, void **desc, - size_t count, fi_addr_t dest_addr, void *context) -{ - OFI_UNUSED(ep_fid); - OFI_UNUSED(iov); - OFI_UNUSED(desc); - OFI_UNUSED(count); - OFI_UNUSED(dest_addr); - OFI_UNUSED(context); - return -FI_ENOSYS; -} -static ssize_t ofi_nd_no_inject(struct fid_ep *ep_fid, const void *buf, size_t len, - fi_addr_t dest_addr) -{ - OFI_UNUSED(ep_fid); - OFI_UNUSED(buf); - OFI_UNUSED(len); - OFI_UNUSED(dest_addr); - return -FI_ENOSYS; -} - -static ssize_t ofi_nd_no_senddata(struct fid_ep *ep, const void *buf, size_t len, void *desc, - uint64_t data, fi_addr_t dest_addr, void *context) -{ - OFI_UNUSED(ep); - OFI_UNUSED(buf); - OFI_UNUSED(len); - OFI_UNUSED(desc); - OFI_UNUSED(data); - OFI_UNUSED(dest_addr); - OFI_UNUSED(context); - return -FI_ENOSYS; -} - -static ssize_t ofi_nd_no_injectdata(struct fid_ep *ep, const void *buf, size_t len, - uint64_t data, fi_addr_t dest_addr) -{ - OFI_UNUSED(ep); - OFI_UNUSED(buf); - OFI_UNUSED(len); - OFI_UNUSED(data); - OFI_UNUSED(dest_addr); - return -FI_ENOSYS; -} - -static ssize_t ofi_nd_srx_cancel(fid_t fid, void *context) -{ - assert(fid); - assert(fid->fclass == FI_CLASS_SRX_CTX); - assert(context); - - if (!context) { - ND_LOG_WARN(FI_LOG_EP_DATA, "Context is NULL \n"); - return -FI_EINVAL; - } - - return ofi_nd_cq_cancel(fid, context); -} - -#endif /* _WIN32 */ - diff --git a/prov/netdir/src/netdir_eq.c b/prov/netdir/src/netdir_eq.c deleted file mode 100644 index 9deeef24aa7..00000000000 --- a/prov/netdir/src/netdir_eq.c +++ /dev/null @@ -1,411 +0,0 @@ -/* -* Copyright (c) 2015-2016 Intel Corporation, Inc. All rights reserved. -* -* This software is available to you under a choice of one of two -* licenses. You may choose to be licensed under the terms of the GNU -* General Public License (GPL) Version 2, available from the file -* COPYING in the main directory of this source tree, or the -* BSD license below: -* -* Redistribution and use in source and binary forms, with or -* without modification, are permitted provided that the following -* conditions are met: -* -* - Redistributions of source code must retain the above -* copyright notice, this list of conditions and the following -* disclaimer. -* -* - Redistributions in binary form must reproduce the above -* copyright notice, this list of conditions and the following -* disclaimer in the documentation and/or other materials -* provided with the distribution. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -*/ - -#ifdef _WIN32 - -#include "netdir.h" -#include "netdir_ov.h" -#include "netdir_log.h" -#include "netdir_iface.h" - -#include "ofi_util.h" - -static int ofi_nd_eq_close(struct fid *fid); -static ssize_t ofi_nd_eq_read(struct fid_eq *eq, uint32_t *event, - void *buf, size_t len, uint64_t flags); -static ssize_t ofi_nd_eq_write(struct fid_eq *eq, uint32_t ev, - const void *buf, size_t len, uint64_t flags); -static ssize_t ofi_nd_eq_readerr(struct fid_eq *eq, struct fi_eq_err_entry *buf, - uint64_t flags); -static ssize_t ofi_nd_eq_sread(struct fid_eq *eq, uint32_t *event, - void *buf, size_t len, int timeout, - uint64_t flags); -static const char *ofi_nd_eq_strerror(struct fid_eq *eq, int prov_errno, - const void *err_data, char *buf, - size_t len); - -static struct fi_ops ofi_nd_fi_ops = { - .size = sizeof(ofi_nd_fi_ops), - .close = ofi_nd_eq_close, - .bind = fi_no_bind, - .control = fi_no_control, - .ops_open = fi_no_ops_open, -}; - -static struct fi_ops_eq ofi_nd_eq_ops = { - .size = sizeof(ofi_nd_eq_ops), - .read = ofi_nd_eq_read, - .readerr = ofi_nd_eq_readerr, - .write = ofi_nd_eq_write, - .sread = ofi_nd_eq_sread, - .strerror = ofi_nd_eq_strerror -}; - -int ofi_nd_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr, - struct fid_eq **peq, void *context) -{ - assert(fabric); - assert(fabric->fid.fclass == FI_CLASS_FABRIC); - - if (attr) { - if (attr->wait_obj != FI_WAIT_NONE && attr->wait_obj != FI_WAIT_UNSPEC) - return -FI_EBADFLAGS; - } - - struct nd_eq *eq = (struct nd_eq*)calloc(1, sizeof(*eq)); - if (!eq) - return -FI_ENOMEM; - - struct nd_eq def = { - .fid = { - .fid = { - .fclass = FI_CLASS_EQ, - .context = context, - .ops = &ofi_nd_fi_ops - }, - .ops = &ofi_nd_eq_ops - } - }; - - *eq = def; - - InitializeCriticalSection(&eq->lock); - - eq->iocp = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0); - if (!eq->iocp || eq->iocp == INVALID_HANDLE_VALUE) { - ofi_nd_eq_close(&eq->fid.fid); - return H2F(HRESULT_FROM_WIN32(GetLastError())); - } - - eq->err = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0); - if (!eq->err || eq->err == INVALID_HANDLE_VALUE) { - ofi_nd_eq_close(&eq->fid.fid); - return H2F(HRESULT_FROM_WIN32(GetLastError())); - } - - *peq = &eq->fid; - - return FI_SUCCESS; -} - -static int ofi_nd_eq_close(struct fid *fid) -{ - assert(fid->fclass == FI_CLASS_EQ); - - struct nd_eq *eq = container_of(fid, struct nd_eq, fid.fid); - - if (eq->iocp && eq->iocp != INVALID_HANDLE_VALUE) - CloseHandle(eq->iocp); - if (eq->err && eq->err != INVALID_HANDLE_VALUE) - CloseHandle(eq->err); - - DeleteCriticalSection(&eq->lock); - - free(eq); - return FI_SUCCESS; -} - -static inline ssize_t ofi_nd_eq_ev2buf(struct nd_eq_event *ev, - void *buf, size_t len) -{ - assert(ev); - - size_t copylen = 0; - char* dst = (char *)buf; - - if (!ev->is_custom) { - switch (ev->eq_event) { - case FI_CONNREQ: - case FI_CONNECTED: - case FI_SHUTDOWN: - copylen = min(sizeof(struct fi_eq_cm_entry), len); - break; - case FI_AV_COMPLETE: - case FI_MR_COMPLETE: - copylen = min(sizeof(struct fi_eq_entry), len); - break; - default: - ND_LOG_WARN(FI_LOG_EQ, "unknown event type: %d\n", - ev->eq_event); - copylen = min(sizeof(struct fi_eq_entry), len); - break; - } - } - - if (copylen) - memcpy(dst, &ev->operation, copylen); - - if (ev->len) { - assert(ev->data); - if (len > copylen) { - dst += copylen; - memcpy(dst, ev->data, min(len - copylen, ev->len)); - copylen += min(len - copylen, ev->len); - } - } - return (ssize_t)copylen; -} - -static ssize_t ofi_nd_eq_read(struct fid_eq *peq, uint32_t *pev, - void *buf, size_t len, uint64_t flags) -{ - assert(peq); - assert(pev); - assert(peq->fid.fclass == FI_CLASS_EQ); - - struct nd_eq *eq = container_of(peq, struct nd_eq, fid); - - DWORD bytes; - ULONG_PTR key; - OVERLAPPED *ov; - ssize_t res = 0; - - struct nd_eq_event *ev = 0; - - if (!eq->count) - return -FI_EAGAIN; - - /* we have to use critical section here because concurrent thread - may read event with FI_PEEK flag */ - EnterCriticalSection(&eq->lock); - - /* check again because it may be changed on critical section barrier */ - if (!eq->count) { - res = -FI_EAGAIN; - goto fn_complete; - } - - /* if there is peeked item - use it, else - try to read from queue */ - if (eq->peek) { - ev = eq->peek; - } - else { - assert(eq->iocp); - if (GetQueuedCompletionStatus(eq->iocp, &bytes, &key, &ov, 0)) { - ev = container_of(ov, struct nd_eq_event, ov); - } - } - - /* in case if no event available, but counter is non-zero - error available */ - if (!ev && eq->count) { - res = -FI_EAVAIL; - goto fn_complete; - } - - res = ofi_nd_eq_ev2buf(ev, buf, len); - *pev = ev->eq_event; - - if (flags & FI_PEEK) { - eq->peek = ev; - /* we updated peek ptr, notify other waiters about this */ - WakeByAddressAll((void*)&eq->count); - } - else { - eq->peek = NULL; - InterlockedDecrement(&eq->count); - assert(eq->count >= 0); - } - -fn_complete: - LeaveCriticalSection(&eq->lock); - return res; -} - -static ssize_t ofi_nd_eq_readerr(struct fid_eq *peq, - struct fi_eq_err_entry *buf, uint64_t flags) -{ - assert(peq); - assert(peq->fid.fclass == FI_CLASS_EQ); - assert(buf); - - OFI_UNUSED(flags); - - struct nd_eq *eq = container_of(peq, struct nd_eq, fid); - - DWORD bytes; - ULONG_PTR key; - OVERLAPPED *ov; - - struct nd_eq_event *ev = NULL; - - if (!eq->errdata) { - free(eq->errdata); - eq->errdata = NULL; - } - - assert(eq->err); - if (!GetQueuedCompletionStatus(eq->err, &bytes, &key, &ov, 0)) - return -FI_EAGAIN; - - InterlockedDecrement(&eq->count); - assert(eq->count >= 0); - ev = container_of(ov, struct nd_eq_event, ov); - - if (buf->err_data && buf->err_data_size) { - memcpy(buf, &ev->error, offsetof(struct fi_eq_err_entry, err_data)); - buf->err_data_size = MIN(buf->err_data_size, ev->error.err_data_size); - memcpy(buf->err_data, ev->error.err_data, buf->err_data_size); - /* to be sure that the errdata in EQ is NULL */ - eq->errdata = NULL; - } else { - /* for compatibility purposes (release < 1.5 or passed err_data_size is 0) */ - memcpy(buf, &ev->error, sizeof(ev->error)); - eq->errdata = ev->error.err_data; - } - - return 0; -} - -static ssize_t ofi_nd_eq_sread(struct fid_eq *peq, uint32_t *pev, - void *buf, size_t len, int timeout, - uint64_t flags) -{ - assert(peq); - assert(pev); - assert(peq->fid.fclass == FI_CLASS_EQ); - - struct nd_eq *eq = container_of(peq, struct nd_eq, fid); - - DWORD bytes; - ULONG_PTR key; - OVERLAPPED *ov; - ssize_t res = 0; - - struct nd_eq_event *ev = 0; - - LONG zero = 0; - - for (;;) { - do { - if (!WaitOnAddress( - &eq->count, &zero, sizeof(eq->count), - (DWORD)timeout) && timeout >= 0) - return -FI_EAGAIN; - } while (!eq->count); - - /* we have to use critical section here because concurrent thread - may read event with FI_PEEK flag */ - EnterCriticalSection(&eq->lock); - - if (!eq->count) { - LeaveCriticalSection(&eq->lock); - if (timeout >= 0) - return -FI_EAGAIN; - else - continue; - } - - /* if there is peeked item - use it, else - try to read from queue */ - if (eq->peek) { - ev = eq->peek; - } - else { - assert(eq->iocp); - if (GetQueuedCompletionStatus( - eq->iocp, &bytes, &key, &ov, 0)) { - ev = container_of(ov, struct nd_eq_event, ov); - } - } - - /* in case if no event available, but counter is non-zero - error available */ - if (!ev && eq->count) { - res = -FI_EAVAIL; - goto fn_complete; - } - - res = ofi_nd_eq_ev2buf(ev, buf, len); - *pev = ev->eq_event; - - if (flags & FI_PEEK) { - eq->peek = ev; - /* we updated peek ptr, notify other waiters about this */ - WakeByAddressAll((void*)&eq->count); - } - else { - eq->peek = NULL; - InterlockedDecrement(&eq->count); - assert(eq->count >= 0); - } - -fn_complete: - LeaveCriticalSection(&eq->lock); - return res; - } -} - -static const char *ofi_nd_eq_strerror(struct fid_eq *eq, int prov_errno, - const void *err_data, char *buf, size_t len) -{ - OFI_UNUSED(eq); - OFI_UNUSED(err_data); - - if (buf && len) - return strncpy(buf, fi_strerror(-prov_errno), len); - return fi_strerror(-prov_errno); -} - -static ssize_t ofi_nd_eq_write(struct fid_eq *peq, uint32_t ev, - const void *buf, size_t len, uint64_t flags) -{ - OFI_UNUSED(flags); - - assert(peq); - assert(peq->fid.fclass == FI_CLASS_EQ); - - struct nd_eq *eq = container_of(peq, struct nd_eq, fid); - - nd_eq_event *custom = ofi_nd_buf_alloc_nd_eq_event(); - if (!custom) - return -FI_ENOMEM; - memset(custom, 0, sizeof(*custom)); - - custom->is_custom = 1; - custom->eq_event = ev; - if (len) { - assert(buf); - custom->data = malloc(len); - if (!custom->data) { - ofi_nd_eq_free_event(custom); - return -FI_ENOMEM; - } - custom->len = len; - memcpy(custom->data, buf, len); - } - - ofi_nd_eq_push(eq, custom); - - return len; -} - - -#endif /* _WIN32 */ - diff --git a/prov/netdir/src/netdir_fabric.c b/prov/netdir/src/netdir_fabric.c deleted file mode 100644 index 7c98f30615d..00000000000 --- a/prov/netdir/src/netdir_fabric.c +++ /dev/null @@ -1,121 +0,0 @@ -/* -* Copyright (c) 2015-2016 Intel Corporation, Inc. All rights reserved. -* -* This software is available to you under a choice of one of two -* licenses. You may choose to be licensed under the terms of the GNU -* General Public License (GPL) Version 2, available from the file -* COPYING in the main directory of this source tree, or the -* BSD license below: -* -* Redistribution and use in source and binary forms, with or -* without modification, are permitted provided that the following -* conditions are met: -* -* - Redistributions of source code must retain the above -* copyright notice, this list of conditions and the following -* disclaimer. -* -* - Redistributions in binary form must reproduce the above -* copyright notice, this list of conditions and the following -* disclaimer in the documentation and/or other materials -* provided with the distribution. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -*/ - -#ifdef _WIN32 - -#include -#include - -#include "netdir.h" -#include "ofi_util.h" -#include "ofi_enosys.h" -#include "rdma/fabric.h" - -#include "netdir_ov.h" -#include "netdir_iface.h" - -static int ofi_nd_fabric_close(fid_t fid); - -static struct fi_ops ofi_nd_fi_ops = { - .size = sizeof(ofi_nd_fi_ops), - .close = ofi_nd_fabric_close, - .bind = fi_no_bind, - .control = fi_no_control, - .ops_open = fi_no_ops_open, -}; - -static struct fid ofi_nd_fid = { - .fclass = FI_CLASS_FABRIC, - .context = NULL, - .ops = &ofi_nd_fi_ops -}; - -static struct fi_ops_fabric ofi_nd_fabric_ops = { - .size = sizeof(ofi_nd_fabric_ops), - .domain = ofi_nd_domain_open, - .passive_ep = ofi_nd_passive_endpoint, - .eq_open = ofi_nd_eq_open, - .wait_open = fi_no_wait_open, - .trywait = fi_no_trywait -}; - -static int ofi_nd_fabric_close(fid_t fid) -{ - struct nd_fabric *fabric; - fabric = container_of(fid, struct nd_fabric, fid.fid); - free(fabric); - /* due to issues in cleanup NetworkDirect on library - unload make clening here, on fabric close */ - ofi_nd_shutdown(); - return FI_SUCCESS; -} - -int ofi_nd_fabric(struct fi_fabric_attr *attr, struct fid_fabric **fab, - void *context) -{ - OFI_UNUSED(context); - - if (attr) { - if (attr->name && strcmp(attr->name, ofi_nd_prov.name)) - return -FI_EINVAL; - if (attr->prov_name && strcmp(attr->prov_name, ofi_nd_prov.name)) - return -FI_EINVAL; - if (attr->prov_version && attr->prov_version != ofi_nd_prov.version) - return -FI_EINVAL; - } - - struct nd_fabric *fabric = (struct nd_fabric*)calloc(1, sizeof(*fabric)); - if (!fabric) - return -FI_ENOMEM; - - struct nd_fabric def = { - .fid = { - .fid = ofi_nd_fid, - .ops = &ofi_nd_fabric_ops - } - }; - - *fabric = def; - - *fab = &fabric->fid; - - fi_param_get_int(&ofi_nd_prov, "inlinethr", &gl_data.inline_thr); - fi_param_get_int(&ofi_nd_prov, "prepostcnt", &gl_data.prepost_cnt); - fi_param_get_int(&ofi_nd_prov, "prepostbufcnt", &gl_data.prepost_buf_cnt); - - gl_data.total_avail = gl_data.prepost_cnt * gl_data.prepost_buf_cnt; - - return FI_SUCCESS; -} - -#endif /* _WIN32 */ - diff --git a/prov/netdir/src/netdir_iface.h b/prov/netdir/src/netdir_iface.h deleted file mode 100644 index 0b5829713ec..00000000000 --- a/prov/netdir/src/netdir_iface.h +++ /dev/null @@ -1,273 +0,0 @@ -/* -* Copyright (c) 2015-2016 Intel Corporation, Inc. All rights reserved. -* -* This software is available to you under a choice of one of two -* licenses. You may choose to be licensed under the terms of the GNU -* General Public License (GPL) Version 2, available from the file -* COPYING in the main directory of this source tree, or the -* BSD license below: -* -* Redistribution and use in source and binary forms, with or -* without modification, are permitted provided that the following -* conditions are met: -* -* - Redistributions of source code must retain the above -* copyright notice, this list of conditions and the following -* disclaimer. -* -* - Redistributions in binary form must reproduce the above -* copyright notice, this list of conditions and the following -* disclaimer in the documentation and/or other materials -* provided with the distribution. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -*/ - -#ifndef _FI_NETDIR_IFACE_H_ -#define _FI_NETDIR_IFACE_H_ - -#include -#include -#include - -#include - -#include "rdma/fabric.h" - -#include "ofi_mem.h" -#include "ofi_list.h" -#include "rdma/fi_eq.h" -#include "rdma/fi_domain.h" -#include "rdma/fi_endpoint.h" - -#include "netdir_buf.h" -#include "netdir_queue.h" -#include "netdir_ov.h" - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -typedef void(*nd_free_event_t)(struct nd_event_base* base); -typedef void(*nd_event_t)(struct nd_event_base* base, DWORD bytes); -typedef void(*nd_err_t)(struct nd_event_base* base, DWORD bytes, DWORD err); - -typedef struct nd_event_base { - OVERLAPPED ov; - - nd_free_event_t free; - nd_event_t event_cb; - nd_err_t err_cb; -} nd_event_base; - -struct nd_fabric { - struct fid_fabric fid; -}; - -typedef struct nd_flow_cntrl_flags { - unsigned req_ack : 1; - unsigned ack : 1; - unsigned empty : 1; -} nd_flow_cntrl_flags; - -struct nd_msgheader { - uint64_t data; - enum ofi_nd_cq_event event; - nd_flow_cntrl_flags flags; - size_t location_cnt; -}; - -struct nd_msgprefix { - UINT32 token; - struct nd_msgheader header; -}; - -struct nd_inlinebuf { - UINT32 token; - void* buffer; -}; - -struct nd_msg_location { - uint64_t addr; - size_t len; - uint32_t remote_mr_token; -}; - -struct nd_notifybuf { - UINT32 token; - struct nd_msg_location location[ND_MSG_IOV_LIMIT]; -}; - -OFI_ND_NB_BUF_TYPED(nd_msgprefix, struct nd_msgprefix); -OFI_ND_NB_BUF_TYPED(nd_inlinebuf, struct nd_inlinebuf); -OFI_ND_NB_BUF_TYPED(nd_notifybuf, struct nd_notifybuf); - -struct nd_domain { - struct fid_domain fid; - struct nd_eq *eq; - struct fi_info *info; - - uint64_t eq_flags; - - IND2Adapter *adapter; - IND2CompletionQueue *cq; - - nd_event_base ov; - - HANDLE adapter_file; - ND2_ADAPTER_INFO ainfo; - - LONG64 msg_cnt; - - LONG cq_canceled; - - ND_BUF_FOOTER(nd_msgprefix) msgfooter; - ND_BUF_FOOTER(nd_inlinebuf) inlinebuf; - ND_BUF_FOOTER(nd_notifybuf) notifybuf; - - union { - struct sockaddr addr; - struct sockaddr_in addr4; - struct sockaddr_in6 addr6; - } addr; -#if 0 - pthread_t progress_thread; - int do_progress; -#endif - struct dlist_entry ep_list; -}; - -struct nd_pep { - struct fid_pep fid; - struct fi_info *info; - - struct nd_eq *eq; - - IND2Adapter *adapter; - IND2Listener *listener; - - HANDLE adapter_file; -}; - -struct nd_eq { - struct fid_eq fid; - size_t cnum; - HANDLE iocp; - HANDLE err; - volatile LONG count; /* total number of available events, - including peek, queued & errors */ - struct nd_eq_event *peek; - - CRITICAL_SECTION lock; - void* errdata; -}; - - -struct nd_cq { - struct fid_cq fid; - enum fi_cq_format format; - - HANDLE iocp; - HANDLE err; - volatile LONG count; /* total number of available events, - including queued & errors */ -}; - -struct nd_cntr { - struct fid_cntr fid; - volatile LONG64 counter; - volatile LONG64 err; -}; - -struct nd_connreq { - struct fid handle; - IND2Connector *connector; -}; - -struct nd_unexpected { - IND2MemoryRegion *mr; - UINT32 token; - struct nd_unexpected_buf **unexpected; -#if 0 - size_t used_counter; -#endif - CRITICAL_SECTION unexp_lock; - struct dlist_entry received; - LONG active; -}; - -typedef struct nd_flow_block_flags { - unsigned is_send_blocked : 1; -} nd_flow_block_flags; - -struct nd_ep { - struct fid_ep fid; - struct fi_info *info; - - struct nd_domain *domain; - struct nd_eq *eq; - struct nd_srx *srx; - - struct nd_cq *cq_send; - struct nd_cq *cq_recv; - - uint64_t send_flags; - uint64_t recv_flags; - - struct nd_cntr *cntr_send; - struct nd_cntr *cntr_recv; - struct nd_cntr *cntr_read; - struct nd_cntr *cntr_write; - - IND2Connector *connector; - IND2QueuePair *qp; - - struct nd_unexpected unexpected; - struct nd_queue_queue prepost; - struct nd_queue_queue internal_prepost; - - nd_event_base disconnect_ov; - - CRITICAL_SECTION prepost_lock; - LONG shutdown; - LONG connected; - - struct dlist_entry entry; - struct { - nd_flow_block_flags flags; - size_t used_counter; - CRITICAL_SECTION send_lock; - } send_op; - struct nd_queue_queue send_queue; -}; - -struct nd_srx { - struct fid_ep fid; - struct fi_rx_attr attr; - IND2SharedReceiveQueue *srx; - struct nd_domain *domain; - struct dlist_entry received; - CRITICAL_SECTION prepost_lock; - struct nd_queue_queue prepost; -}; - -struct nd_mr { - struct fid_mr fid; - - IND2MemoryRegion *mr; - IND2MemoryWindow *wnd; -}; - -#ifdef __cplusplus -} -#endif /* __cplusplus */ - -#endif /* _FI_NETDIR_IFACE_H_ */ - diff --git a/prov/netdir/src/netdir_init.c b/prov/netdir/src/netdir_init.c deleted file mode 100644 index 3c8b0cc9336..00000000000 --- a/prov/netdir/src/netdir_init.c +++ /dev/null @@ -1,205 +0,0 @@ -/* -* Copyright (c) 2015-2016 Intel Corporation, Inc. All rights reserved. -* -* This software is available to you under a choice of one of two -* licenses. You may choose to be licensed under the terms of the GNU -* General Public License (GPL) Version 2, available from the file -* COPYING in the main directory of this source tree, or the -* BSD license below: -* -* Redistribution and use in source and binary forms, with or -* without modification, are permitted provided that the following -* conditions are met: -* -* - Redistributions of source code must retain the above -* copyright notice, this list of conditions and the following -* disclaimer. -* -* - Redistributions in binary form must reproduce the above -* copyright notice, this list of conditions and the following -* disclaimer in the documentation and/or other materials -* provided with the distribution. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -*/ - -#ifdef _WIN32 - -#include "netdir.h" -#include "netdir_buf.h" -#include "ofi_prov.h" -#include "ofi_util.h" -#include "ofi_mem.h" - -#include "netdir_ov.h" -#include "netdir_iface.h" - -#include "netdir_queue.h" - -const char ofi_nd_prov_name[] = "netdir"; - -struct fi_provider ofi_nd_prov = { - .name = ofi_nd_prov_name, - .version = OFI_VERSION_DEF_PROV, - .fi_version = OFI_VERSION_LATEST, - .getinfo = ofi_nd_getinfo, - .fabric = ofi_nd_fabric, - .cleanup = ofi_nd_fini -}; - -static void ofi_nd_alter_defaults(uint32_t version, const struct fi_info *hints, - const struct fi_info *base_info, - struct fi_info *dest_info); - -struct util_prov ofi_nd_util_prov = { - .prov = &ofi_nd_prov, - .info = 0, - .alter_defaults = &ofi_nd_alter_defaults, - .flags = UTIL_RX_SHARED_CTX, -}; - -struct gl_data gl_data = { - /* 8 KByte */ - .inline_thr = 8192, - .prepost_cnt = 8, - .prepost_buf_cnt = 1, - .flow_control_cnt = 1, - .total_avail = 64 -}; - -static size_t nd_default_tx_iov_limit = 8; -static size_t nd_default_tx_size = 384; -static size_t nd_default_rx_iov_limit = 8; -static size_t nd_default_rx_size = 384; - -static void ofi_nd_alter_defaults(uint32_t version, const struct fi_info *hints, - const struct fi_info *base_info, - struct fi_info *dest_info) -{ - dest_info->tx_attr->iov_limit = min(base_info->tx_attr->iov_limit, - nd_default_tx_iov_limit); - dest_info->tx_attr->size = min(base_info->tx_attr->size, - nd_default_tx_size); - dest_info->rx_attr->iov_limit = min(base_info->rx_attr->iov_limit, - nd_default_rx_iov_limit); - dest_info->rx_attr->size = min(base_info->rx_attr->size, - nd_default_rx_size); -} - -int ofi_nd_getinfo(uint32_t version, const char *node, const char *service, - uint64_t flags, const struct fi_info *hints, - struct fi_info **info) -{ - if (ofi_nd_util_prov.info) { - return util_getinfo(&ofi_nd_util_prov, version, node, service, flags, - hints, info); - } else { - *info = NULL; - return -FI_EINVAL; - } -} - -void ofi_nd_fini(void) -{ - if (ofi_nd_util_prov.info) { - fi_freeinfo((void*)ofi_nd_util_prov.info); - ofi_nd_util_prov.info = 0; - } - ofi_nd_shutdown(); - nd_buf_fini_apply(); -} - -extern struct fi_provider ofi_nd_prov; - -static int ofi_nd_adapter_cb(const ND2_ADAPTER_INFO *adapter, const char *name) -{ - struct fi_info *info = fi_allocinfo(); - if (!info) - return -FI_ENOMEM; - - info->tx_attr->caps = FI_MSG | FI_SEND; - info->tx_attr->mode = FI_CONTEXT; - info->tx_attr->comp_order = FI_ORDER_STRICT; - info->tx_attr->inject_size = (size_t)gl_data.inline_thr; - info->tx_attr->size = (size_t)adapter->MaxInitiatorQueueDepth; - /* TODO: if optimization will be needed, we can use adapter->MaxInitiatorSge, - * and use ND SGE to send/write iovecs */ - info->tx_attr->iov_limit = (size_t)min(adapter->MaxInitiatorSge, ND_MSG_IOV_LIMIT); - info->tx_attr->rma_iov_limit = 1; - info->tx_attr->op_flags = OFI_ND_TX_OP_FLAGS; - info->tx_attr->msg_order = OFI_ND_MSG_ORDER; - - info->rx_attr->caps = FI_MSG | FI_RECV; - info->rx_attr->mode = FI_CONTEXT; - info->rx_attr->comp_order = FI_ORDER_STRICT; - info->rx_attr->total_buffered_recv = 0; - info->rx_attr->size = (size_t)adapter->MaxReceiveQueueDepth; - /* TODO: if optimization will be needed, we can use adapter->MaxInitiatorSge, - * and use ND SGE to recv iovecs */ - info->rx_attr->iov_limit = (size_t)min(adapter->MaxReceiveSge, ND_MSG_IOV_LIMIT); - info->rx_attr->msg_order = OFI_ND_MSG_ORDER; - - info->ep_attr->type = FI_EP_MSG; - info->ep_attr->protocol = FI_PROTO_NETWORKDIRECT; - info->ep_attr->protocol_version = 0; - info->ep_attr->max_msg_size = (size_t)adapter->MaxTransferLength; - - info->domain_attr->caps = OFI_ND_DOMAIN_CAPS; - info->domain_attr->name = strdup(name); - info->domain_attr->threading = FI_THREAD_SAFE; - info->domain_attr->control_progress = FI_PROGRESS_AUTO; - info->domain_attr->data_progress = FI_PROGRESS_AUTO; - info->domain_attr->resource_mgmt = FI_RM_DISABLED; - info->domain_attr->av_type = FI_AV_UNSPEC; - info->domain_attr->mr_mode = FI_MR_BASIC | OFI_MR_BASIC_MAP | FI_MR_LOCAL; - info->domain_attr->cq_cnt = (size_t)adapter->MaxCompletionQueueDepth; - info->domain_attr->mr_iov_limit = ND_MSG_IOV_LIMIT; - info->domain_attr->mr_cnt = OFI_ND_MAX_MR_CNT; - - info->fabric_attr->name = strdup(ofi_nd_prov_name); - info->fabric_attr->prov_version = OFI_VERSION_DEF_PROV; - - info->caps = OFI_ND_EP_CAPS | OFI_ND_DOMAIN_CAPS; - info->mode = FI_CONTEXT; - info->addr_format = FI_SOCKADDR; - - if (!ofi_nd_util_prov.info) { - ofi_nd_util_prov.info = info; - } else { - struct fi_info *finfo = (struct fi_info *) ofi_nd_util_prov.info; - - while (finfo->next) - finfo = finfo->next; - finfo->next = info; - } - - return FI_SUCCESS; -} - -NETDIR_INI -{ - fi_param_define(&ofi_nd_prov, "inlinethr", FI_PARAM_INT, - "Inline threshold: size of buffer to be send using pre-allocated buffer"); - fi_param_define(&ofi_nd_prov, "largemsgthr", FI_PARAM_INT, - "Large msg threshold: size of user data that is considered as large message"); - fi_param_define(&ofi_nd_prov, "prepostcnt", FI_PARAM_INT, - "Prepost Buffer Count: number of buffers to be preposted per EP and " - "not required internal ACK"); - fi_param_define(&ofi_nd_prov, "prepostbufcnt", FI_PARAM_INT, - "Count of Entries in Array of Preposted Buffers: number of set of buffer " - "in each entry array of buffers to be preposted per EP"); - - ofi_nd_startup(ofi_nd_adapter_cb); - return &ofi_nd_prov; -} - - -#endif /* _WIN32 */ - diff --git a/prov/netdir/src/netdir_log.h b/prov/netdir/src/netdir_log.h deleted file mode 100644 index dd716d0f959..00000000000 --- a/prov/netdir/src/netdir_log.h +++ /dev/null @@ -1,195 +0,0 @@ -/* -* Copyright (c) 2015-2016 Intel Corporation, Inc. All rights reserved. -* -* This software is available to you under a choice of one of two -* licenses. You may choose to be licensed under the terms of the GNU -* General Public License (GPL) Version 2, available from the file -* COPYING in the main directory of this source tree, or the -* BSD license below: -* -* Redistribution and use in source and binary forms, with or -* without modification, are permitted provided that the following -* conditions are met: -* -* - Redistributions of source code must retain the above -* copyright notice, this list of conditions and the following -* disclaimer. -* -* - Redistributions in binary form must reproduce the above -* copyright notice, this list of conditions and the following -* disclaimer in the documentation and/or other materials -* provided with the distribution. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -*/ - -#ifndef _FI_NETDIR_LOG_H_ -#define _FI_NETDIR_LOG_H_ - -#include "config.h" - -#include - -#include "rdma/providers/fi_log.h" - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -extern struct fi_provider ofi_nd_prov; - -#define ND_LOG_WARN(subsystem, ...) FI_WARN(&ofi_nd_prov, subsystem, __VA_ARGS__) -#define ND_LOG_INFO(subsystem, ...) FI_INFO(&ofi_nd_prov, subsystem, __VA_ARGS__) -#define ND_LOG_DEBUG(subsystem, ...) FI_DBG(&ofi_nd_prov, subsystem, __VA_ARGS__) - -#if ENABLE_DEBUG -# define ND_LOG_EVENT_INFO(entry) \ - ND_LOG_DEBUG(FI_LOG_EP_DATA, "\nflags: req_ack - %d, ack - %d, empty - %d\n" \ - "common: state - %d, event - %d\n", \ - (entry)->flow_cntrl_flags.req_ack, \ - (entry)->flow_cntrl_flags.ack, \ - (entry)->flow_cntrl_flags.empty, \ - (entry)->state, \ - (entry)->event) -#else -# define ND_LOG_EVENT_INFO(entry) -#endif - -#define FI_ND_GUID_FORMAT "%08lX-%04hX-%04hX-%02hhX%02hhX-%02hhX%02hhX%02hhX%02hhX%02hhX%02hhX" -#define FI_ND_GUID_ARG(guid) \ - (guid).Data1, (guid).Data2, (guid).Data3, \ - (guid).Data4[0], (guid).Data4[1], (guid).Data4[2], \ - (guid).Data4[3], (guid).Data4[4], (guid).Data4[5], \ - (guid).Data4[6], (guid).Data4[7] - -/* ofi_nd_strerror generates string message based on err value (GetLastError) - returned string is valid till next call of ofi_nd_strerror -*/ -static inline char *ofi_nd_strerror(DWORD err, HMODULE module) -{ - static char *message = NULL; - size_t size; - - /* if message is allocated - free it */ - if (message) - LocalFree(message); - - size = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | - FORMAT_MESSAGE_FROM_SYSTEM | - FORMAT_MESSAGE_IGNORE_INSERTS | - (module ? FORMAT_MESSAGE_FROM_HMODULE : 0), - module, err, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), - (LPSTR)&message, 0, NULL); - - return size ? message : (char*)""; -} -static inline char * -ofi_nd_get_last_error_str(HRESULT hr, char *errmsg, SIZE_T max_msg_len) -{ - LPVOID lpMsgBuf; - DWORD dw = (DWORD)hr; - errno_t rc; - - FormatMessage( - FORMAT_MESSAGE_ALLOCATE_BUFFER | - FORMAT_MESSAGE_FROM_SYSTEM | - FORMAT_MESSAGE_IGNORE_INSERTS, - NULL, - dw, - MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), - (LPTSTR)&lpMsgBuf, - 0, NULL); - - strcpy_s(errmsg, max_msg_len, "NTStatus: "); - rc = strncat_s(errmsg, max_msg_len - strlen(errmsg), - lpMsgBuf, _TRUNCATE); - - LocalFree(lpMsgBuf); - - return errmsg; -} - -#define OFI_NDERR(err, str) \ - case err: \ - str = #err ; \ - break - -#define ND_FLUSHED 0x10000L /* undocumented ND error code */ -#define ND_DISCONNECTED 0xc000020C - -static char *ofi_nd_error_str(HRESULT hr) -{ - static char lerr[128]; - char *err_str = NULL; - - switch (hr) { - OFI_NDERR(ND_SUCCESS, err_str); - OFI_NDERR(ND_FLUSHED, err_str); - OFI_NDERR(ND_TIMEOUT, err_str); - OFI_NDERR(ND_PENDING, err_str); - OFI_NDERR(ND_BUFFER_OVERFLOW, err_str); - OFI_NDERR(ND_DEVICE_BUSY, err_str); - OFI_NDERR(ND_NO_MORE_ENTRIES, err_str); - OFI_NDERR(ND_UNSUCCESSFUL, err_str); - OFI_NDERR(ND_ACCESS_VIOLATION, err_str); - OFI_NDERR(ND_INVALID_HANDLE, err_str); - OFI_NDERR(ND_INVALID_DEVICE_REQUEST, err_str); - OFI_NDERR(ND_INVALID_PARAMETER, err_str); - OFI_NDERR(ND_NO_MEMORY, err_str); - OFI_NDERR(ND_INVALID_PARAMETER_MIX, err_str); - OFI_NDERR(ND_DATA_OVERRUN, err_str); - OFI_NDERR(ND_SHARING_VIOLATION, err_str); - OFI_NDERR(ND_INSUFFICIENT_RESOURCES, err_str); - OFI_NDERR(ND_DEVICE_NOT_READY, err_str); - OFI_NDERR(ND_IO_TIMEOUT, err_str); - OFI_NDERR(ND_NOT_SUPPORTED, err_str); - OFI_NDERR(ND_INTERNAL_ERROR, err_str); - OFI_NDERR(ND_INVALID_PARAMETER_1, err_str); - OFI_NDERR(ND_INVALID_PARAMETER_2, err_str); - OFI_NDERR(ND_INVALID_PARAMETER_3, err_str); - OFI_NDERR(ND_INVALID_PARAMETER_4, err_str); - OFI_NDERR(ND_INVALID_PARAMETER_5, err_str); - OFI_NDERR(ND_INVALID_PARAMETER_6, err_str); - OFI_NDERR(ND_INVALID_PARAMETER_7, err_str); - OFI_NDERR(ND_INVALID_PARAMETER_8, err_str); - OFI_NDERR(ND_INVALID_PARAMETER_9, err_str); - OFI_NDERR(ND_INVALID_PARAMETER_10, err_str); - OFI_NDERR(ND_CANCELED, err_str); - OFI_NDERR(ND_REMOTE_ERROR, err_str); - OFI_NDERR(ND_INVALID_ADDRESS, err_str); - OFI_NDERR(ND_INVALID_DEVICE_STATE, err_str); - OFI_NDERR(ND_INVALID_BUFFER_SIZE, err_str); - OFI_NDERR(ND_TOO_MANY_ADDRESSES, err_str); - OFI_NDERR(ND_ADDRESS_ALREADY_EXISTS, err_str); - OFI_NDERR(ND_CONNECTION_REFUSED, err_str); - OFI_NDERR(ND_CONNECTION_INVALID, err_str); - OFI_NDERR(ND_CONNECTION_ACTIVE, err_str); - OFI_NDERR(ND_HOST_UNREACHABLE, err_str); - OFI_NDERR(ND_CONNECTION_ABORTED, err_str); - OFI_NDERR(ND_DEVICE_REMOVED, err_str); - OFI_NDERR(ND_DISCONNECTED, err_str); - default: - err_str = ofi_nd_get_last_error_str(hr, lerr, sizeof(lerr)); - if (err_str == NULL) { - _snprintf(lerr, sizeof(lerr), "Unknown ND error %#08ld", hr); - err_str = lerr; - } - break; - } - return err_str; -} -#undef NDERR - -#ifdef __cplusplus -} -#endif /* __cplusplus */ - -#endif /* _FI_NETDIR_LOG_H_ */ - diff --git a/prov/netdir/src/netdir_mr.c b/prov/netdir/src/netdir_mr.c deleted file mode 100644 index 02dbf98442a..00000000000 --- a/prov/netdir/src/netdir_mr.c +++ /dev/null @@ -1,355 +0,0 @@ -/* -* Copyright (c) 2015-2016 Intel Corporation, Inc. All rights reserved. -* -* This software is available to you under a choice of one of two -* licenses. You may choose to be licensed under the terms of the GNU -* General Public License (GPL) Version 2, available from the file -* COPYING in the main directory of this source tree, or the -* BSD license below: -* -* Redistribution and use in source and binary forms, with or -* without modification, are permitted provided that the following -* conditions are met: -* -* - Redistributions of source code must retain the above -* copyright notice, this list of conditions and the following -* disclaimer. -* -* - Redistributions in binary form must reproduce the above -* copyright notice, this list of conditions and the following -* disclaimer in the documentation and/or other materials -* provided with the distribution. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -*/ - -#ifdef _WIN32 - -#include "netdir.h" - -#include "netdir_ov.h" -#include "netdir_iface.h" - -#include "ofi.h" -#include "ofi_util.h" - -static int ofi_nd_mr_close(struct fid *fid); - -static struct fi_ops ofi_nd_fi_ops = { - .size = sizeof(ofi_nd_fi_ops), - .close = ofi_nd_mr_close, - .bind = fi_no_bind, - .control = fi_no_control, - .ops_open = fi_no_ops_open, -}; - -static struct fid ofi_nd_fid = { - .fclass = FI_CLASS_MR, - .context = NULL, - .ops = &ofi_nd_fi_ops -}; - -typedef struct ofi_nd_mr_ov { - nd_event_base base; - struct nd_eq *eq; - fid_t fid; - void *context; - LONG cnt; -} ofi_nd_mr_ov; - -static void ofi_nd_mr_ov_free(struct nd_event_base* base); -static void ofi_nd_mr_ov_event(struct nd_event_base* base, DWORD bytes); -static void ofi_nd_mr_ov_err(struct nd_event_base* base, DWORD bytes, DWORD err); - -OFI_ND_NB_BUF_TYPED(nd_mr, struct nd_mr); -OFI_ND_NB_BUF_IMP(nd_mr); - -OFI_ND_NB_BUF_TYPED(mr_ov, ofi_nd_mr_ov); -OFI_ND_NB_BUF_IMP(mr_ov); - -static inline void ofi_nd_mr_fini_handler() -{ - ND_REGISTER_FINI(ND_BUF_FINIPTR(nd_mr)); - ND_REGISTER_FINI(ND_BUF_FINIPTR(mr_ov)); -} - -int ofi_nd_mr_reg(struct fid *fid, const void *buf, size_t len, - uint64_t access, uint64_t offset, uint64_t requested_key, - uint64_t flags, struct fid_mr **pmr, void *context) -{ - OFI_UNUSED(requested_key); - - assert(fid->fclass == FI_CLASS_DOMAIN); - assert(!offset); - - HRESULT hr; - - ofi_nd_mr_fini_handler(); - - if (fid->fclass != FI_CLASS_DOMAIN) - return -FI_EINVAL; - if (offset) - return -FI_EINVAL; - if (flags) - return -FI_EINVAL; - - struct nd_domain *domain = container_of(fid, struct nd_domain, fid.fid); - - assert(domain->adapter); - assert(domain->adapter_file); - - struct nd_mr *mr = ofi_nd_buf_alloc_nd_mr(); - if (!mr) - return -FI_ENOMEM; - - struct nd_mr def = { - .fid = { - .fid = ofi_nd_fid - } - }; - - *mr = def; - - hr = domain->adapter->lpVtbl->CreateMemoryRegion( - domain->adapter, &IID_IND2MemoryRegion, domain->adapter_file, - (void**)&mr->mr); - - if (FAILED(hr)) - goto fn_fail; - - ULONG ind2flag = 0; - - if (access & FI_REMOTE_READ) - ind2flag |= ND_MR_FLAG_ALLOW_REMOTE_READ; - if (access & FI_REMOTE_WRITE) - ind2flag |= ND_MR_FLAG_ALLOW_REMOTE_WRITE; - if ((access & FI_WRITE) || (access & FI_RECV)) - ind2flag |= ND_MR_FLAG_ALLOW_LOCAL_WRITE; - - /* there is bug in mlx4 module: it always generates - IO completion (even for cases when hEvent value - of OVERLAPPED structure is initialized). To - workaround this we have to use dynamically allocated - ov */ - ofi_nd_mr_ov *ov = ND_BUF_ALLOC(mr_ov); - if (!ov) { - hr = ND_NO_MEMORY; - goto fn_fail; - } - memset(ov, 0, sizeof(*ov)); - - ofi_nd_mr_ov ovdef = { - .base = { - .free = ofi_nd_mr_ov_free, - .event_cb = ofi_nd_mr_ov_event, - .err_cb = ofi_nd_mr_ov_err - }, - .eq = domain->eq, - .fid = &mr->fid.fid, - .context = context - }; - - *ov = ovdef; - if (!(domain->eq_flags & FI_MR_COMPLETE)) { - ov->cnt = 2; - ov->base.ov.hEvent = CreateEvent(0, TRUE, FALSE, NULL); - } - - hr = mr->mr->lpVtbl->Register(mr->mr, buf, len, ind2flag, &ov->base.ov); - if (FAILED(hr)) { - ofi_nd_mr_ov_free(&ov->base); - goto fn_fail; - } - - if (!(domain->eq_flags & FI_MR_COMPLETE)) { - /* sync memory registration */ - hr = mr->mr->lpVtbl->GetOverlappedResult(mr->mr, &ov->base.ov, TRUE); - if (!InterlockedDecrement(&ov->cnt)) - ofi_nd_mr_ov_free(&ov->base); - if (FAILED(hr)) - goto fn_fail; - mr->fid.key = mr->mr->lpVtbl->GetRemoteToken(mr->mr); - mr->fid.mem_desc = (void *)(uintptr_t)mr->mr->lpVtbl->GetLocalToken(mr->mr); - } - else { - /* async memory registration */ - hr = mr->mr->lpVtbl->Register( - mr->mr, buf, len, ind2flag, &ov->base.ov); - if (FAILED(hr)) { - ofi_nd_mr_ov_free(&ov->base); - goto fn_fail; - } - } - - *pmr = &mr->fid; - - return FI_SUCCESS; - -fn_fail: - ofi_nd_mr_close(&mr->fid.fid); - return H2F(hr); -} - -int ofi_nd_mr_regv(struct fid *fid, const struct iovec *iov, - size_t count, uint64_t access, - uint64_t offset, uint64_t requested_key, - uint64_t flags, struct fid_mr **mr, void *context) -{ - OFI_UNUSED(fid); - OFI_UNUSED(iov); - OFI_UNUSED(count); - OFI_UNUSED(access); - OFI_UNUSED(offset); - OFI_UNUSED(requested_key); - OFI_UNUSED(flags); - OFI_UNUSED(fid); - OFI_UNUSED(mr); - OFI_UNUSED(context); - - /* This functionality wasn't implemented due to impossibility - * to do it by means of ND services. To avoid problems in future, - * just to not implement it until no support from ND */ - - ofi_nd_mr_fini_handler(); - assert(0); - return FI_SUCCESS; -} - -int ofi_nd_mr_regattr(struct fid *fid, const struct fi_mr_attr *attr, - uint64_t flags, struct fid_mr **mr) -{ - OFI_UNUSED(fid); - OFI_UNUSED(attr); - OFI_UNUSED(flags); - OFI_UNUSED(mr); - - ofi_nd_mr_fini_handler(); - assert(0); - return FI_SUCCESS; -} - -static int ofi_nd_mr_close(struct fid *fid) -{ - ND_LOG_DEBUG(FI_LOG_MR, "closing mr\n"); - assert(fid->fclass == FI_CLASS_MR); - if (fid->fclass != FI_CLASS_MR) - return -FI_EINVAL; - - struct nd_mr *mr = container_of(fid, struct nd_mr, fid.fid); - - if (mr->mr) - mr->mr->lpVtbl->Release(mr->mr); - if (mr->wnd) - mr->wnd->lpVtbl->Release(mr->wnd); - - ofi_nd_buf_free_nd_mr(mr); - - return FI_SUCCESS; -} - -static void ofi_nd_mr_ov_free(struct nd_event_base* base) -{ - ofi_nd_mr_ov *ov = container_of(base, ofi_nd_mr_ov, base); - if (ov->base.ov.hEvent && ov->base.ov.hEvent != INVALID_HANDLE_VALUE) - CloseHandle(ov->base.ov.hEvent); - - ofi_nd_buf_free_mr_ov(ov); -} - -static void ofi_nd_mr_ov_event(struct nd_event_base* base, DWORD bytes) -{ - OFI_UNUSED(bytes); - - HRESULT hr; - - ofi_nd_mr_ov *ov = container_of(base, ofi_nd_mr_ov, base); - - if (ov->cnt) { /* this is sync mr reg operation */ - if (!InterlockedDecrement(&ov->cnt)) - ofi_nd_mr_ov_free(&ov->base); - return; - } - - assert(ov->eq); - assert(ov->fid); - assert(ov->fid->fclass == FI_CLASS_MR); - - struct nd_mr *mr = container_of(ov->fid, struct nd_mr, fid.fid); - assert(mr->mr); - mr->fid.key = mr->mr->lpVtbl->GetRemoteToken(mr->mr); - mr->fid.mem_desc = (void *)(uintptr_t)mr->mr->lpVtbl->GetLocalToken(mr->mr); - - struct fi_eq_entry entry = {.fid = ov->fid, .context = ov->context}; - ofi_nd_mr_ov_free(base); - - struct nd_eq_event *err; - struct nd_eq_event *ev = ofi_nd_buf_alloc_nd_eq_event(); - if (!ev) { - hr = ND_NO_MEMORY; - goto fn_fail; - } - memset(ev, 0, sizeof(*ev)); - ev->eq_event = FI_MR_COMPLETE; - ev->operation = entry; - ofi_nd_eq_push(ov->eq, ev); - return; - -fn_fail: - err = ofi_nd_buf_alloc_nd_eq_event(); - if (!err) { - ND_LOG_WARN(FI_LOG_EP_CTRL, - "failed to allocate error event\n"); - return; - } - memset(err, 0, sizeof(*err)); - err->error.err = -H2F(hr); - err->error.prov_errno = (int)hr; - err->error.fid = ov->fid; - ofi_nd_eq_push_err(ov->eq, err); -} - -static void ofi_nd_mr_ov_err(struct nd_event_base* base, DWORD bytes, - DWORD error) -{ - OFI_UNUSED(bytes); - - ofi_nd_mr_ov *ov = container_of(base, ofi_nd_mr_ov, base); - - assert(ov->eq); - assert(ov->fid); - assert(ov->fid->fclass == FI_CLASS_MR); - - struct nd_mr *mr = container_of(ov->fid, struct nd_mr, fid.fid); - assert(mr->mr); - OFI_UNUSED(mr); - - struct fi_eq_err_entry entry = { - .fid = ov->fid, - .context = ov->context, - .err = H2F(error), - .prov_errno = error - }; - - ofi_nd_mr_ov_free(base); - - struct nd_eq_event *err = ofi_nd_buf_alloc_nd_eq_event(); - if (!err) { - ND_LOG_WARN(FI_LOG_EP_CTRL, - "failed to allocate error event\n"); - return; - } - memset(err, 0, sizeof(*err)); - err->error = entry; - ofi_nd_eq_push_err(ov->eq, err); -} - - -#endif /* _WIN32 */ - diff --git a/prov/netdir/src/netdir_ov.c b/prov/netdir/src/netdir_ov.c deleted file mode 100644 index c0736be2fa5..00000000000 --- a/prov/netdir/src/netdir_ov.c +++ /dev/null @@ -1,159 +0,0 @@ -/* -* Copyright (c) 2015-2016 Intel Corporation, Inc. All rights reserved. -* -* This software is available to you under a choice of one of two -* licenses. You may choose to be licensed under the terms of the GNU -* General Public License (GPL) Version 2, available from the file -* COPYING in the main directory of this source tree, or the -* BSD license below: -* -* Redistribution and use in source and binary forms, with or -* without modification, are permitted provided that the following -* conditions are met: -* -* - Redistributions of source code must retain the above -* copyright notice, this list of conditions and the following -* disclaimer. -* -* - Redistributions in binary form must reproduce the above -* copyright notice, this list of conditions and the following -* disclaimer in the documentation and/or other materials -* provided with the distribution. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -*/ - -#ifdef _WIN32 - -#include -#define WIN32_NO_STATUS - -#include "netdir_ov.h" -#include "netdir_log.h" -#include "netdir_util.h" -#include "netdir_iface.h" - -#include "netdir_queue.h" - -OFI_ND_NB_BUF_IMP(nd_eq_event); -OFI_ND_NB_BUF_IMP(nd_buf_fini); - -volatile nd_buf_fini *nd_buf_fini_head = 0; - -LONG nd_async_progress = 0; - -void CALLBACK domain_io_cb(DWORD err, DWORD bytes, LPOVERLAPPED ov) -{ - assert(ov); - - InterlockedIncrement(&nd_async_progress); - - nd_event_base *base = container_of(ov, nd_event_base, ov); - - ND_LOG_DEBUG(FI_LOG_EP_CTRL, - "IO callback: err: %s, bytes: %d\n", - ofi_nd_error_str(err), bytes); - - if (err) { - assert(base->err_cb); - base->err_cb(base, bytes, err); - } - else { - assert(base->event_cb); - base->event_cb(base, bytes); - } - - InterlockedDecrement(&nd_async_progress); - - return; -} - -static void ofi_nd_util_mr_ov_free(struct nd_event_base* base) -{ - assert(base); - ofi_nd_util_ov *ov = container_of(base, ofi_nd_util_ov, base); - ov->cnt = 2; - ResetEvent(ov->base.ov.hEvent); - ND_BUF_FREE(ofi_nd_util_ov, ov); -} - -static void ofi_nd_util_mr_ov_event(struct nd_event_base* base, DWORD bytes) -{ - OFI_UNUSED(bytes); - - ofi_nd_util_ov *ov = container_of(base, ofi_nd_util_ov, base); - - if (!InterlockedDecrement(&ov->cnt)) - ov->base.free(&ov->base); -} - -static void ofi_nd_util_mr_ov_err(struct nd_event_base* base, DWORD bytes, - DWORD error) -{ - OFI_UNUSED(bytes); - OFI_UNUSED(error); - - ofi_nd_util_ov *ov = container_of(base, ofi_nd_util_ov, base); - - if (!InterlockedDecrement(&ov->cnt)) - ov->base.free(&ov->base); -} - -static ND_BUF_CHUNK(ofi_nd_util_ov) -*ofi_nd_alloc_ov_chunk(ND_BUF_FOOTER(ofi_nd_util_ov) *footer, size_t *count) -{ - OFI_UNUSED(footer); - - ND_BUF_CHUNK(ofi_nd_util_ov) *chunk = malloc(sizeof(*chunk)); - if (!chunk) - return 0; - assert(count); - *count = countof(chunk->item); - memset(chunk, 0, sizeof(*chunk)); - - size_t i; - for (i = 0; i < countof(chunk->item); i++) { - chunk->item[i].data.cnt = 2; - chunk->item[i].data.base.free = ofi_nd_util_mr_ov_free; - chunk->item[i].data.base.event_cb = ofi_nd_util_mr_ov_event; - chunk->item[i].data.base.err_cb = ofi_nd_util_mr_ov_err; - chunk->item[i].data.base.ov.hEvent = CreateEvent(0, TRUE, FALSE, 0); - if (!chunk->item[i].data.base.ov.hEvent || - chunk->item[i].data.base.ov.hEvent == INVALID_HANDLE_VALUE) - goto fn_fail; - } - - return chunk; - -fn_fail: - for (i = 0; i < countof(chunk->item); i++) { - if (chunk->item[i].data.base.ov.hEvent && - chunk->item[i].data.base.ov.hEvent != INVALID_HANDLE_VALUE) - CloseHandle(chunk->item[i].data.base.ov.hEvent); - } - free(chunk); - return 0; -} - -static void ofi_nd_free_ov_chunk(struct nd_buf_chunk_ofi_nd_util_ov *chunk) -{ - assert(chunk); - size_t i; - for(i = 0; i < countof(chunk->item); i++) - if (chunk->item[i].data.base.ov.hEvent && - chunk->item[i].data.base.ov.hEvent != INVALID_HANDLE_VALUE) - CloseHandle(chunk->item[i].data.base.ov.hEvent); - free(chunk); -} - -OFI_ND_NB_BUF_IMP_ALLOC(ofi_nd_util_ov, ofi_nd_alloc_ov_chunk, ofi_nd_free_ov_chunk); - -#endif /* _WIN32 */ - diff --git a/prov/netdir/src/netdir_ov.h b/prov/netdir/src/netdir_ov.h deleted file mode 100644 index ec002913bc8..00000000000 --- a/prov/netdir/src/netdir_ov.h +++ /dev/null @@ -1,249 +0,0 @@ -/* -* Copyright (c) 2015-2016 Intel Corporation, Inc. All rights reserved. -* -* This software is available to you under a choice of one of two -* licenses. You may choose to be licensed under the terms of the GNU -* General Public License (GPL) Version 2, available from the file -* COPYING in the main directory of this source tree, or the -* BSD license below: -* -* Redistribution and use in source and binary forms, with or -* without modification, are permitted provided that the following -* conditions are met: -* -* - Redistributions of source code must retain the above -* copyright notice, this list of conditions and the following -* disclaimer. -* -* - Redistributions in binary form must reproduce the above -* copyright notice, this list of conditions and the following -* disclaimer in the documentation and/or other materials -* provided with the distribution. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -*/ - -#ifndef _FI_NETDIR_OV_H_ -#define _FI_NETDIR_OV_H_ - -#include -#include -#include - -#include "ndspi.h" - -#include "rdma/fabric.h" -#include "ofi_mem.h" - -#include "netdir.h" -#include "netdir_buf.h" -#include "netdir_log.h" -#include "netdir_iface.h" -#include "netdir_queue.h" - -#include "rdma/fi_eq.h" - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -typedef enum ofi_nd_cq_state { - NORMAL_STATE = 0, - LARGE_MSG_RECV_REQ = 1, - LARGE_MSG_WAIT_ACK = 2, - MAX_STATE = 3 -} ofi_nd_cq_state; - -typedef enum ofi_nd_cq_event { - NORMAL_EVENT = 0, - LARGE_MSG_REQ = 1, - LARGE_MSG_ACK = 2, - MAX_EVENT = 3 -} ofi_nd_cq_event; - -typedef struct nd_eq_event { - OVERLAPPED ov; - int is_custom; - uint32_t eq_event; - union { - struct fi_eq_entry operation; - /* fi_eq_cm_entry could not be used here because it has - incomplete size */ - /*struct fi_eq_cm_entry connection;*/ - struct fi_eq_err_entry error; - }; - - /* connection data */ - void *data; - size_t len; -} nd_eq_event; - -typedef struct nd_send_entry nd_send_entry; - -typedef struct nd_cq_entry { - nd_event_base base; - struct nd_domain *domain; - struct nd_msgprefix *prefix; - struct nd_inlinebuf *inline_buf; - struct nd_notifybuf *notify_buf; - struct iovec iov[ND_MSG_IOV_LIMIT]; - size_t iov_cnt; - - /* used for RMA operations */ - size_t mr_count; - IND2MemoryRegion *mr[ND_MSG_IOV_LIMIT]; - ND2_RESULT result; - - uint64_t flags; - uint64_t seq; - void* buf; - size_t len; - uint64_t data; - struct nd_queue_item queue_item; - int completed; - void* context; - - struct { - struct nd_msg_location *locations; - /* != 0 only in case of large message - * receiving via RMA read */ - size_t count; - } rma_location; - struct { - /* these parameters are specified in - * parent's CQ entry to wait until all - * read/write operation will be completed */ - size_t comp_count; - size_t total_count; - - CRITICAL_SECTION comp_lock; - } wait_completion; - struct nd_cq_entry *aux_entry; - - ofi_nd_cq_state state; - ofi_nd_cq_event event; - nd_flow_cntrl_flags flow_cntrl_flags; - nd_send_entry *send_entry; -} nd_cq_entry; - -typedef struct nd_sge { - ND2_SGE entries[256]; - ULONG count; -} nd_sge; - -struct nd_send_entry { - struct nd_queue_item queue_item; - nd_sge *sge; - nd_cq_entry *cq_entry; - nd_cq_entry *prepost_entry; - struct nd_ep *ep; -}; - -typedef struct nd_buf_fini { - volatile struct nd_buf_fini *next; - void(*fini)(void); -} nd_buf_fini; - -#define ND_FI_CONTEXT(ptr) ((struct fi_context*)(ptr))->internal[0] - -OFI_ND_NB_BUF(nd_buf_fini); -OFI_ND_NB_BUF(nd_eq_event); -OFI_ND_NB_BUF(nd_cq_entry); -OFI_ND_NB_BUF(nd_sge); -OFI_ND_NB_BUF(nd_send_entry); - -extern LONG nd_async_progress; -extern volatile nd_buf_fini *nd_buf_fini_head; - -static inline void nd_buf_register_fini(void(*fini)(void)) -{ - assert(fini); - nd_buf_fini *fin = ND_BUF_ALLOC(nd_buf_fini); - if (fin) { - fin->fini = fini; - do { - fin->next = nd_buf_fini_head; - } while (InterlockedCompareExchangePointer( - (PVOID *)&nd_buf_fini_head, - fin, (PVOID)fin->next) != fin->next); - } - else { - ND_LOG_WARN(FI_LOG_CORE, "failed to allocate finalizer\n"); - } -} - -static inline void nd_buf_fini_apply() -{ - volatile nd_buf_fini *next = nd_buf_fini_head; - while (next) { - volatile nd_buf_fini *current = next; - next = current->next; - assert(current->fini); - current->fini(); - ND_BUF_FREE(nd_buf_fini, ((nd_buf_fini*)current)); - } -} - -#define ND_REGISTER_FINI(fini) \ -do { \ - static LONG init_done = 0; \ - if (!init_done) { \ - if (!InterlockedExchange(&init_done, 1)) \ - nd_buf_register_fini(fini); \ - } \ -} while (0) - -OFI_ND_NB_BUF(nd_event_base); - -static inline void ofi_nd_eq_free_event(nd_eq_event *ev) -{ - assert(ev); - - if (ev->data) - free(ev->data); - if (ev->eq_event == FI_CONNREQ) { - struct fi_eq_cm_entry *cm = (struct fi_eq_cm_entry*)&ev->operation; - if (cm->info) - fi_freeinfo(cm->info); - } - - ND_BUF_FREE(nd_eq_event, ev); -} - -void CALLBACK domain_io_cb(DWORD err, DWORD bytes, LPOVERLAPPED ov); - -static inline void ofi_nd_eq_push(struct nd_eq *eq, struct nd_eq_event *ev) -{ - assert(eq); - assert(ev); - - assert(eq->iocp); - PostQueuedCompletionStatus(eq->iocp, 0, 0, &ev->ov); - InterlockedIncrement(&eq->count); - WakeByAddressAll((void*)&eq->count); -} - -static inline void ofi_nd_eq_push_err(struct nd_eq *eq, struct nd_eq_event *ev) -{ - assert(eq); - assert(ev); - - assert(eq->err); - PostQueuedCompletionStatus(eq->err, 0, 0, &ev->ov); - InterlockedIncrement(&eq->count); - WakeByAddressAll((void*)&eq->count); -} - -#ifdef __cplusplus -} -#endif /* __cplusplus */ - -#endif /* _FI_NETDIR_OV_H_ */ - diff --git a/prov/netdir/src/netdir_pep.c b/prov/netdir/src/netdir_pep.c deleted file mode 100644 index 208fd024824..00000000000 --- a/prov/netdir/src/netdir_pep.c +++ /dev/null @@ -1,481 +0,0 @@ -/* -* Copyright (c) 2015-2016 Intel Corporation, Inc. All rights reserved. -* -* This software is available to you under a choice of one of two -* licenses. You may choose to be licensed under the terms of the GNU -* General Public License (GPL) Version 2, available from the file -* COPYING in the main directory of this source tree, or the -* BSD license below: -* -* Redistribution and use in source and binary forms, with or -* without modification, are permitted provided that the following -* conditions are met: -* -* - Redistributions of source code must retain the above -* copyright notice, this list of conditions and the following -* disclaimer. -* -* - Redistributions in binary form must reproduce the above -* copyright notice, this list of conditions and the following -* disclaimer in the documentation and/or other materials -* provided with the distribution. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -*/ - -#ifdef _WIN32 - -#include -#include -#include - -#include "netdir.h" - -#include "ofi.h" -#include "ofi_osd.h" -#include "ofi_util.h" - -#include "netdir_ov.h" -#include "netdir_log.h" -#include "netdir_iface.h" - -static int ofi_nd_pep_getname(fid_t fid, void *addr, size_t *addrlen); -static int ofi_nd_pep_close(struct fid *fid); -static int ofi_nd_pep_listen(struct fid_pep *pep); -static int ofi_nd_pep_bind(struct fid *fid, struct fid *bfid, uint64_t flags); -static int ofi_nd_pep_reject(struct fid_pep *ppep, fid_t handle, - const void *param, size_t paramlen); - -static void ofi_nd_pep_connreq_free(nd_event_base *base); -static void ofi_nd_pep_connreq(nd_event_base *base, DWORD bytes); -static void ofi_nd_pep_connreq_err(nd_event_base *base, DWORD err, - DWORD bytes); -extern int ofi_nd_ep_getopt(struct fid *ep, int level, int optname, - void *optval, size_t *optlen); - -static struct fi_ops ofi_nd_fi_ops = { - .size = sizeof(ofi_nd_fi_ops), - .close = ofi_nd_pep_close, - .bind = ofi_nd_pep_bind, - .control = fi_no_control, - .ops_open = fi_no_ops_open, -}; - -static struct fid ofi_nd_fid = { - .fclass = FI_CLASS_PEP, - .context = NULL, - .ops = &ofi_nd_fi_ops -}; - -static struct fi_ops_cm ofi_nd_cm_ops = { - .size = sizeof(struct fi_ops_cm), - .setname = fi_no_setname, - .getname = ofi_nd_pep_getname, - .getpeer = fi_no_getpeer, - .connect = fi_no_connect, - .listen = ofi_nd_pep_listen, - .accept = fi_no_accept, - .reject = ofi_nd_pep_reject, - .shutdown = fi_no_shutdown, - .join = fi_no_join, -}; - -static struct fi_ops_ep ofi_nd_pep_ops = { - .size = sizeof(ofi_nd_pep_ops), - .cancel = fi_no_cancel, - .getopt = ofi_nd_ep_getopt, - .setopt = fi_no_setopt, - .tx_ctx = fi_no_tx_ctx, - .rx_ctx = fi_no_rx_ctx, - .rx_size_left = fi_no_rx_size_left, - .tx_size_left = fi_no_tx_size_left, -}; - -typedef struct nd_pep_connreq { - nd_event_base base; - struct nd_eq *eq; - struct fi_info *info; - IND2Connector *connector; - fid_t fid; -} nd_pep_connreq; - -static nd_event_base nd_pep_connreq_base_def = { - .free = ofi_nd_pep_connreq_free, - .event_cb = ofi_nd_pep_connreq, - .err_cb = ofi_nd_pep_connreq_err -}; - -OFI_ND_NB_BUF(nd_pep_connreq); -OFI_ND_NB_BUF_IMP(nd_pep_connreq); -OFI_ND_NB_BUF_TYPED(nd_connreq, struct nd_connreq); -OFI_ND_NB_BUF_IMP(nd_connreq); - -int ofi_nd_passive_endpoint(struct fid_fabric *fabric, struct fi_info *info, - struct fid_pep **ppep, void *context) -{ - OFI_UNUSED(context); - OFI_UNUSED(fabric); - - ND_REGISTER_FINI(ND_BUF_FINIPTR(nd_connreq)); - - assert(info); - assert(fabric); - assert(fabric->fid.fclass == FI_CLASS_FABRIC); - - struct nd_pep *pep = (struct nd_pep*)calloc(1, sizeof(*pep)); - if (!pep) - return -FI_ENOMEM; - - struct nd_pep def = { - .fid = { - .fid = { - .fclass = FI_CLASS_PEP, - .context = context, - .ops = &ofi_nd_fi_ops - }, - .ops = &ofi_nd_pep_ops, - .cm = &ofi_nd_cm_ops - }, - .info = fi_dupinfo(info) - }; - - *pep = def; - *ppep = &pep->fid; - - return FI_SUCCESS; -} - -static int ofi_nd_pep_getname(fid_t fid, void *addr, size_t *addrlen) -{ - assert(fid && fid->fclass == FI_CLASS_PEP); - - if (fid->fclass != FI_CLASS_PEP) - return -FI_EINVAL; - - HRESULT hr; - ULONG len = (ULONG)*addrlen; - struct nd_pep *pep = container_of(fid, struct nd_pep, fid.fid); - - if (!pep->listener) - return -FI_EOPBADSTATE; - - hr = pep->listener->lpVtbl->GetLocalAddress(pep->listener, - (struct sockaddr *)addr, - &len); - - if (*addrlen < len) { - ND_LOG_INFO(FI_LOG_EP_CTRL, - "Provided buffer (size = %"PRIu64") is too small, required = %"PRIu64, - addrlen, len); - *addrlen = (size_t)len; - return -FI_ETOOSMALL; - } - *addrlen = (size_t)len; - - return H2F(hr); -} - -static int ofi_nd_pep_close(struct fid *fid) -{ - assert(fid); - assert(fid->fclass == FI_CLASS_PEP); - - struct nd_pep *pep = container_of(fid, struct nd_pep, fid.fid); - - int ref; - if (pep->listener) { - ref = (int)pep->listener->lpVtbl->Release(pep->listener); - ND_LOG_DEBUG(FI_LOG_EP_CTRL, "pep->listener ref count: %d\n", ref); - } - if (pep->adapter) { - ref = (int)pep->adapter->lpVtbl->Release(pep->adapter); - ND_LOG_DEBUG(FI_LOG_EP_CTRL, "pep->adapter ref count: %d\n", ref); - } - if (pep->adapter_file && pep->adapter_file != INVALID_HANDLE_VALUE) - CloseHandle(pep->adapter_file); - if (pep->info) - fi_freeinfo(pep->info); - - free(pep); - - return FI_SUCCESS; -} - -static void ofi_nd_pep_connreq_free(nd_event_base *base) -{ - assert(base); - - nd_pep_connreq *connreq = container_of(base, nd_pep_connreq, base); - if (connreq->connector) - connreq->connector->lpVtbl->Release(connreq->connector); - ofi_nd_buf_free_nd_pep_connreq(connreq); -} - -static void ofi_nd_pep_connreq(nd_event_base *base, DWORD bytes) -{ - assert(base); - OFI_UNUSED(bytes); - - HRESULT hr; - ULONG len; - nd_pep_connreq *connreq = container_of(base, nd_pep_connreq, base); - struct nd_eq_event *err = 0; - - assert(connreq->connector); - assert(connreq->eq); - assert(connreq->fid); - assert(connreq->info); - - struct nd_eq_event *ev = ND_BUF_ALLOC(nd_eq_event); - if (!ev) { - ND_LOG_WARN(FI_LOG_EP_CTRL, "failed to allocate event\n"); - hr = ND_NO_MEMORY; - goto fn_fail_ev; - } - memset(ev, 0, sizeof(*ev)); - - ev->eq_event = FI_CONNREQ; - - struct fi_eq_cm_entry *cmev = (struct fi_eq_cm_entry*)&ev->operation; - cmev->fid = connreq->fid; - cmev->info = fi_dupinfo(connreq->info); - if (!cmev->info) { - ND_LOG_WARN(FI_LOG_EP_CTRL, "failed to copy info\n"); - hr = ND_NO_MEMORY; - goto fn_fail; - } - - struct nd_connreq *handle = ND_BUF_ALLOC(nd_connreq); - if (!handle) { - ND_LOG_WARN(FI_LOG_EP_CTRL, "failed to allocate handle\n"); - hr = ND_NO_MEMORY; - goto fn_fail; - } - memset(handle, 0, sizeof(*handle)); - handle->handle.fclass = FI_CLASS_CONNREQ; - handle->connector = connreq->connector; - handle->connector->lpVtbl->AddRef(handle->connector); - cmev->info->handle = &handle->handle; - - hr = connreq->connector->lpVtbl->GetPrivateData( - connreq->connector, NULL, &len); - if (FAILED(hr) && hr != ND_BUFFER_OVERFLOW) { - ND_LOG_WARN(FI_LOG_EP_CTRL, "failed to get private data\n"); - goto fn_fail_handle; - } - - if (len) { - ev->data = malloc(len); - if (!ev->data) { - ND_LOG_WARN(FI_LOG_EP_CTRL, "failed to allocate private data\n"); - ev->len = 0; - goto fn_fail_handle; - } - - hr = connreq->connector->lpVtbl->GetPrivateData( - connreq->connector, ev->data, &len); - if (FAILED(hr)) { - ND_LOG_WARN(FI_LOG_EP_CTRL, "failed to copy private data\n"); - free(ev->data); - ev->len = 0; - goto fn_fail_handle; - } - } - ev->len = (size_t)len; - - ofi_nd_eq_push(connreq->eq, ev); - ofi_nd_pep_connreq_free(&connreq->base); - return; - -fn_fail_handle: - handle->connector->lpVtbl->Release(handle->connector); - ND_BUF_FREE(nd_connreq, handle); -fn_fail: - ofi_nd_eq_free_event(ev); -fn_fail_ev: - err = ND_BUF_ALLOC(nd_eq_event); - if (!err) { - ND_LOG_WARN(FI_LOG_EP_CTRL, "failed to allocate error\n"); - return; - } - memset(err, 0, sizeof(*err)); - err->error.err = -H2F(hr); - err->error.prov_errno = (int)hr; - err->error.fid = connreq->fid; - ofi_nd_eq_push_err(connreq->eq, err); - ofi_nd_pep_connreq_free(&connreq->base); -} - -static void ofi_nd_pep_connreq_err(nd_event_base *base, DWORD error, DWORD bytes) -{ - assert(base); - OFI_UNUSED(bytes); - - nd_pep_connreq *connreq = container_of(base, nd_pep_connreq, base); - struct nd_eq_event *err = 0; - - assert(connreq->connector); - assert(connreq->eq); - assert(connreq->fid); - assert(connreq->info); - - err = ofi_nd_buf_alloc_nd_eq_event(); - - if (!err) { - ND_LOG_WARN(FI_LOG_EP_CTRL, "failed to allocate error\n"); - return; - } - memset(err, 0, sizeof(*err)); - err->error.err = FI_EOTHER; - err->error.prov_errno = (int)error; - err->error.fid = connreq->fid; - ofi_nd_eq_push_err(connreq->eq, err); - ofi_nd_pep_connreq_free(&connreq->base); -} - -static int ofi_nd_pep_listen(struct fid_pep *ppep) -{ - assert(ppep); - - int res = FI_SUCCESS; - HRESULT hr; - - if (ppep->fid.fclass != FI_CLASS_PEP) - return -FI_EINVAL; - - struct nd_pep *pep = container_of(ppep, struct nd_pep, fid); - - assert(pep->info); - assert(pep->info->domain_attr); - assert(pep->info->domain_attr->name); - - struct sockaddr* addr; - - if (!pep->adapter) { - struct sockaddr* listen_addr = NULL; - size_t listen_addr_len = 0; - - res = ofi_nd_lookup_adapter(pep->info->domain_attr->name, - &pep->adapter, &addr); - if (res != FI_SUCCESS) - return res; - assert(pep->adapter); - - hr = pep->adapter->lpVtbl->CreateOverlappedFile(pep->adapter, - &pep->adapter_file); - if (FAILED(hr)) - return H2F(hr); - assert(pep->adapter_file && - pep->adapter_file != INVALID_HANDLE_VALUE); - - BindIoCompletionCallback(pep->adapter_file, domain_io_cb, 0); - - hr = pep->adapter->lpVtbl->CreateListener(pep->adapter, - &IID_IND2Listener, - pep->adapter_file, - (void**)&pep->listener); - if (FAILED(hr)) - return H2F(hr); - assert(pep->listener); - - if (pep->info->src_addr) { - /* uses address that is specified in fi_info */ - listen_addr = pep->info->src_addr; - listen_addr_len = pep->info->src_addrlen; - } - else { - /* uses address on which provider are open */ - listen_addr = addr; - listen_addr_len = ofi_sizeofaddr(addr); - } - - hr = pep->listener->lpVtbl->Bind(pep->listener, - listen_addr, - (ULONG)sizeof(*listen_addr)); - if (FAILED(hr)) - return H2F(hr); - - hr = pep->listener->lpVtbl->Listen(pep->listener, 0); - if (FAILED(hr)) - return H2F(hr); - } - assert(pep->adapter); - - ND_REGISTER_FINI(ND_BUF_FINIPTR(nd_pep_connreq)); - - nd_pep_connreq *conn = ofi_nd_buf_alloc_nd_pep_connreq(); - if (!conn) - return -FI_ENOMEM; - memset(conn, 0, sizeof(*conn)); - - conn->base = nd_pep_connreq_base_def; - - hr = pep->adapter->lpVtbl->CreateConnector(pep->adapter, - &IID_IND2Connector, - pep->adapter_file, - (void**)&conn->connector); - if (FAILED(hr)) - return H2F(hr); - - conn->eq = pep->eq; - conn->info = pep->info; - conn->fid = &pep->fid.fid; - - hr = pep->listener->lpVtbl->GetConnectionRequest(pep->listener, - (IUnknown*)conn->connector, - &conn->base.ov); - if (FAILED(hr)) { - ND_LOG_WARN(FI_LOG_EP_CTRL, "failed to get connection request\n"); - } - - return H2F(hr); -} - -static int ofi_nd_pep_bind(struct fid *fid, struct fid *bfid, uint64_t flags) -{ - OFI_UNUSED(flags); - - if (fid->fclass != FI_CLASS_PEP) - return -FI_EINVAL; - if (bfid->fclass != FI_CLASS_EQ) - return -FI_EINVAL; - - struct nd_pep *pep = container_of(fid, struct nd_pep, fid.fid); - struct nd_eq *eq = container_of(bfid, struct nd_eq, fid.fid); - - pep->eq = eq; - - return FI_SUCCESS; -} - -static int ofi_nd_pep_reject(struct fid_pep *ppep, fid_t handle, - const void *param, size_t paramlen) -{ - assert(ppep); - - if (ppep->fid.fclass != FI_CLASS_PEP) - return -FI_EINVAL; - if (handle->fclass != FI_CLASS_CONNREQ) - return -FI_EINVAL; - - struct nd_connreq *connreq = container_of(handle, struct nd_connreq, handle); - - assert(connreq->connector); - connreq->connector->lpVtbl->Reject(connreq->connector, param, - (ULONG)paramlen); - - connreq->connector->lpVtbl->Release(connreq->connector); - ofi_nd_buf_free_nd_connreq(connreq); - - return FI_SUCCESS; -} - -#endif /* _WIN32 */ - diff --git a/prov/netdir/src/netdir_queue.h b/prov/netdir/src/netdir_queue.h deleted file mode 100644 index 2eebd9ddab9..00000000000 --- a/prov/netdir/src/netdir_queue.h +++ /dev/null @@ -1,187 +0,0 @@ -/* -* Copyright (c) 2015-2016 Intel Corporation, Inc. All rights reserved. -* -* This software is available to you under a choice of one of two -* licenses. You may choose to be licensed under the terms of the GNU -* General Public License (GPL) Version 2, available from the file -* COPYING in the main directory of this source tree, or the -* BSD license below: -* -* Redistribution and use in source and binary forms, with or -* without modification, are permitted provided that the following -* conditions are met: -* -* - Redistributions of source code must retain the above -* copyright notice, this list of conditions and the following -* disclaimer. -* -* - Redistributions in binary form must reproduce the above -* copyright notice, this list of conditions and the following -* disclaimer in the documentation and/or other materials -* provided with the distribution. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -*/ - -#ifndef _FI_NETDIR_QUEUE_H_ -#define _FI_NETDIR_QUEUE_H_ - -#include - -#include "rdma/fabric.h" - -#include "ofi.h" -#include "ofi_osd.h" - -#include "netdir.h" - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -struct nd_queue_item { - struct nd_queue_item *next; -}; - -__declspec(align(16)) struct nd_queue_queue { - union { - struct { - struct nd_queue_item *head; - struct nd_queue_item *tail; - }; - volatile LONG64 exchange[2]; - }; -}; - -/* push front call is non-blocking thread safe */ -static inline void ofi_nd_queue_push_front(struct nd_queue_queue *queue, - struct nd_queue_item *item) -{ - assert(queue); - - item->next = 0; - BOOLEAN success; - - struct { - struct nd_queue_item *head; - struct nd_queue_item *tail; - } src; - - do { - src.head = queue->head; - src.tail = queue->tail; - - LONG64 head = (LONG64)(src.head ? src.head : item); - LONG64 tail = (LONG64)item; - __declspec(align(16)) LONG64 compare[2] = { (LONG64)src.head, (LONG64)src.tail }; - success = InterlockedCompareExchange128( - queue->exchange, tail, head, compare); - } while (!success); - - if (src.tail) { - item->next = src.head; - src.head = item; - WakeByAddressAll(&src.head); - } -} - -/* push call is non-blocking thread safe */ -static inline void ofi_nd_queue_push(struct nd_queue_queue *queue, - struct nd_queue_item *item) -{ - assert(queue); - - item->next = 0; - BOOLEAN success; - - struct { - struct nd_queue_item *head; - struct nd_queue_item *tail; - } src; - - do { - src.head = queue->head; - src.tail = queue->tail; - - LONG64 head = (LONG64)(src.head ? src.head : item); - LONG64 tail = (LONG64)item; - __declspec(align(16)) LONG64 compare[2] = {(LONG64)src.head, (LONG64)src.tail}; - success = InterlockedCompareExchange128( - queue->exchange, tail, head, compare); - } while (!success); - - if (src.tail) { - src.tail->next = item; - WakeByAddressAll(&src.tail->next); - } -} - -/* pop call is NOT thread safe, it allows only one consumer, but it is - safe to be used with push operation without locks */ -static inline int ofi_nd_queue_pop(struct nd_queue_queue *queue, - struct nd_queue_item **item) -{ - assert(queue); - assert(item); - - BOOLEAN success; - struct { - struct nd_queue_item *head; - struct nd_queue_item *tail; - } src; - - do { - src.head = queue->head; - src.tail = queue->tail; - - if (!src.head) - return 0; - - /* here is potential thread race: object located at src.head - may be destroyed while we're waiting. that is why pop - operation is not thread safe */ - if (src.head != src.tail) { - /* in case if head and tail are not same - ensure that - head->next element is not NULL */ - void *zero = NULL; - while (!src.head->next) { - WaitOnAddress(&src.head->next, &zero, sizeof(zero), INFINITE); - } - } - - LONG64 head = (LONG64)src.head->next; - LONG64 tail = (LONG64)(src.head != src.tail ? src.tail : NULL); - __declspec(align(16)) LONG64 compare[2] = {(LONG64)src.head, (LONG64)src.tail}; - success = InterlockedCompareExchange128( - queue->exchange, tail, head, compare); - } while (!success); - - *item = src.head; - - return (*item) != NULL; -} - -/* peek call is NOT thread safe, it allows only one consumer */ -static inline int ofi_nd_queue_peek(struct nd_queue_queue *queue, - struct nd_queue_item **item) -{ - assert(queue); - assert(item); - - *item = queue->head; - return (*item) != 0; -} - -#ifdef __cplusplus -} -#endif /* __cplusplus */ - -#endif /* _FI_NETDIR_QUEUE_H_ */ - diff --git a/prov/netdir/src/netdir_unexp.c b/prov/netdir/src/netdir_unexp.c deleted file mode 100644 index 76b4a69d06c..00000000000 --- a/prov/netdir/src/netdir_unexp.c +++ /dev/null @@ -1,532 +0,0 @@ -/* -* Copyright (c) 2015-2016 Intel Corporation, Inc. All rights reserved. -* -* This software is available to you under a choice of one of two -* licenses. You may choose to be licensed under the terms of the GNU -* General Public License (GPL) Version 2, available from the file -* COPYING in the main directory of this source tree, or the -* BSD license below: -* -* Redistribution and use in source and binary forms, with or -* without modification, are permitted provided that the following -* conditions are met: -* -* - Redistributions of source code must retain the above -* copyright notice, this list of conditions and the following -* disclaimer. -* -* - Redistributions in binary form must reproduce the above -* copyright notice, this list of conditions and the following -* disclaimer in the documentation and/or other materials -* provided with the distribution. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -*/ - -#ifdef _WIN32 - -#include -#define WIN32_NO_STATUS - -#include "ndspi.h" - -#include "netdir.h" -#include "netdir_cq.h" -#include "netdir_log.h" -#include "netdir_util.h" -#include "netdir_queue.h" -#include "netdir_iface.h" -#include "netdir_queue.h" -#include "netdir_unexp.h" - -#include - -#define PREPOSTLEN (sizeof(nd_unexpected_buf) + gl_data.inline_thr) - -static ND_BUF_CHUNK(nd_unexpected_entry) *ofi_nd_unexp_alloc_chunk( - ND_BUF_FOOTER(nd_unexpected_entry) *footer, size_t* count); -static void ofi_nd_unexp_free_chunk(ND_BUF_CHUNK(nd_unexpected_entry) *chunk); - -OFI_ND_NB_BUF(nd_unexpected_ctx); -OFI_ND_NB_BUF(nd_unexpected_entry); -OFI_ND_NB_BUF_IMP(nd_unexpected_ctx); -OFI_ND_NB_BUF_IMP_ALLOC(nd_unexpected_entry, - ofi_nd_unexp_alloc_chunk, - ofi_nd_unexp_free_chunk); - -HRESULT ofi_nd_unexp_init(struct nd_ep *ep) -{ - assert(ep); - assert(ep->fid.fid.fclass == FI_CLASS_EP); - - ND_REGISTER_FINI(ND_BUF_FINIPTR(nd_unexpected_ctx)); - ND_REGISTER_FINI(ND_BUF_FINIPTR(nd_unexpected_entry)); - - int i; - HRESULT hr; - int total_count = (gl_data.prepost_cnt + - gl_data.flow_control_cnt) * gl_data.prepost_buf_cnt; - - if (ep->unexpected.active) - return S_OK; - - dlist_init(&ep->unexpected.received); - - ep->unexpected.unexpected = malloc( - total_count * sizeof(*ep->unexpected.unexpected) - ); - if (!ep->unexpected.unexpected) - return ND_NO_MEMORY; - - size_t len = PREPOSTLEN * total_count; - - char* tmp = (char*)calloc(1, len); - if (!tmp) { - hr = ND_NO_MEMORY; - goto free_unexp; - } - - hr = ep->domain->adapter->lpVtbl->CreateMemoryRegion( - ep->domain->adapter, &IID_IND2MemoryRegion, - ep->domain->adapter_file, (void**)&ep->unexpected.mr); - if (FAILED(hr)) - goto free_tmp; - - hr = ofi_nd_util_register_mr(ep->unexpected.mr, tmp, - len, ND_MR_FLAG_ALLOW_LOCAL_WRITE); - if (FAILED(hr)) - goto release; - - ep->unexpected.token = - ep->unexpected.mr->lpVtbl->GetLocalToken(ep->unexpected.mr); - - for (i = 0; i < total_count; i++) { - ep->unexpected.unexpected[i] = - (struct nd_unexpected_buf*)(tmp + (PREPOSTLEN * i)); - } - - InterlockedIncrement(&ep->unexpected.active); - - return S_OK; - -release: - ep->unexpected.mr->lpVtbl->Release(ep->unexpected.mr); -free_tmp: - free(tmp); -free_unexp: - free(ep->unexpected.unexpected); - ep->unexpected.unexpected = NULL; - return hr; -} - -HRESULT ofi_nd_unexp_fini(struct nd_ep *ep) -{ - assert(ep); - assert(ep->fid.fid.fclass == FI_CLASS_EP); - - int total_count = (gl_data.prepost_cnt + - gl_data.flow_control_cnt) * gl_data.prepost_buf_cnt; - - if (InterlockedDecrement(&ep->unexpected.active)) - return S_OK; - - - ND_LOG_INFO(FI_LOG_EP_CTRL, "finalize unexpected queue\n" - "total_count = %d\n", - total_count); - - InterlockedAdd(&ep->shutdown, total_count); - if (ep->qp) { - ep->qp->lpVtbl->Flush(ep->qp); - /* wait until all preposted entries are canceled (GetResult() - * ND2_RESULT entries with Status == STATUS_CANCELLED) */ - while (InterlockedAdd(&ep->shutdown, 0) > 0) - /* yields execution to another thread - * that is ready to run on: */ - if (!SwitchToThread()) /* - the current processor */ - Sleep(0); /* - the another processor */ - } - - if (ep->unexpected.mr) { - ofi_nd_util_unregister_mr(ep->unexpected.mr); - ep->unexpected.mr->lpVtbl->Release(ep->unexpected.mr); - } - - if (ep->unexpected.unexpected) { - /* Free allocated a piece of memory for unexpected events */ - if (ep->unexpected.unexpected[0]) - free(ep->unexpected.unexpected[0]); - /* Free allocated set of unexpected entries */ - free(ep->unexpected.unexpected); - } - - return S_OK; -} - -static ND_BUF_CHUNK(nd_unexpected_entry) -*ofi_nd_unexp_alloc_chunk(ND_BUF_FOOTER(nd_unexpected_entry) *footer, size_t* count) -{ - OFI_UNUSED(footer); - - ND_BUF_CHUNK(nd_unexpected_entry) *chunk = malloc(sizeof(*chunk)); - if (!chunk) - return 0; - assert(count); - *count = countof(chunk->item); - memset(chunk, 0, sizeof(*chunk)); - - char *tmp = malloc(countof(chunk->item) * PREPOSTLEN); - if (!tmp) - goto fn_fail; - - size_t i; - for (i = 0; i < countof(chunk->item); i++) { - chunk->item[i].data.buf = (struct nd_unexpected_buf*)(tmp + (PREPOSTLEN * i)); - } - - return chunk; - -fn_fail: - free(chunk); - return 0; -} - -static void ofi_nd_unexp_free_chunk(ND_BUF_CHUNK(nd_unexpected_entry) *chunk) -{ - assert(chunk); - - if (chunk->item[0].data.buf) - free(chunk->item[0].data.buf); - free(chunk); -} - -static int ofi_nd_return_true(struct dlist_entry *item, const void *arg) -{ - OFI_UNUSED(item); - OFI_UNUSED(arg); - return 1; -} - -void ofi_nd_unexp_match(struct nd_ep *ep) -{ - assert(ep); - assert(ep->fid.fid.fclass == FI_CLASS_EP); - - int done = 0; - do { - nd_cq_entry *entry = NULL; - nd_cq_entry *ep_entry = NULL; - nd_cq_entry *srx_entry = NULL; - nd_unexpected_entry *unexp = NULL; - - if (ep->srx) - EnterCriticalSection(&ep->srx->prepost_lock); - else - EnterCriticalSection(&ep->prepost_lock); - - struct nd_queue_item *ep_qentry = NULL; - struct nd_queue_item *srx_qentry = NULL; - - struct dlist_entry *qunexp = NULL; - - if ((ofi_nd_queue_peek(&ep->prepost, &ep_qentry) || - (ep->srx && ofi_nd_queue_peek(&ep->srx->prepost, &srx_qentry))) && - (!dlist_empty(&ep->unexpected.received))) { - qunexp = dlist_find_first_match( - &ep->unexpected.received, ofi_nd_return_true, 0); - unexp = container_of( - qunexp, - nd_unexpected_entry, ep_list); - - if(ep_qentry) - ep_entry = container_of(ep_qentry, nd_cq_entry, queue_item); - if(srx_qentry) - srx_entry = container_of(srx_qentry, nd_cq_entry, queue_item); - - if (ep_entry && srx_entry) { - if (ep_entry->seq < srx_entry->seq) { - entry = ep_entry; - ofi_nd_queue_pop(&ep->prepost, &ep_qentry); - } - else { - entry = srx_entry; - ofi_nd_queue_pop(&ep->srx->prepost, &srx_qentry); - } - } - else if (ep_entry) { - entry = ep_entry; - ofi_nd_queue_pop(&ep->prepost, &ep_qentry); - } - else { - assert(ep->srx); - entry = srx_entry; - ofi_nd_queue_pop(&ep->srx->prepost, &srx_qentry); - } - - dlist_remove(qunexp); - /* remove element from srx queue */ - if(unexp->ep->srx) - dlist_remove(&unexp->srx_list); - } - else { - done = 1; - } - - if (ep->srx) - LeaveCriticalSection(&ep->srx->prepost_lock); - else - LeaveCriticalSection(&ep->prepost_lock); - if (!done) { - /* Set event that was received */ - entry->event = unexp->buf->header.event; - ND_LOG_EVENT_INFO(entry); - if (unexp->buf->header.flags.req_ack) - ofi_nd_send_ack(entry, ep); - ofi_nd_dispatch_cq_event(unexp->buf->header.event, entry, unexp); - } - } while (!done); -} - -void ofi_nd_srx_match(struct nd_srx *srx) -{ - assert(srx); - assert(srx->fid.fid.fclass == FI_CLASS_SRX_CTX); - - int done = 0; - do { - nd_cq_entry *entry = NULL; - nd_unexpected_entry *unexp = NULL; - - EnterCriticalSection(&srx->prepost_lock); - - struct nd_queue_item *qentry = NULL; - - if (ofi_nd_queue_peek(&srx->prepost, &qentry) && - !dlist_empty(&srx->received)) { - - entry = container_of(qentry, nd_cq_entry, queue_item); - ofi_nd_queue_pop(&srx->prepost, &qentry); - - struct dlist_entry *qunexp = dlist_remove_first_match( - &srx->received, ofi_nd_return_true, 0); - - unexp = container_of( - qunexp, nd_unexpected_entry, srx_list); - /* remove element from ep queue */ - dlist_remove(&unexp->ep_list); - } - else { - done = 1; - } - LeaveCriticalSection(&srx->prepost_lock); - - if (!done) { - if (unexp->buf->header.flags.req_ack) - ofi_nd_send_ack(entry, unexp->ep); - ofi_nd_dispatch_cq_event(unexp->buf->header.event, entry, unexp); - } - } while (!done); -} - -void ofi_nd_unexp_event(ND2_RESULT *result) -{ - assert(result); - assert(result->RequestType == Nd2RequestTypeReceive); - - struct nd_ep *ep = (struct nd_ep *)result->QueuePairContext; - assert(ep); - assert(ep->fid.fid.fclass == FI_CLASS_EP); - - nd_unexpected_ctx *ctx = (nd_unexpected_ctx *)result->RequestContext; - struct nd_unexpected_buf *buf = ctx->entry; - assert(ctx); - - if (ep->shutdown || result->Status == STATUS_CANCELLED) { - /* shutdown mode */ - ND_BUF_FREE(nd_unexpected_ctx, ctx); - InterlockedDecrement(&ep->shutdown); - return; - } - - if (ctx->entry->header.flags.ack) { - if (ctx->entry->header.flags.empty) { - /* Just drop this received unexpected entry - * since no CQ are posted to be cogherent with - * this unexp and this unexp isn't carrying - * payload data in itself */ - ND_BUF_FREE(nd_unexpected_ctx, ctx); - - ofi_nd_unexp_repost(ep, buf); - ep->send_op.flags.is_send_blocked = 0; - return; - } - - ep->send_op.flags.is_send_blocked = 0; - } - - nd_unexpected_entry *entry = ND_BUF_ALLOC(nd_unexpected_entry); - /* do NOT zero mem for entry: buf points to real buffer */ - if (!entry) { - ND_LOG_WARN(FI_LOG_EP_DATA, "Failed to allocate 'unexpected' buffer"); - return; - } - - entry->result = *result; - assert(entry->buf); - assert(result->BytesTransferred <= PREPOSTLEN); - memcpy(entry->buf, ctx->entry, result->BytesTransferred); - - entry->ep = ep; - - if (ep->srx) { - EnterCriticalSection(&ep->srx->prepost_lock); - dlist_insert_tail(&entry->srx_list, &ep->srx->received); - } - else { - EnterCriticalSection(&ep->prepost_lock); - } - dlist_insert_tail(&entry->ep_list, &ep->unexpected.received); - - if (ep->srx) { - LeaveCriticalSection(&ep->srx->prepost_lock); - } - else { - LeaveCriticalSection(&ep->prepost_lock); - } - - ND_BUF_FREE(nd_unexpected_ctx, ctx); - ofi_nd_unexp_repost(ep, buf); -#if 0 - ep->unexpected.used_counter++; - if (ep->unexpected.used_counter == (size_t)gl_data.total_avail) { - ep->unexpected.used_counter = 0; - ofi_nd_unexp_payload_run(ep); - } -#endif - ofi_nd_unexp_match(ep); -} - -void ofi_nd_unexp_service_event(ND2_RESULT *result) -{ - assert(result); - assert(result->RequestType == Nd2RequestTypeReceive); - - struct nd_ep *ep = (struct nd_ep *)result->QueuePairContext; - assert(ep); - assert(ep->fid.fid.fclass == FI_CLASS_EP); - - struct nd_queue_item *ep_qentry = NULL; - - nd_unexpected_ctx *ctx = (nd_unexpected_ctx *)result->RequestContext; - assert(ctx); - - if (ep->shutdown || result->Status == STATUS_CANCELLED) { - /* shutdown mode */ - ND_BUF_FREE(nd_unexpected_ctx, ctx); - InterlockedDecrement(&ep->shutdown); - return; - } - - struct nd_unexpected_buf *unexp_buf = ctx->entry; - assert(unexp_buf); - if (unexp_buf->header.flags.ack) - ep->send_op.flags.is_send_blocked = 0; - - if (OFI_ND_IS_SERVICE_EVENT(unexp_buf->header.event) && - (ofi_nd_queue_peek(&ep->internal_prepost, &ep_qentry))) { - if (ep_qentry) { - nd_unexpected_entry unexp = { 0 }; - nd_cq_entry *ep_entry = NULL; - - unexp.result = *result; - unexp.ep = ep; - - ND_LOG_DEBUG(FI_LOG_EP_CTRL, "Received internal event, let's " - "try to process it\n"); - ep_entry = container_of(ep_qentry, nd_cq_entry, queue_item); - ofi_nd_queue_pop(&ep->internal_prepost, &ep_qentry); - /* Set event that was received */ - ep_entry->event = unexp_buf->header.event; - ND_LOG_EVENT_INFO(ep_entry); - if (unexp_buf->header.flags.req_ack) - ofi_nd_send_ack(ep_entry, ep); - ofi_nd_dispatch_cq_event(unexp_buf->header.event, ep_entry, &unexp); - - /* just zero out it, because it should have been freed above */ - ep_entry = NULL; - } - else { - /* Shouldn't happen */ - ND_LOG_WARN(FI_LOG_EP_CTRL, "Received internal event, but " - "internal queue is empty\n"); - assert(0); - } - } - - ND_BUF_FREE(nd_unexpected_ctx, ctx); - ofi_nd_unexp_repost(ep, unexp_buf); -#if 0 - ep->unexpected.used_counter++; - if (ep->unexpected.used_counter == (size_t)gl_data.total_avail) { - ep->unexpected.used_counter = 0; - ofi_nd_unexp_payload_run(ep); - } -#endif -} - -HRESULT ofi_nd_unexp_repost(struct nd_ep *ep, struct nd_unexpected_buf *entry) -{ - assert(entry); - assert(ep); - assert(ep->fid.fid.fclass == FI_CLASS_EP); - - HRESULT hr; - - nd_unexpected_ctx *ctx = ND_BUF_ALLOC(nd_unexpected_ctx); - if (!ctx) - return ND_NO_MEMORY; - memset(ctx, 0, sizeof(*ctx)); - ctx->entry = entry; - ctx->ep = ep; - - assert(ep->unexpected.mr); - assert(ep->unexpected.token); - ND2_SGE sge = { - .Buffer = (void*)entry, - .BufferLength = (ULONG)(sizeof(nd_unexpected_buf) + - MIN(sizeof(struct nd_msg_location) * ND_MSG_IOV_LIMIT, - gl_data.inline_thr)), - .MemoryRegionToken = ep->unexpected.token - }; - - assert(ep->qp); - hr = ep->qp->lpVtbl->Receive(ep->qp, ctx, &sge, 1); - return hr; -} - -HRESULT ofi_nd_unexp_run(struct nd_ep *ep) -{ - int i; - int total_count = (gl_data.prepost_cnt + - gl_data.flow_control_cnt) * gl_data.prepost_buf_cnt; - - for (i = 0; i < total_count; i++) - ofi_nd_unexp_repost(ep, ep->unexpected.unexpected[i]); - - return S_OK; -} - -void ofi_nd_release_unexp_entry(nd_unexpected_entry *unexp) -{ - ND_BUF_FREE(nd_unexpected_entry, unexp); -} - -#endif /* _WIN32 */ - diff --git a/prov/netdir/src/netdir_unexp.h b/prov/netdir/src/netdir_unexp.h deleted file mode 100644 index 8801033f196..00000000000 --- a/prov/netdir/src/netdir_unexp.h +++ /dev/null @@ -1,84 +0,0 @@ -/* -* Copyright (c) 2015-2016 Intel Corporation, Inc. All rights reserved. -* -* This software is available to you under a choice of one of two -* licenses. You may choose to be licensed under the terms of the GNU -* General Public License (GPL) Version 2, available from the file -* COPYING in the main directory of this source tree, or the -* BSD license below: -* -* Redistribution and use in source and binary forms, with or -* without modification, are permitted provided that the following -* conditions are met: -* -* - Redistributions of source code must retain the above -* copyright notice, this list of conditions and the following -* disclaimer. -* -* - Redistributions in binary form must reproduce the above -* copyright notice, this list of conditions and the following -* disclaimer in the documentation and/or other materials -* provided with the distribution. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -*/ - -#ifndef _FI_NETDIR_UNEXP_H_ -#define _FI_NETDIR_UNEXP_H_ - -#include - -#include "netdir_iface.h" -#include "netdir.h" - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -#define OFI_ND_IS_SERVICE_EVENT(event) \ - ((event) == LARGE_MSG_ACK) - -typedef struct nd_unexpected_buf { - struct nd_msgheader header; - union received { - struct nd_msg_location locations[]; - char data[]; - } received_buf; -} nd_unexpected_buf; - -typedef struct nd_unexpected_ctx { - struct nd_ep *ep; - struct nd_unexpected_buf *entry; -} nd_unexpected_ctx; - -typedef struct nd_unexpected_entry { - struct dlist_entry ep_list; - struct dlist_entry srx_list; - ND2_RESULT result; - struct nd_unexpected_buf *buf; - struct nd_ep *ep; -} nd_unexpected_entry; - -HRESULT ofi_nd_unexp_init(struct nd_ep *ep); -HRESULT ofi_nd_unexp_fini(struct nd_ep *ep); -void ofi_nd_unexp_event(ND2_RESULT *result); -void ofi_nd_unexp_service_event(ND2_RESULT *result); -void ofi_nd_unexp_match(struct nd_ep *ep); -void ofi_nd_srx_match(struct nd_srx *srx); -HRESULT ofi_nd_unexp_repost(struct nd_ep *ep, struct nd_unexpected_buf *entry); -HRESULT ofi_nd_unexp_run(struct nd_ep *ep); -void ofi_nd_release_unexp_entry(nd_unexpected_entry *unexp); - -#ifdef __cplusplus -} -#endif /* __cplusplus */ - -#endif /* _FI_NETDIR_UNEXP_H_ */ - diff --git a/prov/netdir/src/netdir_util.h b/prov/netdir/src/netdir_util.h deleted file mode 100644 index a976eae8763..00000000000 --- a/prov/netdir/src/netdir_util.h +++ /dev/null @@ -1,176 +0,0 @@ -/* -* Copyright (c) 2015-2016 Intel Corporation, Inc. All rights reserved. -* -* This software is available to you under a choice of one of two -* licenses. You may choose to be licensed under the terms of the GNU -* General Public License (GPL) Version 2, available from the file -* COPYING in the main directory of this source tree, or the -* BSD license below: -* -* Redistribution and use in source and binary forms, with or -* without modification, are permitted provided that the following -* conditions are met: -* -* - Redistributions of source code must retain the above -* copyright notice, this list of conditions and the following -* disclaimer. -* -* - Redistributions in binary form must reproduce the above -* copyright notice, this list of conditions and the following -* disclaimer in the documentation and/or other materials -* provided with the distribution. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -*/ - -#ifndef _FI_NETDIR_UTIL_H_ -#define _FI_NETDIR_UTIL_H_ - -#include -#include -#include -#include - -#include "ndspi.h" - -#include "rdma/fabric.h" -#include "ofi_mem.h" - -#include "netdir.h" -#include "netdir_buf.h" -#include "netdir_ov.h" -#include "netdir_log.h" -#include "netdir_iface.h" - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -typedef struct ofi_nd_util_ov { - nd_event_base base; - LONG cnt; -} ofi_nd_util_ov; - -OFI_ND_NB_BUF(ofi_nd_util_ov); - -static HRESULT ofi_nd_util_ov_wait(void *overlapped, ofi_nd_util_ov *ov) -{ - assert(overlapped); - assert(ov); - - HRESULT hr = ((IND2Overlapped*)overlapped)->lpVtbl->GetOverlappedResult(overlapped, &ov->base.ov, TRUE); - if (!InterlockedDecrement(&ov->cnt)) - ov->base.free(&ov->base); - return hr; -} - -static inline HRESULT -ofi_nd_util_register_mr(IND2MemoryRegion *mr, const void *buffer, size_t len, DWORD flags) -{ - HRESULT hr = S_OK; - - ofi_nd_util_ov *ov = ND_BUF_ALLOC(ofi_nd_util_ov); - if (!ov) { - hr = ND_NO_MEMORY; - goto fn_fail; - } - - hr = mr->lpVtbl->Register(mr, buffer, len, flags, &ov->base.ov); - if (FAILED(hr)) - goto fn_fail_ov; - - hr = ofi_nd_util_ov_wait(mr, ov); - if (FAILED(hr)) - goto fn_fail_ov; - - return S_OK; - -fn_fail_ov: - ov->base.free(&ov->base); -fn_fail: - return hr; -} - -static inline HRESULT -ofi_nd_util_unregister_mr(IND2MemoryRegion *mr) -{ - HRESULT hr = S_OK; - - ofi_nd_util_ov *ov = ND_BUF_ALLOC(ofi_nd_util_ov); - if (!ov) { - hr = ND_NO_MEMORY; - goto fn_fail; - } - - hr = mr->lpVtbl->Deregister(mr, &ov->base.ov); - if (FAILED(hr)) - goto fn_fail_ov; - - hr = ofi_nd_util_ov_wait(mr, ov); - if (FAILED(hr)) - goto fn_fail_ov; - - return S_OK; - -fn_fail_ov: - ov->base.free(&ov->base); -fn_fail: - return hr; -} - -static inline int -ofi_nd_util_can_be_inlined(const struct iovec *iov, size_t iovlen) -{ - assert(iov); - - size_t i; - for (i = 0; i < iovlen; i++) - if (iov[i].iov_len > (size_t)gl_data.inline_thr) - return 0; - return 1; -} - -/* return 1 if notification should be */ -static inline int -ofi_nd_util_completion_blackmagic(uint64_t info_flags, - uint64_t cq_flags, - uint64_t op_flags) -{ - OFI_UNUSED(info_flags); - if ((op_flags & FI_COMPLETION) || - (op_flags & (FI_INJECT_COMPLETE | - FI_TRANSMIT_COMPLETE | - FI_DELIVERY_COMPLETE))) - return 1; - else if (op_flags & FI_INJECT) - return 0; - else if (!(cq_flags & FI_SELECTIVE_COMPLETION)) - return 1; - else - return 0; -} - -static inline -void ofi_nd_free_send_entry(nd_send_entry *entry) -{ - assert(entry); - - if (entry->sge) - ND_BUF_FREE(nd_sge, entry->sge); - - ND_BUF_FREE(nd_send_entry, entry); -} - -#ifdef __cplusplus -} -#endif /* __cplusplus */ - -#endif /* _FI_NETDIR_UTIL_H_ */ - diff --git a/prov/verbs/include/windows/verbs_nd.h b/prov/verbs/include/windows/verbs_nd.h index 27b0919c11d..1f91230a897 100644 --- a/prov/verbs/include/windows/verbs_nd.h +++ b/prov/verbs/include/windows/verbs_nd.h @@ -42,6 +42,12 @@ HRESULT nd_startup(); void nd_shutdown(); +int ofi_nd_is_valid_addr(const SOCKADDR *addr); +int ofi_nd_addr_cmp(const void *vaddr1, const void *vaddr2); +int ofi_nd_is_same_file(const wchar_t *path1, const wchar_t *path2); +int ofi_nd_file_exists(const wchar_t *path); +int ofi_nd_is_directory(const wchar_t *path); + char *ofi_nd_error_str(HRESULT hr); static inline int hresult2fi(HRESULT hr) { diff --git a/prov/netdir/src/netdir_addr.c b/prov/verbs/src/windows/verbs_nd_addr.c similarity index 97% rename from prov/netdir/src/netdir_addr.c rename to prov/verbs/src/windows/verbs_nd_addr.c index 3ae4a2fc673..1ec393580d6 100644 --- a/prov/netdir/src/netdir_addr.c +++ b/prov/verbs/src/windows/verbs_nd_addr.c @@ -30,13 +30,10 @@ * SOFTWARE. */ -#ifdef _WIN32 - #include #include #include -#include "netdir.h" int ofi_nd_is_valid_addr(const SOCKADDR *addr) { @@ -90,6 +87,3 @@ int ofi_nd_addr_cmp(const void* vaddr1, const void* vaddr2) return 0; } - -#endif /* _WIN32 */ - diff --git a/prov/netdir/src/netdir_fs.c b/prov/verbs/src/windows/verbs_nd_fs.c similarity index 98% rename from prov/netdir/src/netdir_fs.c rename to prov/verbs/src/windows/verbs_nd_fs.c index 3dcb56825fb..9bd5feb520d 100644 --- a/prov/netdir/src/netdir_fs.c +++ b/prov/verbs/src/windows/verbs_nd_fs.c @@ -30,12 +30,10 @@ * SOFTWARE. */ -#ifdef _WIN32 - -#include "netdir.h" #include + #define FI_ND_SHARE_ATTR (FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE) static inline DWORD ofi_nd_file_attributes(const wchar_t* path) @@ -110,6 +108,3 @@ const wchar_t *ofi_nd_filename(const wchar_t *path) return &path[i] + 1; return path; } - -#endif /* _WIN32 */ - diff --git a/prov/verbs/src/windows/verbs_nd_init.c b/prov/verbs/src/windows/verbs_nd_init.c index 43a1244e322..22f0dcb70ee 100644 --- a/prov/verbs/src/windows/verbs_nd_init.c +++ b/prov/verbs/src/windows/verbs_nd_init.c @@ -32,15 +32,15 @@ */ #include +#include +#include + +#include #include "ndspi.h" -#include "ws2spi.h" #include "verbs_nd.h" -int ofi_nd_is_valid_addr(const SOCKADDR *addr); -int ofi_nd_addr_cmp(const void *vaddr1, const void *vaddr2); -int ofi_nd_is_same_file(const wchar_t *path1, const wchar_t *path2); -int ofi_nd_file_exists(const wchar_t *path); -int ofi_nd_is_directory(const wchar_t *path); +#include "netdir.h" +#include "netdir_log.h" /* Adapters must be sorted by nd_adapter::address. */ static size_t ofi_nd_remove_dups(struct nd_adapter *adapters, size_t num) diff --git a/prov/netdir/src/netdir_ndinit.c b/prov/verbs/src/windows/verbs_nd_ndinit.c similarity index 99% rename from prov/netdir/src/netdir_ndinit.c rename to prov/verbs/src/windows/verbs_nd_ndinit.c index 945a50d735e..1ab13d3be0f 100644 --- a/prov/netdir/src/netdir_ndinit.c +++ b/prov/verbs/src/windows/verbs_nd_ndinit.c @@ -30,8 +30,6 @@ * SOFTWARE. */ -#ifdef _WIN32 - #include #include @@ -689,6 +687,3 @@ int ofi_nd_lookup_adapter(const char *name, IND2Adapter **adapter, struct sockad return -FI_EINVAL; } - -#endif /* _WIN32 */ - diff --git a/src/fabric.c b/src/fabric.c index 1706aa5b037..ecdde7ac22f 100644 --- a/src/fabric.c +++ b/src/fabric.c @@ -889,7 +889,6 @@ void fi_ini(void) ofi_register_provider(PSM3_INIT, NULL); ofi_register_provider(PSM2_INIT, NULL); - ofi_register_provider(NETDIR_INIT, NULL); ofi_register_provider(SHM_INIT, NULL); ofi_register_provider(SM2_INIT, NULL); From 692339f6a02f568d90f3f372c438775974cbc298 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Mon, 18 Sep 2023 18:59:09 -0700 Subject: [PATCH 06/34] prov/sockets: Remove provider Sockets provider is only supported in the v1.x series Signed-off-by: Sean Hefty --- Makefile.am | 1 - configure.ac | 1 - contrib/intel/jenkins/Jenkinsfile | 31 +- contrib/intel/jenkins/common.py | 2 - contrib/intel/jenkins/runtests.py | 2 +- contrib/intel/jenkins/summary.py | 8 +- contrib/intel/jenkins/tests.py | 22 +- docs/windows.txt | 1 - fabtests/Makefile.am | 7 +- fabtests/common/shared.c | 4 +- fabtests/fabtests.vcxproj | 1 - fabtests/fabtests.vcxproj.filters | 3 - fabtests/man/fabtests.7.md | 2 +- fabtests/scripts/runfabtests.cmd | 4 +- fabtests/scripts/runfabtests.sh | 4 +- fabtests/test_configs/sockets/all.test | 91 - fabtests/test_configs/sockets/complete.test | 476 --- fabtests/test_configs/sockets/quick.test | 228 -- fabtests/test_configs/sockets/sockets.exclude | 7 - fabtests/test_configs/sockets/verify.test | 258 -- fabtests/unit/common.c | 2 +- include/ofi_prov.h | 11 - libfabric.vcxproj | 236 +- libfabric.vcxproj.filters | 84 - man/fabric.7.md | 4 +- man/fi_pingpong.1.md | 6 +- man/fi_sockets.7.md | 114 - man/man7/fi_sockets.7 | 133 - prov/sockets/Makefile.include | 51 - prov/sockets/configure.m4 | 41 - prov/sockets/include/rdma/fi_direct.h | 37 - prov/sockets/include/rdma/fi_direct_atomic.h | 33 - .../include/rdma/fi_direct_atomic_def.h | 33 - prov/sockets/include/rdma/fi_direct_cm.h | 33 - prov/sockets/include/rdma/fi_direct_domain.h | 33 - .../sockets/include/rdma/fi_direct_endpoint.h | 33 - prov/sockets/include/rdma/fi_direct_eq.h | 33 - prov/sockets/include/rdma/fi_direct_rma.h | 33 - prov/sockets/include/rdma/fi_direct_tagged.h | 33 - prov/sockets/include/rdma/fi_direct_trigger.h | 33 - prov/sockets/include/sock.h | 1228 ------- prov/sockets/include/sock_util.h | 92 - prov/sockets/libfabric-sockets.spec.in | 52 - prov/sockets/provider_FABRIC_1.0.map | 1 - prov/sockets/src/sock_atomic.c | 587 ---- prov/sockets/src/sock_attr.c | 252 -- prov/sockets/src/sock_av.c | 680 ---- prov/sockets/src/sock_cntr.c | 603 ---- prov/sockets/src/sock_comm.c | 231 -- prov/sockets/src/sock_conn.c | 645 ---- prov/sockets/src/sock_cq.c | 769 ----- prov/sockets/src/sock_ctx.c | 228 -- prov/sockets/src/sock_dom.c | 217 -- prov/sockets/src/sock_ep.c | 1887 ----------- prov/sockets/src/sock_ep_dgram.c | 86 - prov/sockets/src/sock_ep_msg.c | 1304 -------- prov/sockets/src/sock_ep_rdm.c | 87 - prov/sockets/src/sock_eq.c | 447 --- prov/sockets/src/sock_fabric.c | 398 --- prov/sockets/src/sock_mr.c | 225 -- prov/sockets/src/sock_msg.c | 760 ----- prov/sockets/src/sock_poll.c | 240 -- prov/sockets/src/sock_progress.c | 2826 ----------------- prov/sockets/src/sock_rma.c | 502 --- prov/sockets/src/sock_rx_entry.c | 183 -- prov/sockets/src/sock_trigger.c | 348 -- prov/sockets/src/sock_wait.c | 321 -- src/fabric.c | 6 +- util/pingpong.c | 2 +- 69 files changed, 39 insertions(+), 17337 deletions(-) delete mode 100644 fabtests/test_configs/sockets/all.test delete mode 100644 fabtests/test_configs/sockets/complete.test delete mode 100644 fabtests/test_configs/sockets/quick.test delete mode 100644 fabtests/test_configs/sockets/sockets.exclude delete mode 100644 fabtests/test_configs/sockets/verify.test delete mode 100644 man/fi_sockets.7.md delete mode 100644 man/man7/fi_sockets.7 delete mode 100644 prov/sockets/Makefile.include delete mode 100644 prov/sockets/configure.m4 delete mode 100644 prov/sockets/include/rdma/fi_direct.h delete mode 100644 prov/sockets/include/rdma/fi_direct_atomic.h delete mode 100644 prov/sockets/include/rdma/fi_direct_atomic_def.h delete mode 100644 prov/sockets/include/rdma/fi_direct_cm.h delete mode 100644 prov/sockets/include/rdma/fi_direct_domain.h delete mode 100644 prov/sockets/include/rdma/fi_direct_endpoint.h delete mode 100644 prov/sockets/include/rdma/fi_direct_eq.h delete mode 100644 prov/sockets/include/rdma/fi_direct_rma.h delete mode 100644 prov/sockets/include/rdma/fi_direct_tagged.h delete mode 100644 prov/sockets/include/rdma/fi_direct_trigger.h delete mode 100644 prov/sockets/include/sock.h delete mode 100644 prov/sockets/include/sock_util.h delete mode 100644 prov/sockets/libfabric-sockets.spec.in delete mode 100644 prov/sockets/provider_FABRIC_1.0.map delete mode 100644 prov/sockets/src/sock_atomic.c delete mode 100644 prov/sockets/src/sock_attr.c delete mode 100644 prov/sockets/src/sock_av.c delete mode 100644 prov/sockets/src/sock_cntr.c delete mode 100644 prov/sockets/src/sock_comm.c delete mode 100644 prov/sockets/src/sock_conn.c delete mode 100644 prov/sockets/src/sock_cq.c delete mode 100644 prov/sockets/src/sock_ctx.c delete mode 100644 prov/sockets/src/sock_dom.c delete mode 100644 prov/sockets/src/sock_ep.c delete mode 100644 prov/sockets/src/sock_ep_dgram.c delete mode 100644 prov/sockets/src/sock_ep_msg.c delete mode 100644 prov/sockets/src/sock_ep_rdm.c delete mode 100644 prov/sockets/src/sock_eq.c delete mode 100644 prov/sockets/src/sock_fabric.c delete mode 100644 prov/sockets/src/sock_mr.c delete mode 100644 prov/sockets/src/sock_msg.c delete mode 100644 prov/sockets/src/sock_poll.c delete mode 100644 prov/sockets/src/sock_progress.c delete mode 100644 prov/sockets/src/sock_rma.c delete mode 100644 prov/sockets/src/sock_rx_entry.c delete mode 100644 prov/sockets/src/sock_trigger.c delete mode 100644 prov/sockets/src/sock_wait.c diff --git a/Makefile.am b/Makefile.am index 2696ed9c243..6a7e6771988 100644 --- a/Makefile.am +++ b/Makefile.am @@ -448,7 +448,6 @@ prov_dist_man_pages= prov_extra_dist= EXTRA_DIST= -include prov/sockets/Makefile.include include prov/udp/Makefile.include include prov/verbs/Makefile.include include prov/efa/Makefile.include diff --git a/configure.ac b/configure.ac index 06ec6ea489f..c63964a2a5d 100644 --- a/configure.ac +++ b/configure.ac @@ -947,7 +947,6 @@ dnl Provider-specific checks FI_PROVIDER_INIT FI_PROVIDER_SETUP([psm2]) FI_PROVIDER_SETUP([psm3]) -FI_PROVIDER_SETUP([sockets]) FI_PROVIDER_SETUP([verbs]) FI_PROVIDER_SETUP([efa]) FI_PROVIDER_SETUP([udp]) diff --git a/contrib/intel/jenkins/Jenkinsfile b/contrib/intel/jenkins/Jenkinsfile index a6560da4438..703a4ed08fa 100644 --- a/contrib/intel/jenkins/Jenkinsfile +++ b/contrib/intel/jenkins/Jenkinsfile @@ -17,7 +17,7 @@ def run_python(version, command, output=null) { } def slurm_batch(partition, node_num, output, command) { - + try { sh """timeout $TIMEOUT sbatch --partition=${partition} -N ${node_num} \ --wait -o ${output} --open-mode=append --wrap=\'env; ${command}\' @@ -66,7 +66,7 @@ def run_middleware(providers, stage_name, test, partition, node_num, mpi=null, if (env.WEEKLY.toBoolean()) base_cmd = "${base_cmd} --weekly=${env.WEEKLY}" - + for (prov in providers) { if (prov[1]) { echo "Running ${prov[0]}-${prov[1]} ${stage_name}" @@ -295,8 +295,8 @@ pipeline { weekly = env.WEEKLY.toBoolean() } if (weekly) { - TIMEOUT="21600" - } + TIMEOUT="21600" + } skip = skip() RELEASE = release() if (skip && !weekly) { @@ -308,7 +308,7 @@ pipeline { stage ('prepare build') { when { equals expected: true, actual: DO_RUN } steps { - script { + script { echo "Copying build dirs." build("builddir") echo "Copying log dirs." @@ -342,15 +342,10 @@ pipeline { checkout scm echo "Building Libfabric reg" slurm_batch("squirtle,totodile", "1", - "${env.LOG_DIR}/libfabric_mpich_log", + "${env.LOG_DIR}/libfabric_mpich_log", """python$PYTHON_VERSION ${RUN_LOCATION}/build.py \ --build_item=libfabric_mpich """ ) - slurm_batch("squirtle,totodile", "1", - "${env.LOG_DIR}/build_mpich_log", - """python$PYTHON_VERSION ${RUN_LOCATION}/build.py \ - --build_item=mpich """ - ) } } } @@ -521,15 +516,6 @@ pipeline { } } } - stage('sockets') { - steps { - script { - dir (RUN_LOCATION) { - run_fabtests("sockets", "bulbasaur", "2", "sockets") - } - } - } - } stage('ucx') { steps { script { @@ -570,8 +556,7 @@ pipeline { steps { script { dir (RUN_LOCATION) { - run_middleware([["verbs", null], ["tcp", null], - ["sockets", null]], "SHMEM", "shmem", + run_middleware([["verbs", null], ["tcp", null]], "SHMEM", "shmem", "squirtle,totodile", "2") } } @@ -615,7 +600,7 @@ pipeline { dir (RUN_LOCATION) { run_middleware([["verbs", "rxm"]], "oneCCL-GPU-v3", "onecclgpu", "fabrics-ci", "2") - } + } } } } diff --git a/contrib/intel/jenkins/common.py b/contrib/intel/jenkins/common.py index ff3bd9e11c4..be9dc1cc9e3 100755 --- a/contrib/intel/jenkins/common.py +++ b/contrib/intel/jenkins/common.py @@ -102,7 +102,6 @@ def run(self): Prov('verbs', None), Prov('verbs', 'rxd'), Prov('verbs', 'rxm'), - Prov('sockets', None), Prov('tcp', None), Prov('udp', None), Prov('udp', 'rxd'), @@ -112,7 +111,6 @@ def run(self): default_prov_list = [ 'verbs', 'tcp', - 'sockets', 'udp', 'shm', 'psm3' diff --git a/contrib/intel/jenkins/runtests.py b/contrib/intel/jenkins/runtests.py index 6a9aec18c4a..b8b5a4454b3 100755 --- a/contrib/intel/jenkins/runtests.py +++ b/contrib/intel/jenkins/runtests.py @@ -17,7 +17,7 @@ def __call__(self, parser, namespace, values, option_string=None): parser = argparse.ArgumentParser() parser.add_argument('--prov', help="core provider", choices=['verbs', \ - 'tcp', 'udp', 'sockets', 'shm', 'psm3', 'ucx']) + 'tcp', 'udp', 'shm', 'psm3', 'ucx']) parser.add_argument('--util', help="utility provider", choices=['rxd', 'rxm']) parser.add_argument('--ofi_build_mode', help="specify the build configuration",\ choices = ['reg', 'dbg', 'dl'], default='reg') diff --git a/contrib/intel/jenkins/summary.py b/contrib/intel/jenkins/summary.py index 74f9ca0d6c9..9ea8bac7283 100755 --- a/contrib/intel/jenkins/summary.py +++ b/contrib/intel/jenkins/summary.py @@ -537,7 +537,7 @@ def __init__(self, logger, log_dir, prov, mpi, file_name, stage_name): self.mpi = mpi self.run = 'mpiexec' - + def read_file(self): with open(self.file_path,'r') as log_file: super().fast_forward(log_file) @@ -551,7 +551,7 @@ def check_exclude(self, line): self.excluded_tests.append(test) def check_name(self, line): - if (line.startswith('ok') or + if (line.startswith('ok') or line.startswith('not ok')): self.name = line.split('-')[1].split('#')[0].strip() @@ -696,7 +696,7 @@ def check_fail(self, line): if 'cancel' in elem: self.error += total self.errored_tests.append(f'cancel: {self.test_name}') - + def check_exclude(self, line): res_list = line.lstrip("results :").rstrip().split('|') for elem in res_list: @@ -867,7 +867,7 @@ def summarize_items(summary_item, logger, log_dir, mode): err += ret if ret else 0 if summary_item == 'shmem' or summary_item == 'all': - for prov in ['tcp', 'verbs', 'sockets']: + for prov in ['tcp', 'verbs']: ret= ShmemSummarizer( logger, log_dir, prov, f'SHMEM_{prov}_shmem_{mode}', diff --git a/contrib/intel/jenkins/tests.py b/contrib/intel/jenkins/tests.py index c7ea585c97f..992bd40d54b 100755 --- a/contrib/intel/jenkins/tests.py +++ b/contrib/intel/jenkins/tests.py @@ -152,14 +152,6 @@ def options(self): else: opts += "-t all " - if (self.core_prov == 'sockets' and self.ofi_build_mode == 'reg'): - complex_test_file = f'{self.libfab_installpath}/share/fabtests/'\ - f'test_configs/{self.core_prov}/quick.test' - if (os.path.isfile(complex_test_file)): - opts += "-u {complex_test_file} " - else: - print(f"{self.core_prov} Complex test file not found") - if (self.ofi_build_mode != 'reg' or self.core_prov == 'udp'): opts += "-e \'ubertest,multinode\' " @@ -527,7 +519,7 @@ def env(self): cmd += f"export FI_PROVIDER={self.core_prov}\\;{self.util_prov}; " else: cmd += f"export FI_PROVIDER={self.core_prov}; " - if (self.core_prov == 'tcp'): + if (self.core_prov == 'tcp'): cmd += "export FI_IFACE=eth0; " elif (self.core_prov == 'verbs'): cmd += "export FI_IFACE=ib0; " @@ -771,7 +763,7 @@ def execute_cmd(self): if (self.weekly): print(f'Weekly {self.mpi_type} mpichsuite tests') os.chdir(self.mpichsuitepath) - common.run_command(shlex.split(self.mpi.env + + common.run_command(shlex.split(self.mpi.env + configure_cmd + '\'')) self.exclude_tests(self.mpichsuitepath, self.core_prov) testcmd = 'make testing' @@ -783,7 +775,7 @@ def execute_cmd(self): else: print(f"PR {self.mpi_type} mpichsuite tests") os.chdir(self.mpichsuitepath) - common.run_command(shlex.split(self.mpi.env + + common.run_command(shlex.split(self.mpi.env + configure_cmd + '\'')) common.run_command(['make', '-j']) self.exclude_tests(self.mpichsuitepath, self.core_prov) @@ -998,7 +990,7 @@ def __init__(self, jobname, buildno, testname, core_prov, fabric, print(core_prov) self.daos_nodes = cloudbees_config.prov_node_map[core_prov] print(self.daos_nodes) - self.launch_node = self.daos_nodes[0] + self.launch_node = self.daos_nodes[0] self.cart_tests = { 'corpc_one_node' : {'tags' :'cart,corpc,one_node', 'numservers':1, 'numclients':0}, @@ -1032,12 +1024,12 @@ def set_paths(self, core_prov): def cmd(self): return f"env; echo {common.cloudbees_log_start_string}; "\ "python3.6 launch.py " - + def remote_launch_cmd(self, testname): # The following env variables must be set appropriately prior -# to running the daos/cart tests OFI_DOMAIN, OFI_INTERFACE, -# CRT_PHY_ADDR_STR, PATH, DAOS_TEST_SHARED_DIR DAOS_TEST_LOG_DIR, +# to running the daos/cart tests OFI_DOMAIN, OFI_INTERFACE, +# CRT_PHY_ADDR_STR, PATH, DAOS_TEST_SHARED_DIR DAOS_TEST_LOG_DIR, # LD_LIBRARY_PATH in the script being sourced below. launch_cmd = f"ssh {self.launch_node} \"source {self.ci_middlewares_path}/daos_ci_env_setup.sh && \ cd {self.cart_test_scripts} &&\" " diff --git a/docs/windows.txt b/docs/windows.txt index e49a8f09dcf..cad6e42d705 100644 --- a/docs/windows.txt +++ b/docs/windows.txt @@ -9,7 +9,6 @@ Dependencies: Limitations: - Supported only 64-bit achitectures - Supported Windows Vista or newer Windows version -- Supported 'sockets' provider only Build: - Open libfabric.sln file using Visual Studio diff --git a/fabtests/Makefile.am b/fabtests/Makefile.am index 025cabd8d7a..cee15821b19 100644 --- a/fabtests/Makefile.am +++ b/fabtests/Makefile.am @@ -97,11 +97,6 @@ nobase_dist_config_DATA = \ test_configs/lat_bw.test \ test_configs/tcp/all.test \ test_configs/tcp/tcp.exclude \ - test_configs/sockets/all.test \ - test_configs/sockets/quick.test \ - test_configs/sockets/complete.test \ - test_configs/sockets/verify.test \ - test_configs/sockets/sockets.exclude \ test_configs/udp/all.test \ test_configs/udp/lat_bw.test \ test_configs/udp/quick.test \ @@ -664,5 +659,5 @@ dist-hook: fabtests.spec cp fabtests.spec $(distdir) test: - ./scripts/runfabtests.sh -vvv -S $(os_excludes) -f ./test_configs/sockets/sockets.exclude sockets + ./scripts/runfabtests.sh -vvv -S $(os_excludes) -f ./test_configs/tcp/tcp.exclude tcp ./scripts/runfabtests.sh -vvv -S $(os_excludes) -f ./test_configs/udp/udp.exclude udp diff --git a/fabtests/common/shared.c b/fabtests/common/shared.c index d0631a1a911..e3e6065df18 100644 --- a/fabtests/common/shared.c +++ b/fabtests/common/shared.c @@ -3079,7 +3079,7 @@ void ft_usage(char *name, char *desc) ft_addr_usage(); FT_PRINT_OPTS_USAGE("-f ", "fabric name"); FT_PRINT_OPTS_USAGE("-d ", "domain name"); - FT_PRINT_OPTS_USAGE("-p ", "specific provider name eg sockets, verbs"); + FT_PRINT_OPTS_USAGE("-p ", "specific provider name eg tcp, verbs"); FT_PRINT_OPTS_USAGE("-e ", "Endpoint type: msg|rdm|dgram (default:rdm)"); FT_PRINT_OPTS_USAGE("", "Only the following tests support this option for now:"); FT_PRINT_OPTS_USAGE("", "fi_rma_bw"); @@ -3130,7 +3130,7 @@ void ft_mcusage(char *name, char *desc) ft_addr_usage(); FT_PRINT_OPTS_USAGE("-f ", "fabric name"); FT_PRINT_OPTS_USAGE("-d ", "domain name"); - FT_PRINT_OPTS_USAGE("-p ", "specific provider name eg sockets, verbs"); + FT_PRINT_OPTS_USAGE("-p ", "specific provider name eg tcp, verbs"); ft_hmem_usage(); FT_PRINT_OPTS_USAGE("-h", "display this help output"); diff --git a/fabtests/fabtests.vcxproj b/fabtests/fabtests.vcxproj index cdc5bd9e130..6f393187490 100644 --- a/fabtests/fabtests.vcxproj +++ b/fabtests/fabtests.vcxproj @@ -276,7 +276,6 @@ - diff --git a/fabtests/fabtests.vcxproj.filters b/fabtests/fabtests.vcxproj.filters index 7ae24f5b62c..e6fc8842b58 100644 --- a/fabtests/fabtests.vcxproj.filters +++ b/fabtests/fabtests.vcxproj.filters @@ -329,9 +329,6 @@ Source Files\test_configs - - Source Files\test_configs - Source Files\test_configs diff --git a/fabtests/man/fabtests.7.md b/fabtests/man/fabtests.7.md index 5b3754aaf65..e588fb11773 100644 --- a/fabtests/man/fabtests.7.md +++ b/fabtests/man/fabtests.7.md @@ -593,7 +593,7 @@ in fabtests and reports the number of pass/fail/notrun. By default if none of the options are provided, it runs all the tests using - - sockets provider + - tcp provider - 127.0.0.1 as both server and client address - for small number of optiond and iterations diff --git a/fabtests/scripts/runfabtests.cmd b/fabtests/scripts/runfabtests.cmd index dd94fabd4d5..0fba978875f 100644 --- a/fabtests/scripts/runfabtests.cmd +++ b/fabtests/scripts/runfabtests.cmd @@ -621,7 +621,7 @@ goto :global_main echo. runfabtests.cmd [OPTIONS] [provider] [host] [client] 1>&2 echo. 1>&2 echo.Run fabtests using provider between host and client (default 1>&2 - echo.'sockets' provider in loopback-mode). Report pass/fail/notrun status. 1>&2 + echo.'tcp' provider in loopback-mode). Report pass/fail/notrun status. 1>&2 echo. 1>&2 echo.Options... 1>&2 echo. -g good IP address from [host]'s perspective (default %GOOD_ADDR%) 1>&2 @@ -737,7 +737,7 @@ goto :global_main if "%GOOD_ADDR%" == "" set GOOD_ADDR=%S_INTERFACE% if "%PROV%" == "" ( - set PROV=sockets + set PROV=tcp call :main "%TEST_TYPE%" ) else ( call :main "%TEST_TYPE%" diff --git a/fabtests/scripts/runfabtests.sh b/fabtests/scripts/runfabtests.sh index 3c8b54a3191..3fbf846631f 100755 --- a/fabtests/scripts/runfabtests.sh +++ b/fabtests/scripts/runfabtests.sh @@ -42,7 +42,7 @@ trap cleanup_and_exit SIGINT # -# Default behavior with no args will use sockets provider with loopback +# Default behavior with no args will use tcp provider with loopback # declare BIN_PATH declare PROV="" @@ -871,7 +871,7 @@ function usage { errcho " $0 [OPTIONS] [provider] [host] [client]" errcho errcho "Run fabtests using provider between host and client (default" - errcho "'sockets' provider in loopback-mode). Report pass/fail/notrun status." + errcho "'tcp' provider in loopback-mode). Report pass/fail/notrun status." errcho errcho "Options:" errcho -e " -g\tgood IP address from 's perspective (default $GOOD_ADDR)" diff --git a/fabtests/test_configs/sockets/all.test b/fabtests/test_configs/sockets/all.test deleted file mode 100644 index f7c27f710d7..00000000000 --- a/fabtests/test_configs/sockets/all.test +++ /dev/null @@ -1,91 +0,0 @@ -#: "Suite of tests for the sockets provider" -{ - prov_name: sockets, - test_type: [ - FT_TEST_LATENCY, - FT_TEST_BANDWIDTH, - ], - class_function: [ - FT_FUNC_SEND, - FT_FUNC_SENDV, - FT_FUNC_SENDMSG, - FT_FUNC_INJECT, - FT_FUNC_INJECTDATA, - ], - ep_type: [ - FI_EP_MSG, - FI_EP_RDM, - ], - av_type: [ - FI_AV_TABLE, - FI_AV_MAP, - ], - comp_type: [ - FT_COMP_QUEUE, - ], - test_class: [ - FT_CAP_MSG, - FT_CAP_TAGGED, - ], -}, -{ - prov_name: sockets, - test_type: [ - FT_TEST_LATENCY, - ], - class_function: [ - FT_FUNC_SEND, - ], - ep_type: [ - FI_EP_MSG, - ], - av_type: [ - FI_AV_TABLE, - ], - comp_type: [ - FT_COMP_QUEUE, - ], - eq_wait_obj: [ - FI_WAIT_NONE, - FI_WAIT_UNSPEC, - FI_WAIT_FD, - FI_WAIT_MUTEX_COND, - ], - cq_wait_obj: [ - FI_WAIT_NONE, - ], - test_class: [ - FT_CAP_MSG, - ], -}, -{ - prov_name: sockets, - test_type: [ - FT_TEST_LATENCY, - FT_TEST_BANDWIDTH, - ], - class_function: [ - FT_FUNC_SEND, - ], - ep_type: [ - FI_EP_MSG, - ], - av_type: [ - FI_AV_TABLE, - ], - comp_type: [ - FT_COMP_QUEUE, - ], - eq_wait_obj: [ - FI_WAIT_NONE, - ], - cq_wait_obj: [ - FI_WAIT_NONE, - FI_WAIT_UNSPEC, - FI_WAIT_FD, - FI_WAIT_MUTEX_COND, - ], - test_class: [ - FT_CAP_MSG, - ], -}, diff --git a/fabtests/test_configs/sockets/complete.test b/fabtests/test_configs/sockets/complete.test deleted file mode 100644 index b6c932daf42..00000000000 --- a/fabtests/test_configs/sockets/complete.test +++ /dev/null @@ -1,476 +0,0 @@ -#: "Suite of tests for the sockets provider" -{ - prov_name: sockets, - test_type: [ - FT_TEST_LATENCY, - FT_TEST_BANDWIDTH, - ], - class_function: [ - FT_FUNC_SEND, - FT_FUNC_SENDV, - FT_FUNC_SENDMSG, - FT_FUNC_SENDDATA, - FT_FUNC_INJECT, - FT_FUNC_INJECTDATA, - ], - ep_type: [ - FI_EP_MSG, - FI_EP_RDM, - ], - av_type: [ - FI_AV_TABLE, - FI_AV_MAP, - ], - comp_type: [ - FT_COMP_QUEUE, - FT_COMP_CNTR, - ], - test_class: [ - FT_CAP_MSG, - FT_CAP_TAGGED, - ], - test_flags: FT_FLAG_QUICKTEST -}, -{ - prov_name: sockets, - test_type: [ - FT_TEST_LATENCY, - ], - class_function: [ - FT_FUNC_SEND, - ], - ep_type: [ - FI_EP_MSG, - ], - av_type: [ - FI_AV_TABLE, - ], - comp_type: [ - FT_COMP_QUEUE, - FT_COMP_CNTR, - ], - eq_wait_obj: [ - FI_WAIT_NONE, - FI_WAIT_UNSPEC, - FI_WAIT_FD, - FI_WAIT_MUTEX_COND, - ], - test_class: [ - FT_CAP_MSG, - ], - test_flags: FT_FLAG_QUICKTEST -}, -{ - prov_name: sockets, - test_type: [ - FT_TEST_LATENCY, - FT_TEST_BANDWIDTH, - ], - class_function: [ - FT_FUNC_SEND, - ], - ep_type: [ - FI_EP_MSG, - ], - av_type: [ - FI_AV_TABLE, - ], - comp_type: [ - FT_COMP_QUEUE, - ], - eq_wait_obj: [ - FI_WAIT_NONE, - ], - cq_wait_obj: [ - FI_WAIT_NONE, - FI_WAIT_UNSPEC, - FI_WAIT_FD, - FI_WAIT_MUTEX_COND, - ], - test_class: [ - FT_CAP_MSG, - ], - test_flags: FT_FLAG_QUICKTEST -}, -{ - prov_name: sockets, - test_type: [ - FT_TEST_LATENCY, - FT_TEST_BANDWIDTH, - ], - class_function: [ - FT_FUNC_SEND, - ], - ep_type: [ - FI_EP_MSG, - ], - av_type: [ - FI_AV_TABLE, - ], - comp_type: [ - FT_COMP_CNTR, - ], - eq_wait_obj: [ - FI_WAIT_NONE, - ], - cntr_wait_obj: [ - FI_WAIT_NONE, - FI_WAIT_UNSPEC, - FI_WAIT_FD, - FI_WAIT_MUTEX_COND, - ], - test_class: [ - FT_CAP_MSG, - ], - test_flags: FT_FLAG_QUICKTEST -}, -{ - prov_name: sockets, - test_type: [ - FT_TEST_LATENCY, - FT_TEST_BANDWIDTH, - ], - class_function: [ - FT_FUNC_WRITE, - FT_FUNC_WRITEV, - FT_FUNC_WRITEMSG, - FT_FUNC_INJECT_WRITE, - FT_FUNC_WRITEDATA, - FT_FUNC_INJECT_WRITEDATA, - FT_FUNC_READ, - FT_FUNC_READV, - FT_FUNC_READMSG, - ], - ep_type: [ - FI_EP_MSG, - FI_EP_RDM, - ], - av_type: [ - FI_AV_TABLE, - ], - comp_type: [ - FT_COMP_QUEUE, - FT_COMP_CNTR, - ], - test_class: [ - FT_CAP_RMA, - ], - test_flags: FT_FLAG_QUICKTEST -}, -{ - prov_name: sockets, - test_type: [ - FT_TEST_LATENCY, - FT_TEST_BANDWIDTH, - ], - class_function: [ - FT_FUNC_ATOMIC, - FT_FUNC_ATOMICV, - FT_FUNC_ATOMICMSG, - FT_FUNC_FETCH_ATOMIC, - FT_FUNC_FETCH_ATOMICV, - FT_FUNC_FETCH_ATOMICMSG, - FT_FUNC_INJECT_ATOMIC, - ], - op:[ - FI_MIN, - FI_MAX, - FI_SUM, - FI_PROD, - FI_LOR, - FI_LAND, - FI_BOR, - FI_BAND, - FI_LXOR, - FI_BXOR, - FI_ATOMIC_WRITE, - ], - datatype:[ - FI_INT8, - FI_UINT8, - FI_INT16, - FI_UINT16, - FI_INT32, - FI_UINT32, - FI_INT64, - FI_UINT64, - FI_FLOAT, - FI_DOUBLE, - FI_LONG_DOUBLE, - FI_FLOAT_COMPLEX, - FI_DOUBLE_COMPLEX, - FI_LONG_DOUBLE_COMPLEX, - ], - ep_type: [ - FI_EP_MSG, - FI_EP_RDM, - ], - av_type: [ - FI_AV_TABLE, - ], - comp_type: [ - FT_COMP_QUEUE, - FT_COMP_CNTR, - ], - test_class: [ - FT_CAP_ATOMIC, - ], - test_flags: FT_FLAG_QUICKTEST -}, -{ - prov_name: sockets, - test_type: [ - FT_TEST_LATENCY, - FT_TEST_BANDWIDTH, - ], - class_function: [ - FT_FUNC_FETCH_ATOMIC, - FT_FUNC_FETCH_ATOMICV, - FT_FUNC_FETCH_ATOMICMSG, - ], - op:[ - FI_ATOMIC_READ, - ], - datatype:[ - FI_INT8, - FI_UINT8, - FI_INT16, - FI_UINT16, - FI_INT32, - FI_UINT32, - FI_INT64, - FI_UINT64, - FI_FLOAT, - FI_DOUBLE, - FI_LONG_DOUBLE, - FI_FLOAT_COMPLEX, - FI_DOUBLE_COMPLEX, - FI_LONG_DOUBLE_COMPLEX, - ], - ep_type: [ - FI_EP_MSG, - FI_EP_RDM, - ], - av_type: [ - FI_AV_TABLE, - ], - comp_type: [ - FT_COMP_QUEUE, - FT_COMP_CNTR, - ], - test_class: [ - FT_CAP_ATOMIC, - ], - test_flags: FT_FLAG_QUICKTEST -}, -{ - prov_name: sockets, - test_type: [ - FT_TEST_LATENCY, - FT_TEST_BANDWIDTH, - ], - class_function: [ - FT_FUNC_COMPARE_ATOMIC, - FT_FUNC_COMPARE_ATOMICV, - FT_FUNC_COMPARE_ATOMICMSG, - ], - op:[ - FI_CSWAP, - FI_CSWAP_NE, - FI_CSWAP_LE, - FI_CSWAP_LT, - FI_CSWAP_GE, - FI_CSWAP_GT, - FI_MSWAP, - ], - datatype:[ - FI_INT8, - FI_UINT8, - FI_INT16, - FI_UINT16, - FI_INT32, - FI_UINT32, - FI_INT64, - FI_UINT64, - FI_FLOAT, - FI_DOUBLE, - FI_LONG_DOUBLE, - FI_FLOAT_COMPLEX, - FI_DOUBLE_COMPLEX, - FI_LONG_DOUBLE_COMPLEX, - ], - ep_type: [ - FI_EP_MSG, - FI_EP_RDM, - ], - av_type: [ - FI_AV_TABLE, - ], - comp_type: [ - FT_COMP_QUEUE, - FT_COMP_CNTR, - ], - test_class: [ - FT_CAP_ATOMIC, - ], - test_flags: FT_FLAG_QUICKTEST -}, -{ - prov_name: sockets, - test_type: [ - FT_TEST_LATENCY, - FT_TEST_BANDWIDTH, - ], - class_function: [ - FT_FUNC_SENDMSG, - ], - ep_type: [ - FI_EP_RDM, - ], - av_type: [ - FI_AV_TABLE, - ], - comp_type: [ - FT_COMP_CNTR, - ], - test_class: [ - FT_CAP_MSG, - FT_CAP_TAGGED, - ], - rx_cq_bind_flags: [ - FI_SELECTIVE_COMPLETION, - ], - rx_op_flags: [ - FI_COMPLETION, - ], - msg_flags: [ - FI_COMPLETION, - ], - test_flags: FT_FLAG_QUICKTEST -}, -{ - prov_name: sockets, - test_type: [ - FT_TEST_LATENCY, - FT_TEST_BANDWIDTH, - ], - class_function: [ - FT_FUNC_SENDMSG, - ], - ep_type: [ - FI_EP_RDM, - ], - av_type: [ - FI_AV_TABLE, - ], - comp_type: [ - FT_COMP_CNTR, - ], - test_class: [ - FT_CAP_MSG, - FT_CAP_TAGGED, - ], - rx_cq_bind_flags: [ - FI_SELECTIVE_COMPLETION, - ], - msg_flags: [ - FI_COMPLETION, - ], - test_flags: FT_FLAG_QUICKTEST -}, -{ - prov_name: sockets, - test_type: [ - FT_TEST_LATENCY, - FT_TEST_BANDWIDTH, - ], - class_function: [ - FT_FUNC_SEND, - FT_FUNC_SENDV, - FT_FUNC_SENDDATA, - FT_FUNC_INJECT, - FT_FUNC_INJECTDATA, - ], - ep_type: [ - FI_EP_RDM, - ], - av_type: [ - FI_AV_TABLE, - ], - comp_type: [ - FT_COMP_CNTR, - ], - test_class: [ - FT_CAP_MSG, - FT_CAP_TAGGED, - ], - rx_cq_bind_flags: [ - FI_SELECTIVE_COMPLETION, - ], - rx_op_flags: [ - FI_COMPLETION, - ], - test_flags: FT_FLAG_QUICKTEST -}, -{ - prov_name: sockets, - test_type: [ - FT_TEST_LATENCY, - FT_TEST_BANDWIDTH, - ], - class_function: [ - FT_FUNC_SENDMSG, - ], - ep_type: [ - FI_EP_RDM, - ], - av_type: [ - FI_AV_TABLE, - ], - comp_type: [ - FT_COMP_QUEUE, - FT_COMP_CNTR, - ], - test_class: [ - FT_CAP_MSG, - FT_CAP_TAGGED, - ], - tx_cq_bind_flags: [ - FI_SELECTIVE_COMPLETION, - ], - msg_flags: [ - FI_COMPLETION, - ], - test_flags: FT_FLAG_QUICKTEST -}, -{ - prov_name: sockets, - test_type: [ - FT_TEST_LATENCY, - FT_TEST_BANDWIDTH, - ], - class_function: [ - FT_FUNC_SENDMSG, - ], - ep_type: [ - FI_EP_RDM, - ], - av_type: [ - FI_AV_TABLE, - ], - comp_type: [ - FT_COMP_QUEUE, - FT_COMP_CNTR, - ], - test_class: [ - FT_CAP_MSG, - FT_CAP_TAGGED, - ], - tx_cq_bind_flags: [ - FI_SELECTIVE_COMPLETION, - ], - tx_op_flags: [ - FI_COMPLETION, - ], - test_flags: FT_FLAG_QUICKTEST -}, diff --git a/fabtests/test_configs/sockets/quick.test b/fabtests/test_configs/sockets/quick.test deleted file mode 100644 index b929c5d7e91..00000000000 --- a/fabtests/test_configs/sockets/quick.test +++ /dev/null @@ -1,228 +0,0 @@ -#: "Suite of tests for the sockets provider" -{ - prov_name: sockets, - test_type: [ - FT_TEST_LATENCY, - FT_TEST_BANDWIDTH, - ], - class_function: [ - FT_FUNC_SEND, - FT_FUNC_SENDV, - FT_FUNC_SENDMSG, - FT_FUNC_SENDDATA, - FT_FUNC_INJECT, - FT_FUNC_INJECTDATA, - ], - ep_type: [ - FI_EP_MSG, - FI_EP_RDM, - ], - av_type: [ - FI_AV_TABLE, - FI_AV_MAP, - ], - comp_type: [ - FT_COMP_QUEUE, - ], - test_class: [ - FT_CAP_MSG, - FT_CAP_TAGGED, - ], - test_flags: FT_FLAG_QUICKTEST -}, -{ - prov_name: sockets, - test_type: [ - FT_TEST_LATENCY, - ], - class_function: [ - FT_FUNC_SEND, - ], - ep_type: [ - FI_EP_MSG, - ], - av_type: [ - FI_AV_TABLE, - ], - comp_type: [ - FT_COMP_QUEUE, - ], - eq_wait_obj: [ - FI_WAIT_NONE, - FI_WAIT_UNSPEC, - FI_WAIT_FD, - FI_WAIT_MUTEX_COND, - ], - cq_wait_obj: [ - FI_WAIT_NONE, - ], - test_class: [ - FT_CAP_MSG, - ], - test_flags: FT_FLAG_QUICKTEST -}, -{ - prov_name: sockets, - test_type: [ - FT_TEST_LATENCY, - FT_TEST_BANDWIDTH, - ], - class_function: [ - FT_FUNC_SEND, - ], - ep_type: [ - FI_EP_MSG, - ], - av_type: [ - FI_AV_TABLE, - ], - comp_type: [ - FT_COMP_QUEUE, - ], - eq_wait_obj: [ - FI_WAIT_NONE, - ], - cq_wait_obj: [ - FI_WAIT_NONE, - FI_WAIT_UNSPEC, - FI_WAIT_FD, - FI_WAIT_MUTEX_COND, - ], - test_class: [ - FT_CAP_MSG, - ], - test_flags: FT_FLAG_QUICKTEST -}, -{ - prov_name: sockets, - test_type: [ - FT_TEST_LATENCY, - FT_TEST_BANDWIDTH, - ], - class_function: [ - FT_FUNC_WRITE, - FT_FUNC_WRITEV, - FT_FUNC_WRITEMSG, - FT_FUNC_INJECT_WRITE, - FT_FUNC_WRITEDATA, - FT_FUNC_INJECT_WRITEDATA, - FT_FUNC_READ, - FT_FUNC_READV, - FT_FUNC_READMSG, - ], - ep_type: [ - FI_EP_MSG, - FI_EP_RDM, - ], - av_type: [ - FI_AV_TABLE, - ], - comp_type: [ - FT_COMP_QUEUE, - ], - test_class: [ - FT_CAP_RMA, - ], - test_flags: FT_FLAG_QUICKTEST -}, -{ - prov_name: sockets, - test_type: [ - FT_TEST_LATENCY, - FT_TEST_BANDWIDTH, - ], - class_function: [ - FT_FUNC_ATOMIC, - FT_FUNC_ATOMICV, - FT_FUNC_ATOMICMSG, - FT_FUNC_FETCH_ATOMIC, - FT_FUNC_FETCH_ATOMICV, - FT_FUNC_FETCH_ATOMICMSG, - FT_FUNC_INJECT_ATOMIC, - ], - op:[ - FI_SUM, - FI_PROD, - ], - datatype:[ - FI_INT8, - ], - ep_type: [ - FI_EP_MSG, - FI_EP_RDM, - ], - av_type: [ - FI_AV_TABLE, - ], - comp_type: [ - FT_COMP_QUEUE, - ], - test_class: [ - FT_CAP_ATOMIC, - ], - test_flags: FT_FLAG_QUICKTEST -}, -{ - prov_name: sockets, - test_type: [ - FT_TEST_LATENCY, - FT_TEST_BANDWIDTH, - ], - class_function: [ - FT_FUNC_FETCH_ATOMIC, - FT_FUNC_FETCH_ATOMICV, - FT_FUNC_FETCH_ATOMICMSG, - ], - op:[ - FI_ATOMIC_READ, - ], - datatype:[ - FI_INT8, - ], - ep_type: [ - FI_EP_MSG, - FI_EP_RDM, - ], - av_type: [ - FI_AV_TABLE, - ], - comp_type: [ - FT_COMP_QUEUE, - ], - test_class: [ - FT_CAP_ATOMIC, - ], - test_flags: FT_FLAG_QUICKTEST -}, -{ - prov_name: sockets, - test_type: [ - FT_TEST_LATENCY, - FT_TEST_BANDWIDTH, - ], - class_function: [ - FT_FUNC_COMPARE_ATOMIC, - FT_FUNC_COMPARE_ATOMICV, - FT_FUNC_COMPARE_ATOMICMSG, - ], - op:[ - FI_CSWAP, - ], - datatype:[ - FI_INT8, - ], - ep_type: [ - FI_EP_MSG, - FI_EP_RDM, - ], - av_type: [ - FI_AV_TABLE, - ], - comp_type: [ - FT_COMP_QUEUE, - ], - test_class: [ - FT_CAP_ATOMIC, - ], - test_flags: FT_FLAG_QUICKTEST -}, diff --git a/fabtests/test_configs/sockets/sockets.exclude b/fabtests/test_configs/sockets/sockets.exclude deleted file mode 100644 index 974c8651d43..00000000000 --- a/fabtests/test_configs/sockets/sockets.exclude +++ /dev/null @@ -1,7 +0,0 @@ -# Regex patterns of tests to exclude in runfabtests.sh - -# Exclude all prefix tests --k --e dgram -dgram -multinode_coll diff --git a/fabtests/test_configs/sockets/verify.test b/fabtests/test_configs/sockets/verify.test deleted file mode 100644 index a284a8c2577..00000000000 --- a/fabtests/test_configs/sockets/verify.test +++ /dev/null @@ -1,258 +0,0 @@ -{ - prov_name: sockets, - test_type: [ - FT_TEST_UNIT, - ], - class_function: [ - FT_FUNC_SEND, - FT_FUNC_SENDV, - FT_FUNC_SENDMSG, - FT_FUNC_SENDDATA, - FT_FUNC_INJECT, - FT_FUNC_INJECTDATA, - ], - ep_type: [ - FI_EP_RDM, - ], - av_type: [ - FI_AV_MAP, - ], - comp_type: [ - FT_COMP_QUEUE, - ], - test_class: [ - FT_CAP_MSG, - FT_CAP_TAGGED, - ], - test_flags: FT_FLAG_QUICKTEST -}, -{ - prov_name: sockets, - test_type: [ - FT_TEST_UNIT, - ], - class_function: [ - FT_FUNC_WRITE, - FT_FUNC_WRITEV, - FT_FUNC_WRITEMSG, - FT_FUNC_INJECT_WRITE, - FT_FUNC_WRITEDATA, - FT_FUNC_INJECT_WRITEDATA, - FT_FUNC_READ, - FT_FUNC_READV, - FT_FUNC_READMSG, - ], - ep_type: [ - FI_EP_RDM, - ], - av_type: [ - FI_AV_TABLE, - ], - comp_type: [ - FT_COMP_QUEUE, - ], - test_class: [ - FT_CAP_RMA, - ], - test_flags: FT_FLAG_QUICKTEST -}, -{ - prov_name: sockets, - test_type: [ - FT_TEST_UNIT, - ], - class_function: [ - FT_FUNC_ATOMIC, - FT_FUNC_ATOMICV, - FT_FUNC_ATOMICMSG, - FT_FUNC_FETCH_ATOMIC, - FT_FUNC_FETCH_ATOMICV, - FT_FUNC_FETCH_ATOMICMSG, - FT_FUNC_INJECT_ATOMIC, - ], - op:[ - FI_MIN, - FI_MAX, - FI_SUM, - FI_PROD, - FI_LOR, - FI_LAND, - FI_BOR, - FI_BAND, - FI_LXOR, - FI_BXOR, - FI_ATOMIC_WRITE, - ], - datatype:[ - FI_INT8, - FI_UINT8, - FI_INT16, - FI_UINT16, - FI_INT32, - FI_UINT32, - FI_INT64, - FI_UINT64, - FI_FLOAT, - FI_DOUBLE, - FI_LONG_DOUBLE, - FI_FLOAT_COMPLEX, - FI_DOUBLE_COMPLEX, - FI_LONG_DOUBLE_COMPLEX, - ], - ep_type: [ - FI_EP_RDM, - ], - av_type: [ - FI_AV_TABLE, - ], - comp_type: [ - FT_COMP_QUEUE, - ], - test_class: [ - FT_CAP_ATOMIC, - ], - test_flags: FT_FLAG_QUICKTEST -}, -{ - prov_name: sockets, - test_type: [ - FT_TEST_UNIT, - ], - class_function: [ - FT_FUNC_FETCH_ATOMIC, - FT_FUNC_FETCH_ATOMICV, - FT_FUNC_FETCH_ATOMICMSG, - ], - op:[ - FI_ATOMIC_READ, - ], - datatype:[ - FI_INT8, - FI_UINT8, - FI_INT16, - FI_UINT16, - FI_INT32, - FI_UINT32, - FI_INT64, - FI_UINT64, - FI_FLOAT, - FI_DOUBLE, - FI_LONG_DOUBLE, - FI_FLOAT_COMPLEX, - FI_DOUBLE_COMPLEX, - FI_LONG_DOUBLE_COMPLEX, - ], - ep_type: [ - FI_EP_RDM, - ], - av_type: [ - FI_AV_TABLE, - ], - comp_type: [ - FT_COMP_QUEUE, - ], - test_class: [ - FT_CAP_ATOMIC, - ], - test_flags: FT_FLAG_QUICKTEST -}, -{ - prov_name: sockets, - test_type: [ - FT_TEST_UNIT, - ], - class_function: [ - FT_FUNC_COMPARE_ATOMIC, - FT_FUNC_COMPARE_ATOMICV, - FT_FUNC_COMPARE_ATOMICMSG, - ], - op:[ - FI_CSWAP, - FI_CSWAP_NE, - FI_CSWAP_LE, - FI_CSWAP_LT, - FI_CSWAP_GE, - FI_CSWAP_GT, - FI_MSWAP, - ], - datatype:[ - FI_INT8, - FI_UINT8, - FI_INT16, - FI_UINT16, - FI_INT32, - FI_UINT32, - FI_INT64, - FI_UINT64, - FI_FLOAT, - FI_DOUBLE, - FI_LONG_DOUBLE, - FI_FLOAT_COMPLEX, - FI_DOUBLE_COMPLEX, - FI_LONG_DOUBLE_COMPLEX, - ], - ep_type: [ - FI_EP_RDM, - ], - av_type: [ - FI_AV_TABLE, - ], - comp_type: [ - FT_COMP_QUEUE, - ], - test_class: [ - FT_CAP_ATOMIC, - ], - test_flags: FT_FLAG_QUICKTEST -}, -{ - prov_name: sockets, - test_type: [ - FT_TEST_UNIT, - ], - class_function: [ - FT_FUNC_SENDMSG, - FT_FUNC_SENDDATA, - FT_FUNC_INJECTDATA, - ], - ep_type: [ - FI_EP_RDM, - ], - av_type: [ - FI_AV_MAP, - ], - comp_type: [ - FT_COMP_QUEUE, - ], - test_class: [ - FT_CAP_MSG, - FT_CAP_TAGGED, - ], - msg_flags: FI_REMOTE_CQ_DATA, - test_flags: FT_FLAG_QUICKTEST -}, -{ - prov_name: sockets, - test_type: [ - FT_TEST_UNIT, - ], - class_function: [ - FT_FUNC_WRITEMSG, - FT_FUNC_WRITEDATA, - FT_FUNC_INJECT_WRITEDATA, - ], - ep_type: [ - FI_EP_RDM, - ], - av_type: [ - FI_AV_MAP, - ], - comp_type: [ - FT_COMP_QUEUE, - ], - test_class: [ - FT_CAP_RMA, - ], - msg_flags: FI_REMOTE_CQ_DATA, - test_flags: FT_FLAG_QUICKTEST -}, diff --git a/fabtests/unit/common.c b/fabtests/unit/common.c index 998a7d7b3cc..538f80dce27 100644 --- a/fabtests/unit/common.c +++ b/fabtests/unit/common.c @@ -46,7 +46,7 @@ void ft_unit_usage(char *name, char *desc) fprintf(stderr, "\nOptions:\n"); FT_PRINT_OPTS_USAGE("-f ", "specific fabric to use"); FT_PRINT_OPTS_USAGE("-d ", "domain name"); - FT_PRINT_OPTS_USAGE("-p ", "specific provider name eg sockets, verbs"); + FT_PRINT_OPTS_USAGE("-p ", "specific provider name eg tcp, verbs"); FT_PRINT_OPTS_USAGE("-h", "display this help output"); } diff --git a/include/ofi_prov.h b/include/ofi_prov.h index 506c1fd8f08..3c0337e667e 100644 --- a/include/ofi_prov.h +++ b/include/ofi_prov.h @@ -101,17 +101,6 @@ PSM3_INI ; # define PSM3_INIT NULL #endif -#if (HAVE_SOCKETS) && (HAVE_SOCKETS_DL) -# define SOCKETS_INI FI_EXT_INI -# define SOCKETS_INIT NULL -#elif (HAVE_SOCKETS) -# define SOCKETS_INI INI_SIG(fi_sockets_ini) -# define SOCKETS_INIT fi_sockets_ini() -SOCKETS_INI ; -#else -# define SOCKETS_INIT NULL -#endif - #if (HAVE_UDP) && (HAVE_UDP_DL) # define UDP_INI FI_EXT_INI # define UDP_INIT NULL diff --git a/libfabric.vcxproj b/libfabric.vcxproj index cffccc1383f..bc5ecf7e29b 100644 --- a/libfabric.vcxproj +++ b/libfabric.vcxproj @@ -250,7 +250,7 @@ Disabled WIN32;_WINSOCKAPI_=;_CRT_SECURE_NO_WARNINGS;_WINSOCK_DEPRECATED_NO_WARNINGS;_WINDOWS;_USRDLL;LIBFABRIC_EXPORTS;HAVE_CONFIG_H;ENABLE_DEBUG;%(PreprocessorDefinitions) 4127;4200;4204;4221;4115;4201;4100 - $(ProjectDir)include;$(ProjectDir)include\windows;$(ProjectDir)prov\sockets\include;$(ProjectDir)prov\hook\src;$(ProjectDir)prov\hook\include;$(ProjectDir)prov\hook\perf\include;$(ProjectDir)prov\efa\src;$(ProjectDir)prov\efa\src\rxr;$(ProjectDir)prov\efa\src\windows; + $(ProjectDir)include;$(ProjectDir)include\windows;$(ProjectDir)prov\hook\src;$(ProjectDir)prov\hook\include;$(ProjectDir)prov\hook\perf\include;$(ProjectDir)prov\efa\src;$(ProjectDir)prov\efa\src\rxr;$(ProjectDir)prov\efa\src\windows; true MultiThreadedDebug false @@ -356,7 +356,7 @@ true true WIN32;_WINSOCKAPI_=;_CRT_SECURE_NO_WARNINGS;_WINSOCK_DEPRECATED_NO_WARNINGS;_WINDOWS;_USRDLL;LIBFABRIC_EXPORTS;HAVE_CONFIG_H;%(PreprocessorDefinitions) - $(ProjectDir)include;$(ProjectDir)include\windows;$(ProjectDir)prov\sockets\include;$(ProjectDir)prov\hook\src;$(ProjectDir)prov\hook\include;$(ProjectDir)prov\hook\perf\include;$(ProjectDir)prov\efa\src;$(ProjectDir)prov\efa\src\rxr;$(ProjectDir)prov\efa\src\windows; + $(ProjectDir)include;$(ProjectDir)include\windows;$(ProjectDir)prov\hook\src;$(ProjectDir)prov\hook\include;$(ProjectDir)prov\hook\perf\include;$(ProjectDir)prov\efa\src;$(ProjectDir)prov\efa\src\rxr;$(ProjectDir)prov\efa\src\windows; 4127;4200;4204;4221;4115;4201;4100 true true @@ -465,236 +465,6 @@ - - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - - - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - - - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - - - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - - - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - - - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - - - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - - - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - - - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - - - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - - - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - - - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - - - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - - - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - - - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - - - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - - - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - - - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - - - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - - - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - - - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - - - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - - - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - $(ProjectDir)prov\sockets\include;%(AdditionalIncludeDirectories) - @@ -973,8 +743,6 @@ - - diff --git a/libfabric.vcxproj.filters b/libfabric.vcxproj.filters index 5d7bd424e2f..c493a603c64 100644 --- a/libfabric.vcxproj.filters +++ b/libfabric.vcxproj.filters @@ -25,9 +25,6 @@ {ec212b69-2bcc-482c-8553-310e20942080} - - {87c89076-58b2-45b2-a92f-f83911949b06} - {02f3b800-de85-4324-b5e2-21818e485da4} @@ -46,12 +43,6 @@ {873977f2-5032-4efb-a766-b707ebe2f92e} - - {3458ec4f-718d-4517-8602-ac471fd7628a} - - - {53ba7405-75a4-408e-9ac8-e6802ba33d58} - {e8d08af7-8c3f-4e28-adbf-b6800d101dbf} @@ -258,72 +249,6 @@ Source Files\src\windows - - Source Files\prov\sockets\src - - - Source Files\prov\sockets\src - - - Source Files\prov\sockets\src - - - Source Files\prov\sockets\src - - - Source Files\prov\sockets\src - - - Source Files\prov\sockets\src - - - Source Files\prov\sockets\src - - - Source Files\prov\sockets\src - - - Source Files\prov\sockets\src - - - Source Files\prov\sockets\src - - - Source Files\prov\sockets\src - - - Source Files\prov\sockets\src - - - Source Files\prov\sockets\src - - - Source Files\prov\sockets\src - - - Source Files\prov\sockets\src - - - Source Files\prov\sockets\src - - - Source Files\prov\sockets\src - - - Source Files\prov\sockets\src - - - Source Files\prov\sockets\src - - - Source Files\prov\sockets\src - - - Source Files\prov\sockets\src - - - Source Files\prov\sockets\src - Source Files\prov\util @@ -522,9 +447,6 @@ Source Files\src - - Source Files - Source Files @@ -749,12 +671,6 @@ Header Files\windows\sys - - Source Files\prov\sockets\include - - - Source Files\prov\sockets\include - Header Files diff --git a/man/fabric.7.md b/man/fabric.7.md index 83c7896182a..93344c08da8 100644 --- a/man/fabric.7.md +++ b/man/fabric.7.md @@ -200,8 +200,8 @@ FI_LOG_SUBSYS environment variables. separated fashion. If the list begins with the '^' symbol, then the list will be negated. By default all providers are enabled. - Example: To enable logging from the psm3 and sockets provider: - FI_LOG_PROV="psm3,sockets" + Example: To enable logging from the psm3 and tcp provider: + FI_LOG_PROV="psm3,tcp" Example: To enable logging from providers other than psm3: FI_LOG_PROV="^psm3" diff --git a/man/fi_pingpong.1.md b/man/fi_pingpong.1.md index ab59d45e028..fa710c1a3d3 100644 --- a/man/fi_pingpong.1.md +++ b/man/fi_pingpong.1.md @@ -72,7 +72,7 @@ given domains cannot communicate, then the application will fail. ## Fabric Filtering *-p \* -: The name of the underlying fabric provider (e.g., sockets, psm3, etc.). +: The name of the underlying fabric provider (e.g., tcp, psm3, etc.). If a provider is not specified via the -p switch, the test will pick one from the list of available providers (as returned by fi_getinfo(3)). @@ -111,10 +111,10 @@ given domains cannot communicate, then the application will fail. ## A simple example ### Server: `fi_pingpong -p ` -`server$ fi_pingpong -p sockets` +`server$ fi_pingpong -p tcp` ### Client: `fi_pingpong -p ` -`client$ fi_pingpong -p sockets 192.168.0.123` +`client$ fi_pingpong -p tcp 192.168.0.123` ## An example with various options diff --git a/man/fi_sockets.7.md b/man/fi_sockets.7.md deleted file mode 100644 index 73d8d5e7622..00000000000 --- a/man/fi_sockets.7.md +++ /dev/null @@ -1,114 +0,0 @@ ---- -layout: page -title: fi_sockets(7) -tagline: Libfabric Programmer's Manual ---- -{% include JB/setup %} - -# NAME - -fi_sockets \- The Sockets Fabric Provider - -# OVERVIEW - -The sockets provider is being deprecated in favor of the tcp, udp, and -utility providers. Further work on the sockets provider will be minimal. -Most applications should instead use the tcp provider instead. - -The sockets provider is a general purpose provider that can be used on any -system that supports TCP sockets. The provider is not intended to provide -performance improvements over regular TCP sockets, but rather to allow -developers to write, test, and debug application code even on platforms -that do not have high-performance fabric hardware. The sockets provider -supports all libfabric provider requirements and interfaces. - -# SUPPORTED FEATURES - -The sockets provider supports all the features defined for the libfabric API. -Key features include: - -*Endpoint types* -: The provider supports all endpoint types: *FI_EP_MSG*, *FI_EP_RDM*, - and *FI_EP_DGRAM*. - -*Endpoint capabilities* -: The following data transfer interface is supported for a all endpoint - types: *fi_msg*. Additionally, these interfaces are supported - for reliable endpoints (*FI_EP_MSG* and *FI_EP_RDM*): *fi_tagged*, - *fi_atomic*, and *fi_rma*. - -*Modes* -: The sockets provider supports all operational modes including - *FI_CONTEXT* and *FI_MSG_PREFIX*. - -*Progress* -: Sockets provider supports both *FI_PROGRESS_AUTO* and *FI_PROGRESS_MANUAL*, - with a default set to auto. When progress is set to auto, a background - thread runs to ensure that progress is made for asynchronous requests. - -# LIMITATIONS - -Sockets provider attempts to emulate the entire API set, including all -defined options. In order to support development on a wide range of -systems, it is implemented over TCP sockets. As a result, the -performance numbers are lower compared to other providers implemented -over high-speed fabric, and lower than what an application might see -implementing to sockets directly. - -Does not support FI_ADDR_STR address format. - -# RUNTIME PARAMETERS - -The sockets provider checks for the following environment variables - - -*FI_SOCKETS_PE_WAITTIME* -: An integer value that specifies how many milliseconds to spin while waiting for progress in *FI_PROGRESS_AUTO* mode. - -*FI_SOCKETS_CONN_TIMEOUT* -: An integer value that specifies how many milliseconds to wait for one connection establishment. - -*FI_SOCKETS_MAX_CONN_RETRY* -: An integer value that specifies the number of socket connection retries before reporting as failure. - -*FI_SOCKETS_DEF_CONN_MAP_SZ* -: An integer to specify the default connection map size. - -*FI_SOCKETS_DEF_AV_SZ* -: An integer to specify the default address vector size. - -*FI_SOCKETS_DEF_CQ_SZ* -: An integer to specify the default completion queue size. - -*FI_SOCKETS_DEF_EQ_SZ* -: An integer to specify the default event queue size. - -*FI_SOCKETS_DGRAM_DROP_RATE* -: An integer value to specify the drop rate of dgram frame when endpoint is *FI_EP_DGRAM*. This is for debugging purpose only. - -*FI_SOCKETS_PE_AFFINITY* -: If specified, progress thread is bound to the indicated range(s) of Linux virtual processor ID(s). This option is currently not supported on OS X. The usage is - id_start[-id_end[:stride]][,]. - -*FI_SOCKETS_KEEPALIVE_ENABLE* -: A boolean to enable the keepalive support. - -*FI_SOCKETS_KEEPALIVE_TIME* -: An integer to specify the idle time in seconds before sending the first keepalive probe. Only relevant if *FI_SOCKETS_KEEPALIVE_ENABLE* is enabled. - -*FI_SOCKETS_KEEPALIVE_INTVL* -: An integer to specify the time in seconds between individual keepalive probes. Only relevant if *FI_SOCKETS_KEEPALIVE_ENABLE* is enabled. - -*FI_SOCKETS_KEEPALIVE_PROBES* -: An integer to specify the maximum number of keepalive probes sent before dropping the connection. Only relevant if *FI_SOCKETS_KEEPALIVE_ENABLE* is enabled. - -*FI_SOCKETS_IFACE* -: The prefix or the name of the network interface (default: any) - -# LARGE SCALE JOBS - -For large scale runs one can use these environment variables to set the default parameters e.g. size of the address vector(AV), completion queue (CQ), connection map etc. that satisfies the requirement of the particular benchmark. The recommended parameters for large scale runs are *FI_SOCKETS_MAX_CONN_RETRY*, *FI_SOCKETS_DEF_CONN_MAP_SZ*, *FI_SOCKETS_DEF_AV_SZ*, *FI_SOCKETS_DEF_CQ_SZ*, *FI_SOCKETS_DEF_EQ_SZ*. - -# SEE ALSO - -[`fabric`(7)](fabric.7.html), -[`fi_provider`(7)](fi_provider.7.html), -[`fi_getinfo`(3)](fi_getinfo.3.html) diff --git a/man/man7/fi_sockets.7 b/man/man7/fi_sockets.7 deleted file mode 100644 index 4f48b3ea613..00000000000 --- a/man/man7/fi_sockets.7 +++ /dev/null @@ -1,133 +0,0 @@ -.\" Automatically generated by Pandoc 2.9.2.1 -.\" -.TH "fi_sockets" "7" "2022\-12\-09" "Libfabric Programmer\[cq]s Manual" "#VERSION#" -.hy -.SH NAME -.PP -fi_sockets - The Sockets Fabric Provider -.SH OVERVIEW -.PP -The sockets provider is being deprecated in favor of the tcp, udp, and -utility providers. -Further work on the sockets provider will be minimal. -Most applications should instead use the tcp provider instead. -.PP -The sockets provider is a general purpose provider that can be used on -any system that supports TCP sockets. -The provider is not intended to provide performance improvements over -regular TCP sockets, but rather to allow developers to write, test, and -debug application code even on platforms that do not have -high-performance fabric hardware. -The sockets provider supports all libfabric provider requirements and -interfaces. -.SH SUPPORTED FEATURES -.PP -The sockets provider supports all the features defined for the libfabric -API. -Key features include: -.TP -\f[I]Endpoint types\f[R] -The provider supports all endpoint types: \f[I]FI_EP_MSG\f[R], -\f[I]FI_EP_RDM\f[R], and \f[I]FI_EP_DGRAM\f[R]. -.TP -\f[I]Endpoint capabilities\f[R] -The following data transfer interface is supported for a all endpoint -types: \f[I]fi_msg\f[R]. -Additionally, these interfaces are supported for reliable endpoints -(\f[I]FI_EP_MSG\f[R] and \f[I]FI_EP_RDM\f[R]): \f[I]fi_tagged\f[R], -\f[I]fi_atomic\f[R], and \f[I]fi_rma\f[R]. -.TP -\f[I]Modes\f[R] -The sockets provider supports all operational modes including -\f[I]FI_CONTEXT\f[R] and \f[I]FI_MSG_PREFIX\f[R]. -.TP -\f[I]Progress\f[R] -Sockets provider supports both \f[I]FI_PROGRESS_AUTO\f[R] and -\f[I]FI_PROGRESS_MANUAL\f[R], with a default set to auto. -When progress is set to auto, a background thread runs to ensure that -progress is made for asynchronous requests. -.SH LIMITATIONS -.PP -Sockets provider attempts to emulate the entire API set, including all -defined options. -In order to support development on a wide range of systems, it is -implemented over TCP sockets. -As a result, the performance numbers are lower compared to other -providers implemented over high-speed fabric, and lower than what an -application might see implementing to sockets directly. -.PP -Does not support FI_ADDR_STR address format. -.SH RUNTIME PARAMETERS -.PP -The sockets provider checks for the following environment variables - -.TP -\f[I]FI_SOCKETS_PE_WAITTIME\f[R] -An integer value that specifies how many milliseconds to spin while -waiting for progress in \f[I]FI_PROGRESS_AUTO\f[R] mode. -.TP -\f[I]FI_SOCKETS_CONN_TIMEOUT\f[R] -An integer value that specifies how many milliseconds to wait for one -connection establishment. -.TP -\f[I]FI_SOCKETS_MAX_CONN_RETRY\f[R] -An integer value that specifies the number of socket connection retries -before reporting as failure. -.TP -\f[I]FI_SOCKETS_DEF_CONN_MAP_SZ\f[R] -An integer to specify the default connection map size. -.TP -\f[I]FI_SOCKETS_DEF_AV_SZ\f[R] -An integer to specify the default address vector size. -.TP -\f[I]FI_SOCKETS_DEF_CQ_SZ\f[R] -An integer to specify the default completion queue size. -.TP -\f[I]FI_SOCKETS_DEF_EQ_SZ\f[R] -An integer to specify the default event queue size. -.TP -\f[I]FI_SOCKETS_DGRAM_DROP_RATE\f[R] -An integer value to specify the drop rate of dgram frame when endpoint -is \f[I]FI_EP_DGRAM\f[R]. -This is for debugging purpose only. -.TP -\f[I]FI_SOCKETS_PE_AFFINITY\f[R] -If specified, progress thread is bound to the indicated range(s) of -Linux virtual processor ID(s). -This option is currently not supported on OS X. -The usage is - id_start[-id_end[:stride]][,]. -.TP -\f[I]FI_SOCKETS_KEEPALIVE_ENABLE\f[R] -A boolean to enable the keepalive support. -.TP -\f[I]FI_SOCKETS_KEEPALIVE_TIME\f[R] -An integer to specify the idle time in seconds before sending the first -keepalive probe. -Only relevant if \f[I]FI_SOCKETS_KEEPALIVE_ENABLE\f[R] is enabled. -.TP -\f[I]FI_SOCKETS_KEEPALIVE_INTVL\f[R] -An integer to specify the time in seconds between individual keepalive -probes. -Only relevant if \f[I]FI_SOCKETS_KEEPALIVE_ENABLE\f[R] is enabled. -.TP -\f[I]FI_SOCKETS_KEEPALIVE_PROBES\f[R] -An integer to specify the maximum number of keepalive probes sent before -dropping the connection. -Only relevant if \f[I]FI_SOCKETS_KEEPALIVE_ENABLE\f[R] is enabled. -.TP -\f[I]FI_SOCKETS_IFACE\f[R] -The prefix or the name of the network interface (default: any) -.SH LARGE SCALE JOBS -.PP -For large scale runs one can use these environment variables to set the -default parameters e.g.\ size of the address vector(AV), completion -queue (CQ), connection map etc. -that satisfies the requirement of the particular benchmark. -The recommended parameters for large scale runs are -\f[I]FI_SOCKETS_MAX_CONN_RETRY\f[R], -\f[I]FI_SOCKETS_DEF_CONN_MAP_SZ\f[R], \f[I]FI_SOCKETS_DEF_AV_SZ\f[R], -\f[I]FI_SOCKETS_DEF_CQ_SZ\f[R], \f[I]FI_SOCKETS_DEF_EQ_SZ\f[R]. -.SH SEE ALSO -.PP -\f[C]fabric\f[R](7), \f[C]fi_provider\f[R](7), \f[C]fi_getinfo\f[R](3) -.SH AUTHORS -OpenFabrics. diff --git a/prov/sockets/Makefile.include b/prov/sockets/Makefile.include deleted file mode 100644 index 2e8024cd7a7..00000000000 --- a/prov/sockets/Makefile.include +++ /dev/null @@ -1,51 +0,0 @@ -# Makefile.include for sockets provider - -if HAVE_SOCKETS - -AM_CPPFLAGS += -I$(top_srcdir)/prov/sockets/include -I$(top_srcdir)/prov/sockets - -_sockets_files = \ - prov/sockets/src/sock_attr.c \ - prov/sockets/src/sock_av.c \ - prov/sockets/src/sock_dom.c \ - prov/sockets/src/sock_mr.c \ - prov/sockets/src/sock_eq.c \ - prov/sockets/src/sock_cq.c \ - prov/sockets/src/sock_cntr.c \ - prov/sockets/src/sock_poll.c \ - prov/sockets/src/sock_wait.c \ - prov/sockets/src/sock_ep_rdm.c \ - prov/sockets/src/sock_ep_dgram.c \ - prov/sockets/src/sock_ep_msg.c \ - prov/sockets/src/sock_fabric.c \ - prov/sockets/src/sock_ep.c \ - prov/sockets/src/sock_ctx.c \ - prov/sockets/src/sock_rx_entry.c \ - prov/sockets/src/sock_progress.c \ - prov/sockets/src/sock_comm.c \ - prov/sockets/src/sock_conn.c \ - prov/sockets/src/sock_msg.c \ - prov/sockets/src/sock_rma.c \ - prov/sockets/src/sock_atomic.c \ - prov/sockets/src/sock_trigger.c - -_sockets_headers = \ - prov/sockets/include/sock.h \ - prov/sockets/include/sock_util.h - -if HAVE_SOCKETS_DL -pkglib_LTLIBRARIES += libsockets-fi.la -libsockets_fi_la_SOURCES = $(_sockets_files) $(_sockets_headers) $(common_srcs) -libsockets_fi_la_LIBADD = $(linkback) $(sockets_LIBS) -libsockets_fi_la_LDFLAGS = -module -avoid-version -shared -export-dynamic -libsockets_fi_la_DEPENDENCIES = $(linkback) -else !HAVE_SOCKETS_DL -src_libfabric_la_SOURCES += $(_sockets_files) $(_sockets_headers) -src_libfabric_la_LIBADD += $(sockets_LIBS) -endif !HAVE_SOCKETS_DL - -prov_install_man_pages += man/man7/fi_sockets.7 - -endif HAVE_SOCKETS - -prov_dist_man_pages += man/man7/fi_sockets.7 diff --git a/prov/sockets/configure.m4 b/prov/sockets/configure.m4 deleted file mode 100644 index e39bf52c6cf..00000000000 --- a/prov/sockets/configure.m4 +++ /dev/null @@ -1,41 +0,0 @@ -dnl Configury specific to the libfabric sockets provider - -dnl Called to configure this provider -dnl -dnl Arguments: -dnl -dnl $1: action if configured successfully -dnl $2: action if not configured successfully -dnl -AC_DEFUN([FI_SOCKETS_CONFIGURE],[ - # Determine if we can support the sockets provider - sockets_h_happy=0 - sockets_shm_happy=0 - AS_IF([test x"$enable_sockets" != x"no"], - [AC_CHECK_HEADER([sys/socket.h], [sockets_h_happy=1], - [sockets_h_happy=0]) - - - # check if shm_open is already present - AC_CHECK_FUNC([shm_open], - [sockets_shm_happy=1], - [sockets_shm_happy=0]) - - # look for shm_open in librt if not already present - AS_IF([test $sockets_shm_happy -eq 0], - [FI_CHECK_PACKAGE([sockets], - [sys/mman.h], - [rt], - [shm_open], - [], - [], - [], - [sockets_shm_happy=1], - [sockets_shm_happy=0])]) - ]) - - AC_CHECK_FUNCS([getifaddrs]) - - AS_IF([test $sockets_h_happy -eq 1 && \ - test $sockets_shm_happy -eq 1], [$1], [$2]) -]) diff --git a/prov/sockets/include/rdma/fi_direct.h b/prov/sockets/include/rdma/fi_direct.h deleted file mode 100644 index 6d42ac48b0c..00000000000 --- a/prov/sockets/include/rdma/fi_direct.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2016 Intel Corporation. All rights reserved. - * Copyright (c) 2015-2016 Los Alamos National Security, LLC. - * All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/* Do not remove this file. All the include/rdma/fi_direct*.h files are created - * build the provider with FABRIC_DIRECT option. For details see man/fi_direct.7.md - */ diff --git a/prov/sockets/include/rdma/fi_direct_atomic.h b/prov/sockets/include/rdma/fi_direct_atomic.h deleted file mode 100644 index 971eba062c6..00000000000 --- a/prov/sockets/include/rdma/fi_direct_atomic.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2016 Intel Corporation, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/* Do not remove this file. See fi_direct.h */ diff --git a/prov/sockets/include/rdma/fi_direct_atomic_def.h b/prov/sockets/include/rdma/fi_direct_atomic_def.h deleted file mode 100644 index 971eba062c6..00000000000 --- a/prov/sockets/include/rdma/fi_direct_atomic_def.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2016 Intel Corporation, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/* Do not remove this file. See fi_direct.h */ diff --git a/prov/sockets/include/rdma/fi_direct_cm.h b/prov/sockets/include/rdma/fi_direct_cm.h deleted file mode 100644 index 971eba062c6..00000000000 --- a/prov/sockets/include/rdma/fi_direct_cm.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2016 Intel Corporation, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/* Do not remove this file. See fi_direct.h */ diff --git a/prov/sockets/include/rdma/fi_direct_domain.h b/prov/sockets/include/rdma/fi_direct_domain.h deleted file mode 100644 index 971eba062c6..00000000000 --- a/prov/sockets/include/rdma/fi_direct_domain.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2016 Intel Corporation, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/* Do not remove this file. See fi_direct.h */ diff --git a/prov/sockets/include/rdma/fi_direct_endpoint.h b/prov/sockets/include/rdma/fi_direct_endpoint.h deleted file mode 100644 index 971eba062c6..00000000000 --- a/prov/sockets/include/rdma/fi_direct_endpoint.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2016 Intel Corporation, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/* Do not remove this file. See fi_direct.h */ diff --git a/prov/sockets/include/rdma/fi_direct_eq.h b/prov/sockets/include/rdma/fi_direct_eq.h deleted file mode 100644 index 971eba062c6..00000000000 --- a/prov/sockets/include/rdma/fi_direct_eq.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2016 Intel Corporation, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/* Do not remove this file. See fi_direct.h */ diff --git a/prov/sockets/include/rdma/fi_direct_rma.h b/prov/sockets/include/rdma/fi_direct_rma.h deleted file mode 100644 index 971eba062c6..00000000000 --- a/prov/sockets/include/rdma/fi_direct_rma.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2016 Intel Corporation, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/* Do not remove this file. See fi_direct.h */ diff --git a/prov/sockets/include/rdma/fi_direct_tagged.h b/prov/sockets/include/rdma/fi_direct_tagged.h deleted file mode 100644 index 971eba062c6..00000000000 --- a/prov/sockets/include/rdma/fi_direct_tagged.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2016 Intel Corporation, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/* Do not remove this file. See fi_direct.h */ diff --git a/prov/sockets/include/rdma/fi_direct_trigger.h b/prov/sockets/include/rdma/fi_direct_trigger.h deleted file mode 100644 index 971eba062c6..00000000000 --- a/prov/sockets/include/rdma/fi_direct_trigger.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2016 Intel Corporation, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/* Do not remove this file. See fi_direct.h */ diff --git a/prov/sockets/include/sock.h b/prov/sockets/include/sock.h deleted file mode 100644 index f96ce520fea..00000000000 --- a/prov/sockets/include/sock.h +++ /dev/null @@ -1,1228 +0,0 @@ -/* - * Copyright (c) 2014 Intel Corporation, Inc. All rights reserved. - * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2017 DataDirect Networks, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "config.h" - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifndef _SOCK_H_ -#define _SOCK_H_ - -/* 4k allocated for all sockets headers */ -#define SOCK_EP_MAX_MSG_SZ (OFI_MAX_SOCKET_BUF_SIZE - 4096) -#define SOCK_EP_MAX_INJECT_SZ ((1<<8) - 1) -#define SOCK_EP_MAX_BUFF_RECV (1<<26) -#define SOCK_EP_MAX_ORDER_RAW_SZ SOCK_EP_MAX_MSG_SZ -#define SOCK_EP_MAX_ORDER_WAR_SZ SOCK_EP_MAX_MSG_SZ -#define SOCK_EP_MAX_ORDER_WAW_SZ SOCK_EP_MAX_MSG_SZ -#define SOCK_EP_MEM_TAG_FMT FI_TAG_GENERIC -#define SOCK_EP_MAX_EP_CNT (128) -#define SOCK_EP_MAX_CQ_CNT (32) -#define SOCK_EP_MAX_CNTR_CNT (128) -#define SOCK_EP_MAX_TX_CNT (16) -#define SOCK_EP_MAX_RX_CNT (16) -#define SOCK_EP_MAX_IOV_LIMIT (8) -#define SOCK_EP_TX_SZ (256) -#define SOCK_EP_RX_SZ (256) -#define SOCK_EP_MIN_MULTI_RECV (64) -#define SOCK_EP_MAX_ATOMIC_SZ (4096) -#define SOCK_EP_MAX_CTX_BITS (16) -#define SOCK_EP_MSG_PREFIX_SZ (0) -#define SOCK_DOMAIN_MR_CNT (65535) - -#define SOCK_PE_POLL_TIMEOUT (100000) -#define SOCK_PE_MAX_ENTRIES (128) -#define SOCK_PE_WAITTIME (10) - -#define SOCK_EQ_DEF_SZ (1<<8) -#define SOCK_CQ_DEF_SZ (1<<8) -#define SOCK_AV_DEF_SZ (1<<8) -#define SOCK_CMAP_DEF_SZ (1<<10) -#define SOCK_EPOLL_WAIT_EVENTS 32 - -#define SOCK_CQ_DATA_SIZE (sizeof(uint64_t)) -#define SOCK_TAG_SIZE (sizeof(uint64_t)) -#define SOCK_MAX_NETWORK_ADDR_SZ (35) - -#define SOCK_PEP_LISTENER_TIMEOUT (10000) -#define SOCK_CM_COMM_TIMEOUT (2000) -#define SOCK_EP_MAX_RETRY (5) -#define SOCK_EP_MAX_CM_DATA_SZ (256) -#define SOCK_CM_DEF_BACKLOG (128) -#define SOCK_CM_DEF_TIMEOUT (15000) -#define SOCK_CM_DEF_RETRY (5) -#define SOCK_CM_CONN_IN_PROGRESS ((struct sock_conn *)(0x1L)) - -#define SOCK_EP_MSG_ORDER (OFI_ORDER_RAR_SET | OFI_ORDER_RAW_SET | FI_ORDER_RAS| \ - OFI_ORDER_WAR_SET | OFI_ORDER_WAW_SET | FI_ORDER_WAS | \ - FI_ORDER_SAR | FI_ORDER_SAW | FI_ORDER_SAS) - -#define SOCK_EP_COMP_ORDER (FI_ORDER_STRICT | FI_ORDER_DATA) - -#define SOCK_EP_CQ_FLAGS (FI_SEND | FI_TRANSMIT | FI_RECV | \ - FI_SELECTIVE_COMPLETION) -#define SOCK_EP_CNTR_FLAGS (FI_SEND | FI_RECV | FI_READ | \ - FI_WRITE | FI_REMOTE_READ | FI_REMOTE_WRITE) - -#define SOCK_EP_SET_TX_OP_FLAGS(_flags) do { \ - if (!((_flags) & FI_INJECT_COMPLETE)) \ - (_flags) |= FI_TRANSMIT_COMPLETE; \ - } while (0) - -#define SOCK_MODE (0) -#define SOCK_NO_COMPLETION (1ULL << 60) -#define SOCK_USE_OP_FLAGS (1ULL << 61) -#define SOCK_TRIGGERED_OP (1ULL << 62) -#define SOCK_PE_COMM_BUFF_SZ (1024) -#define SOCK_PE_OVERFLOW_COMM_BUFF_SZ (128) - -/* it must be adjusted if error data size in CQ/EQ - * will be larger than SOCK_EP_MAX_CM_DATA_SZ */ -#define SOCK_MAX_ERR_CQ_EQ_DATA_SZ SOCK_EP_MAX_CM_DATA_SZ - -enum { - SOCK_SIGNAL_RD_FD = 0, - SOCK_SIGNAL_WR_FD -}; - -enum { - SOCK_OPTS_NONBLOCK = 1<<0, - SOCK_OPTS_KEEPALIVE = 1<<1, - SOCK_OPTS_BUFSIZE = 1<<2, -}; - -#define SOCK_WIRE_PROTO_VERSION (2) - -extern struct fi_info sock_dgram_info; -extern struct fi_info sock_msg_info; - -extern struct util_prov sock_util_prov; -extern struct fi_domain_attr sock_domain_attr; -extern struct fi_fabric_attr sock_fabric_attr; -extern struct fi_tx_attr sock_msg_tx_attr; -extern struct fi_tx_attr sock_rdm_tx_attr; -extern struct fi_tx_attr sock_dgram_tx_attr; -extern struct fi_rx_attr sock_msg_rx_attr; -extern struct fi_rx_attr sock_rdm_rx_attr; -extern struct fi_rx_attr sock_dgram_rx_attr; -extern struct fi_ep_attr sock_msg_ep_attr; -extern struct fi_ep_attr sock_rdm_ep_attr; -extern struct fi_ep_attr sock_dgram_ep_attr; -extern struct fi_tx_attr sock_stx_attr; -extern struct fi_rx_attr sock_srx_attr; - -struct sock_service_entry { - int service; - struct dlist_entry entry; -}; - -struct sock_fabric { - struct fid_fabric fab_fid; - ofi_atomic32_t ref; -#if ENABLE_DEBUG - uint64_t num_send_msg; -#endif - struct dlist_entry service_list; - struct dlist_entry fab_list_entry; - ofi_mutex_t lock; -}; - -struct sock_conn { - int sock_fd; - int connected; - int address_published; - union ofi_sock_ip addr; - struct sock_pe_entry *rx_pe_entry; - struct sock_pe_entry *tx_pe_entry; - struct sock_ep_attr *ep_attr; - fi_addr_t av_index; - struct dlist_entry ep_entry; -}; - -struct sock_conn_map { - struct sock_conn *table; - ofi_epoll_t epoll_set; - struct ofi_epollfds_event *epoll_events; - int epoll_size; - int used; - int size; - ofi_mutex_t lock; -}; - -struct sock_conn_listener { - ofi_epoll_t epollfd; - struct fd_signal signal; - ofi_mutex_t signal_lock; /* acquire before map lock */ - pthread_t listener_thread; - int do_listen; - bool removed_from_epollfd; -}; - -struct sock_ep_cm_head { - ofi_epoll_t epollfd; - struct fd_signal signal; - pthread_mutex_t signal_lock; - pthread_t listener_thread; - struct dlist_entry msg_list; - int do_listen; - bool removed_from_epollfd; -}; - -struct sock_domain { - struct fi_info info; - struct fid_domain dom_fid; - struct sock_fabric *fab; - ofi_mutex_t lock; - ofi_atomic32_t ref; - - struct sock_eq *eq; - struct sock_eq *mr_eq; - - enum fi_progress progress_mode; - struct ofi_mr_map mr_map; - struct sock_pe *pe; - struct dlist_entry dom_list_entry; - struct fi_domain_attr attr; - struct sock_conn_listener conn_listener; - struct sock_ep_cm_head cm_head; -}; - -/* move to fi_trigger.h when removing experimental tag from work queues */ -enum { - SOCK_DEFERRED_WORK = FI_TRIGGER_THRESHOLD + 1 -}; - -/* move to fi_trigger.h when removing experimental tag from work queues */ -/* Overlay with fi_trigger_threshold and within fi_trigger_context */ -struct sock_trigger_work { - struct fid_cntr *triggering_cntr; - size_t threshold; - struct fid_cntr *completion_cntr; -}; - -/* must overlay fi_triggered_context */ -struct sock_triggered_context { - int event_type; - union { - struct fi_trigger_threshold threshold; - struct sock_trigger_work work; - void *internal[3]; - } trigger; -}; - -struct sock_trigger { - enum fi_op_type op_type; - size_t threshold; - struct dlist_entry entry; - - struct sock_triggered_context *context; - struct fid_ep *ep; - uint64_t flags; - - union { - struct { - struct fi_msg msg; - struct iovec msg_iov[SOCK_EP_MAX_IOV_LIMIT]; - } msg; - - struct { - struct fi_msg_tagged msg; - struct iovec msg_iov[SOCK_EP_MAX_IOV_LIMIT]; - } tmsg; - - struct { - struct fi_msg_rma msg; - struct iovec msg_iov[SOCK_EP_MAX_IOV_LIMIT]; - struct fi_rma_iov rma_iov[SOCK_EP_MAX_IOV_LIMIT]; - } rma; - - struct { - struct fi_msg_atomic msg; - struct fi_ioc msg_iov[SOCK_EP_MAX_IOV_LIMIT]; - struct fi_rma_ioc rma_iov[SOCK_EP_MAX_IOV_LIMIT]; - struct fi_ioc comparev[SOCK_EP_MAX_IOV_LIMIT]; - size_t compare_count; - struct fi_ioc resultv[SOCK_EP_MAX_IOV_LIMIT]; - size_t result_count; - } atomic; - } op; -}; - -struct sock_cntr { - struct fid_cntr cntr_fid; - struct sock_domain *domain; - ofi_atomic32_t value; - ofi_atomic32_t ref; - ofi_atomic32_t err_cnt; - ofi_atomic32_t last_read_val; - pthread_cond_t cond; - pthread_mutex_t mut; - struct fi_cntr_attr attr; - - struct dlist_entry rx_list; - struct dlist_entry tx_list; - ofi_mutex_t list_lock; - - ofi_mutex_t trigger_lock; - struct dlist_entry trigger_list; - - struct fid_wait *waitset; - int signal; - ofi_atomic32_t num_waiting; - int err_flag; -}; - -struct sock_mr { - struct fid_mr mr_fid; - struct sock_domain *domain; - uint64_t key; - uint64_t flags; - struct sock_cntr *cntr; - struct sock_cq *cq; -}; - -struct sock_av_addr { - union ofi_sock_ip addr; - uint8_t valid; - uint8_t reserved[7]; -}; - -struct sock_av_table_hdr { - uint64_t size; - uint64_t stored; -}; - -struct sock_av { - struct fid_av av_fid; - struct sock_domain *domain; - ofi_atomic32_t ref; - struct fi_av_attr attr; - uint64_t mask; - int rx_ctx_bits; - socklen_t addrlen; - struct sock_eq *eq; - struct sock_av_table_hdr *table_hdr; - struct sock_av_addr *table; - uint64_t *idx_arr; - struct util_shm shm; - int shared; - struct dlist_entry ep_list; - ofi_mutex_t list_lock; - ofi_mutex_t table_lock; -}; - -struct sock_fid_list { - struct dlist_entry entry; - struct fid *fid; -}; - -struct sock_poll { - struct fid_poll poll_fid; - struct sock_domain *domain; - struct dlist_entry fid_list; -}; - -struct sock_wait { - struct fid_wait wait_fid; - struct sock_fabric *fab; - struct dlist_entry fid_list; - enum fi_wait_obj type; - union { - int fd[2]; - struct sock_mutex_cond { - pthread_mutex_t mutex; - pthread_cond_t cond; - } mutex_cond; - } wobj; -}; - -enum { - /* wire protocol */ - SOCK_OP_SEND = 0, - SOCK_OP_TSEND = 1, - SOCK_OP_SEND_COMPLETE = 2, - - SOCK_OP_WRITE = 3, - SOCK_OP_WRITE_COMPLETE = 4, - SOCK_OP_WRITE_ERROR = 5, - - SOCK_OP_READ = 6, - SOCK_OP_READ_COMPLETE = 7, - SOCK_OP_READ_ERROR = 8, - - SOCK_OP_ATOMIC = 9, - SOCK_OP_ATOMIC_COMPLETE = 10, - SOCK_OP_ATOMIC_ERROR = 11, - - SOCK_OP_CONN_MSG = 12, - - /* internal */ - SOCK_OP_RECV, - SOCK_OP_TRECV, -}; - -/* - * Transmit context - ring buffer data: - * tx_op + flags + context + dest_addr + conn + [data] + [tag] + tx_iov - * 8B 8B 8B 8B 8B 8B 24B+ - * data - only present if flags indicate - * tag - only present for TSEND op - */ -struct sock_op { - uint8_t op; - uint8_t src_iov_len; - uint8_t dest_iov_len; - struct { - uint8_t op; - uint8_t datatype; - uint8_t res_iov_len; - uint8_t cmp_iov_len; - } atomic; - uint8_t reserved[1]; -}; - -struct sock_op_send { - struct sock_op op; - uint64_t flags; - uint64_t context; - uint64_t dest_addr; - uint64_t buf; - struct sock_ep *ep; - struct sock_conn *conn; -}; - -struct sock_op_tsend { - struct sock_op op; - uint64_t flags; - uint64_t context; - uint64_t dest_addr; - uint64_t buf; - struct sock_ep *ep; - struct sock_conn *conn; - uint64_t tag; -}; - -union sock_iov { - struct fi_rma_iov iov; - struct fi_rma_ioc ioc; -}; - -struct sock_eq_entry { - uint32_t type; - size_t len; - uint64_t flags; - struct dlist_entry entry; - char event[]; -}; - -struct sock_eq_err_data_entry { - struct dlist_entry entry; - int do_free; - char err_data[]; -}; - -struct sock_eq { - struct fid_eq eq; - struct fi_eq_attr attr; - struct sock_fabric *sock_fab; - - struct dlistfd_head list; - struct dlistfd_head err_list; - struct dlist_entry err_data_list; - ofi_mutex_t lock; - - struct fid_wait *waitset; - int signal; - int wait_fd; - char service[NI_MAXSERV]; -}; - -struct sock_comp { - uint8_t send_cq_event; - uint8_t recv_cq_event; - char reserved[2]; - - struct sock_cq *send_cq; - struct sock_cq *recv_cq; - - struct sock_cntr *send_cntr; - struct sock_cntr *recv_cntr; - struct sock_cntr *read_cntr; - struct sock_cntr *write_cntr; - struct sock_cntr *rem_read_cntr; - struct sock_cntr *rem_write_cntr; - - struct sock_eq *eq; -}; - -enum sock_cm_state { - SOCK_CM_STATE_DISCONNECTED = 0, - SOCK_CM_STATE_REQUESTED, - SOCK_CM_STATE_CONNECTED, -}; - -struct sock_pep_cm_entry { - int sock; - int do_listen; - int signal_fds[2]; - pthread_t listener_thread; -}; - -struct sock_ep_cm_entry { - int sock; - ofi_mutex_t lock; - enum sock_cm_state state; -}; - -struct sock_conn_handle { - int sock; - int do_listen; -}; - -struct sock_ep_attr { - size_t fclass; - - int tx_shared; - int rx_shared; - size_t buffered_len; - size_t min_multi_recv; - - ofi_atomic32_t ref; - struct sock_eq *eq; - struct sock_av *av; - struct sock_domain *domain; - - struct sock_rx_ctx *rx_ctx; - struct sock_tx_ctx *tx_ctx; - - struct sock_rx_ctx **rx_array; - struct sock_tx_ctx **tx_array; - ofi_atomic32_t num_rx_ctx; - ofi_atomic32_t num_tx_ctx; - - struct dlist_entry rx_ctx_entry; - struct dlist_entry tx_ctx_entry; - - struct fi_info info; - struct fi_ep_attr ep_attr; - - enum fi_ep_type ep_type; - union ofi_sock_ip *src_addr; - union ofi_sock_ip *dest_addr; - uint16_t msg_src_port; - uint16_t msg_dest_port; - - uint64_t peer_fid; - uint16_t key; - int is_enabled; - struct sock_ep_cm_entry cm; - struct sock_conn_handle conn_handle; - ofi_mutex_t lock; - - struct index_map av_idm; - struct sock_conn_map cmap; -}; - -struct sock_ep { - struct fid_ep ep; - struct fi_tx_attr tx_attr; - struct fi_rx_attr rx_attr; - struct sock_ep_attr *attr; - int is_alias; -}; - -struct sock_pep { - struct fid_pep pep; - struct sock_fabric *sock_fab; - - struct sock_ep_cm_head cm_head; - struct sock_pep_cm_entry cm; - union ofi_sock_ip src_addr; - struct fi_info info; - struct sock_eq *eq; - int name_set; -}; - -struct sock_rx_entry { - struct sock_op rx_op; - uint8_t is_buffered; - uint8_t is_busy; - uint8_t is_claimed; - uint8_t is_complete; - uint8_t is_tagged; - uint8_t is_pool_entry; - uint8_t reserved[2]; - - uint64_t used; - uint64_t total_len; - - uint64_t flags; - uint64_t context; - uint64_t addr; - uint64_t data; - uint64_t tag; - uint64_t ignore; - struct sock_comp *comp; - - union sock_iov iov[SOCK_EP_MAX_IOV_LIMIT]; - struct dlist_entry entry; - struct slist_entry pool_entry; - struct sock_rx_ctx *rx_ctx; -}; - -struct sock_rx_ctx { - struct fid_ep ctx; - - uint16_t rx_id; - int enabled; - int progress; - int is_ctrl_ctx; - int recv_cq_event; - int use_shared; - - size_t num_left; - size_t buffered_len; - size_t min_multi_recv; - uint64_t addr; - struct sock_comp comp; - struct sock_rx_ctx *srx_ctx; - - struct sock_ep_attr *ep_attr; - struct sock_av *av; - struct sock_eq *eq; - struct sock_domain *domain; - - struct dlist_entry pe_entry; - struct dlist_entry cq_entry; - - struct dlist_entry pe_entry_list; - struct dlist_entry rx_entry_list; - struct dlist_entry rx_buffered_list; - struct dlist_entry ep_list; - ofi_mutex_t lock; - - struct dlist_entry *progress_start; - - struct fi_rx_attr attr; - struct sock_rx_entry *rx_entry_pool; - struct slist pool_list; -}; - -struct sock_tx_ctx { - union { - struct fid_ep ctx; - struct fid_stx stx; - } fid; - size_t fclass; - - struct ofi_ringbuf rb; - ofi_mutex_t rb_lock; - - uint16_t tx_id; - uint8_t enabled; - uint8_t progress; - - int use_shared; - uint64_t addr; - struct sock_comp comp; - struct sock_rx_ctx *rx_ctrl_ctx; - struct sock_tx_ctx *stx_ctx; - - struct sock_ep_attr *ep_attr; - struct sock_av *av; - struct sock_eq *eq; - struct sock_domain *domain; - - struct dlist_entry pe_entry; - struct dlist_entry cq_entry; - - struct dlist_entry pe_entry_list; - struct dlist_entry ep_list; - - struct fi_tx_attr attr; - ofi_mutex_t lock; -}; - -struct sock_msg_hdr { - uint8_t version; - uint8_t op_type; - uint8_t rx_id; - uint8_t dest_iov_len; - uint16_t pe_entry_id; - uint8_t reserved[2]; - - uint64_t flags; - uint64_t msg_len; -}; - -struct sock_msg_send { - struct sock_msg_hdr msg_hdr; - /* user data */ - /* data */ -}; - -struct sock_msg_tsend { - struct sock_msg_hdr msg_hdr; - uint64_t tag; - /* user data */ - /* data */ -}; - -struct sock_rma_write_req { - struct sock_msg_hdr msg_hdr; - /* user data */ - /* dst iov(s)*/ - /* data */ -}; - -struct sock_atomic_req { - struct sock_msg_hdr msg_hdr; - struct sock_op op; - - /* user data */ - /* dst ioc(s)*/ - /* cmp iov(s) */ - /* data */ -}; - -struct sock_msg_response { - struct sock_msg_hdr msg_hdr; - uint16_t pe_entry_id; - int32_t err; - uint8_t reserved[2]; -}; - -struct sock_rma_read_req { - struct sock_msg_hdr msg_hdr; - /* src iov(s)*/ -}; - -struct sock_rma_read_response { - struct sock_msg_hdr msg_hdr; - uint16_t pe_entry_id; - uint8_t reserved[6]; - /* data */ -}; - -struct sock_atomic_response { - struct sock_msg_hdr msg_hdr; - uint16_t pe_entry_id; - uint8_t reserved[6]; - /* data */ -}; - -struct sock_tx_iov { - union sock_iov src; - union sock_iov dst; - union sock_iov res; - union sock_iov cmp; -}; - -struct sock_tx_pe_entry { - struct sock_op tx_op; - struct sock_comp *comp; - uint8_t header_sent; - uint8_t send_done; - uint8_t reserved[6]; - - struct sock_tx_ctx *tx_ctx; - struct sock_tx_iov tx_iov[SOCK_EP_MAX_IOV_LIMIT]; - char inject[SOCK_EP_MAX_INJECT_SZ]; -}; - -struct sock_rx_pe_entry { - struct sock_op rx_op; - - struct sock_comp *comp; - uint8_t header_read; - uint8_t pending_send; - uint8_t reserved[6]; - struct sock_rx_entry *rx_entry; - union sock_iov rx_iov[SOCK_EP_MAX_IOV_LIMIT]; - char *atomic_cmp; - char *atomic_src; -}; - -/* PE entry type */ -enum { - SOCK_PE_RX, - SOCK_PE_TX, -}; - -struct sock_pe_entry { - union { - struct sock_tx_pe_entry tx; - struct sock_rx_pe_entry rx; - } pe; - - struct sock_msg_hdr msg_hdr; - struct sock_msg_response response; - - uint64_t flags; - uint64_t context; - uint64_t addr; - uint64_t data; - uint64_t tag; - uint64_t buf; - - uint8_t type; - uint8_t is_complete; - uint8_t is_error; - uint8_t mr_checked; - uint8_t is_pool_entry; - uint8_t completion_reported; - uint8_t reserved[3]; - - uint64_t done_len; - uint64_t total_len; - uint64_t data_len; - uint64_t rem; - void *comm_addr; - struct sock_ep_attr *ep_attr; - struct sock_conn *conn; - struct sock_comp *comp; - - struct dlist_entry entry; - struct dlist_entry ctx_entry; - struct ofi_ringbuf comm_buf; - size_t cache_sz; -}; - -struct sock_pe { - struct sock_domain *domain; - int num_free_entries; - struct sock_pe_entry pe_table[SOCK_PE_MAX_ENTRIES]; - ofi_mutex_t lock; - ofi_mutex_t signal_lock; - pthread_mutex_t list_lock; - int wcnt, rcnt; - int signal_fds[2]; - uint64_t waittime; - - struct ofi_bufpool *pe_rx_pool; - struct ofi_bufpool *atomic_rx_pool; - struct dlist_entry free_list; - struct dlist_entry busy_list; - struct dlist_entry pool_list; - - struct dlist_entry tx_list; - struct dlist_entry rx_list; - - pthread_t progress_thread; - volatile int do_progress; - struct sock_pe_entry *pe_atomic; - ofi_epoll_t epoll_set; -}; - -typedef ssize_t (*sock_cq_report_fn) (struct sock_cq *cq, fi_addr_t addr, - struct sock_pe_entry *pe_entry); - -struct sock_cq_overflow_entry_t { - size_t len; - fi_addr_t addr; - struct dlist_entry entry; - char cq_entry[]; -}; - -struct sock_cq { - struct fid_cq cq_fid; - struct sock_domain *domain; - ssize_t cq_entry_size; - ofi_atomic32_t ref; - struct fi_cq_attr attr; - - struct ofi_ringbuf addr_rb; - struct ofi_ringbuffd cq_rbfd; - struct ofi_ringbuf cqerr_rb; - struct dlist_entry overflow_list; - pthread_mutex_t lock; - pthread_mutex_t list_lock; - - struct fid_wait *waitset; - int signal; - ofi_atomic32_t signaled; - - struct dlist_entry ep_list; - struct dlist_entry rx_list; - struct dlist_entry tx_list; - - sock_cq_report_fn report_completion; -}; - -struct sock_conn_hdr { - uint8_t type; - uint8_t reserved[3]; - uint16_t port; - uint16_t cm_data_sz; - /* cm data follows cm_data_sz */ -}; - -struct sock_conn_req { - struct sock_conn_hdr hdr; - union ofi_sock_ip src_addr; - uint64_t caps; - char cm_data[]; -}; - -enum { - SOCK_CONN_REQ, - SOCK_CONN_ACCEPT, - SOCK_CONN_REJECT, - SOCK_CONN_SHUTDOWN, -}; - -enum sock_conn_handle_state { - SOCK_CONN_HANDLE_ACTIVE, - SOCK_CONN_HANDLE_ACCEPTED, - SOCK_CONN_HANDLE_REJECTED, - SOCK_CONN_HANDLE_DELETED, - SOCK_CONN_HANDLE_FINALIZING, - SOCK_CONN_HANDLE_FINALIZED, -}; - -struct sock_conn_req_handle { - struct fid handle; - struct sock_conn_req *req; - int sock_fd; - uint8_t monitored; - enum sock_conn_handle_state state; - pthread_mutex_t finalized_mutex; - pthread_cond_t finalized_cond; - struct sock_pep *pep; - struct sock_ep *ep; - size_t paramlen; - union ofi_sock_ip dest_addr; - struct dlist_entry entry; - char cm_data[SOCK_EP_MAX_CM_DATA_SZ]; -}; - -union sock_tx_op { - struct sock_msg { - struct sock_op_send op; - uint64_t cq_data; - union { - char inject[SOCK_EP_MAX_INJECT_SZ]; - union sock_iov msg[SOCK_EP_MAX_IOV_LIMIT]; - } data; - } msg; - - struct sock_rma_write { - struct sock_op_send op; - union { - char inject[SOCK_EP_MAX_INJECT_SZ]; - union sock_iov msg[SOCK_EP_MAX_IOV_LIMIT]; - } data; - union sock_iov rma[SOCK_EP_MAX_IOV_LIMIT]; - } rma_write; - - struct sock_rma_read { - struct sock_op_send op; - union sock_iov msg[SOCK_EP_MAX_IOV_LIMIT]; - union sock_iov rma[SOCK_EP_MAX_IOV_LIMIT]; - } rma_read; - - struct sock_atomic { - struct sock_op_send op; - union { - char inject[SOCK_EP_MAX_INJECT_SZ]; - union sock_iov msg[SOCK_EP_MAX_IOV_LIMIT]; - } data; - union sock_iov rma[SOCK_EP_MAX_IOV_LIMIT]; - union sock_iov res[SOCK_EP_MAX_IOV_LIMIT]; - } atomic; -}; -#define SOCK_EP_TX_ENTRY_SZ (sizeof(union sock_tx_op)) - -size_t sock_get_tx_size(size_t size); -int sock_get_src_addr(union ofi_sock_ip *dest_addr, - union ofi_sock_ip *src_addr); -int sock_get_src_addr_from_hostname(union ofi_sock_ip *src_addr, - const char *service, uint16_t sa_family); - -struct fi_info *sock_fi_info(uint32_t version, enum fi_ep_type ep_type, - const struct fi_info *hints, void *src_addr, - void *dest_addr); -void free_fi_info(struct fi_info *info); - -int sock_msg_getinfo(uint32_t version, const char *node, const char *service, - uint64_t flags, const struct fi_info *hints, - struct fi_info **info); - -int sock_domain(struct fid_fabric *fabric, struct fi_info *info, - struct fid_domain **dom, void *context); -void sock_dom_add_to_list(struct sock_domain *domain); -int sock_dom_check_list(struct sock_domain *domain); -void sock_dom_remove_from_list(struct sock_domain *domain); -struct sock_domain *sock_dom_list_head(void); -int sock_dom_check_manual_progress(struct sock_fabric *fabric); -int sock_query_atomic(struct fid_domain *domain, - enum fi_datatype datatype, enum fi_op op, - struct fi_atomic_attr *attr, uint64_t flags); - -void sock_fab_add_to_list(struct sock_fabric *fabric); -int sock_fab_check_list(struct sock_fabric *fabric); -void sock_fab_remove_from_list(struct sock_fabric *fabric); -struct sock_fabric *sock_fab_list_head(void); - -int sock_alloc_endpoint(struct fid_domain *domain, struct fi_info *info, - struct sock_ep **ep, void *context, size_t fclass); -int sock_rdm_ep(struct fid_domain *domain, struct fi_info *info, - struct fid_ep **ep, void *context); -int sock_rdm_sep(struct fid_domain *domain, struct fi_info *info, - struct fid_ep **sep, void *context); - -int sock_dgram_ep(struct fid_domain *domain, struct fi_info *info, - struct fid_ep **ep, void *context); -int sock_dgram_sep(struct fid_domain *domain, struct fi_info *info, - struct fid_ep **sep, void *context); - -int sock_msg_ep(struct fid_domain *domain, struct fi_info *info, - struct fid_ep **ep, void *context); -int sock_msg_sep(struct fid_domain *domain, struct fi_info *info, - struct fid_ep **sep, void *context); -int sock_msg_passive_ep(struct fid_fabric *fabric, struct fi_info *info, - struct fid_pep **pep, void *context); -int sock_ep_enable(struct fid_ep *ep); -int sock_ep_disable(struct fid_ep *ep); - -int sock_stx_ctx(struct fid_domain *domain, - struct fi_tx_attr *attr, struct fid_stx **stx, void *context); -int sock_srx_ctx(struct fid_domain *domain, - struct fi_rx_attr *attr, struct fid_ep **srx, void *context); - - -int sock_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, - struct fid_cq **cq, void *context); -int sock_cq_report_error(struct sock_cq *cq, struct sock_pe_entry *entry, - size_t olen, int err, int prov_errno, void *err_data, - size_t err_data_size); -int sock_cq_progress(struct sock_cq *cq); -void sock_cq_add_tx_ctx(struct sock_cq *cq, struct sock_tx_ctx *tx_ctx); -void sock_cq_remove_tx_ctx(struct sock_cq *cq, struct sock_tx_ctx *tx_ctx); -void sock_cq_add_rx_ctx(struct sock_cq *cq, struct sock_rx_ctx *rx_ctx); -void sock_cq_remove_rx_ctx(struct sock_cq *cq, struct sock_rx_ctx *rx_ctx); - - -int sock_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr, - struct fid_eq **eq, void *context); -int sock_eq_report_event(struct sock_eq *sock_eq, uint32_t event, - const void *buf, size_t len, uint64_t flags); -int sock_eq_report_error(struct sock_eq *sock_eq, fid_t fid, void *context, - uint64_t data, int err, int prov_errno, - void *err_data, size_t err_data_size); -int sock_eq_openwait(struct sock_eq *eq, const char *service); - -int sock_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr, - struct fid_cntr **cntr, void *context); -void sock_cntr_inc(struct sock_cntr *cntr); -int sock_cntr_progress(struct sock_cntr *cntr); -void sock_cntr_add_tx_ctx(struct sock_cntr *cntr, struct sock_tx_ctx *tx_ctx); -void sock_cntr_remove_tx_ctx(struct sock_cntr *cntr, struct sock_tx_ctx *tx_ctx); -void sock_cntr_add_rx_ctx(struct sock_cntr *cntr, struct sock_rx_ctx *rx_ctx); -void sock_cntr_remove_rx_ctx(struct sock_cntr *cntr, struct sock_rx_ctx *rx_ctx); - - -struct sock_mr *sock_mr_verify_key(struct sock_domain *domain, uint64_t key, - uintptr_t *buf, size_t len, uint64_t access); -struct sock_mr *sock_mr_verify_desc(struct sock_domain *domain, void *desc, - void *buf, size_t len, uint64_t access); - -struct sock_rx_ctx *sock_rx_ctx_alloc(const struct fi_rx_attr *attr, - void *context, int use_shared); -void sock_rx_ctx_free(struct sock_rx_ctx *rx_ctx); - -struct sock_tx_ctx *sock_tx_ctx_alloc(const struct fi_tx_attr *attr, - void *context, int use_shared); -struct sock_tx_ctx *sock_stx_ctx_alloc(const struct fi_tx_attr *attr, void *context); -void sock_tx_ctx_free(struct sock_tx_ctx *tx_ctx); -void sock_tx_ctx_start(struct sock_tx_ctx *tx_ctx); -void sock_tx_ctx_write(struct sock_tx_ctx *tx_ctx, const void *buf, size_t len); -void sock_tx_ctx_commit(struct sock_tx_ctx *tx_ctx); -void sock_tx_ctx_abort(struct sock_tx_ctx *tx_ctx); -void sock_tx_ctx_write_op_send(struct sock_tx_ctx *tx_ctx, - struct sock_op *op, uint64_t flags, uint64_t context, - uint64_t dest_addr, uint64_t buf, struct sock_ep_attr *ep_attr, - struct sock_conn *conn); -void sock_tx_ctx_write_op_tsend(struct sock_tx_ctx *tx_ctx, - struct sock_op *op, uint64_t flags, uint64_t context, - uint64_t dest_addr, uint64_t buf, struct sock_ep_attr *ep_attr, - struct sock_conn *conn, uint64_t tag); -void sock_tx_ctx_read_op_send(struct sock_tx_ctx *tx_ctx, - struct sock_op *op, uint64_t *flags, uint64_t *context, - uint64_t *dest_addr, uint64_t *buf, struct sock_ep_attr **ep_attr, - struct sock_conn **conn); - -int sock_poll_open(struct fid_domain *domain, struct fi_poll_attr *attr, - struct fid_poll **pollset); -int sock_wait_open(struct fid_fabric *fabric, struct fi_wait_attr *attr, - struct fid_wait **waitset); -void sock_wait_signal(struct fid_wait *wait_fid); -int sock_wait_get_obj(struct fid_wait *fid, void *arg); -int sock_wait_close(fid_t fid); - - -int sock_av_open(struct fid_domain *domain, struct fi_av_attr *attr, - struct fid_av **av, void *context); -int sock_av_compare_addr(struct sock_av *av, fi_addr_t addr1, fi_addr_t addr2); -int sock_av_get_addr_index(struct sock_av *av, union ofi_sock_ip *addr); - -struct sock_conn *sock_ep_lookup_conn(struct sock_ep_attr *attr, fi_addr_t index, - union ofi_sock_ip *addr); -int sock_ep_get_conn(struct sock_ep_attr *ep_attr, struct sock_tx_ctx *tx_ctx, - fi_addr_t index, struct sock_conn **pconn); -void sock_ep_remove_conn(struct sock_ep_attr *ep_attr, struct sock_conn *conn); -int sock_ep_connect(struct sock_ep_attr *attr, fi_addr_t index, - struct sock_conn **conn); -ssize_t sock_conn_send_src_addr(struct sock_ep_attr *ep_attr, struct sock_tx_ctx *tx_ctx, - struct sock_conn *conn); -int sock_conn_listen(struct sock_ep_attr *ep_attr); -int sock_conn_start_listener_thread(struct sock_conn_listener *conn_listener); -int sock_conn_stop_listener_thread(struct sock_conn_listener *conn_listener); -void sock_conn_map_destroy(struct sock_ep_attr *ep_attr); -void sock_conn_release_entry(struct sock_conn_map *map, struct sock_conn *conn); -void sock_set_sockopts(int sock, int sock_opts); -int fd_set_nonblock(int fd); -int sock_conn_map_init(struct sock_ep *ep, int init_size); - -struct sock_pe *sock_pe_init(struct sock_domain *domain); -void sock_pe_add_tx_ctx(struct sock_pe *pe, struct sock_tx_ctx *ctx); -void sock_pe_add_rx_ctx(struct sock_pe *pe, struct sock_rx_ctx *ctx); -void sock_pe_signal(struct sock_pe *pe); -void sock_pe_poll_add(struct sock_pe *pe, int fd); -void sock_pe_poll_del(struct sock_pe *pe, int fd); - -int sock_pe_progress_ep_rx(struct sock_pe *pe, struct sock_ep_attr *ep_attr); -int sock_pe_progress_ep_tx(struct sock_pe *pe, struct sock_ep_attr *ep_attr); -int sock_pe_progress_rx_ctx(struct sock_pe *pe, struct sock_rx_ctx *rx_ctx); -int sock_pe_progress_tx_ctx(struct sock_pe *pe, struct sock_tx_ctx *tx_ctx); -void sock_pe_remove_tx_ctx(struct sock_tx_ctx *tx_ctx); -void sock_pe_remove_rx_ctx(struct sock_rx_ctx *rx_ctx); -void sock_pe_finalize(struct sock_pe *pe); - - -struct sock_rx_entry *sock_rx_new_entry(struct sock_rx_ctx *rx_ctx); -struct sock_rx_entry *sock_rx_new_buffered_entry(struct sock_rx_ctx *rx_ctx, - size_t len); -struct sock_rx_entry *sock_rx_get_entry(struct sock_rx_ctx *rx_ctx, - uint64_t addr, uint64_t tag, - uint8_t is_tagged); -struct sock_rx_entry *sock_rx_get_buffered_entry(struct sock_rx_ctx *rx_ctx, - uint64_t addr, uint64_t tag, - uint64_t ignore, uint8_t is_tagged); -ssize_t sock_rx_peek_recv(struct sock_rx_ctx *rx_ctx, fi_addr_t addr, - uint64_t tag, uint64_t ignore, void *context, uint64_t flags, - uint8_t is_tagged); -ssize_t sock_rx_claim_recv(struct sock_rx_ctx *rx_ctx, void *context, - uint64_t flags, uint64_t tag, uint64_t ignore, - uint8_t is_tagged, const struct iovec *msg_iov, - size_t iov_count); -void sock_rx_release_entry(struct sock_rx_entry *rx_entry); - -ssize_t sock_comm_send(struct sock_pe_entry *pe_entry, const void *buf, size_t len); -ssize_t sock_comm_recv(struct sock_pe_entry *pe_entry, void *buf, size_t len); -ssize_t sock_comm_peek(struct sock_conn *conn, void *buf, size_t len); -ssize_t sock_comm_discard(struct sock_pe_entry *pe_entry, size_t len); -int sock_comm_tx_done(struct sock_pe_entry *pe_entry); -ssize_t sock_comm_flush(struct sock_pe_entry *pe_entry); -int sock_comm_is_disconnected(struct sock_pe_entry *pe_entry); - -ssize_t sock_ep_recvmsg(struct fid_ep *ep, const struct fi_msg *msg, - uint64_t flags); -ssize_t sock_ep_sendmsg(struct fid_ep *ep, const struct fi_msg *msg, - uint64_t flags); -ssize_t sock_ep_trecvmsg(struct fid_ep *ep, - const struct fi_msg_tagged *msg, uint64_t flags); -ssize_t sock_ep_tsendmsg(struct fid_ep *ep, - const struct fi_msg_tagged *msg, uint64_t flags); -ssize_t sock_ep_rma_readmsg(struct fid_ep *ep, const struct fi_msg_rma *msg, - uint64_t flags); -ssize_t sock_ep_rma_writemsg(struct fid_ep *ep, const struct fi_msg_rma *msg, - uint64_t flags); -ssize_t sock_ep_tx_atomic(struct fid_ep *ep, - const struct fi_msg_atomic *msg, - const struct fi_ioc *comparev, void **compare_desc, - size_t compare_count, struct fi_ioc *resultv, - void **result_desc, size_t result_count, uint64_t flags); - -ssize_t sock_queue_work(struct sock_domain *dom, struct fi_deferred_work *work); -ssize_t sock_queue_rma_op(struct fid_ep *ep, const struct fi_msg_rma *msg, - uint64_t flags, enum fi_op_type op_type); -ssize_t sock_queue_atomic_op(struct fid_ep *ep, const struct fi_msg_atomic *msg, - const struct fi_ioc *comparev, size_t compare_count, - struct fi_ioc *resultv, size_t result_count, - uint64_t flags, enum fi_op_type op_type); -ssize_t sock_queue_tmsg_op(struct fid_ep *ep, const struct fi_msg_tagged *msg, - uint64_t flags, enum fi_op_type op_type); -ssize_t sock_queue_msg_op(struct fid_ep *ep, const struct fi_msg *msg, - uint64_t flags, enum fi_op_type op_type); -ssize_t sock_queue_cntr_op(struct fi_deferred_work *work, uint64_t flags); -void sock_cntr_check_trigger_list(struct sock_cntr *cntr); - -static inline size_t sock_rx_avail_len(struct sock_rx_entry *rx_entry) -{ - return rx_entry->total_len - rx_entry->used; -} - -int sock_ep_cm_start_thread(struct sock_ep_cm_head *cm_head); -void sock_ep_cm_signal(struct sock_ep_cm_head *cm_head); -void sock_ep_cm_stop_thread(struct sock_ep_cm_head *cm_head); -void sock_ep_cm_wait_handle_finalized(struct sock_ep_cm_head *cm_head, - struct sock_conn_req_handle *handle); - -#endif diff --git a/prov/sockets/include/sock_util.h b/prov/sockets/include/sock_util.h deleted file mode 100644 index f360b47f1b0..00000000000 --- a/prov/sockets/include/sock_util.h +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2014 Intel Corporation, Inc. All rights reserved. - * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2017 DataDirect Networks, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _SOCK_UTIL_H_ -#define _SOCK_UTIL_H_ - -#include - -#include -#include -#include "sock.h" - -extern const char sock_fab_name[]; -extern const char sock_dom_name[]; -extern const char sock_prov_name[]; -extern struct fi_provider sock_prov; -extern int sock_pe_waittime; -extern int sock_conn_timeout; -extern int sock_conn_retry; -extern int sock_cm_def_map_sz; -extern int sock_av_def_sz; -extern int sock_cq_def_sz; -extern int sock_eq_def_sz; -#if ENABLE_DEBUG -extern int sock_dgram_drop_rate; -#endif -extern int sock_keepalive_enable; -extern int sock_keepalive_time; -extern int sock_keepalive_intvl; -extern int sock_keepalive_probes; -extern int sock_buf_sz; - -#define _SOCK_LOG_DBG(subsys, ...) FI_DBG(&sock_prov, subsys, __VA_ARGS__) -#define _SOCK_LOG_ERROR(subsys, ...) FI_WARN(&sock_prov, subsys, __VA_ARGS__) - -static inline int sock_drop_packet(struct sock_ep_attr *ep_attr) -{ -#if ENABLE_DEBUG - if (ep_attr->ep_type == FI_EP_DGRAM && sock_dgram_drop_rate > 0) { - ep_attr->domain->fab->num_send_msg++; - if (!(ep_attr->domain->fab->num_send_msg % sock_dgram_drop_rate)) - return 1; - } -#endif - return 0; -} - -static inline void *sock_mremap(void *old_address, size_t old_size, - size_t new_size) -{ -#ifdef __APPLE__ - return (void *) -1; -#elif defined __FreeBSD__ - return (void *) -1; -#else - return mremap(old_address, old_size, new_size, 0); -#endif -} - -#endif - diff --git a/prov/sockets/libfabric-sockets.spec.in b/prov/sockets/libfabric-sockets.spec.in deleted file mode 100644 index 256b6f5a223..00000000000 --- a/prov/sockets/libfabric-sockets.spec.in +++ /dev/null @@ -1,52 +0,0 @@ -%{!?configopts: %global configopts LDFLAGS=-Wl,--build-id} -%{!?provider: %define provider sockets} -%{!?provider_formal: %define provider_formal sockets} - -Name: libfabric-%{provider} -Version: @VERSION@ -Release: 1%{?dist} -Summary: Dynamic %{provider_formal} provider for user-space Open Fabric Interfaces -Group: System Environment/Libraries -License: GPLv2 or BSD -Url: http://www.github.com/ofiwg/libfabric -Source: http://www.github.org/ofiwg/%{name}/releases/download/v{%version}/libfabric-%{version}.tar.bz2 -BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) -Requires: libfabric -BuildRequires: libfabric - -%description -libfabric provides a user-space API to access high-performance fabric -services, such as RDMA. - -This RPM provides the %{provider_formal} provider as a "plugin" to an existing -libfabric installation. This plugin will override older %{provider_formal} -provider functionality in the existing libfabric installation. - -%prep -%setup -q -n libfabric-%{version} - -%build -%configure %{configopts} --enable-%{provider}=dl -make %{?_smp_mflags} - -%install -rm -rf %{buildroot} -%makeinstall installdirs - -%clean -rm -rf %{buildroot} - -%files -%defattr(-,root,root,-) -%{_libdir}/libfabric/*.so - -%exclude %{_libdir}/libfabric.* -%exclude %{_libdir}/libfabric/*.la -%exclude %{_libdir}/pkgconfig -%exclude %{_bindir} -%exclude %{_mandir} -%exclude %{_includedir} - -%changelog -* Wed May 24 2017 Open Fabrics Interfaces Working Group -- First release of specfile for packaging a single dl provider. diff --git a/prov/sockets/provider_FABRIC_1.0.map b/prov/sockets/provider_FABRIC_1.0.map deleted file mode 100644 index b1029d55c67..00000000000 --- a/prov/sockets/provider_FABRIC_1.0.map +++ /dev/null @@ -1 +0,0 @@ -/* Do not remove this file. This is needed for FABRIC_DIRECT option. See man/fi_direct.7.md for details. */ diff --git a/prov/sockets/src/sock_atomic.c b/prov/sockets/src/sock_atomic.c deleted file mode 100644 index fee7882f2d8..00000000000 --- a/prov/sockets/src/sock_atomic.c +++ /dev/null @@ -1,587 +0,0 @@ -/* - * Copyright (c) 2014 Intel Corporation, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "config.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "sock.h" -#include "sock_util.h" -#include - -#define SOCK_LOG_DBG(...) _SOCK_LOG_DBG(FI_LOG_EP_DATA, __VA_ARGS__) -#define SOCK_LOG_ERROR(...) _SOCK_LOG_ERROR(FI_LOG_EP_DATA, __VA_ARGS__) - -ssize_t sock_ep_tx_atomic(struct fid_ep *ep, - const struct fi_msg_atomic *msg, - const struct fi_ioc *comparev, void **compare_desc, - size_t compare_count, struct fi_ioc *resultv, - void **result_desc, size_t result_count, uint64_t flags) -{ - ssize_t ret; - size_t i; - size_t datatype_sz; - struct sock_op tx_op; - union sock_iov tx_iov; - struct sock_conn *conn; - struct sock_tx_ctx *tx_ctx; - uint64_t total_len, src_len, dst_len, cmp_len, op_flags; - struct sock_ep *sock_ep; - struct sock_ep_attr *ep_attr; - - switch (ep->fid.fclass) { - case FI_CLASS_EP: - sock_ep = container_of(ep, struct sock_ep, ep); - tx_ctx = sock_ep->attr->tx_ctx->use_shared ? - sock_ep->attr->tx_ctx->stx_ctx : sock_ep->attr->tx_ctx; - ep_attr = sock_ep->attr; - op_flags = sock_ep->tx_attr.op_flags; - break; - case FI_CLASS_TX_CTX: - tx_ctx = container_of(ep, struct sock_tx_ctx, fid.ctx); - ep_attr = tx_ctx->ep_attr; - op_flags = tx_ctx->attr.op_flags; - break; - default: - SOCK_LOG_ERROR("Invalid EP type\n"); - return -FI_EINVAL; - } - - if (msg->iov_count > SOCK_EP_MAX_IOV_LIMIT || - msg->rma_iov_count > SOCK_EP_MAX_IOV_LIMIT) - return -FI_EINVAL; - - if (!tx_ctx->enabled) - return -FI_EOPBADSTATE; - - ret = sock_ep_get_conn(ep_attr, tx_ctx, msg->addr, &conn); - if (ret) - return ret; - - SOCK_EP_SET_TX_OP_FLAGS(flags); - if (flags & SOCK_USE_OP_FLAGS) - flags |= op_flags; - - if (msg->op == FI_ATOMIC_READ) { - flags &= ~FI_INJECT; - } - - if (flags & FI_TRIGGER) { - ret = sock_queue_atomic_op(ep, msg, comparev, compare_count, - resultv, result_count, flags, - FI_OP_ATOMIC); - if (ret != 1) - return ret; - } - - src_len = cmp_len = 0; - datatype_sz = ofi_datatype_size(msg->datatype); - for (i = 0; i < compare_count; i++) - cmp_len += (comparev[i].count * datatype_sz); - if (flags & FI_INJECT) { - for (i = 0; i < msg->iov_count; i++) - src_len += (msg->msg_iov[i].count * datatype_sz); - - if ((src_len + cmp_len) > SOCK_EP_MAX_INJECT_SZ) - return -FI_EINVAL; - - total_len = src_len + cmp_len; - } else { - total_len = (msg->iov_count + compare_count) * sizeof(union sock_iov); - } - - total_len += (sizeof(struct sock_op_send) + - (msg->rma_iov_count * sizeof(union sock_iov)) + - (result_count * sizeof(union sock_iov))); - - sock_tx_ctx_start(tx_ctx); - if (ofi_rbavail(&tx_ctx->rb) < total_len) { - ret = -FI_EAGAIN; - goto err; - } - - memset(&tx_op, 0, sizeof(tx_op)); - tx_op.op = SOCK_OP_ATOMIC; - tx_op.dest_iov_len = (uint8_t) msg->rma_iov_count; - tx_op.atomic.op = msg->op; - tx_op.atomic.datatype = msg->datatype; - tx_op.atomic.res_iov_len = (uint8_t) result_count; - tx_op.atomic.cmp_iov_len = (uint8_t) compare_count; - - if (flags & FI_INJECT) { - tx_op.src_iov_len = (uint8_t) src_len; - tx_op.atomic.cmp_iov_len = (uint8_t) cmp_len; - } else { - tx_op.src_iov_len = (uint8_t) msg->iov_count; - } - - sock_tx_ctx_write_op_send(tx_ctx, &tx_op, flags, - (uintptr_t) msg->context, msg->addr, - (uintptr_t) msg->msg_iov[0].addr, ep_attr, conn); - - if (flags & FI_REMOTE_CQ_DATA) - sock_tx_ctx_write(tx_ctx, &msg->data, sizeof(uint64_t)); - - src_len = dst_len = 0; - if (flags & FI_INJECT) { - for (i = 0; i < msg->iov_count; i++) { - sock_tx_ctx_write(tx_ctx, msg->msg_iov[i].addr, - msg->msg_iov[i].count * datatype_sz); - src_len += (msg->msg_iov[i].count * datatype_sz); - } - for (i = 0; i < compare_count; i++) { - sock_tx_ctx_write(tx_ctx, comparev[i].addr, - comparev[i].count * datatype_sz); - dst_len += comparev[i].count * datatype_sz; - } - } else { - for (i = 0; i < msg->iov_count; i++) { - tx_iov.ioc.addr = (uintptr_t) msg->msg_iov[i].addr; - tx_iov.ioc.count = msg->msg_iov[i].count; - sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov)); - src_len += (tx_iov.ioc.count * datatype_sz); - } - for (i = 0; i < compare_count; i++) { - tx_iov.ioc.addr = (uintptr_t) comparev[i].addr; - tx_iov.ioc.count = comparev[i].count; - sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov)); - dst_len += (tx_iov.ioc.count * datatype_sz); - } - } - -#if ENABLE_DEBUG - if ((src_len > SOCK_EP_MAX_ATOMIC_SZ) || - (dst_len > SOCK_EP_MAX_ATOMIC_SZ)) { - SOCK_LOG_ERROR("Max atomic operation size exceeded!\n"); - ret = -FI_EINVAL; - goto err; - } else if (compare_count && (dst_len != src_len)) { - SOCK_LOG_ERROR("Buffer length mismatch\n"); - ret = -FI_EINVAL; - goto err; - } -#endif - - dst_len = 0; - for (i = 0; i < msg->rma_iov_count; i++) { - tx_iov.ioc.addr = msg->rma_iov[i].addr; - tx_iov.ioc.key = msg->rma_iov[i].key; - tx_iov.ioc.count = msg->rma_iov[i].count; - sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov)); - dst_len += (tx_iov.ioc.count * datatype_sz); - } - - if (msg->iov_count && (dst_len != src_len)) { - SOCK_LOG_ERROR("Buffer length mismatch\n"); - ret = -FI_EINVAL; - goto err; - } else { - src_len = dst_len; - } - - dst_len = 0; - for (i = 0; i < result_count; i++) { - tx_iov.ioc.addr = (uintptr_t) resultv[i].addr; - tx_iov.ioc.count = resultv[i].count; - sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov)); - dst_len += (tx_iov.ioc.count * datatype_sz); - } - -#if ENABLE_DEBUG - if (result_count && (dst_len != src_len)) { - SOCK_LOG_ERROR("Buffer length mismatch\n"); - ret = -FI_EINVAL; - goto err; - } -#endif - - sock_tx_ctx_commit(tx_ctx); - return 0; - -err: - sock_tx_ctx_abort(tx_ctx); - return ret; -} - -static ssize_t sock_ep_atomic_writemsg(struct fid_ep *ep, - const struct fi_msg_atomic *msg, uint64_t flags) -{ -#if ENABLE_DEBUG - switch (msg->op) { - case FI_MIN: - case FI_MAX: - case FI_SUM: - case FI_PROD: - case FI_LOR: - case FI_LAND: - case FI_BOR: - case FI_BAND: - case FI_LXOR: - case FI_BXOR: - case FI_ATOMIC_WRITE: - break; - default: - SOCK_LOG_ERROR("Invalid operation type\n"); - return -FI_EINVAL; - } -#endif - return sock_ep_tx_atomic(ep, msg, NULL, NULL, 0, NULL, NULL, 0, flags); -} - -static ssize_t sock_ep_atomic_writev(struct fid_ep *ep, - const struct fi_ioc *iov, void **desc, size_t count, - fi_addr_t dest_addr, uint64_t addr, uint64_t key, - enum fi_datatype datatype, enum fi_op op, void *context) -{ - struct fi_rma_ioc rma_iov = { - .addr = addr, - .count = ofi_total_ioc_cnt(iov, count), - .key = key, - }; - struct fi_msg_atomic msg = { - .msg_iov = iov, - .desc = desc, - .iov_count = count, - .addr = dest_addr, - .rma_iov = &rma_iov, - .rma_iov_count = 1, - .datatype = datatype, - .op = op, - .context = context, - .data = 0, - }; - - return sock_ep_atomic_writemsg(ep, &msg, SOCK_USE_OP_FLAGS); -} - -static ssize_t sock_ep_atomic_write(struct fid_ep *ep, const void *buf, - size_t count, void *desc, fi_addr_t dest_addr, - uint64_t addr, uint64_t key, - enum fi_datatype datatype, enum fi_op op, - void *context) -{ - const struct fi_ioc iov = { - .addr = (void *) buf, - .count = count, - }; - - return sock_ep_atomic_writev(ep, &iov, &desc, 1, dest_addr, addr, key, - datatype, op, context); -} - -static ssize_t sock_ep_atomic_inject(struct fid_ep *ep, const void *buf, - size_t count, fi_addr_t dest_addr, uint64_t addr, - uint64_t key, enum fi_datatype datatype, - enum fi_op op) -{ - struct fi_ioc msg_iov = { - .addr = (void *)buf, - .count = count, - }; - struct fi_rma_ioc rma_iov = { - .addr = addr, - .count = count, - .key = key, - }; - struct fi_msg_atomic msg = { - .msg_iov = &msg_iov, - .desc = NULL, - .iov_count = 1, - .addr = dest_addr, - .rma_iov = &rma_iov, - .rma_iov_count = 1, - .datatype = datatype, - .op = op, - .context = NULL, - .data = 0, - }; - - return sock_ep_atomic_writemsg(ep, &msg, FI_INJECT | - SOCK_NO_COMPLETION | SOCK_USE_OP_FLAGS); -} - -static ssize_t sock_ep_atomic_readwritemsg(struct fid_ep *ep, - const struct fi_msg_atomic *msg, - struct fi_ioc *resultv, void **result_desc, - size_t result_count, uint64_t flags) -{ - switch (msg->op) { - case FI_MIN: - case FI_MAX: - case FI_SUM: - case FI_PROD: - case FI_LOR: - case FI_LAND: - case FI_BOR: - case FI_BAND: - case FI_LXOR: - case FI_BXOR: - case FI_ATOMIC_READ: - case FI_ATOMIC_WRITE: - break; - default: - SOCK_LOG_ERROR("Invalid operation type\n"); - return -FI_EINVAL; - } - - return sock_ep_tx_atomic(ep, msg, NULL, NULL, 0, - resultv, result_desc, result_count, flags); -} - -static ssize_t sock_ep_atomic_readwritev(struct fid_ep *ep, - const struct fi_ioc *iov, void **desc, size_t count, - struct fi_ioc *resultv, void **result_desc, - size_t result_count, fi_addr_t dest_addr, - uint64_t addr, uint64_t key, - enum fi_datatype datatype, enum fi_op op, void *context) -{ - struct fi_rma_ioc rma_iov = { - .addr = addr, - .count = ofi_total_ioc_cnt(iov, count), - .key = key, - }; - struct fi_msg_atomic msg = { - .msg_iov = iov, - .desc = desc, - .iov_count = count, - .addr = dest_addr, - .rma_iov = &rma_iov, - .rma_iov_count = 1, - .datatype = datatype, - .op = op, - .context = context, - .data = 0 - }; - - return sock_ep_atomic_readwritemsg(ep, &msg, - resultv, result_desc, result_count, - SOCK_USE_OP_FLAGS); -} - -static ssize_t sock_ep_atomic_readwrite(struct fid_ep *ep, const void *buf, - size_t count, void *desc, - void *result, void *result_desc, - fi_addr_t dest_addr, uint64_t addr, - uint64_t key, enum fi_datatype datatype, - enum fi_op op, void *context) -{ - struct fi_ioc iov = { - .addr = (op == FI_ATOMIC_READ) ? NULL : (void *) buf, - .count = count - }; - struct fi_ioc res_iov = { - .addr = result, - .count = count - }; - - if (!buf && op != FI_ATOMIC_READ) - return -FI_EINVAL; - - return sock_ep_atomic_readwritev(ep, &iov, &desc, 1, - &res_iov, &result_desc, 1, - dest_addr, addr, key, - datatype, op, context); -} - -static ssize_t sock_ep_atomic_compwritemsg(struct fid_ep *ep, - const struct fi_msg_atomic *msg, - const struct fi_ioc *comparev, void **compare_desc, - size_t compare_count, struct fi_ioc *resultv, - void **result_desc, size_t result_count, uint64_t flags) -{ - switch (msg->op) { - case FI_CSWAP: - case FI_CSWAP_NE: - case FI_CSWAP_LE: - case FI_CSWAP_LT: - case FI_CSWAP_GE: - case FI_CSWAP_GT: - case FI_MSWAP: - break; - default: - SOCK_LOG_ERROR("Invalid operation type\n"); - return -FI_EINVAL; - } - - return sock_ep_tx_atomic(ep, msg, comparev, compare_desc, compare_count, - resultv, result_desc, result_count, flags); -} - -static ssize_t sock_ep_atomic_compwritev(struct fid_ep *ep, - const struct fi_ioc *iov, void **desc, size_t count, - const struct fi_ioc *comparev, void **compare_desc, - size_t compare_count, struct fi_ioc *resultv, - void **result_desc, size_t result_count, - fi_addr_t dest_addr, uint64_t addr, uint64_t key, - enum fi_datatype datatype, enum fi_op op, void *context) -{ - struct fi_rma_ioc rma_iov = { - .addr = addr, - .count = ofi_total_ioc_cnt(iov, count), - .key = key, - }; - struct fi_msg_atomic msg = { - .msg_iov = iov, - .desc = desc, - .iov_count = count, - .addr = dest_addr, - .rma_iov = &rma_iov, - .rma_iov_count = 1, - .datatype = datatype, - .op = op, - .context = context, - .data = 0 - }; - - return sock_ep_atomic_compwritemsg(ep, &msg, - comparev, compare_desc, compare_count, - resultv, result_desc, result_count, - SOCK_USE_OP_FLAGS); -} - -static ssize_t sock_ep_atomic_compwrite(struct fid_ep *ep, const void *buf, - size_t count, void *desc, - const void *compare, void *compare_desc, - void *result, void *result_desc, - fi_addr_t dest_addr, uint64_t addr, - uint64_t key, enum fi_datatype datatype, - enum fi_op op, void *context) -{ - struct fi_ioc iov = { - .addr = (void *) buf, - .count = count, - }; - struct fi_ioc resultv = { - .addr = result, - .count = count, - }; - struct fi_ioc comparev = { - .addr = (void *) compare, - .count = count, - }; - - return sock_ep_atomic_compwritev(ep, &iov, &desc, 1, - &comparev, &compare_desc, 1, - &resultv, &result_desc, 1, - dest_addr, addr, key, - datatype, op, context); -} - -/* Domain parameter is ignored, okay to pass in NULL */ -int sock_query_atomic(struct fid_domain *domain, - enum fi_datatype datatype, enum fi_op op, - struct fi_atomic_attr *attr, uint64_t flags) -{ - int ret; - - ret = ofi_atomic_valid(&sock_prov, datatype, op, flags); - if (ret) - return ret; - - attr->size = ofi_datatype_size(datatype); - if (attr->size == 0) - return -FI_EINVAL; - - attr->count = (SOCK_EP_MAX_ATOMIC_SZ / attr->size); - return 0; -} - -static int sock_ep_atomic_valid(struct fid_ep *ep, - enum fi_datatype datatype, enum fi_op op, size_t *count) -{ - struct fi_atomic_attr attr; - int ret; - - ret = sock_query_atomic(NULL, datatype, op, &attr, 0); - if (!ret) - *count = attr.count; - return ret; -} - -static int sock_ep_atomic_fetch_valid(struct fid_ep *ep, - enum fi_datatype datatype, enum fi_op op, size_t *count) -{ - struct fi_atomic_attr attr; - int ret; - - ret = sock_query_atomic(NULL, datatype, op, &attr, FI_FETCH_ATOMIC); - if (!ret) - *count = attr.count; - return ret; -} - -static int sock_ep_atomic_cswap_valid(struct fid_ep *ep, - enum fi_datatype datatype, enum fi_op op, size_t *count) -{ - struct fi_atomic_attr attr; - int ret; - - /* domain parameter is ignored - okay to pass in NULL */ - ret = sock_query_atomic(NULL, datatype, op, &attr, FI_COMPARE_ATOMIC); - if (!ret) - *count = attr.count; - return ret; -} - -struct fi_ops_atomic sock_ep_atomic = { - .size = sizeof(struct fi_ops_atomic), - .write = sock_ep_atomic_write, - .writev = sock_ep_atomic_writev, - .writemsg = sock_ep_atomic_writemsg, - .inject = sock_ep_atomic_inject, - .readwrite = sock_ep_atomic_readwrite, - .readwritev = sock_ep_atomic_readwritev, - .readwritemsg = sock_ep_atomic_readwritemsg, - .compwrite = sock_ep_atomic_compwrite, - .compwritev = sock_ep_atomic_compwritev, - .compwritemsg = sock_ep_atomic_compwritemsg, - .writevalid = sock_ep_atomic_valid, - .readwritevalid = sock_ep_atomic_fetch_valid, - .compwritevalid = sock_ep_atomic_cswap_valid, -}; diff --git a/prov/sockets/src/sock_attr.c b/prov/sockets/src/sock_attr.c deleted file mode 100644 index 6039c73b36d..00000000000 --- a/prov/sockets/src/sock_attr.c +++ /dev/null @@ -1,252 +0,0 @@ -/* - * Copyright (c) 2020 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "sock.h" - -#define SOCK_MSG_TX_CAPS (OFI_TX_MSG_CAPS | FI_TAGGED | OFI_TX_RMA_CAPS | \ - FI_ATOMICS | FI_NAMED_RX_CTX | FI_FENCE | FI_TRIGGER) -#define SOCK_MSG_RX_CAPS (OFI_RX_MSG_CAPS | FI_TAGGED | OFI_RX_RMA_CAPS | \ - FI_ATOMICS | FI_DIRECTED_RECV | FI_MULTI_RECV | \ - FI_RMA_EVENT | FI_SOURCE | FI_TRIGGER) - -#define SOCK_RDM_TX_CAPS (OFI_TX_MSG_CAPS | FI_TAGGED | OFI_TX_RMA_CAPS | \ - FI_ATOMICS | FI_NAMED_RX_CTX | FI_FENCE | FI_TRIGGER | \ - FI_RMA_PMEM) -#define SOCK_RDM_RX_CAPS (OFI_RX_MSG_CAPS | FI_TAGGED | OFI_RX_RMA_CAPS | \ - FI_ATOMICS | FI_DIRECTED_RECV | FI_MULTI_RECV | \ - FI_RMA_EVENT | FI_SOURCE | FI_TRIGGER | FI_RMA_PMEM) - -#define SOCK_DGRAM_TX_CAPS (OFI_TX_MSG_CAPS | FI_TAGGED | FI_NAMED_RX_CTX | \ - FI_FENCE | FI_TRIGGER) -#define SOCK_DGRAM_RX_CAPS (OFI_RX_MSG_CAPS | FI_TAGGED | FI_DIRECTED_RECV | \ - FI_MULTI_RECV | FI_SOURCE | FI_TRIGGER) - -#define SOCK_DOMAIN_CAPS (FI_LOCAL_COMM | FI_REMOTE_COMM | FI_SHARED_AV) - -#define SOCK_TX_OP_FLAGS (FI_COMMIT_COMPLETE | FI_COMPLETION | \ - FI_DELIVERY_COMPLETE | FI_INJECT | FI_INJECT_COMPLETE | \ - FI_MULTICAST | FI_TRANSMIT_COMPLETE) -#define SOCK_RX_OP_FLAGS (FI_COMMIT_COMPLETE | FI_COMPLETION | \ - FI_DELIVERY_COMPLETE | FI_INJECT | FI_INJECT_COMPLETE | \ - FI_MULTI_RECV | FI_TRANSMIT_COMPLETE) - -struct fi_ep_attr sock_msg_ep_attr = { - .type = FI_EP_MSG, - .protocol = FI_PROTO_SOCK_TCP, - .protocol_version = SOCK_WIRE_PROTO_VERSION, - .max_msg_size = SOCK_EP_MAX_MSG_SZ, - .msg_prefix_size = SOCK_EP_MSG_PREFIX_SZ, - .max_order_raw_size = SOCK_EP_MAX_ORDER_RAW_SZ, - .max_order_war_size = SOCK_EP_MAX_ORDER_WAR_SZ, - .max_order_waw_size = SOCK_EP_MAX_ORDER_WAW_SZ, - .mem_tag_format = SOCK_EP_MEM_TAG_FMT, - .tx_ctx_cnt = SOCK_EP_MAX_TX_CNT, - .rx_ctx_cnt = SOCK_EP_MAX_RX_CNT, -}; - -struct fi_tx_attr sock_msg_tx_attr = { - .caps = SOCK_MSG_TX_CAPS, - .mode = SOCK_MODE, - .op_flags = SOCK_TX_OP_FLAGS, - .msg_order = SOCK_EP_MSG_ORDER, - .inject_size = SOCK_EP_MAX_INJECT_SZ, - .size = SOCK_EP_TX_SZ, - .iov_limit = SOCK_EP_MAX_IOV_LIMIT, - .rma_iov_limit = SOCK_EP_MAX_IOV_LIMIT, -}; - -struct fi_rx_attr sock_msg_rx_attr = { - .caps = SOCK_MSG_RX_CAPS, - .mode = SOCK_MODE, - .op_flags = SOCK_RX_OP_FLAGS, - .msg_order = SOCK_EP_MSG_ORDER, - .comp_order = SOCK_EP_COMP_ORDER, - .total_buffered_recv = SOCK_EP_MAX_BUFF_RECV, - .size = SOCK_EP_RX_SZ, - .iov_limit = SOCK_EP_MAX_IOV_LIMIT, -}; - -struct fi_ep_attr sock_dgram_ep_attr = { - .type = FI_EP_DGRAM, - .protocol = FI_PROTO_SOCK_TCP, - .protocol_version = SOCK_WIRE_PROTO_VERSION, - .max_msg_size = SOCK_EP_MAX_MSG_SZ, - .msg_prefix_size = SOCK_EP_MSG_PREFIX_SZ, - .max_order_raw_size = SOCK_EP_MAX_ORDER_RAW_SZ, - .max_order_war_size = SOCK_EP_MAX_ORDER_WAR_SZ, - .max_order_waw_size = SOCK_EP_MAX_ORDER_WAW_SZ, - .mem_tag_format = SOCK_EP_MEM_TAG_FMT, - .tx_ctx_cnt = SOCK_EP_MAX_TX_CNT, - .rx_ctx_cnt = SOCK_EP_MAX_RX_CNT, -}; - -struct fi_ep_attr sock_rdm_ep_attr = { - .type = FI_EP_RDM, - .protocol = FI_PROTO_SOCK_TCP, - .protocol_version = SOCK_WIRE_PROTO_VERSION, - .max_msg_size = SOCK_EP_MAX_MSG_SZ, - .msg_prefix_size = SOCK_EP_MSG_PREFIX_SZ, - .max_order_raw_size = SOCK_EP_MAX_ORDER_RAW_SZ, - .max_order_war_size = SOCK_EP_MAX_ORDER_WAR_SZ, - .max_order_waw_size = SOCK_EP_MAX_ORDER_WAW_SZ, - .mem_tag_format = SOCK_EP_MEM_TAG_FMT, - .tx_ctx_cnt = SOCK_EP_MAX_TX_CNT, - .rx_ctx_cnt = SOCK_EP_MAX_RX_CNT, -}; - -struct fi_tx_attr sock_rdm_tx_attr = { - .caps = SOCK_RDM_TX_CAPS, - .mode = SOCK_MODE, - .op_flags = SOCK_TX_OP_FLAGS, - .msg_order = SOCK_EP_MSG_ORDER, - .inject_size = SOCK_EP_MAX_INJECT_SZ, - .size = SOCK_EP_TX_SZ, - .iov_limit = SOCK_EP_MAX_IOV_LIMIT, - .rma_iov_limit = SOCK_EP_MAX_IOV_LIMIT, -}; - -struct fi_rx_attr sock_rdm_rx_attr = { - .caps = SOCK_RDM_RX_CAPS, - .mode = SOCK_MODE, - .op_flags = SOCK_RX_OP_FLAGS, - .msg_order = SOCK_EP_MSG_ORDER, - .comp_order = SOCK_EP_COMP_ORDER, - .total_buffered_recv = SOCK_EP_MAX_BUFF_RECV, - .size = SOCK_EP_RX_SZ, - .iov_limit = SOCK_EP_MAX_IOV_LIMIT, -}; - -struct fi_tx_attr sock_dgram_tx_attr = { - .caps = SOCK_DGRAM_TX_CAPS, - .mode = SOCK_MODE, - .op_flags = SOCK_TX_OP_FLAGS, - .msg_order = SOCK_EP_MSG_ORDER, - .inject_size = SOCK_EP_MAX_INJECT_SZ, - .size = SOCK_EP_TX_SZ, - .iov_limit = SOCK_EP_MAX_IOV_LIMIT, - .rma_iov_limit = 0, -}; - -struct fi_rx_attr sock_dgram_rx_attr = { - .caps = SOCK_DGRAM_RX_CAPS, - .mode = SOCK_MODE, - .op_flags = SOCK_RX_OP_FLAGS, - .msg_order = SOCK_EP_MSG_ORDER, - .comp_order = SOCK_EP_COMP_ORDER, - .total_buffered_recv = SOCK_EP_MAX_BUFF_RECV, - .size = SOCK_EP_RX_SZ, - .iov_limit = SOCK_EP_MAX_IOV_LIMIT, -}; - -struct fi_tx_attr sock_stx_attr = { - .caps = SOCK_RDM_TX_CAPS | SOCK_RDM_RX_CAPS | SOCK_DOMAIN_CAPS, - .mode = SOCK_MODE, - .op_flags = FI_TRANSMIT_COMPLETE, - .msg_order = SOCK_EP_MSG_ORDER, - .inject_size = SOCK_EP_MAX_INJECT_SZ, - .size = SOCK_EP_TX_SZ, - .iov_limit = SOCK_EP_MAX_IOV_LIMIT, - .rma_iov_limit = SOCK_EP_MAX_IOV_LIMIT, -}; - -struct fi_rx_attr sock_srx_attr = { - .caps = SOCK_RDM_TX_CAPS | SOCK_RDM_RX_CAPS | SOCK_DOMAIN_CAPS, - .mode = SOCK_MODE, - .op_flags = 0, - .msg_order = SOCK_EP_MSG_ORDER, - .comp_order = SOCK_EP_COMP_ORDER, - .total_buffered_recv = 0, - .size = SOCK_EP_MAX_MSG_SZ, - .iov_limit = SOCK_EP_MAX_IOV_LIMIT, -}; - -struct fi_domain_attr sock_domain_attr = { - .name = "sockets", - .threading = FI_THREAD_SAFE, - .control_progress = FI_PROGRESS_AUTO, - .data_progress = FI_PROGRESS_AUTO, - .resource_mgmt = FI_RM_ENABLED, - /* Provider supports basic memory registration mode */ - .mr_mode = FI_MR_BASIC | FI_MR_SCALABLE, - .mr_key_size = sizeof(uint64_t), - .cq_data_size = sizeof(uint64_t), - .cq_cnt = SOCK_EP_MAX_CQ_CNT, - .ep_cnt = SOCK_EP_MAX_EP_CNT, - .tx_ctx_cnt = SOCK_EP_MAX_TX_CNT, - .rx_ctx_cnt = SOCK_EP_MAX_RX_CNT, - .max_ep_tx_ctx = SOCK_EP_MAX_TX_CNT, - .max_ep_rx_ctx = SOCK_EP_MAX_RX_CNT, - .max_ep_stx_ctx = SOCK_EP_MAX_EP_CNT, - .max_ep_srx_ctx = SOCK_EP_MAX_EP_CNT, - .cntr_cnt = SOCK_EP_MAX_CNTR_CNT, - .mr_iov_limit = SOCK_EP_MAX_IOV_LIMIT, - .max_err_data = SOCK_MAX_ERR_CQ_EQ_DATA_SZ, - .mr_cnt = SOCK_DOMAIN_MR_CNT, - .caps = SOCK_DOMAIN_CAPS, -}; - -struct fi_fabric_attr sock_fabric_attr = { - .name = "sockets", - .prov_version = OFI_VERSION_DEF_PROV, -}; - -struct fi_info sock_msg_info = { - .caps = SOCK_MSG_TX_CAPS | SOCK_MSG_RX_CAPS | SOCK_DOMAIN_CAPS, - .addr_format = FI_SOCKADDR, - .tx_attr = &sock_msg_tx_attr, - .rx_attr = &sock_msg_rx_attr, - .ep_attr = &sock_msg_ep_attr, - .domain_attr = &sock_domain_attr, - .fabric_attr = &sock_fabric_attr -}; - -struct fi_info sock_rdm_info = { - .next = &sock_msg_info, - .caps = SOCK_RDM_TX_CAPS | SOCK_RDM_RX_CAPS | SOCK_DOMAIN_CAPS, - .addr_format = FI_SOCKADDR, - .tx_attr = &sock_rdm_tx_attr, - .rx_attr = &sock_rdm_rx_attr, - .ep_attr = &sock_rdm_ep_attr, - .domain_attr = &sock_domain_attr, - .fabric_attr = &sock_fabric_attr -}; - -struct fi_info sock_dgram_info = { - .next = &sock_rdm_info, - .caps = SOCK_DGRAM_TX_CAPS | SOCK_DGRAM_RX_CAPS | SOCK_DOMAIN_CAPS, - .addr_format = FI_SOCKADDR, - .tx_attr = &sock_dgram_tx_attr, - .rx_attr = &sock_dgram_rx_attr, - .ep_attr = &sock_dgram_ep_attr, - .domain_attr = &sock_domain_attr, - .fabric_attr = &sock_fabric_attr -}; diff --git a/prov/sockets/src/sock_av.c b/prov/sockets/src/sock_av.c deleted file mode 100644 index 1b5c551837b..00000000000 --- a/prov/sockets/src/sock_av.c +++ /dev/null @@ -1,680 +0,0 @@ -/* - * Copyright (c) 2014 Intel Corporation, Inc. All rights reserved. - * Copyright (c) 2016, Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2017 Los Alamos National Security, LLC. - * All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "config.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "sock.h" -#include "sock_util.h" - -#include "ofi_osd.h" -#include "ofi_util.h" - -#define SOCK_LOG_DBG(...) _SOCK_LOG_DBG(FI_LOG_AV, __VA_ARGS__) -#define SOCK_LOG_ERROR(...) _SOCK_LOG_ERROR(FI_LOG_AV, __VA_ARGS__) - -#define SOCK_AV_TABLE_SZ(count, av_name) (sizeof(struct sock_av_table_hdr) + \ - SOCK_IS_SHARED_AV(av_name) * count * sizeof(uint64_t) + \ - count * sizeof(struct sock_av_addr)) -#define SOCK_IS_SHARED_AV(av_name) ((av_name) ? 1 : 0) - -int sock_av_get_addr_index(struct sock_av *av, union ofi_sock_ip *addr) -{ - int i; - struct sock_av_addr *av_addr; - - ofi_mutex_lock(&av->table_lock); - for (i = 0; i < (int)av->table_hdr->size; i++) { - av_addr = &av->table[i]; - if (!av_addr->valid) - continue; - - if (ofi_equals_sockaddr((const struct sockaddr *) addr, - (const struct sockaddr *) &av_addr->addr)) { - ofi_mutex_unlock(&av->table_lock); - return i; - } - } - ofi_mutex_unlock(&av->table_lock); - SOCK_LOG_DBG("failed to get index in AV\n"); - return -1; -} - -int sock_av_compare_addr(struct sock_av *av, - fi_addr_t addr1, fi_addr_t addr2) -{ - int64_t index1, index2; - struct sock_av_addr *av_addr1, *av_addr2; - int ret; - - index1 = addr1 & av->mask; - index2 = addr2 & av->mask; - - ofi_mutex_lock(&av->table_lock); - if (index1 >= av->table_hdr->size || index1 < 0 || - index2 >= av->table_hdr->size || index2 < 0) { - SOCK_LOG_ERROR("requested rank is larger than av table\n"); - ofi_mutex_unlock(&av->table_lock); - return -1; - } - - av_addr1 = &av->table[index1]; - av_addr2 = &av->table[index2]; - - /* Return 0 if the addresses match */ - ret = !ofi_equals_sockaddr(&av_addr1->addr.sa, &av_addr2->addr.sa); - ofi_mutex_unlock(&av->table_lock); - return ret; -} - -static inline void sock_av_report_success(struct sock_av *av, void *context, - int num_done, uint64_t flags) -{ - struct fi_eq_entry eq_entry; - - if (!av->eq) - return; - - eq_entry.fid = &av->av_fid.fid; - eq_entry.context = context; - eq_entry.data = num_done; - sock_eq_report_event(av->eq, FI_AV_COMPLETE, - &eq_entry, sizeof(eq_entry), flags); -} - -static void sock_av_report_error(struct sock_av *av, fi_addr_t *fi_addr, - void *context, int index, int err, - uint64_t flags) -{ - int *sync_err; - - if (fi_addr) { - fi_addr[index] = FI_ADDR_NOTAVAIL; - } else if (flags & FI_SYNC_ERR) { - sync_err = context; - sync_err[index] = err; - } - - if (av->eq) - sock_eq_report_error(av->eq, &av->av_fid.fid, - context, index, err, -err, NULL, 0); -} - -static void sock_update_av_table(struct sock_av *_av, size_t count) -{ - _av->table = (struct sock_av_addr *) - ((char *)_av->table_hdr + - SOCK_IS_SHARED_AV(_av->attr.name) * count * sizeof(uint64_t) + - sizeof(struct sock_av_table_hdr)); -} - -static int sock_resize_av_table(struct sock_av *av) -{ - void *new_addr; - size_t new_count, table_sz, old_sz; - - new_count = av->table_hdr->size * 2; - table_sz = SOCK_AV_TABLE_SZ(new_count, av->attr.name); - old_sz = SOCK_AV_TABLE_SZ(av->table_hdr->size, av->attr.name); - - if (av->attr.name) { - new_addr = sock_mremap(av->table_hdr, old_sz, table_sz); - if (new_addr == MAP_FAILED) - return -1; - - av->idx_arr[av->table_hdr->stored] = av->table_hdr->stored; - } else { - new_addr = realloc(av->table_hdr, table_sz); - if (!new_addr) - return -1; - memset((char *) new_addr + old_sz, 0, table_sz - old_sz); - } - - av->table_hdr = new_addr; - av->table_hdr->size = new_count; - sock_update_av_table(av, new_count); - - return 0; -} - -static int64_t sock_av_get_next_index(struct sock_av *av) -{ - uint64_t i; - - for (i = 0; i < av->table_hdr->size; i++) { - if (!av->table[i].valid) - return i; - } - - return -1; -} - -static int sock_check_table_in(struct sock_av *_av, const void *addr, - fi_addr_t *fi_addr, int count, uint64_t flags, - void *context) -{ - int i, ret = 0; - uint64_t j; - char sa_ip[INET6_ADDRSTRLEN]; - struct sock_av_addr *av_addr; - int64_t index; - - if ((_av->attr.flags & FI_EVENT) && !_av->eq) - return -FI_ENOEQ; - - if (flags & FI_SYNC_ERR) { - if (fi_addr || !context || _av->eq) - return -FI_EBADFLAGS; - memset(context, 0, sizeof(int) * count); - } - - if (_av->attr.flags & FI_READ) { - for (i = 0; i < count; i++) { - struct sockaddr *sock_addr = (struct sockaddr *) ((char *)addr + i * _av->addrlen); - for (j = 0; j < _av->table_hdr->size; j++) { - if (_av->table[j].valid && - !ofi_valid_dest_ipaddr(sock_addr)) { - sock_av_report_error(_av, fi_addr, - context, i, FI_EINVAL, - flags); - continue; - } - - av_addr = &_av->table[j]; - if (memcmp(&av_addr->addr, sock_addr, - ofi_sizeofaddr(sock_addr)) == 0) { - SOCK_LOG_DBG("Found addr in shared av\n"); - if (fi_addr) - fi_addr[i] = (fi_addr_t)j; - ret++; - } - } - } - sock_av_report_success(_av, context, ret, flags); - return (_av->attr.flags & FI_EVENT) ? 0 : ret; - } - - for (i = 0, ret = 0; i < count; i++) { - struct sockaddr *sock_addr = (struct sockaddr *) ((char *)addr + i * _av->addrlen); - if (!ofi_valid_dest_ipaddr(sock_addr)) { - sock_av_report_error(_av, fi_addr, context, i, FI_EINVAL, - flags); - continue; - } - if (_av->table_hdr->stored == _av->table_hdr->size) { - index = sock_av_get_next_index(_av); - if (index < 0) { - if (sock_resize_av_table(_av)) { - sock_av_report_error(_av, fi_addr, - context, i, - FI_ENOMEM, flags); - continue; - } - index = _av->table_hdr->stored++; - } - } else { - index = _av->table_hdr->stored++; - } - - av_addr = &_av->table[index]; - inet_ntop(sock_addr->sa_family, ofi_get_ipaddr(sock_addr), - sa_ip, sizeof sa_ip); - SOCK_LOG_DBG("AV-INSERT: dst_addr family: %d, IP %s, port: %d\n", - sock_addr->sa_family, sa_ip, - ofi_addr_get_port(sock_addr)); - - memcpy(&av_addr->addr, sock_addr, ofi_sizeofaddr(sock_addr)); - if (fi_addr) - fi_addr[i] = (fi_addr_t)index; - - av_addr->valid = 1; - ret++; - } - sock_av_report_success(_av, context, ret, flags); - return (_av->attr.flags & FI_EVENT) ? 0 : ret; -} - -static int sock_av_insert(struct fid_av *av, const void *addr, size_t count, - fi_addr_t *fi_addr, uint64_t flags, void *context) -{ - struct sock_av *_av; - int ret = 0; - - _av = container_of(av, struct sock_av, av_fid); - - ofi_mutex_lock(&_av->table_lock); - ret = sock_check_table_in(_av, addr, fi_addr, (int) count, flags, context); - ofi_mutex_unlock(&_av->table_lock); - return ret; -} - -static int sock_av_lookup(struct fid_av *av, fi_addr_t fi_addr, void *addr, - size_t *addrlen) -{ - int64_t index; - struct sock_av *_av; - struct sock_av_addr *av_addr; - - _av = container_of(av, struct sock_av, av_fid); - index = fi_addr & _av->mask; - - ofi_mutex_lock(&_av->table_lock); - if (index >= _av->table_hdr->size || index < 0) { - SOCK_LOG_ERROR("requested address not inserted\n"); - ofi_mutex_unlock(&_av->table_lock); - return -EINVAL; - } - - av_addr = &_av->table[index]; - memcpy(addr, &av_addr->addr, MIN(*addrlen, (size_t)_av->addrlen)); - ofi_mutex_unlock(&_av->table_lock); - *addrlen = _av->addrlen; - return 0; -} - -static int _sock_av_insertsvc(struct fid_av *av, const char *node, - const char *service, fi_addr_t *fi_addr, - uint64_t flags, void *context) -{ - int ret; - struct addrinfo sock_hints; - struct addrinfo *result = NULL; - struct sock_av *_av; - - _av = container_of(av, struct sock_av, av_fid); - memset(&sock_hints, 0, sizeof(struct addrinfo)); - /* Map all services to IPv4 addresses -- for compatibility */ - sock_hints.ai_family = AF_INET; - sock_hints.ai_socktype = SOCK_STREAM; - - ret = getaddrinfo(node, service, &sock_hints, &result); - if (ret) { - if (_av->eq) { - sock_av_report_error(_av, fi_addr, context, 0, - FI_EINVAL, flags); - sock_av_report_success(_av, context, 0, flags); - } - return -ret; - } - - ofi_mutex_lock(&_av->table_lock); - ret = sock_check_table_in(_av, result->ai_addr, - fi_addr, 1, flags, context); - ofi_mutex_unlock(&_av->table_lock); - freeaddrinfo(result); - return ret; -} - -static int sock_av_insertsvc(struct fid_av *av, const char *node, - const char *service, fi_addr_t *fi_addr, - uint64_t flags, void *context) -{ - if (!service) { - SOCK_LOG_ERROR("Port not provided\n"); - return -FI_EINVAL; - } - - return _sock_av_insertsvc(av, node, service, fi_addr, flags, context); -} - -static int sock_av_insertsym(struct fid_av *av, const char *node, size_t nodecnt, - const char *service, size_t svccnt, fi_addr_t *fi_addr, - uint64_t flags, void *context) -{ - int ret = 0, success = 0, err_code = 0, len1, len2; - int var_port, var_host; - char base_host[FI_NAME_MAX] = {0}; - char tmp_host[FI_NAME_MAX] = {0}; - char tmp_port[FI_NAME_MAX] = {0}; - int hostlen, offset = 0, fmt; - size_t i, j; - - if (!node || !service || node[0] == '\0') { - SOCK_LOG_ERROR("Node/service not provided\n"); - return -FI_EINVAL; - } - - hostlen = (int) strlen(node); - while (isdigit(*(node + hostlen - (offset + 1)))) - offset++; - - if (*(node + hostlen - offset) == '.') - fmt = 0; - else - fmt = offset; - - if (hostlen - offset >= FI_NAME_MAX) - return -FI_ETOOSMALL; - memcpy(base_host, node, hostlen - offset); - var_port = atoi(service); - var_host = atoi(node + hostlen - offset); - - for (i = 0; i < nodecnt; i++) { - for (j = 0; j < svccnt; j++) { - len1 = snprintf(tmp_host, FI_NAME_MAX, "%s%0*d", - base_host, fmt, var_host + (int)i); - len2 = snprintf(tmp_port, FI_NAME_MAX, "%d", - var_port + (int)j); - if (len1 > 0 && len1 < FI_NAME_MAX && len2 > 0 && len2 < FI_NAME_MAX) { - ret = _sock_av_insertsvc(av, tmp_host, tmp_port, fi_addr, flags, context); - if (ret == 1) - success++; - else - err_code = ret; - } else { - SOCK_LOG_ERROR("Node/service value is not valid\n"); - err_code = -FI_ETOOSMALL; - } - } - } - return success > 0 ? success : err_code; -} - - -static int sock_av_remove(struct fid_av *av, fi_addr_t *fi_addr, size_t count, - uint64_t flags) -{ - size_t i; - struct sock_av *_av; - struct sock_av_addr *av_addr; - struct dlist_entry *item; - struct fid_list_entry *fid_entry; - struct sock_ep *sock_ep; - struct sock_conn *conn; - uint16_t idx; - - _av = container_of(av, struct sock_av, av_fid); - ofi_mutex_lock(&_av->list_lock); - dlist_foreach(&_av->ep_list, item) { - fid_entry = container_of(item, struct fid_list_entry, entry); - sock_ep = container_of(fid_entry->fid, struct sock_ep, ep.fid); - ofi_mutex_lock(&sock_ep->attr->cmap.lock); - for (i = 0; i < count; i++) { - idx = (uint16_t)(fi_addr[i] & sock_ep->attr->av->mask); - conn = ofi_idm_lookup(&sock_ep->attr->av_idm, idx); - if (conn && conn != SOCK_CM_CONN_IN_PROGRESS) { - /* A peer may be using the connection, so leave - * it operational, just dissociate it from AV. - */ - if (conn->av_index == idx) - conn->av_index = FI_ADDR_NOTAVAIL; - ofi_idm_clear(&sock_ep->attr->av_idm, idx); - } - } - ofi_mutex_unlock(&sock_ep->attr->cmap.lock); - } - ofi_mutex_unlock(&_av->list_lock); - - ofi_mutex_lock(&_av->table_lock); - for (i = 0; i < count; i++) { - av_addr = &_av->table[fi_addr[i]]; - av_addr->valid = 0; - } - ofi_mutex_unlock(&_av->table_lock); - - return 0; -} - -static const char *sock_av_straddr(struct fid_av *av, const void *addr, - char *buf, size_t *len) -{ - const struct sockaddr *sa = addr; - char straddr[OFI_ADDRSTRLEN]; - char ipaddr[INET6_ADDRSTRLEN]; - int size; - - if (!inet_ntop(sa->sa_family, ofi_get_ipaddr(sa), ipaddr, sizeof(ipaddr))) - return NULL; - - size = snprintf(straddr, sizeof(straddr), "%s:%d", - ipaddr, ofi_addr_get_port(sa)); - snprintf(buf, *len, "%s", straddr); - *len = size + 1; - return buf; -} - -static int sock_av_bind(struct fid *fid, struct fid *bfid, uint64_t flags) -{ - struct sock_av *av; - struct sock_eq *eq; - - if (bfid->fclass != FI_CLASS_EQ) - return -FI_EINVAL; - - av = container_of(fid, struct sock_av, av_fid.fid); - eq = container_of(bfid, struct sock_eq, eq.fid); - av->eq = eq; - return 0; -} - -static int sock_av_close(struct fid *fid) -{ - struct sock_av *av; - int ret = 0; - av = container_of(fid, struct sock_av, av_fid.fid); - if (ofi_atomic_get32(&av->ref)) - return -FI_EBUSY; - - if (!av->shared) { - free(av->table_hdr); - } else { - ret = ofi_shm_unmap(&av->shm); - if (ret) - SOCK_LOG_ERROR("unmap failed: %s\n", - strerror(ofi_syserr())); - } - - ofi_atomic_dec32(&av->domain->ref); - ofi_mutex_destroy(&av->list_lock); - ofi_mutex_destroy(&av->table_lock); - free(av); - return 0; -} - -static struct fi_ops sock_av_fi_ops = { - .size = sizeof(struct fi_ops), - .close = sock_av_close, - .bind = sock_av_bind, - .control = fi_no_control, - .ops_open = fi_no_ops_open, -}; - -static struct fi_ops_av sock_am_ops = { - .size = sizeof(struct fi_ops_av), - .insert = sock_av_insert, - .insertsvc = sock_av_insertsvc, - .insertsym = sock_av_insertsym, - .remove = sock_av_remove, - .lookup = sock_av_lookup, - .straddr = sock_av_straddr -}; - -static struct fi_ops_av sock_at_ops = { - .size = sizeof(struct fi_ops_av), - .insert = sock_av_insert, - .insertsvc = sock_av_insertsvc, - .insertsym = sock_av_insertsym, - .remove = sock_av_remove, - .lookup = sock_av_lookup, - .straddr = sock_av_straddr -}; - -static int sock_verify_av_attr(struct fi_av_attr *attr) -{ - switch (attr->type) { - case FI_AV_MAP: - case FI_AV_TABLE: - case FI_AV_UNSPEC: - break; - default: - return -FI_EINVAL; - } - - if (attr->flags & FI_READ && !attr->name) - return -FI_EINVAL; - - if (attr->rx_ctx_bits > SOCK_EP_MAX_CTX_BITS) { - SOCK_LOG_ERROR("Invalid rx_ctx_bits\n"); - return -FI_EINVAL; - } - return 0; -} - -int sock_av_open(struct fid_domain *domain, struct fi_av_attr *attr, - struct fid_av **av, void *context) -{ - int ret = 0; - struct sock_domain *dom; - struct sock_av *_av; - size_t table_sz; - - if (!attr || sock_verify_av_attr(attr)) - return -FI_EINVAL; - - if (attr->type == FI_AV_UNSPEC) - attr->type = FI_AV_TABLE; - - dom = container_of(domain, struct sock_domain, dom_fid); - if (dom->attr.av_type != FI_AV_UNSPEC && - dom->attr.av_type != attr->type) - return -FI_EINVAL; - - _av = calloc(1, sizeof(*_av)); - if (!_av) - return -FI_ENOMEM; - - _av->attr = *attr; - _av->attr.count = (attr->count) ? attr->count : sock_av_def_sz; - table_sz = SOCK_AV_TABLE_SZ(_av->attr.count, attr->name); - - if (attr->name) { - ret = ofi_shm_map(&_av->shm, attr->name, table_sz, - attr->flags & FI_READ, (void**)&_av->table_hdr); - - if (ret || _av->table_hdr == MAP_FAILED) { - SOCK_LOG_ERROR("map failed\n"); - ret = -FI_EINVAL; - goto err; - } - - _av->idx_arr = (uint64_t *)(_av->table_hdr + 1); - _av->attr.map_addr = _av->idx_arr; - attr->map_addr = _av->attr.map_addr; - SOCK_LOG_DBG("Updating map_addr: %p\n", _av->attr.map_addr); - - if (attr->flags & FI_READ) { - if (_av->table_hdr->size != _av->attr.count) { - ret = -FI_EINVAL; - goto err2; - } - } else { - _av->table_hdr->size = _av->attr.count; - _av->table_hdr->stored = 0; - } - _av->shared = 1; - } else { - _av->table_hdr = calloc(1, table_sz); - if (!_av->table_hdr) { - ret = -FI_ENOMEM; - goto err; - } - _av->table_hdr->size = _av->attr.count; - } - sock_update_av_table(_av, _av->attr.count); - - _av->av_fid.fid.fclass = FI_CLASS_AV; - _av->av_fid.fid.context = context; - _av->av_fid.fid.ops = &sock_av_fi_ops; - - switch (attr->type) { - case FI_AV_MAP: - _av->av_fid.ops = &sock_am_ops; - break; - case FI_AV_TABLE: - _av->av_fid.ops = &sock_at_ops; - break; - default: - ret = -FI_EINVAL; - goto err2; - } - - ofi_atomic_initialize32(&_av->ref, 0); - ofi_atomic_inc32(&dom->ref); - _av->domain = dom; - switch (dom->info.addr_format) { - case FI_SOCKADDR_IN: - _av->addrlen = sizeof(struct sockaddr_in); - break; - case FI_SOCKADDR_IN6: - _av->addrlen = sizeof(struct sockaddr_in6); - break; - default: - SOCK_LOG_ERROR("Invalid address format: only IP supported\n"); - ret = -FI_EINVAL; - goto err2; - } - dlist_init(&_av->ep_list); - ofi_mutex_init(&_av->list_lock); - ofi_mutex_init(&_av->table_lock); - _av->rx_ctx_bits = attr->rx_ctx_bits; - _av->mask = attr->rx_ctx_bits ? - ((uint64_t)1 << (64 - attr->rx_ctx_bits)) - 1 : ~0; - *av = &_av->av_fid; - return 0; - -err2: - if(attr->name) { - ofi_shm_unmap(&_av->shm); - } else { - if(_av->table_hdr != MAP_FAILED) - free(_av->table_hdr); - } -err: - free(_av); - return ret; -} diff --git a/prov/sockets/src/sock_cntr.c b/prov/sockets/src/sock_cntr.c deleted file mode 100644 index a9a8956dbe6..00000000000 --- a/prov/sockets/src/sock_cntr.c +++ /dev/null @@ -1,603 +0,0 @@ -/* - * Copyright (c) 2014 Intel Corporation, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "config.h" - -#include -#include -#include -#include -#include - -#include "sock.h" -#include "sock_util.h" - -#include - -#define SOCK_LOG_DBG(...) _SOCK_LOG_DBG(FI_LOG_EP_DATA, __VA_ARGS__) -#define SOCK_LOG_ERROR(...) _SOCK_LOG_ERROR(FI_LOG_EP_DATA, __VA_ARGS__) - -const struct fi_cntr_attr sock_cntr_attr = { - .events = FI_CNTR_EVENTS_COMP, - .wait_obj = FI_WAIT_MUTEX_COND, - .wait_set = NULL, - .flags = 0, -}; - -void sock_cntr_add_tx_ctx(struct sock_cntr *cntr, struct sock_tx_ctx *tx_ctx) -{ - int ret; - struct fid *fid = &tx_ctx->fid.ctx.fid; - ret = fid_list_insert(&cntr->tx_list, &cntr->list_lock, fid); - if (ret) - SOCK_LOG_ERROR("Error in adding ctx to progress list\n"); - else - ofi_atomic_inc32(&cntr->ref); -} - -void sock_cntr_remove_tx_ctx(struct sock_cntr *cntr, struct sock_tx_ctx *tx_ctx) -{ - struct fid *fid = &tx_ctx->fid.ctx.fid; - fid_list_remove(&cntr->tx_list, &cntr->list_lock, fid); - ofi_atomic_dec32(&cntr->ref); -} - -void sock_cntr_add_rx_ctx(struct sock_cntr *cntr, struct sock_rx_ctx *rx_ctx) -{ - int ret; - struct fid *fid = &rx_ctx->ctx.fid; - ret = fid_list_insert(&cntr->rx_list, &cntr->list_lock, fid); - if (ret) - SOCK_LOG_ERROR("Error in adding ctx to progress list\n"); - else - ofi_atomic_inc32(&cntr->ref); -} - -void sock_cntr_remove_rx_ctx(struct sock_cntr *cntr, struct sock_rx_ctx *rx_ctx) -{ - struct fid *fid = &rx_ctx->ctx.fid; - fid_list_remove(&cntr->rx_list, &cntr->list_lock, fid); - ofi_atomic_dec32(&cntr->ref); -} - -int sock_cntr_progress(struct sock_cntr *cntr) -{ - struct sock_tx_ctx *tx_ctx; - struct sock_rx_ctx *rx_ctx; - struct dlist_entry *entry; - - struct fid_list_entry *fid_entry; - - if (cntr->domain->progress_mode == FI_PROGRESS_AUTO) - return 0; - - ofi_mutex_lock(&cntr->list_lock); - for (entry = cntr->tx_list.next; entry != &cntr->tx_list; - entry = entry->next) { - fid_entry = container_of(entry, struct fid_list_entry, entry); - tx_ctx = container_of(fid_entry->fid, struct sock_tx_ctx, fid.ctx.fid); - if (tx_ctx->use_shared) - sock_pe_progress_tx_ctx(cntr->domain->pe, tx_ctx->stx_ctx); - else - sock_pe_progress_ep_tx(cntr->domain->pe, tx_ctx->ep_attr); - } - - for (entry = cntr->rx_list.next; entry != &cntr->rx_list; - entry = entry->next) { - fid_entry = container_of(entry, struct fid_list_entry, entry); - rx_ctx = container_of(fid_entry->fid, struct sock_rx_ctx, ctx.fid); - if (rx_ctx->use_shared) - sock_pe_progress_rx_ctx(cntr->domain->pe, rx_ctx->srx_ctx); - else - sock_pe_progress_ep_rx(cntr->domain->pe, rx_ctx->ep_attr); - } - - ofi_mutex_unlock(&cntr->list_lock); - return 0; -} - -void sock_cntr_check_trigger_list(struct sock_cntr *cntr) -{ - struct fi_deferred_work *work; - struct sock_trigger *trigger; - struct dlist_entry *entry; - ssize_t ret = 0; - - ofi_mutex_lock(&cntr->trigger_lock); - for (entry = cntr->trigger_list.next; - entry != &cntr->trigger_list;) { - - trigger = container_of(entry, struct sock_trigger, entry); - entry = entry->next; - - if (ofi_atomic_get32(&cntr->value) < (int) trigger->threshold) - continue; - - switch (trigger->op_type) { - case FI_OP_SEND: - ret = sock_ep_sendmsg(trigger->ep, &trigger->op.msg.msg, - trigger->flags & ~FI_TRIGGER); - break; - case FI_OP_RECV: - ret = sock_ep_recvmsg(trigger->ep, &trigger->op.msg.msg, - trigger->flags & ~FI_TRIGGER); - break; - case FI_OP_TSEND: - ret = sock_ep_tsendmsg(trigger->ep, &trigger->op.tmsg.msg, - trigger->flags & ~FI_TRIGGER); - break; - case FI_OP_TRECV: - ret = sock_ep_trecvmsg(trigger->ep, &trigger->op.tmsg.msg, - trigger->flags & ~FI_TRIGGER); - break; - case FI_OP_WRITE: - ret = sock_ep_rma_writemsg(trigger->ep, - &trigger->op.rma.msg, - trigger->flags & ~FI_TRIGGER); - break; - case FI_OP_READ: - ret = sock_ep_rma_readmsg(trigger->ep, - &trigger->op.rma.msg, - trigger->flags & ~FI_TRIGGER); - break; - case FI_OP_ATOMIC: - case FI_OP_FETCH_ATOMIC: - case FI_OP_COMPARE_ATOMIC: - ret = sock_ep_tx_atomic(trigger->ep, - &trigger->op.atomic.msg, - trigger->op.atomic.comparev, - NULL, - trigger->op.atomic.compare_count, - trigger->op.atomic.resultv, - NULL, - trigger->op.atomic.result_count, - trigger->flags & ~FI_TRIGGER); - break; - case FI_OP_CNTR_SET: - work = container_of(trigger->context, - struct fi_deferred_work, context); - fi_cntr_set(work->op.cntr->cntr, work->op.cntr->value); - ret = 0; - break; - case FI_OP_CNTR_ADD: - work = container_of(trigger->context, - struct fi_deferred_work, context); - fi_cntr_add(work->op.cntr->cntr, work->op.cntr->value); - ret = 0; - break; - default: - SOCK_LOG_ERROR("unsupported op\n"); - ret = 0; - break; - } - - if (ret != -FI_EAGAIN) { - dlist_remove(&trigger->entry); - free(trigger); - } else { - break; - } - } - ofi_mutex_unlock(&cntr->trigger_lock); -} - -static uint64_t sock_cntr_read(struct fid_cntr *fid_cntr) -{ - struct sock_cntr *cntr; - cntr = container_of(fid_cntr, struct sock_cntr, cntr_fid); - sock_cntr_progress(cntr); - return ofi_atomic_get32(&cntr->value); -} - -void sock_cntr_inc(struct sock_cntr *cntr) -{ - pthread_mutex_lock(&cntr->mut); - ofi_atomic_inc32(&cntr->value); - if (ofi_atomic_get32(&cntr->num_waiting)) - pthread_cond_broadcast(&cntr->cond); - if (cntr->signal) - sock_wait_signal(cntr->waitset); - sock_cntr_check_trigger_list(cntr); - pthread_mutex_unlock(&cntr->mut); -} - -static int sock_cntr_add(struct fid_cntr *fid_cntr, uint64_t value) -{ - uint64_t new_val; - struct sock_cntr *cntr; - cntr = container_of(fid_cntr, struct sock_cntr, cntr_fid); - - pthread_mutex_lock(&cntr->mut); - new_val = ofi_atomic_add32(&cntr->value, (int32_t) value); - ofi_atomic_set32(&cntr->last_read_val, (int32_t) new_val); - if (ofi_atomic_get32(&cntr->num_waiting)) - pthread_cond_broadcast(&cntr->cond); - if (cntr->signal) - sock_wait_signal(cntr->waitset); - - sock_cntr_check_trigger_list(cntr); - pthread_mutex_unlock(&cntr->mut); - return 0; -} - -static int sock_cntr_set(struct fid_cntr *fid_cntr, uint64_t value) -{ - uint64_t new_val; - struct sock_cntr *cntr; - cntr = container_of(fid_cntr, struct sock_cntr, cntr_fid); - - pthread_mutex_lock(&cntr->mut); - new_val = ofi_atomic_set32(&cntr->value, (int32_t) value); - ofi_atomic_set32(&cntr->last_read_val, (int32_t) new_val); - if (ofi_atomic_get32(&cntr->num_waiting)) - pthread_cond_broadcast(&cntr->cond); - if (cntr->signal) - sock_wait_signal(cntr->waitset); - - sock_cntr_check_trigger_list(cntr); - pthread_mutex_unlock(&cntr->mut); - return 0; -} - -static int sock_cntr_adderr(struct fid_cntr *fid_cntr, uint64_t value) -{ - struct sock_cntr *cntr; - cntr = container_of(fid_cntr, struct sock_cntr, cntr_fid); - - pthread_mutex_lock(&cntr->mut); - ofi_atomic_add32(&cntr->err_cnt, (int32_t) value); - if (!cntr->err_flag) - cntr->err_flag = 1; - pthread_cond_signal(&cntr->cond); - if (cntr->signal) - sock_wait_signal(cntr->waitset); - pthread_mutex_unlock(&cntr->mut); - - return 0; -} - -static int sock_cntr_seterr(struct fid_cntr *fid_cntr, uint64_t value) -{ - struct sock_cntr *cntr; - - cntr = container_of(fid_cntr, struct sock_cntr, cntr_fid); - pthread_mutex_lock(&cntr->mut); - ofi_atomic_set32(&cntr->err_cnt, (int32_t) value); - if (!cntr->err_flag) - cntr->err_flag = 1; - pthread_cond_signal(&cntr->cond); - if (cntr->signal) - sock_wait_signal(cntr->waitset); - pthread_mutex_unlock(&cntr->mut); - - return 0; - -} - -static int sock_cntr_wait(struct fid_cntr *fid_cntr, uint64_t threshold, - int timeout) -{ - int last_read, ret = 0; - uint64_t start_ms = 0, end_ms = 0, remaining_ms = 0; - struct sock_cntr *cntr; - cntr = container_of(fid_cntr, struct sock_cntr, cntr_fid); - - pthread_mutex_lock(&cntr->mut); - if (cntr->err_flag) { - ret = -FI_EAVAIL; - goto out; - } - - if (ofi_atomic_get32(&cntr->value) >= (int)threshold) { - ret = 0; - goto out; - } - - ofi_atomic_inc32(&cntr->num_waiting); - - if (timeout >= 0) { - start_ms = ofi_gettime_ms(); - end_ms = start_ms + timeout; - } - - last_read = ofi_atomic_get32(&cntr->value); - remaining_ms = timeout; - - while (!ret && last_read < (int)threshold) { - if (cntr->domain->progress_mode == FI_PROGRESS_MANUAL) { - pthread_mutex_unlock(&cntr->mut); - ret = sock_cntr_progress(cntr); - pthread_mutex_lock(&cntr->mut); - } else { - ret = ofi_wait_cond(&cntr->cond, &cntr->mut, (int) remaining_ms); - } - - uint64_t curr_ms = ofi_gettime_ms(); - if (timeout >= 0) { - if (curr_ms >= end_ms) { - ret = -FI_ETIMEDOUT; - break; - } else { - remaining_ms = end_ms - curr_ms; - } - } - - last_read = ofi_atomic_get32(&cntr->value); - } - - ofi_atomic_set32(&cntr->last_read_val, last_read); - ofi_atomic_dec32(&cntr->num_waiting); - - sock_cntr_check_trigger_list(cntr); - if (cntr->err_flag) - ret = -FI_EAVAIL; - pthread_mutex_unlock(&cntr->mut); - return ret; - -out: - pthread_mutex_unlock(&cntr->mut); - return ret; -} - -static int sock_cntr_control(struct fid *fid, int command, void *arg) -{ - int ret = 0; - struct sock_cntr *cntr; - - cntr = container_of(fid, struct sock_cntr, cntr_fid); - - switch (command) { - case FI_GETWAIT: - if (cntr->domain->progress_mode == FI_PROGRESS_MANUAL) - return -FI_ENOSYS; - - switch (cntr->attr.wait_obj) { - case FI_WAIT_NONE: - case FI_WAIT_UNSPEC: - case FI_WAIT_MUTEX_COND: - memcpy(arg, &cntr->mut, sizeof(cntr->mut)); - memcpy((char *)arg + sizeof(cntr->mut), &cntr->cond, - sizeof(cntr->cond)); - break; - - case FI_WAIT_SET: - case FI_WAIT_FD: - sock_wait_get_obj(cntr->waitset, arg); - break; - - default: - ret = -FI_EINVAL; - break; - } - break; - - case FI_GETOPSFLAG: - memcpy(arg, &cntr->attr.flags, sizeof(uint64_t)); - break; - - case FI_SETOPSFLAG: - memcpy(&cntr->attr.flags, arg, sizeof(uint64_t)); - break; - - default: - ret = -FI_EINVAL; - break; - } - return ret; -} - -static int sock_cntr_close(struct fid *fid) -{ - struct sock_cntr *cntr; - - cntr = container_of(fid, struct sock_cntr, cntr_fid.fid); - if (ofi_atomic_get32(&cntr->ref)) - return -FI_EBUSY; - - if (cntr->signal && cntr->attr.wait_obj == FI_WAIT_FD) - sock_wait_close(&cntr->waitset->fid); - - /* An app could attempt to close the counter after a triggered op - * has updated it. In this case, a progress thread may be actively - * using the counter (e.g. calling fi_cntr_add). The thread will - * be accessing the counter while holding the mutex. So, we wait - * until we can acquire the mutex before proceeding. This ensures - * that the progress thread is no longer touching the counter. - */ - pthread_mutex_lock(&cntr->mut); - pthread_mutex_unlock(&cntr->mut); - - pthread_mutex_destroy(&cntr->mut); - ofi_mutex_destroy(&cntr->list_lock); - ofi_mutex_destroy(&cntr->trigger_lock); - - pthread_cond_destroy(&cntr->cond); - ofi_atomic_dec32(&cntr->domain->ref); - free(cntr); - return 0; -} - -static uint64_t sock_cntr_readerr(struct fid_cntr *cntr) -{ - struct sock_cntr *_cntr; - _cntr = container_of(cntr, struct sock_cntr, cntr_fid); - if (_cntr->domain->progress_mode == FI_PROGRESS_MANUAL) - sock_cntr_progress(_cntr); - if (_cntr->err_flag) - _cntr->err_flag = 0; - return ofi_atomic_get32(&_cntr->err_cnt); -} - -static struct fi_ops_cntr sock_cntr_ops = { - .size = sizeof(struct fi_ops_cntr), - .readerr = sock_cntr_readerr, - .read = sock_cntr_read, - .add = sock_cntr_add, - .set = sock_cntr_set, - .wait = sock_cntr_wait, - .adderr = sock_cntr_adderr, - .seterr = sock_cntr_seterr, -}; - -static struct fi_ops sock_cntr_fi_ops = { - .size = sizeof(struct fi_ops), - .close = sock_cntr_close, - .bind = fi_no_bind, - .control = sock_cntr_control, - .ops_open = fi_no_ops_open, -}; - -static int sock_cntr_verify_attr(struct fi_cntr_attr *attr) -{ - switch (attr->events) { - case FI_CNTR_EVENTS_COMP: - break; - default: - return -FI_ENOSYS; - } - - switch (attr->wait_obj) { - case FI_WAIT_NONE: - case FI_WAIT_UNSPEC: - case FI_WAIT_MUTEX_COND: - case FI_WAIT_SET: - case FI_WAIT_FD: - break; - default: - return -FI_ENOSYS; - } - if (attr->flags) - return -FI_EINVAL; - return 0; -} - -int sock_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr, - struct fid_cntr **cntr, void *context) -{ - int ret; - struct sock_domain *dom; - struct sock_cntr *_cntr; - struct fi_wait_attr wait_attr; - struct sock_fid_list *list_entry; - struct sock_wait *wait; - - dom = container_of(domain, struct sock_domain, dom_fid); - if (attr && sock_cntr_verify_attr(attr)) - return -FI_ENOSYS; - - _cntr = calloc(1, sizeof(*_cntr)); - if (!_cntr) - return -FI_ENOMEM; - - ret = pthread_cond_init(&_cntr->cond, NULL); - if (ret) - goto err; - - if (attr == NULL) - memcpy(&_cntr->attr, &sock_cntr_attr, sizeof(sock_cntr_attr)); - else - memcpy(&_cntr->attr, attr, sizeof(sock_cntr_attr)); - - switch (_cntr->attr.wait_obj) { - - case FI_WAIT_NONE: - case FI_WAIT_UNSPEC: - case FI_WAIT_MUTEX_COND: - _cntr->signal = 0; - break; - - case FI_WAIT_FD: - wait_attr.flags = 0; - wait_attr.wait_obj = FI_WAIT_FD; - ret = sock_wait_open(&dom->fab->fab_fid, &wait_attr, - &_cntr->waitset); - if (ret) { - ret = FI_EINVAL; - goto err; - } - _cntr->signal = 1; - break; - - case FI_WAIT_SET: - if (!attr) { - ret = FI_EINVAL; - goto err; - } - - _cntr->waitset = attr->wait_set; - _cntr->signal = 1; - wait = container_of(attr->wait_set, struct sock_wait, wait_fid); - list_entry = calloc(1, sizeof(*list_entry)); - if (!list_entry) { - ret = FI_ENOMEM; - goto err; - } - dlist_init(&list_entry->entry); - list_entry->fid = &_cntr->cntr_fid.fid; - dlist_insert_after(&list_entry->entry, &wait->fid_list); - break; - - default: - break; - } - - pthread_mutex_init(&_cntr->mut, NULL); - ofi_mutex_init(&_cntr->list_lock); - - ofi_atomic_initialize32(&_cntr->ref, 0); - ofi_atomic_initialize32(&_cntr->err_cnt, 0); - - ofi_atomic_initialize32(&_cntr->value, 0); - ofi_atomic_initialize32(&_cntr->last_read_val, 0); - ofi_atomic_initialize32(&_cntr->num_waiting, 0); - - dlist_init(&_cntr->tx_list); - dlist_init(&_cntr->rx_list); - - dlist_init(&_cntr->trigger_list); - ofi_mutex_init(&_cntr->trigger_lock); - - _cntr->cntr_fid.fid.fclass = FI_CLASS_CNTR; - _cntr->cntr_fid.fid.context = context; - _cntr->cntr_fid.fid.ops = &sock_cntr_fi_ops; - _cntr->cntr_fid.ops = &sock_cntr_ops; - - ofi_atomic_inc32(&dom->ref); - _cntr->domain = dom; - *cntr = &_cntr->cntr_fid; - return 0; - -err: - free(_cntr); - return -ret; -} - diff --git a/prov/sockets/src/sock_comm.c b/prov/sockets/src/sock_comm.c deleted file mode 100644 index 52c83e32c29..00000000000 --- a/prov/sockets/src/sock_comm.c +++ /dev/null @@ -1,231 +0,0 @@ -/* - * Copyright (c) 2014 Intel Corporation, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "config.h" - -#include -#include -#include -#include -#include - -#include "sock.h" -#include "sock_util.h" - -#define SOCK_LOG_DBG(...) _SOCK_LOG_DBG(FI_LOG_EP_DATA, __VA_ARGS__) -#define SOCK_LOG_ERROR(...) _SOCK_LOG_ERROR(FI_LOG_EP_DATA, __VA_ARGS__) - -static ssize_t sock_comm_send_socket(struct sock_conn *conn, - const void *buf, size_t len) -{ - ssize_t ret; - - ret = ofi_send_socket(conn->sock_fd, buf, len, MSG_NOSIGNAL); - if (ret < 0) { - if (OFI_SOCK_TRY_SND_RCV_AGAIN(ofi_sockerr())) { - ret = 0; - } else if (ofi_sockerr() == EPIPE) { - conn->connected = 0; - SOCK_LOG_DBG("Disconnected port: %d\n", - ofi_addr_get_port(&conn->addr.sa)); - } else { - SOCK_LOG_DBG("write error: %s\n", - strerror(ofi_sockerr())); - } - } - if (ret > 0) - SOCK_LOG_DBG("wrote to network: %lu\n", ret); - return ret; -} - -ssize_t sock_comm_flush(struct sock_pe_entry *pe_entry) -{ - ssize_t ret1, ret2 = 0; - size_t endlen, len, xfer_len; - - len = ofi_rbused(&pe_entry->comm_buf); - endlen = pe_entry->comm_buf.size - - (pe_entry->comm_buf.rcnt & pe_entry->comm_buf.size_mask); - - xfer_len = MIN(len, endlen); - ret1 = sock_comm_send_socket(pe_entry->conn, (char*)pe_entry->comm_buf.buf + - (pe_entry->comm_buf.rcnt & pe_entry->comm_buf.size_mask), - xfer_len); - if (ret1 > 0) - pe_entry->comm_buf.rcnt += ret1; - - if ((size_t) ret1 == xfer_len && xfer_len < len) { - ret2 = sock_comm_send_socket(pe_entry->conn, (char*)pe_entry->comm_buf.buf + - (pe_entry->comm_buf.rcnt & pe_entry->comm_buf.size_mask), - len - xfer_len); - if (ret2 > 0) - pe_entry->comm_buf.rcnt += ret2; - else - ret2 = 0; - } - - return (ret1 > 0) ? ret1 + ret2 : 0; -} - -ssize_t sock_comm_send(struct sock_pe_entry *pe_entry, - const void *buf, size_t len) -{ - ssize_t ret, used; - - if (len > pe_entry->cache_sz) { - used = ofi_rbused(&pe_entry->comm_buf); - if (used == sock_comm_flush(pe_entry)) { - return sock_comm_send_socket(pe_entry->conn, buf, len); - } else { - return 0; - } - } - - if (ofi_rbavail(&pe_entry->comm_buf) < len) { - ret = sock_comm_flush(pe_entry); - if (ret <= 0) - return 0; - } - - ret = MIN(ofi_rbavail(&pe_entry->comm_buf), len); - ofi_rbwrite(&pe_entry->comm_buf, buf, ret); - ofi_rbcommit(&pe_entry->comm_buf); - SOCK_LOG_DBG("buffered %lu\n", ret); - return ret; -} - -int sock_comm_tx_done(struct sock_pe_entry *pe_entry) -{ - return ofi_rbempty(&pe_entry->comm_buf); -} - -static ssize_t sock_comm_recv_socket(struct sock_conn *conn, - void *buf, size_t len) -{ - ssize_t ret; - ret = ofi_recv_socket(conn->sock_fd, buf, len, 0); - if (ret == 0) { - conn->connected = 0; - SOCK_LOG_DBG("Disconnected: port %d\n", - ofi_addr_get_port(&conn->addr.sa)); - return ret; - } - - if (ret < 0) { - SOCK_LOG_DBG("read %s\n", strerror(ofi_sockerr())); - ret = 0; - } - - if (ret > 0) - SOCK_LOG_DBG("read from network: %lu\n", ret); - return ret; -} - -static void sock_comm_recv_buffer(struct sock_pe_entry *pe_entry) -{ - ssize_t ret; - size_t max_read, avail; - - avail = ofi_rbavail(&pe_entry->comm_buf); - assert(avail == pe_entry->comm_buf.size); - pe_entry->comm_buf.rcnt = - pe_entry->comm_buf.wcnt = - pe_entry->comm_buf.wpos = 0; - - max_read = pe_entry->rem ? pe_entry->rem : - pe_entry->total_len - pe_entry->done_len; - ret = sock_comm_recv_socket(pe_entry->conn, (char *) pe_entry->comm_buf.buf, - MIN(max_read, avail)); - pe_entry->comm_buf.wpos += ret; - ofi_rbcommit(&pe_entry->comm_buf); -} - -ssize_t sock_comm_recv(struct sock_pe_entry *pe_entry, void *buf, size_t len) -{ - ssize_t read_len; - if (ofi_rbempty(&pe_entry->comm_buf)) { - if (len <= pe_entry->cache_sz) { - sock_comm_recv_buffer(pe_entry); - } else { - return sock_comm_recv_socket(pe_entry->conn, buf, len); - } - } - - read_len = MIN(len, ofi_rbused(&pe_entry->comm_buf)); - ofi_rbread(&pe_entry->comm_buf, buf, read_len); - SOCK_LOG_DBG("read from buffer: %lu\n", read_len); - return read_len; -} - -ssize_t sock_comm_peek(struct sock_conn *conn, void *buf, size_t len) -{ - ssize_t ret; - ret = ofi_recv_socket(conn->sock_fd, buf, len, MSG_PEEK); - if (ret == 0) { - conn->connected = 0; - SOCK_LOG_DBG("Disconnected\n"); - return ret; - } - - if (ret < 0) { - SOCK_LOG_DBG("peek %s\n", strerror(ofi_sockerr())); - ret = 0; - } - - if (ret > 0) - SOCK_LOG_DBG("peek from network: %lu\n", ret); - return ret; -} - -ssize_t sock_comm_discard(struct sock_pe_entry *pe_entry, size_t len) -{ - void *buf; - ssize_t ret; - - buf = malloc(len); - if (!buf) - return 0; - - ret = sock_comm_recv(pe_entry, buf, len); - free(buf); - return ret; -} - -int sock_comm_is_disconnected(struct sock_pe_entry *pe_entry) -{ - /* If the PE entry is TX, there is no need to check that the ring buffer is - * empty */ - if (pe_entry->type == SOCK_PE_TX) - return (!pe_entry->conn->connected); - else - return (ofi_rbempty(&pe_entry->comm_buf) && !pe_entry->conn->connected); -} diff --git a/prov/sockets/src/sock_conn.c b/prov/sockets/src/sock_conn.c deleted file mode 100644 index 2b49eaac108..00000000000 --- a/prov/sockets/src/sock_conn.c +++ /dev/null @@ -1,645 +0,0 @@ -/* - * Copyright (c) 2014 Intel Corporation, Inc. All rights reserved. - * Copyright (c) 2017 DataDirect Networks, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "config.h" - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "sock.h" -#include "sock_util.h" -#include "ofi_file.h" - -#define SOCK_LOG_DBG(...) _SOCK_LOG_DBG(FI_LOG_EP_CTRL, __VA_ARGS__) -#define SOCK_LOG_ERROR(...) _SOCK_LOG_ERROR(FI_LOG_EP_CTRL, __VA_ARGS__) - -ssize_t sock_conn_send_src_addr(struct sock_ep_attr *ep_attr, struct sock_tx_ctx *tx_ctx, - struct sock_conn *conn) -{ - int ret; - uint64_t total_len; - struct sock_op tx_op = { 0 }; - - tx_op.op = SOCK_OP_CONN_MSG; - SOCK_LOG_DBG("New conn msg on TX: %p using conn: %p\n", tx_ctx, conn); - - tx_op.src_iov_len = sizeof(union ofi_sock_ip); - total_len = tx_op.src_iov_len + sizeof(struct sock_op_send); - - sock_tx_ctx_start(tx_ctx); - if (ofi_rbavail(&tx_ctx->rb) < total_len) { - ret = -FI_EAGAIN; - goto err; - } - - sock_tx_ctx_write_op_send(tx_ctx, &tx_op, 0, (uintptr_t) NULL, 0, 0, - ep_attr, conn); - - ofi_straddr_dbg(&sock_prov, FI_LOG_EP_CTRL, "sending src addr: ", - ep_attr->src_addr); - sock_tx_ctx_write(tx_ctx, ep_attr->src_addr, sizeof(union ofi_sock_ip)); - sock_tx_ctx_commit(tx_ctx); - conn->address_published = 1; - return 0; - -err: - sock_tx_ctx_abort(tx_ctx); - return ret; -} - -int sock_conn_map_init(struct sock_ep *ep, int init_size) -{ - struct sock_conn_map *map = &ep->attr->cmap; - int ret; - - map->table = calloc(init_size, sizeof(*map->table)); - if (!map->table) - return -FI_ENOMEM; - - map->epoll_events = calloc(init_size, sizeof(*map->epoll_events)); - if (!map->epoll_events) - goto err1; - - ret = ofi_epoll_create(&map->epoll_set); - if (ret < 0) { - SOCK_LOG_ERROR("failed to create epoll set, " - "error - %d (%s)\n", ret, - strerror(ret)); - goto err2; - } - - ofi_mutex_init(&map->lock); - map->used = 0; - map->size = init_size; - map->epoll_size = init_size; - return 0; - -err2: - free(map->epoll_events); -err1: - free(map->table); - return -FI_ENOMEM; -} - -static int sock_conn_map_increase(struct sock_conn_map *map, int new_size) -{ - void *_table; - - _table = realloc(map->table, new_size * sizeof(*map->table)); - if (!_table) { - SOCK_LOG_ERROR("*** realloc failed, use FI_SOCKETS_DEF_CONN_MAP_SZ for" - "specifying conn-map-size\n"); - return -FI_ENOMEM; - } - - map->size = new_size; - map->table = _table; - return 0; -} - -void sock_conn_map_destroy(struct sock_ep_attr *ep_attr) -{ - int i; - struct sock_conn_map *cmap = &ep_attr->cmap; - for (i = 0; i < cmap->used; i++) { - if (cmap->table[i].sock_fd != -1) { - sock_pe_poll_del(ep_attr->domain->pe, cmap->table[i].sock_fd); - sock_conn_release_entry(cmap, &cmap->table[i]); - } - } - free(cmap->table); - cmap->table = NULL; - free(cmap->epoll_events); - cmap->epoll_events = NULL; - cmap->epoll_size = 0; - cmap->used = cmap->size = 0; - ofi_epoll_close(cmap->epoll_set); - ofi_mutex_destroy(&cmap->lock); -} - -void sock_conn_release_entry(struct sock_conn_map *map, struct sock_conn *conn) -{ - ofi_epoll_del(map->epoll_set, conn->sock_fd); - ofi_close_socket(conn->sock_fd); - - conn->address_published = 0; - conn->av_index = FI_ADDR_NOTAVAIL; - conn->connected = 0; - conn->sock_fd = -1; -} - -static int sock_conn_get_next_index(struct sock_conn_map *map) -{ - int i; - for (i = 0; i < map->size; i++) { - if (map->table[i].sock_fd == -1) - return i; - } - return -1; -} - -static struct sock_conn *sock_conn_map_insert(struct sock_ep_attr *ep_attr, - union ofi_sock_ip *addr, int conn_fd, - int addr_published) -{ - int index; - struct sock_conn_map *map = &ep_attr->cmap; - - if (map->size == map->used) { - index = sock_conn_get_next_index(map); - if (index < 0) { - if (sock_conn_map_increase(map, map->size * 2)) - return NULL; - index = map->used; - map->used++; - } - } else { - index = map->used; - map->used++; - } - - map->table[index].av_index = FI_ADDR_NOTAVAIL; - map->table[index].connected = 1; - map->table[index].addr = *addr; - map->table[index].sock_fd = conn_fd; - map->table[index].ep_attr = ep_attr; - sock_set_sockopts(conn_fd, SOCK_OPTS_NONBLOCK | - (ep_attr->ep_type == FI_EP_MSG ? - SOCK_OPTS_KEEPALIVE : 0)); - - if (ofi_epoll_add(map->epoll_set, conn_fd, OFI_EPOLL_IN, &map->table[index])) - SOCK_LOG_ERROR("failed to add to epoll set: %d\n", conn_fd); - - map->table[index].address_published = addr_published; - sock_pe_poll_add(ep_attr->domain->pe, conn_fd); - return &map->table[index]; -} - -int fd_set_nonblock(int fd) -{ - int ret; - - ret = fi_fd_nonblock(fd); - if (ret) - SOCK_LOG_ERROR("fi_fd_nonblock failed, errno: %d\n", - ret); - - return ret; -} - -#if !defined __APPLE__ && !defined _WIN32 -void sock_set_sockopt_keepalive(int sock) -{ - int optval; - - /* Keepalive is disabled: now leave */ - if (!sock_keepalive_enable) - return; - - optval = 1; - if (setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, &optval, sizeof(optval))) - SOCK_LOG_ERROR("setsockopt keepalive enable failed: %s\n", - strerror(errno)); - - if (sock_keepalive_time != INT_MAX) { - optval = sock_keepalive_time; - if (setsockopt(sock, SOL_TCP, TCP_KEEPIDLE, &optval, sizeof(optval))) - SOCK_LOG_ERROR("setsockopt keepalive time failed: %s\n", - strerror(errno)); - } - - if (sock_keepalive_intvl != INT_MAX) { - optval = sock_keepalive_intvl; - if (setsockopt(sock, SOL_TCP, TCP_KEEPINTVL, &optval, sizeof(optval))) - SOCK_LOG_ERROR("setsockopt keepalive intvl failed: %s\n", - strerror(errno)); - } - - if (sock_keepalive_probes != INT_MAX) { - optval = sock_keepalive_probes; - if (setsockopt(sock, SOL_TCP, TCP_KEEPCNT, &optval, sizeof(optval))) - SOCK_LOG_ERROR("setsockopt keepalive intvl failed: %s\n", - strerror(errno)); - } -} -#else -#define sock_set_sockopt_keepalive(sock) do {} while (0) -#endif - -static void sock_set_sockopt_reuseaddr(int sock) -{ - int optval; - optval = 1; - if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, - (const void *) &optval, sizeof(optval))) - SOCK_LOG_ERROR("setsockopt reuseaddr failed\n"); -} - -static void sock_set_sockopt_bufsize(int sock) -{ - int bufsize = 0; - socklen_t len = sizeof(int); - - if (sock_buf_sz == 0) - return; - bufsize = sock_buf_sz; - - if (setsockopt(sock, SOL_SOCKET, SO_RCVBUF, (char *) &bufsize, len)) - SOCK_LOG_ERROR("setsockopt rcvbuf size failed\n"); - - if (setsockopt(sock, SOL_SOCKET, SO_SNDBUF, (char *) &bufsize, len)) - SOCK_LOG_ERROR("setsockopt sndbuf size failed\n"); -} - -void sock_set_sockopts(int sock, int sock_opts) -{ - int optval; - optval = 1; - - sock_set_sockopt_reuseaddr(sock); - if (sock_opts & SOCK_OPTS_KEEPALIVE) - sock_set_sockopt_keepalive(sock); - if (setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, - (const void *) &optval, sizeof(optval))) - SOCK_LOG_ERROR("setsockopt nodelay failed\n"); - - if (sock_opts & SOCK_OPTS_NONBLOCK) - fd_set_nonblock(sock); - - if (sock_opts & SOCK_OPTS_BUFSIZE) - sock_set_sockopt_bufsize(sock); -} - -int sock_conn_stop_listener_thread(struct sock_conn_listener *conn_listener) -{ - conn_listener->do_listen = 0; - - ofi_mutex_lock(&conn_listener->signal_lock); - fd_signal_set(&conn_listener->signal); - ofi_mutex_unlock(&conn_listener->signal_lock); - - if (conn_listener->listener_thread && - pthread_join(conn_listener->listener_thread, NULL)) { - SOCK_LOG_DBG("pthread join failed\n"); - } - - fd_signal_free(&conn_listener->signal); - ofi_epoll_close(conn_listener->epollfd); - ofi_mutex_destroy(&conn_listener->signal_lock); - - return 0; -} - -static void *sock_conn_listener_thread(void *arg) -{ - struct sock_conn_listener *conn_listener = arg; - struct sock_conn_handle *conn_handle; - struct ofi_epollfds_event events[SOCK_EPOLL_WAIT_EVENTS]; - struct sock_ep_attr *ep_attr; - int num_fds, i, conn_fd; - union ofi_sock_ip remote; - socklen_t addr_size; - - while (conn_listener->do_listen) { - num_fds = ofi_epoll_wait(conn_listener->epollfd, events, - SOCK_EPOLL_WAIT_EVENTS, -1); - if (num_fds < 0) { - SOCK_LOG_ERROR("poll failed : %s\n", strerror(errno)); - continue; - } - - ofi_mutex_lock(&conn_listener->signal_lock); - if (conn_listener->removed_from_epollfd) { - /* The epoll set changed between calling wait and wait - * returning. Get an updated set of events to avoid - * possible use after free error. - */ - conn_listener->removed_from_epollfd = false; - goto skip; - } - - for (i = 0; i < num_fds; i++) { - conn_handle = events[i].data.ptr; - - if (conn_handle == NULL) { /* signal event */ - fd_signal_reset(&conn_listener->signal); - continue; - } - - memset(&remote, 0, sizeof remote); - addr_size = sizeof(remote); - conn_fd = accept(conn_handle->sock, &remote.sa, - &addr_size); - SOCK_LOG_DBG("CONN: accepted conn-req: %d\n", conn_fd); - ofi_straddr_dbg(&sock_prov, FI_LOG_EP_CTRL, - "accepted peer addr: ", &remote.sa); - - if (conn_fd < 0) { - SOCK_LOG_ERROR("failed to accept: %s\n", - strerror(ofi_sockerr())); - continue; - } - - ep_attr = container_of(conn_handle, struct sock_ep_attr, conn_handle); - ofi_mutex_lock(&ep_attr->cmap.lock); - sock_conn_map_insert(ep_attr, &remote, conn_fd, 1); - ofi_mutex_unlock(&ep_attr->cmap.lock); - sock_pe_signal(ep_attr->domain->pe); - } -skip: - ofi_mutex_unlock(&conn_listener->signal_lock); - } - - return NULL; -} - -int sock_conn_start_listener_thread(struct sock_conn_listener *conn_listener) -{ - int ret; - - ofi_mutex_init(&conn_listener->signal_lock); - - ret = ofi_epoll_create(&conn_listener->epollfd); - if (ret < 0) { - SOCK_LOG_ERROR("failed to create epoll set\n"); - goto err1; - } - - ret = fd_signal_init(&conn_listener->signal); - if (ret < 0) { - SOCK_LOG_ERROR("failed to init signal\n"); - goto err2; - } - - ret = ofi_epoll_add(conn_listener->epollfd, - conn_listener->signal.fd[FI_READ_FD], - OFI_EPOLL_IN, NULL); - if (ret != 0){ - SOCK_LOG_ERROR("failed to add signal fd to epoll\n"); - goto err3; - } - - conn_listener->do_listen = 1; - conn_listener->removed_from_epollfd = false; - ret = pthread_create(&conn_listener->listener_thread, NULL, - sock_conn_listener_thread, conn_listener); - if (ret < 0) { - SOCK_LOG_ERROR("failed to create conn listener thread\n"); - goto err3; - } - return 0; - -err3: - conn_listener->do_listen = 0; - fd_signal_free(&conn_listener->signal); -err2: - ofi_epoll_close(conn_listener->epollfd); -err1: - ofi_mutex_destroy(&conn_listener->signal_lock); - return ret; -} - -int sock_conn_listen(struct sock_ep_attr *ep_attr) -{ - int listen_fd, ret; - socklen_t addr_size; - union ofi_sock_ip addr; - struct sock_conn_handle *conn_handle = &ep_attr->conn_handle; - - listen_fd = ofi_socket(ep_attr->src_addr->sa.sa_family, - SOCK_STREAM, IPPROTO_TCP); - if (listen_fd == INVALID_SOCKET) - return -ofi_sockerr(); - - sock_set_sockopts(listen_fd, SOCK_OPTS_NONBLOCK | SOCK_OPTS_BUFSIZE); - - addr = *ep_attr->src_addr; - if (ep_attr->ep_type == FI_EP_MSG) - ofi_addr_set_port(&addr.sa, 0); - - ret = bind(listen_fd, &addr.sa, (socklen_t) ofi_sizeofaddr(&addr.sa)); - if (ret) { - SOCK_LOG_ERROR("failed to bind listener: %s\n", - strerror(ofi_sockerr())); - ofi_straddr_log(&sock_prov, FI_LOG_WARN, FI_LOG_EP_CTRL, - "bind failed to addr: ", &addr.sa); - ret = -ofi_sockerr(); - goto err; - } - - addr_size = sizeof(addr); - ret = ofi_getsockname(listen_fd, &addr.sa, &addr_size); - if (ret) { - ret = -ofi_sockerr(); - goto err; - } - - ep_attr->msg_src_port = ofi_addr_get_port(&addr.sa); - if (!ofi_addr_get_port(&ep_attr->src_addr->sa)) - ofi_addr_set_port(&ep_attr->src_addr->sa, ep_attr->msg_src_port); - - ofi_straddr_dbg(&sock_prov, FI_LOG_EP_CTRL, "listening at addr: ", - &addr.sa); - ret = listen(listen_fd, sock_cm_def_map_sz); - if (ret) { - SOCK_LOG_ERROR("failed to listen socket: %s\n", - strerror(ofi_sockerr())); - ret = -ofi_sockerr(); - goto err; - } - - conn_handle->sock = listen_fd; - conn_handle->do_listen = 1; - - ofi_mutex_lock(&ep_attr->domain->conn_listener.signal_lock); - ret = ofi_epoll_add(ep_attr->domain->conn_listener.epollfd, - conn_handle->sock, OFI_EPOLL_IN, conn_handle); - fd_signal_set(&ep_attr->domain->conn_listener.signal); - ofi_mutex_unlock(&ep_attr->domain->conn_listener.signal_lock); - if (ret) { - SOCK_LOG_ERROR("failed to add fd to pollset: %d\n", ret); - goto err; - } - - return 0; -err: - if (listen_fd != INVALID_SOCKET) { - ofi_close_socket(listen_fd); - conn_handle->sock = INVALID_SOCKET; - conn_handle->do_listen = 0; - } - - return ret; -} - -int sock_ep_connect(struct sock_ep_attr *ep_attr, fi_addr_t index, - struct sock_conn **conn) -{ - int conn_fd = -1, ret; - int do_retry = sock_conn_retry; - struct sock_conn *new_conn; - union ofi_sock_ip addr; - socklen_t lon; - int valopt = 0; - struct pollfd poll_fd; - - if (ep_attr->ep_type == FI_EP_MSG) { - /* Need to check that destination address has been - passed to endpoint */ - assert(ep_attr->dest_addr); - addr = *ep_attr->dest_addr; - ofi_addr_set_port(&addr.sa, ep_attr->msg_dest_port); - } else { - ofi_mutex_lock(&ep_attr->av->table_lock); - addr = ep_attr->av->table[index].addr; - ofi_mutex_unlock(&ep_attr->av->table_lock); - } - -do_connect: - ofi_mutex_lock(&ep_attr->cmap.lock); - *conn = sock_ep_lookup_conn(ep_attr, index, &addr); - ofi_mutex_unlock(&ep_attr->cmap.lock); - - if (*conn != SOCK_CM_CONN_IN_PROGRESS) - return FI_SUCCESS; - - conn_fd = ofi_socket(addr.sa.sa_family, SOCK_STREAM, 0); - if (conn_fd == -1) { - SOCK_LOG_ERROR("failed to create conn_fd, errno: %d\n", - ofi_sockerr()); - *conn = NULL; - return -FI_EOTHER; - } - - ret = fd_set_nonblock(conn_fd); - if (ret) { - SOCK_LOG_ERROR("failed to set conn_fd nonblocking\n"); - *conn = NULL; - ofi_close_socket(conn_fd); - return -FI_EOTHER; - } - - ofi_straddr_dbg(&sock_prov, FI_LOG_EP_CTRL, "connecting to addr: ", - &addr.sa); - ret = connect(conn_fd, &addr.sa, (socklen_t) ofi_sizeofaddr(&addr.sa)); - if (ret < 0) { - if (OFI_SOCK_TRY_CONN_AGAIN(ofi_sockerr())) { - poll_fd.fd = conn_fd; - poll_fd.events = POLLOUT; - - ret = poll(&poll_fd, 1, sock_conn_timeout); - if (ret < 0) { - SOCK_LOG_DBG("poll failed\n"); - goto retry; - } - - lon = sizeof(int); - ret = getsockopt(conn_fd, SOL_SOCKET, SO_ERROR, - (void*)(&valopt), &lon); - if (ret < 0) { - SOCK_LOG_DBG("getsockopt failed: %d, %d\n", - ret, conn_fd); - goto retry; - } - - if (valopt) { - SOCK_LOG_DBG("Error in connection() " - "%d - %s - %d\n", - valopt, strerror(valopt), conn_fd); - goto retry; - } - goto out; - } else { - SOCK_LOG_DBG("Timeout or error() - %s: %d\n", - strerror(ofi_sockerr()), conn_fd); - goto retry; - } - } else { - goto out; - } - -retry: - do_retry--; - if (!do_retry) - goto err; - - if (conn_fd != -1) { - ofi_close_socket(conn_fd); - conn_fd = -1; - } - - SOCK_LOG_ERROR("Connect error, retrying - %s - %d\n", - strerror(ofi_sockerr()), conn_fd); - ofi_straddr_log(&sock_prov, FI_LOG_WARN, FI_LOG_EP_CTRL, - "Retry connect to peer ", &addr.sa); - goto do_connect; - -out: - ofi_mutex_lock(&ep_attr->cmap.lock); - new_conn = sock_conn_map_insert(ep_attr, &addr, conn_fd, 0); - if (!new_conn) { - ofi_mutex_unlock(&ep_attr->cmap.lock); - goto err; - } - new_conn->av_index = (ep_attr->ep_type == FI_EP_MSG) ? - FI_ADDR_NOTAVAIL : index; - *conn = ofi_idm_lookup(&ep_attr->av_idm, (int) index); - if (*conn == SOCK_CM_CONN_IN_PROGRESS) { - if (ofi_idm_set(&ep_attr->av_idm, (int) index, new_conn) < 0) - SOCK_LOG_ERROR("ofi_idm_set failed\n"); - *conn = new_conn; - } - ofi_mutex_unlock(&ep_attr->cmap.lock); - return FI_SUCCESS; - -err: - ofi_close_socket(conn_fd); - *conn = NULL; - return (OFI_SOCK_TRY_CONN_AGAIN(ofi_sockerr()) ? -FI_EAGAIN : - -ofi_sockerr()); -} diff --git a/prov/sockets/src/sock_cq.c b/prov/sockets/src/sock_cq.c deleted file mode 100644 index cd82d02ec34..00000000000 --- a/prov/sockets/src/sock_cq.c +++ /dev/null @@ -1,769 +0,0 @@ -/* - * Copyright (c) 2014 Intel Corporation, Inc. All rights reserved. - * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "config.h" - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "sock.h" -#include "sock_util.h" - -#define SOCK_LOG_DBG(...) _SOCK_LOG_DBG(FI_LOG_CQ, __VA_ARGS__) -#define SOCK_LOG_ERROR(...) _SOCK_LOG_ERROR(FI_LOG_CQ, __VA_ARGS__) - -void sock_cq_add_tx_ctx(struct sock_cq *cq, struct sock_tx_ctx *tx_ctx) -{ - struct dlist_entry *entry; - struct sock_tx_ctx *curr_ctx; - pthread_mutex_lock(&cq->list_lock); - for (entry = cq->tx_list.next; entry != &cq->tx_list; - entry = entry->next) { - curr_ctx = container_of(entry, struct sock_tx_ctx, cq_entry); - if (tx_ctx == curr_ctx) - goto out; - } - dlist_insert_tail(&tx_ctx->cq_entry, &cq->tx_list); - ofi_atomic_inc32(&cq->ref); -out: - pthread_mutex_unlock(&cq->list_lock); -} - -void sock_cq_remove_tx_ctx(struct sock_cq *cq, struct sock_tx_ctx *tx_ctx) -{ - pthread_mutex_lock(&cq->list_lock); - dlist_remove(&tx_ctx->cq_entry); - ofi_atomic_dec32(&cq->ref); - pthread_mutex_unlock(&cq->list_lock); -} - -void sock_cq_add_rx_ctx(struct sock_cq *cq, struct sock_rx_ctx *rx_ctx) -{ - struct dlist_entry *entry; - struct sock_rx_ctx *curr_ctx; - pthread_mutex_lock(&cq->list_lock); - - for (entry = cq->rx_list.next; entry != &cq->rx_list; - entry = entry->next) { - curr_ctx = container_of(entry, struct sock_rx_ctx, cq_entry); - if (rx_ctx == curr_ctx) - goto out; - } - dlist_insert_tail(&rx_ctx->cq_entry, &cq->rx_list); - ofi_atomic_inc32(&cq->ref); -out: - pthread_mutex_unlock(&cq->list_lock); -} - -void sock_cq_remove_rx_ctx(struct sock_cq *cq, struct sock_rx_ctx *rx_ctx) -{ - pthread_mutex_lock(&cq->list_lock); - dlist_remove(&rx_ctx->cq_entry); - ofi_atomic_dec32(&cq->ref); - pthread_mutex_unlock(&cq->list_lock); -} - -int sock_cq_progress(struct sock_cq *cq) -{ - struct sock_tx_ctx *tx_ctx; - struct sock_rx_ctx *rx_ctx; - struct dlist_entry *entry; - - if (cq->domain->progress_mode == FI_PROGRESS_AUTO) - return 0; - - pthread_mutex_lock(&cq->list_lock); - for (entry = cq->tx_list.next; entry != &cq->tx_list; - entry = entry->next) { - tx_ctx = container_of(entry, struct sock_tx_ctx, cq_entry); - if (!tx_ctx->enabled) - continue; - - if (tx_ctx->use_shared) - sock_pe_progress_tx_ctx(cq->domain->pe, tx_ctx->stx_ctx); - else - sock_pe_progress_ep_tx(cq->domain->pe, tx_ctx->ep_attr); - } - - for (entry = cq->rx_list.next; entry != &cq->rx_list; - entry = entry->next) { - rx_ctx = container_of(entry, struct sock_rx_ctx, cq_entry); - if (!rx_ctx->enabled) - continue; - - if (rx_ctx->use_shared) - sock_pe_progress_rx_ctx(cq->domain->pe, rx_ctx->srx_ctx); - else - sock_pe_progress_ep_rx(cq->domain->pe, rx_ctx->ep_attr); - } - pthread_mutex_unlock(&cq->list_lock); - - return 0; -} - -static ssize_t sock_cq_entry_size(struct sock_cq *sock_cq) -{ - ssize_t size; - - switch (sock_cq->attr.format) { - case FI_CQ_FORMAT_CONTEXT: - size = sizeof(struct fi_cq_entry); - break; - - case FI_CQ_FORMAT_MSG: - size = sizeof(struct fi_cq_msg_entry); - break; - - case FI_CQ_FORMAT_DATA: - size = sizeof(struct fi_cq_data_entry); - break; - - case FI_CQ_FORMAT_TAGGED: - size = sizeof(struct fi_cq_tagged_entry); - break; - - case FI_CQ_FORMAT_UNSPEC: - default: - size = -1; - SOCK_LOG_ERROR("Invalid CQ format\n"); - break; - } - return size; -} - -static ssize_t _sock_cq_write(struct sock_cq *cq, fi_addr_t addr, - const void *buf, size_t len) -{ - ssize_t ret; - struct sock_cq_overflow_entry_t *overflow_entry; - - pthread_mutex_lock(&cq->lock); - if (ofi_rbfdavail(&cq->cq_rbfd) < len) { - SOCK_LOG_ERROR("Not enough space in CQ\n"); - overflow_entry = calloc(1, sizeof(*overflow_entry) + len); - if (!overflow_entry) { - ret = -FI_ENOSPC; - goto out; - } - - memcpy(&overflow_entry->cq_entry[0], buf, len); - overflow_entry->len = len; - overflow_entry->addr = addr; - dlist_insert_tail(&overflow_entry->entry, &cq->overflow_list); - ret = len; - goto out; - } - - - ofi_rbwrite(&cq->addr_rb, &addr, sizeof(addr)); - ofi_rbcommit(&cq->addr_rb); - - ofi_rbfdwrite(&cq->cq_rbfd, buf, len); - if (cq->domain->progress_mode == FI_PROGRESS_MANUAL) - ofi_rbcommit(&cq->cq_rbfd.rb); - else - ofi_rbfdcommit(&cq->cq_rbfd); - - ret = len; - - if (cq->signal) - sock_wait_signal(cq->waitset); -out: - pthread_mutex_unlock(&cq->lock); - return ret; -} - -static ssize_t sock_cq_report_context(struct sock_cq *cq, fi_addr_t addr, - struct sock_pe_entry *pe_entry) -{ - struct fi_cq_entry cq_entry; - cq_entry.op_context = (void *) (uintptr_t) pe_entry->context; - return _sock_cq_write(cq, addr, &cq_entry, sizeof(cq_entry)); -} - -static uint64_t sock_cq_sanitize_flags(uint64_t flags) -{ - return (flags & (FI_SEND | FI_RECV | FI_RMA | FI_ATOMIC | - FI_MSG | FI_TAGGED | - FI_READ | FI_WRITE | - FI_REMOTE_READ | FI_REMOTE_WRITE | - FI_REMOTE_CQ_DATA | FI_MULTI_RECV)); -} - -static ssize_t sock_cq_report_msg(struct sock_cq *cq, fi_addr_t addr, - struct sock_pe_entry *pe_entry) -{ - struct fi_cq_msg_entry cq_entry; - cq_entry.op_context = (void *) (uintptr_t) pe_entry->context; - cq_entry.flags = sock_cq_sanitize_flags(pe_entry->flags); - cq_entry.len = pe_entry->data_len; - return _sock_cq_write(cq, addr, &cq_entry, sizeof(cq_entry)); -} - -static ssize_t sock_cq_report_data(struct sock_cq *cq, fi_addr_t addr, - struct sock_pe_entry *pe_entry) -{ - struct fi_cq_data_entry cq_entry; - cq_entry.op_context = (void *) (uintptr_t) pe_entry->context; - cq_entry.flags = sock_cq_sanitize_flags(pe_entry->flags); - cq_entry.len = pe_entry->data_len; - cq_entry.buf = (void *) (uintptr_t) pe_entry->buf; - cq_entry.data = pe_entry->data; - return _sock_cq_write(cq, addr, &cq_entry, sizeof(cq_entry)); -} - -static ssize_t sock_cq_report_tagged(struct sock_cq *cq, fi_addr_t addr, - struct sock_pe_entry *pe_entry) -{ - struct fi_cq_tagged_entry cq_entry; - cq_entry.op_context = (void *) (uintptr_t) pe_entry->context; - cq_entry.flags = sock_cq_sanitize_flags(pe_entry->flags); - cq_entry.len = pe_entry->data_len; - cq_entry.buf = (void *) (uintptr_t) pe_entry->buf; - cq_entry.data = pe_entry->data; - cq_entry.tag = pe_entry->tag; - return _sock_cq_write(cq, addr, &cq_entry, sizeof(cq_entry)); -} - -static void sock_cq_set_report_fn(struct sock_cq *sock_cq) -{ - switch (sock_cq->attr.format) { - case FI_CQ_FORMAT_CONTEXT: - sock_cq->report_completion = &sock_cq_report_context; - break; - - case FI_CQ_FORMAT_MSG: - sock_cq->report_completion = &sock_cq_report_msg; - break; - - case FI_CQ_FORMAT_DATA: - sock_cq->report_completion = &sock_cq_report_data; - break; - - case FI_CQ_FORMAT_TAGGED: - sock_cq->report_completion = &sock_cq_report_tagged; - break; - - case FI_CQ_FORMAT_UNSPEC: - default: - SOCK_LOG_ERROR("Invalid CQ format\n"); - break; - } -} - -static inline void sock_cq_copy_overflow_list(struct sock_cq *cq, size_t count) -{ - size_t i; - struct sock_cq_overflow_entry_t *overflow_entry; - - for (i = 0; i < count && !dlist_empty(&cq->overflow_list); i++) { - overflow_entry = container_of(cq->overflow_list.next, - struct sock_cq_overflow_entry_t, - entry); - ofi_rbwrite(&cq->addr_rb, &overflow_entry->addr, sizeof(fi_addr_t)); - ofi_rbcommit(&cq->addr_rb); - - ofi_rbfdwrite(&cq->cq_rbfd, &overflow_entry->cq_entry[0], overflow_entry->len); - if (cq->domain->progress_mode == FI_PROGRESS_MANUAL) - ofi_rbcommit(&cq->cq_rbfd.rb); - else - ofi_rbfdcommit(&cq->cq_rbfd); - - dlist_remove(&overflow_entry->entry); - free(overflow_entry); - } -} - -static inline ssize_t sock_cq_rbuf_read(struct sock_cq *cq, void *buf, - size_t count, fi_addr_t *src_addr, - size_t cq_entry_len) -{ - size_t i; - fi_addr_t addr; - - ofi_rbfdread(&cq->cq_rbfd, buf, cq_entry_len * count); - for (i = 0; i < count; i++) { - ofi_rbread(&cq->addr_rb, &addr, sizeof(addr)); - if (src_addr) - src_addr[i] = addr; - } - sock_cq_copy_overflow_list(cq, count); - return count; -} - -static ssize_t sock_cq_sreadfrom(struct fid_cq *cq, void *buf, size_t count, - fi_addr_t *src_addr, const void *cond, int timeout) -{ - ssize_t ret = 0; - size_t threshold; - struct sock_cq *sock_cq; - uint64_t start_ms; - ssize_t cq_entry_len, avail; - - sock_cq = container_of(cq, struct sock_cq, cq_fid); - if (ofi_rbused(&sock_cq->cqerr_rb)) - return -FI_EAVAIL; - - cq_entry_len = sock_cq->cq_entry_size; - if (sock_cq->attr.wait_cond == FI_CQ_COND_THRESHOLD) - threshold = MIN((uintptr_t) cond, count); - else - threshold = count; - - start_ms = (timeout >= 0) ? ofi_gettime_ms() : 0; - - if (sock_cq->domain->progress_mode == FI_PROGRESS_MANUAL) { - while (1) { - sock_cq_progress(sock_cq); - pthread_mutex_lock(&sock_cq->lock); - avail = ofi_rbfdused(&sock_cq->cq_rbfd); - if (avail) { - ret = sock_cq_rbuf_read(sock_cq, buf, - MIN(threshold, (size_t)(avail / cq_entry_len)), - src_addr, cq_entry_len); - } - pthread_mutex_unlock(&sock_cq->lock); - if (ret) - return ret; - - if (timeout >= 0) { - timeout -= (int) (ofi_gettime_ms() - start_ms); - if (timeout <= 0) - return -FI_EAGAIN; - } - - if (ofi_atomic_get32(&sock_cq->signaled)) { - ofi_atomic_set32(&sock_cq->signaled, 0); - return -FI_ECANCELED; - } - }; - } else { - do { - pthread_mutex_lock(&sock_cq->lock); - ret = 0; - avail = ofi_rbfdused(&sock_cq->cq_rbfd); - if (avail) { - ret = sock_cq_rbuf_read(sock_cq, buf, - MIN(threshold, (size_t)(avail / cq_entry_len)), - src_addr, cq_entry_len); - } else { - ofi_rbfdreset(&sock_cq->cq_rbfd); - } - pthread_mutex_unlock(&sock_cq->lock); - if (ret && ret != -FI_EAGAIN) - return ret; - - if (timeout >= 0) { - timeout -= (int) (ofi_gettime_ms() - start_ms); - if (timeout <= 0) - return -FI_EAGAIN; - } - - if (ofi_atomic_get32(&sock_cq->signaled)) { - ofi_atomic_set32(&sock_cq->signaled, 0); - return -FI_ECANCELED; - } - ret = ofi_rbfdwait(&sock_cq->cq_rbfd, timeout); - } while (ret > 0); - } - - return (ret == 0 || ret == -FI_ETIMEDOUT || ret == -EINTR) ? -FI_EAGAIN : ret; -} - -static ssize_t sock_cq_sread(struct fid_cq *cq, void *buf, size_t len, - const void *cond, int timeout) -{ - return sock_cq_sreadfrom(cq, buf, len, NULL, cond, timeout); -} - -static ssize_t sock_cq_readfrom(struct fid_cq *cq, void *buf, size_t count, - fi_addr_t *src_addr) -{ - return sock_cq_sreadfrom(cq, buf, count, src_addr, NULL, 0); -} - -static ssize_t sock_cq_read(struct fid_cq *cq, void *buf, size_t count) -{ - return sock_cq_readfrom(cq, buf, count, NULL); -} - -static ssize_t sock_cq_readerr(struct fid_cq *cq, struct fi_cq_err_entry *buf, - uint64_t flags) -{ - struct sock_cq *sock_cq; - ssize_t ret; - struct fi_cq_err_entry entry; - uint32_t api_version; - size_t err_data_size = 0; - void *err_data = NULL; - - sock_cq = container_of(cq, struct sock_cq, cq_fid); - if (sock_cq->domain->progress_mode == FI_PROGRESS_MANUAL) - sock_cq_progress(sock_cq); - - pthread_mutex_lock(&sock_cq->lock); - if (ofi_rbused(&sock_cq->cqerr_rb) >= sizeof(struct fi_cq_err_entry)) { - api_version = sock_cq->domain->fab->fab_fid.api_version; - ofi_rbread(&sock_cq->cqerr_rb, &entry, sizeof(entry)); - - if ((FI_VERSION_GE(api_version, FI_VERSION(1, 5))) - && buf->err_data && buf->err_data_size) { - err_data = buf->err_data; - err_data_size = buf->err_data_size; - *buf = entry; - buf->err_data = err_data; - - /* Fill provided user's buffer */ - buf->err_data_size = MIN(entry.err_data_size, err_data_size); - memcpy(buf->err_data, entry.err_data, buf->err_data_size); - } else { - memcpy(buf, &entry, sizeof(struct fi_cq_err_entry_1_0)); - } - - ret = 1; - } else { - ret = -FI_EAGAIN; - } - pthread_mutex_unlock(&sock_cq->lock); - return ret; -} - -static const char *sock_cq_strerror(struct fid_cq *cq, int prov_errno, - const void *err_data, char *buf, size_t len) -{ - if (buf && len) - return strncpy(buf, fi_strerror(-prov_errno), len); - return fi_strerror(-prov_errno); -} - -static int sock_cq_close(struct fid *fid) -{ - struct sock_cq *cq; - - cq = container_of(fid, struct sock_cq, cq_fid.fid); - if (ofi_atomic_get32(&cq->ref)) - return -FI_EBUSY; - - if (cq->signal && cq->attr.wait_obj == FI_WAIT_MUTEX_COND) - sock_wait_close(&cq->waitset->fid); - - ofi_rbfree(&cq->addr_rb); - ofi_rbfree(&cq->cqerr_rb); - ofi_rbfdfree(&cq->cq_rbfd); - - pthread_mutex_destroy(&cq->lock); - pthread_mutex_destroy(&cq->list_lock); - ofi_atomic_dec32(&cq->domain->ref); - - free(cq); - return 0; -} - -static int sock_cq_signal(struct fid_cq *cq) -{ - struct sock_cq *sock_cq; - sock_cq = container_of(cq, struct sock_cq, cq_fid); - - ofi_atomic_set32(&sock_cq->signaled, 1); - pthread_mutex_lock(&sock_cq->lock); - ofi_rbfdsignal(&sock_cq->cq_rbfd); - pthread_mutex_unlock(&sock_cq->lock); - return 0; -} - -static struct fi_ops_cq sock_cq_ops = { - .size = sizeof(struct fi_ops_cq), - .read = sock_cq_read, - .readfrom = sock_cq_readfrom, - .readerr = sock_cq_readerr, - .sread = sock_cq_sread, - .sreadfrom = sock_cq_sreadfrom, - .signal = sock_cq_signal, - .strerror = sock_cq_strerror, -}; - -static int sock_cq_control(struct fid *fid, int command, void *arg) -{ - struct sock_cq *cq; - int ret = 0; - - cq = container_of(fid, struct sock_cq, cq_fid); - switch (command) { - case FI_GETWAIT: - if (cq->domain->progress_mode == FI_PROGRESS_MANUAL) - return -FI_ENOSYS; - - switch (cq->attr.wait_obj) { - case FI_WAIT_NONE: - case FI_WAIT_FD: - case FI_WAIT_UNSPEC: - memcpy(arg, &cq->cq_rbfd.fd[OFI_RB_READ_FD], sizeof(int)); - break; - - case FI_WAIT_SET: - case FI_WAIT_MUTEX_COND: - sock_wait_get_obj(cq->waitset, arg); - break; - - default: - ret = -FI_EINVAL; - break; - } - break; - - default: - ret = -FI_EINVAL; - break; - } - - return ret; -} - -static struct fi_ops sock_cq_fi_ops = { - .size = sizeof(struct fi_ops), - .close = sock_cq_close, - .bind = fi_no_bind, - .control = sock_cq_control, - .ops_open = fi_no_ops_open, -}; - -static int sock_cq_verify_attr(struct fi_cq_attr *attr) -{ - if (!attr) - return 0; - - switch (attr->format) { - case FI_CQ_FORMAT_CONTEXT: - case FI_CQ_FORMAT_MSG: - case FI_CQ_FORMAT_DATA: - case FI_CQ_FORMAT_TAGGED: - break; - case FI_CQ_FORMAT_UNSPEC: - attr->format = FI_CQ_FORMAT_CONTEXT; - break; - default: - return -FI_ENOSYS; - } - - switch (attr->wait_obj) { - case FI_WAIT_NONE: - case FI_WAIT_FD: - case FI_WAIT_SET: - case FI_WAIT_MUTEX_COND: - break; - case FI_WAIT_UNSPEC: - attr->wait_obj = FI_WAIT_FD; - break; - default: - return -FI_ENOSYS; - } - - return 0; -} - -static struct fi_cq_attr _sock_cq_def_attr = { - .size = SOCK_CQ_DEF_SZ, - .flags = 0, - .format = FI_CQ_FORMAT_CONTEXT, - .wait_obj = FI_WAIT_FD, - .signaling_vector = 0, - .wait_cond = FI_CQ_COND_NONE, - .wait_set = NULL, -}; - -int sock_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, - struct fid_cq **cq, void *context) -{ - struct sock_domain *sock_dom; - struct sock_cq *sock_cq; - struct fi_wait_attr wait_attr; - struct sock_fid_list *list_entry; - struct sock_wait *wait; - int ret; - - sock_dom = container_of(domain, struct sock_domain, dom_fid); - ret = sock_cq_verify_attr(attr); - if (ret) - return ret; - - sock_cq = calloc(1, sizeof(*sock_cq)); - if (!sock_cq) - return -FI_ENOMEM; - - ofi_atomic_initialize32(&sock_cq->ref, 0); - ofi_atomic_initialize32(&sock_cq->signaled, 0); - sock_cq->cq_fid.fid.fclass = FI_CLASS_CQ; - sock_cq->cq_fid.fid.context = context; - sock_cq->cq_fid.fid.ops = &sock_cq_fi_ops; - sock_cq->cq_fid.ops = &sock_cq_ops; - - if (attr == NULL) { - sock_cq->attr = _sock_cq_def_attr; - } else { - sock_cq->attr = *attr; - if (attr->size == 0) - sock_cq->attr.size = _sock_cq_def_attr.size; - } - - sock_cq->domain = sock_dom; - sock_cq->cq_entry_size = sock_cq_entry_size(sock_cq); - sock_cq_set_report_fn(sock_cq); - - dlist_init(&sock_cq->tx_list); - dlist_init(&sock_cq->rx_list); - dlist_init(&sock_cq->ep_list); - dlist_init(&sock_cq->overflow_list); - - ret = ofi_rbfdinit(&sock_cq->cq_rbfd, sock_cq->attr.size * - sock_cq->cq_entry_size); - if (ret) - goto err1; - - ret = ofi_rbinit(&sock_cq->addr_rb, - sock_cq->attr.size * sizeof(fi_addr_t)); - if (ret) - goto err2; - - ret = ofi_rbinit(&sock_cq->cqerr_rb, sock_cq->attr.size * - sizeof(struct fi_cq_err_entry)); - if (ret) - goto err3; - - pthread_mutex_init(&sock_cq->lock, NULL); - - switch (sock_cq->attr.wait_obj) { - case FI_WAIT_NONE: - case FI_WAIT_UNSPEC: - case FI_WAIT_FD: - break; - - case FI_WAIT_MUTEX_COND: - wait_attr.flags = 0; - wait_attr.wait_obj = FI_WAIT_MUTEX_COND; - ret = sock_wait_open(&sock_dom->fab->fab_fid, &wait_attr, - &sock_cq->waitset); - if (ret) { - ret = -FI_EINVAL; - goto err4; - } - sock_cq->signal = 1; - break; - - case FI_WAIT_SET: - if (!attr) { - ret = -FI_EINVAL; - goto err4; - } - - sock_cq->waitset = attr->wait_set; - sock_cq->signal = 1; - wait = container_of(attr->wait_set, struct sock_wait, wait_fid); - list_entry = calloc(1, sizeof(*list_entry)); - if (!list_entry) { - ret = -FI_ENOMEM; - goto err4; - } - dlist_init(&list_entry->entry); - list_entry->fid = &sock_cq->cq_fid.fid; - dlist_insert_after(&list_entry->entry, &wait->fid_list); - break; - - default: - break; - } - - *cq = &sock_cq->cq_fid; - ofi_atomic_inc32(&sock_dom->ref); - pthread_mutex_init(&sock_cq->list_lock, NULL); - - return 0; - -err4: - ofi_rbfree(&sock_cq->cqerr_rb); -err3: - ofi_rbfree(&sock_cq->addr_rb); -err2: - ofi_rbfdfree(&sock_cq->cq_rbfd); -err1: - free(sock_cq); - return ret; -} - -int sock_cq_report_error(struct sock_cq *cq, struct sock_pe_entry *entry, - size_t olen, int err, int prov_errno, void *err_data, - size_t err_data_size) -{ - int ret; - struct fi_cq_err_entry err_entry; - - pthread_mutex_lock(&cq->lock); - if (ofi_rbavail(&cq->cqerr_rb) < sizeof(err_entry)) { - ret = -FI_ENOSPC; - goto out; - } - - err_entry.err = err; - err_entry.olen = olen; - err_entry.err_data = err_data; - err_entry.err_data_size = err_data_size; - err_entry.len = entry->data_len; - err_entry.prov_errno = prov_errno; - err_entry.flags = entry->flags; - err_entry.data = entry->data; - err_entry.tag = entry->tag; - err_entry.op_context = (void *) (uintptr_t) entry->context; - - if (entry->type == SOCK_PE_RX) - err_entry.buf = (void *) (uintptr_t) entry->pe.rx.rx_iov[0].iov.addr; - else - err_entry.buf = (void *) (uintptr_t) entry->pe.tx.tx_iov[0].src.iov.addr; - - ofi_rbwrite(&cq->cqerr_rb, &err_entry, sizeof(err_entry)); - ofi_rbcommit(&cq->cqerr_rb); - ret = 0; - - ofi_rbfdsignal(&cq->cq_rbfd); - -out: - pthread_mutex_unlock(&cq->lock); - return ret; -} diff --git a/prov/sockets/src/sock_ctx.c b/prov/sockets/src/sock_ctx.c deleted file mode 100644 index 9222380737d..00000000000 --- a/prov/sockets/src/sock_ctx.c +++ /dev/null @@ -1,228 +0,0 @@ -/* - * Copyright (c) 2014 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenFabrics.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "config.h" - -#include -#include - -#include "sock.h" -#include "sock_util.h" - -#define SOCK_LOG_DBG(...) _SOCK_LOG_DBG(FI_LOG_EP_CTRL, __VA_ARGS__) -#define SOCK_LOG_ERROR(...) _SOCK_LOG_ERROR(FI_LOG_EP_CTRL, __VA_ARGS__) - -struct sock_rx_ctx *sock_rx_ctx_alloc(const struct fi_rx_attr *attr, - void *context, int use_shared) -{ - struct sock_rx_ctx *rx_ctx; - rx_ctx = calloc(1, sizeof(*rx_ctx)); - if (!rx_ctx) - return NULL; - - dlist_init(&rx_ctx->cq_entry); - dlist_init(&rx_ctx->pe_entry); - - dlist_init(&rx_ctx->pe_entry_list); - dlist_init(&rx_ctx->rx_entry_list); - dlist_init(&rx_ctx->rx_buffered_list); - dlist_init(&rx_ctx->ep_list); - - rx_ctx->progress_start = &rx_ctx->rx_buffered_list; - - ofi_mutex_init(&rx_ctx->lock); - - rx_ctx->ctx.fid.fclass = FI_CLASS_RX_CTX; - rx_ctx->ctx.fid.context = context; - rx_ctx->num_left = sock_get_tx_size(attr->size); - rx_ctx->attr = *attr; - rx_ctx->use_shared = use_shared; - return rx_ctx; -} - -void sock_rx_ctx_free(struct sock_rx_ctx *rx_ctx) -{ - struct sock_rx_entry *rx_buffered; - - /* free any remaining buffered entries */ - while (!dlist_empty(&rx_ctx->rx_buffered_list)) { - dlist_pop_front(&rx_ctx->rx_buffered_list, - struct sock_rx_entry, rx_buffered, entry); - free(rx_buffered); - } - - ofi_mutex_destroy(&rx_ctx->lock); - free(rx_ctx->rx_entry_pool); - free(rx_ctx); -} - -static struct sock_tx_ctx *sock_tx_context_alloc(const struct fi_tx_attr *attr, - void *context, int use_shared, - size_t fclass) -{ - struct sock_tx_ctx *tx_ctx; - struct fi_rx_attr rx_attr = {0}; - - tx_ctx = calloc(sizeof(*tx_ctx), 1); - if (!tx_ctx) - return NULL; - - if (!use_shared && ofi_rbinit(&tx_ctx->rb, - (attr->size) ? attr->size * SOCK_EP_TX_ENTRY_SZ : - SOCK_EP_TX_SZ * SOCK_EP_TX_ENTRY_SZ)) - goto err; - - dlist_init(&tx_ctx->cq_entry); - dlist_init(&tx_ctx->pe_entry); - - dlist_init(&tx_ctx->pe_entry_list); - dlist_init(&tx_ctx->ep_list); - - ofi_mutex_init(&tx_ctx->rb_lock); - ofi_mutex_init(&tx_ctx->lock); - - switch (fclass) { - case FI_CLASS_TX_CTX: - tx_ctx->fid.ctx.fid.fclass = FI_CLASS_TX_CTX; - tx_ctx->fid.ctx.fid.context = context; - tx_ctx->fclass = FI_CLASS_TX_CTX; - tx_ctx->use_shared = use_shared; - break; - case FI_CLASS_STX_CTX: - tx_ctx->fid.stx.fid.fclass = FI_CLASS_STX_CTX; - tx_ctx->fid.stx.fid.context = context; - tx_ctx->fclass = FI_CLASS_STX_CTX; - break; - default: - goto err; - } - tx_ctx->attr = *attr; - tx_ctx->attr.op_flags |= FI_TRANSMIT_COMPLETE; - - if (!use_shared) { - tx_ctx->rx_ctrl_ctx = sock_rx_ctx_alloc(&rx_attr, NULL, 0); - if (!tx_ctx->rx_ctrl_ctx) - goto err; - tx_ctx->rx_ctrl_ctx->is_ctrl_ctx = 1; - } - return tx_ctx; - -err: - free(tx_ctx); - return NULL; -} - - -struct sock_tx_ctx *sock_tx_ctx_alloc(const struct fi_tx_attr *attr, - void *context, int use_shared) -{ - return sock_tx_context_alloc(attr, context, use_shared, FI_CLASS_TX_CTX); -} - -struct sock_tx_ctx *sock_stx_ctx_alloc(const struct fi_tx_attr *attr, - void *context) -{ - return sock_tx_context_alloc(attr, context, 0, FI_CLASS_STX_CTX); -} - -void sock_tx_ctx_free(struct sock_tx_ctx *tx_ctx) -{ - ofi_mutex_destroy(&tx_ctx->rb_lock); - ofi_mutex_destroy(&tx_ctx->lock); - - if (!tx_ctx->use_shared) { - ofi_rbfree(&tx_ctx->rb); - sock_rx_ctx_free(tx_ctx->rx_ctrl_ctx); - } - free(tx_ctx); -} - -void sock_tx_ctx_start(struct sock_tx_ctx *tx_ctx) -{ - ofi_mutex_lock(&tx_ctx->rb_lock); -} - -void sock_tx_ctx_write(struct sock_tx_ctx *tx_ctx, const void *buf, size_t len) -{ - ofi_rbwrite(&tx_ctx->rb, buf, len); -} - -void sock_tx_ctx_commit(struct sock_tx_ctx *tx_ctx) -{ - ofi_rbcommit(&tx_ctx->rb); - sock_pe_signal(tx_ctx->domain->pe); - ofi_mutex_unlock(&tx_ctx->rb_lock); -} - -void sock_tx_ctx_abort(struct sock_tx_ctx *tx_ctx) -{ - ofi_rbabort(&tx_ctx->rb); - ofi_mutex_unlock(&tx_ctx->rb_lock); -} - -void sock_tx_ctx_write_op_send(struct sock_tx_ctx *tx_ctx, - struct sock_op *op, uint64_t flags, uint64_t context, - uint64_t dest_addr, uint64_t buf, struct sock_ep_attr *ep_attr, - struct sock_conn *conn) -{ - sock_tx_ctx_write(tx_ctx, op, sizeof(*op)); - sock_tx_ctx_write(tx_ctx, &flags, sizeof(flags)); - sock_tx_ctx_write(tx_ctx, &context, sizeof(context)); - sock_tx_ctx_write(tx_ctx, &dest_addr, sizeof(dest_addr)); - sock_tx_ctx_write(tx_ctx, &buf, sizeof(buf)); - sock_tx_ctx_write(tx_ctx, &ep_attr, sizeof(ep_attr)); - sock_tx_ctx_write(tx_ctx, &conn, sizeof(conn)); -} - -void sock_tx_ctx_write_op_tsend(struct sock_tx_ctx *tx_ctx, - struct sock_op *op, uint64_t flags, uint64_t context, - uint64_t dest_addr, uint64_t buf, struct sock_ep_attr *ep_attr, - struct sock_conn *conn, uint64_t tag) -{ - sock_tx_ctx_write_op_send(tx_ctx, op, flags, context, dest_addr, - buf, ep_attr, conn); - sock_tx_ctx_write(tx_ctx, &tag, sizeof(tag)); -} - -void sock_tx_ctx_read_op_send(struct sock_tx_ctx *tx_ctx, - struct sock_op *op, uint64_t *flags, uint64_t *context, - uint64_t *dest_addr, uint64_t *buf, struct sock_ep_attr **ep_attr, - struct sock_conn **conn) -{ - ofi_rbread(&tx_ctx->rb, op, sizeof(*op)); - ofi_rbread(&tx_ctx->rb, flags, sizeof(*flags)); - ofi_rbread(&tx_ctx->rb, context, sizeof(*context)); - ofi_rbread(&tx_ctx->rb, dest_addr, sizeof(*dest_addr)); - ofi_rbread(&tx_ctx->rb, buf, sizeof(*buf)); - ofi_rbread(&tx_ctx->rb, ep_attr, sizeof(*ep_attr)); - ofi_rbread(&tx_ctx->rb, conn, sizeof(*conn)); -} diff --git a/prov/sockets/src/sock_dom.c b/prov/sockets/src/sock_dom.c deleted file mode 100644 index 5b847e81f5b..00000000000 --- a/prov/sockets/src/sock_dom.c +++ /dev/null @@ -1,217 +0,0 @@ -/* - * Copyright (c) 2014 Intel Corporation, Inc. All rights reserved. - * Copyright (c) 2017 DataDirect Networks, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "config.h" - -#include -#include - -#include - -#include "sock.h" -#include "sock_util.h" - -#define SOCK_LOG_DBG(...) _SOCK_LOG_DBG(FI_LOG_DOMAIN, __VA_ARGS__) -#define SOCK_LOG_ERROR(...) _SOCK_LOG_ERROR(FI_LOG_DOMAIN, __VA_ARGS__) - -extern struct fi_ops_mr sock_dom_mr_ops; - - -static int sock_dom_close(struct fid *fid) -{ - struct sock_domain *dom; - dom = container_of(fid, struct sock_domain, dom_fid.fid); - if (ofi_atomic_get32(&dom->ref)) - return -FI_EBUSY; - - sock_conn_stop_listener_thread(&dom->conn_listener); - sock_ep_cm_stop_thread(&dom->cm_head); - - sock_pe_finalize(dom->pe); - ofi_mutex_destroy(&dom->lock); - ofi_mr_map_close(&dom->mr_map); - sock_dom_remove_from_list(dom); - free(dom); - return 0; -} - -static int sock_dom_bind(struct fid *fid, struct fid *bfid, uint64_t flags) -{ - struct sock_domain *dom; - struct sock_eq *eq; - - dom = container_of(fid, struct sock_domain, dom_fid.fid); - eq = container_of(bfid, struct sock_eq, eq.fid); - - if (dom->eq) - return -FI_EINVAL; - - dom->eq = eq; - if (flags & FI_REG_MR) - dom->mr_eq = eq; - - return 0; -} - -static int sock_dom_ctrl(struct fid *fid, int command, void *arg) -{ - struct sock_domain *dom; - - dom = container_of(fid, struct sock_domain, dom_fid.fid); - switch (command) { - case FI_QUEUE_WORK: - return (int) sock_queue_work(dom, arg); - default: - return -FI_ENOSYS; - } -} - -static int sock_endpoint(struct fid_domain *domain, struct fi_info *info, - struct fid_ep **ep, void *context) -{ - switch (info->ep_attr->type) { - case FI_EP_RDM: - return sock_rdm_ep(domain, info, ep, context); - case FI_EP_DGRAM: - return sock_dgram_ep(domain, info, ep, context); - case FI_EP_MSG: - return sock_msg_ep(domain, info, ep, context); - default: - return -FI_ENOPROTOOPT; - } -} - -static int sock_scalable_ep(struct fid_domain *domain, struct fi_info *info, - struct fid_ep **sep, void *context) -{ - switch (info->ep_attr->type) { - case FI_EP_RDM: - return sock_rdm_sep(domain, info, sep, context); - case FI_EP_DGRAM: - return sock_dgram_sep(domain, info, sep, context); - case FI_EP_MSG: - return sock_msg_sep(domain, info, sep, context); - default: - return -FI_ENOPROTOOPT; - } -} - -static struct fi_ops sock_dom_fi_ops = { - .size = sizeof(struct fi_ops), - .close = sock_dom_close, - .bind = sock_dom_bind, - .control = sock_dom_ctrl, - .ops_open = fi_no_ops_open, -}; - -static struct fi_ops_domain sock_dom_ops = { - .size = sizeof(struct fi_ops_domain), - .av_open = sock_av_open, - .cq_open = sock_cq_open, - .endpoint = sock_endpoint, - .scalable_ep = sock_scalable_ep, - .cntr_open = sock_cntr_open, - .poll_open = sock_poll_open, - .stx_ctx = sock_stx_ctx, - .srx_ctx = sock_srx_ctx, - .query_atomic = sock_query_atomic, - .query_collective = fi_no_query_collective, -}; - -int sock_domain(struct fid_fabric *fabric, struct fi_info *info, - struct fid_domain **dom, void *context) -{ - struct sock_domain *sock_domain; - struct sock_fabric *fab; - int ret; - - assert(info && info->domain_attr); - fab = container_of(fabric, struct sock_fabric, fab_fid); - - sock_domain = calloc(1, sizeof(*sock_domain)); - if (!sock_domain) - return -FI_ENOMEM; - - ofi_mutex_init(&sock_domain->lock); - ofi_atomic_initialize32(&sock_domain->ref, 0); - - sock_domain->info = *info; - sock_domain->info.domain_attr = NULL; - - sock_domain->dom_fid.fid.fclass = FI_CLASS_DOMAIN; - sock_domain->dom_fid.fid.context = context; - sock_domain->dom_fid.fid.ops = &sock_dom_fi_ops; - sock_domain->dom_fid.ops = &sock_dom_ops; - sock_domain->dom_fid.mr = &sock_dom_mr_ops; - - if (info->domain_attr->data_progress == FI_PROGRESS_UNSPEC) - sock_domain->progress_mode = FI_PROGRESS_AUTO; - else - sock_domain->progress_mode = info->domain_attr->data_progress; - - sock_domain->pe = sock_pe_init(sock_domain); - if (!sock_domain->pe) { - SOCK_LOG_ERROR("Failed to init PE\n"); - goto err1; - } - - sock_domain->fab = fab; - *dom = &sock_domain->dom_fid; - - sock_domain->attr = *(info->domain_attr); - - ret = ofi_mr_map_init(&sock_prov, sock_domain->attr.mr_mode, - &sock_domain->mr_map); - if (ret) - goto err2; - - ret = sock_conn_start_listener_thread(&sock_domain->conn_listener); - if (ret) - goto err2; - - ret = sock_ep_cm_start_thread(&sock_domain->cm_head); - if (ret) - goto err3; - - sock_dom_add_to_list(sock_domain); - return 0; - -err3: - sock_conn_stop_listener_thread(&sock_domain->conn_listener); -err2: - sock_pe_finalize(sock_domain->pe); -err1: - ofi_mutex_destroy(&sock_domain->lock); - free(sock_domain); - return -FI_EINVAL; -} diff --git a/prov/sockets/src/sock_ep.c b/prov/sockets/src/sock_ep.c deleted file mode 100644 index 2ae0f46beed..00000000000 --- a/prov/sockets/src/sock_ep.c +++ /dev/null @@ -1,1887 +0,0 @@ -/* - * Copyright (c) 2013-2014 Intel Corporation. All rights reserved. - * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2017 DataDirect Networks, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenFabrics.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "config.h" - -#include -#include -#include -#include -#include -#include -#include - -#if HAVE_GETIFADDRS -#include -#include -#endif - -#include "ofi_util.h" -#include "sock.h" -#include "sock_util.h" - -#define SOCK_LOG_DBG(...) _SOCK_LOG_DBG(FI_LOG_EP_CTRL, __VA_ARGS__) -#define SOCK_LOG_ERROR(...) _SOCK_LOG_ERROR(FI_LOG_EP_CTRL, __VA_ARGS__) - -extern struct fi_ops_rma sock_ep_rma; -extern struct fi_ops_msg sock_ep_msg_ops; -extern struct fi_ops_tagged sock_ep_tagged; -extern struct fi_ops_atomic sock_ep_atomic; - -extern struct fi_ops_cm sock_ep_cm_ops; -extern struct fi_ops_ep sock_ep_ops; -extern struct fi_ops sock_ep_fi_ops; -extern struct fi_ops_ep sock_ctx_ep_ops; - -static void sock_tx_ctx_close(struct sock_tx_ctx *tx_ctx) -{ - if (tx_ctx->comp.send_cq) - sock_cq_remove_tx_ctx(tx_ctx->comp.send_cq, tx_ctx); - - if (tx_ctx->comp.send_cntr) - sock_cntr_remove_tx_ctx(tx_ctx->comp.send_cntr, tx_ctx); - - if (tx_ctx->comp.read_cntr) - sock_cntr_remove_tx_ctx(tx_ctx->comp.read_cntr, tx_ctx); - - if (tx_ctx->comp.write_cntr) - sock_cntr_remove_tx_ctx(tx_ctx->comp.write_cntr, tx_ctx); -} - -static void sock_rx_ctx_close(struct sock_rx_ctx *rx_ctx) -{ - if (rx_ctx->comp.recv_cq) - sock_cq_remove_rx_ctx(rx_ctx->comp.recv_cq, rx_ctx); - - if (rx_ctx->comp.recv_cntr) - sock_cntr_remove_rx_ctx(rx_ctx->comp.recv_cntr, rx_ctx); - - if (rx_ctx->comp.rem_read_cntr) - sock_cntr_remove_rx_ctx(rx_ctx->comp.rem_read_cntr, rx_ctx); - - if (rx_ctx->comp.rem_write_cntr) - sock_cntr_remove_rx_ctx(rx_ctx->comp.rem_write_cntr, rx_ctx); -} - -static int sock_ctx_close(struct fid *fid) -{ - struct sock_tx_ctx *tx_ctx; - struct sock_rx_ctx *rx_ctx; - - switch (fid->fclass) { - case FI_CLASS_TX_CTX: - tx_ctx = container_of(fid, struct sock_tx_ctx, fid.ctx.fid); - sock_pe_remove_tx_ctx(tx_ctx); - ofi_atomic_dec32(&tx_ctx->ep_attr->num_tx_ctx); - ofi_atomic_dec32(&tx_ctx->domain->ref); - sock_tx_ctx_close(tx_ctx); - sock_tx_ctx_free(tx_ctx); - break; - - case FI_CLASS_RX_CTX: - rx_ctx = container_of(fid, struct sock_rx_ctx, ctx.fid); - sock_pe_remove_rx_ctx(rx_ctx); - ofi_atomic_dec32(&rx_ctx->ep_attr->num_rx_ctx); - ofi_atomic_dec32(&rx_ctx->domain->ref); - sock_rx_ctx_close(rx_ctx); - sock_rx_ctx_free(rx_ctx); - break; - - case FI_CLASS_STX_CTX: - tx_ctx = container_of(fid, struct sock_tx_ctx, fid.stx.fid); - ofi_atomic_dec32(&tx_ctx->domain->ref); - sock_pe_remove_tx_ctx(tx_ctx); - sock_tx_ctx_free(tx_ctx); - break; - - case FI_CLASS_SRX_CTX: - rx_ctx = container_of(fid, struct sock_rx_ctx, ctx.fid); - ofi_atomic_dec32(&rx_ctx->domain->ref); - sock_pe_remove_rx_ctx(rx_ctx); - sock_rx_ctx_free(rx_ctx); - break; - - default: - SOCK_LOG_ERROR("Invalid fid\n"); - return -FI_EINVAL; - } - return 0; -} - -static int sock_ctx_bind_cq(struct fid *fid, struct fid *bfid, uint64_t flags) -{ - struct sock_cq *sock_cq; - struct sock_tx_ctx *tx_ctx; - struct sock_rx_ctx *rx_ctx; - - if ((flags | SOCK_EP_CQ_FLAGS) != SOCK_EP_CQ_FLAGS) { - SOCK_LOG_ERROR("Invalid cq flag\n"); - return -FI_EINVAL; - } - sock_cq = container_of(bfid, struct sock_cq, cq_fid.fid); - switch (fid->fclass) { - case FI_CLASS_TX_CTX: - tx_ctx = container_of(fid, struct sock_tx_ctx, fid.ctx); - if (flags & FI_SEND) { - tx_ctx->comp.send_cq = sock_cq; - if (flags & FI_SELECTIVE_COMPLETION) - tx_ctx->comp.send_cq_event = 1; - } - - sock_cq_add_tx_ctx(sock_cq, tx_ctx); - break; - - case FI_CLASS_RX_CTX: - rx_ctx = container_of(fid, struct sock_rx_ctx, ctx.fid); - if (flags & FI_RECV) { - rx_ctx->comp.recv_cq = sock_cq; - if (flags & FI_SELECTIVE_COMPLETION) - rx_ctx->comp.recv_cq_event = 1; - } - - sock_cq_add_rx_ctx(sock_cq, rx_ctx); - break; - - default: - SOCK_LOG_ERROR("Invalid fid\n"); - return -FI_EINVAL; - } - return 0; -} - -static int sock_ctx_bind_cntr(struct fid *fid, struct fid *bfid, uint64_t flags) -{ - struct sock_cntr *cntr; - struct sock_tx_ctx *tx_ctx; - struct sock_rx_ctx *rx_ctx; - - if ((flags | SOCK_EP_CNTR_FLAGS) != SOCK_EP_CNTR_FLAGS) { - SOCK_LOG_ERROR("Invalid cntr flag\n"); - return -FI_EINVAL; - } - - cntr = container_of(bfid, struct sock_cntr, cntr_fid.fid); - switch (fid->fclass) { - case FI_CLASS_TX_CTX: - tx_ctx = container_of(fid, struct sock_tx_ctx, fid.ctx.fid); - if (flags & FI_SEND) { - tx_ctx->comp.send_cntr = cntr; - sock_cntr_add_tx_ctx(cntr, tx_ctx); - } - - if (flags & FI_READ) { - tx_ctx->comp.read_cntr = cntr; - sock_cntr_add_tx_ctx(cntr, tx_ctx); - } - - if (flags & FI_WRITE) { - tx_ctx->comp.write_cntr = cntr; - sock_cntr_add_tx_ctx(cntr, tx_ctx); - } - break; - - case FI_CLASS_RX_CTX: - rx_ctx = container_of(fid, struct sock_rx_ctx, ctx.fid); - if (flags & FI_RECV) { - rx_ctx->comp.recv_cntr = cntr; - sock_cntr_add_rx_ctx(cntr, rx_ctx); - } - - if (flags & FI_REMOTE_READ) { - rx_ctx->comp.rem_read_cntr = cntr; - sock_cntr_add_rx_ctx(cntr, rx_ctx); - } - - if (flags & FI_REMOTE_WRITE) { - rx_ctx->comp.rem_write_cntr = cntr; - sock_cntr_add_rx_ctx(cntr, rx_ctx); - } - break; - - default: - SOCK_LOG_ERROR("Invalid fid\n"); - return -FI_EINVAL; - } - return 0; -} - -static int sock_ctx_bind(struct fid *fid, struct fid *bfid, uint64_t flags) -{ - switch (bfid->fclass) { - case FI_CLASS_CQ: - return sock_ctx_bind_cq(fid, bfid, flags); - - case FI_CLASS_CNTR: - return sock_ctx_bind_cntr(fid, bfid, flags); - - case FI_CLASS_MR: - return 0; - - default: - SOCK_LOG_ERROR("Invalid bind()\n"); - return -FI_EINVAL; - } - -} - -static int sock_ctx_enable(struct fid_ep *ep) -{ - struct sock_tx_ctx *tx_ctx; - struct sock_rx_ctx *rx_ctx; - - switch (ep->fid.fclass) { - case FI_CLASS_RX_CTX: - rx_ctx = container_of(ep, struct sock_rx_ctx, ctx.fid); - sock_pe_add_rx_ctx(rx_ctx->domain->pe, rx_ctx); - - if (!rx_ctx->ep_attr->conn_handle.do_listen && - sock_conn_listen(rx_ctx->ep_attr)) { - SOCK_LOG_ERROR("failed to create listener\n"); - } - rx_ctx->enabled = 1; - return 0; - - case FI_CLASS_TX_CTX: - tx_ctx = container_of(ep, struct sock_tx_ctx, fid.ctx.fid); - sock_pe_add_tx_ctx(tx_ctx->domain->pe, tx_ctx); - - if (!tx_ctx->ep_attr->conn_handle.do_listen && - sock_conn_listen(tx_ctx->ep_attr)) { - SOCK_LOG_ERROR("failed to create listener\n"); - } - tx_ctx->enabled = 1; - return 0; - - default: - SOCK_LOG_ERROR("Invalid CTX\n"); - break; - } - return -FI_EINVAL; -} - -int sock_getopflags(struct fi_tx_attr *tx_attr, struct fi_rx_attr *rx_attr, - uint64_t *flags) -{ - if ((*flags & FI_TRANSMIT) && (*flags & FI_RECV)) { - SOCK_LOG_ERROR("Both Tx/Rx flags cannot be specified\n"); - return -FI_EINVAL; - } else if (tx_attr && (*flags & FI_TRANSMIT)) { - *flags = tx_attr->op_flags; - } else if (rx_attr && (*flags & FI_RECV)) { - *flags = rx_attr->op_flags; - } else { - SOCK_LOG_ERROR("Tx/Rx flags not specified\n"); - return -FI_EINVAL; - } - return 0; -} - -int sock_setopflags(struct fi_tx_attr *tx_attr, struct fi_rx_attr *rx_attr, - uint64_t flags) -{ - if ((flags & FI_TRANSMIT) && (flags & FI_RECV)) { - SOCK_LOG_ERROR("Both Tx/Rx flags cannot be specified\n"); - return -FI_EINVAL; - } else if (tx_attr && (flags & FI_TRANSMIT)) { - tx_attr->op_flags = flags; - tx_attr->op_flags &= ~FI_TRANSMIT; - if (!(flags & (FI_INJECT_COMPLETE | FI_TRANSMIT_COMPLETE | - FI_DELIVERY_COMPLETE))) - tx_attr->op_flags |= FI_TRANSMIT_COMPLETE; - } else if (rx_attr && (flags & FI_RECV)) { - rx_attr->op_flags = flags; - rx_attr->op_flags &= ~FI_RECV; - } else { - SOCK_LOG_ERROR("Tx/Rx flags not specified\n"); - return -FI_EINVAL; - } - return 0; -} - -static int sock_ctx_control(struct fid *fid, int command, void *arg) -{ - struct fid_ep *ep; - struct sock_tx_ctx *tx_ctx; - struct sock_rx_ctx *rx_ctx; - int ret; - - switch (fid->fclass) { - case FI_CLASS_TX_CTX: - tx_ctx = container_of(fid, struct sock_tx_ctx, fid.ctx.fid); - switch (command) { - case FI_GETOPSFLAG: - ret = sock_getopflags(&tx_ctx->attr, NULL, (uint64_t *) arg); - if (ret) - return -EINVAL; - break; - case FI_SETOPSFLAG: - ret = sock_setopflags(&tx_ctx->attr, NULL, *(uint64_t *) arg); - if (ret) - return -EINVAL; - break; - case FI_ENABLE: - ep = container_of(fid, struct fid_ep, fid); - return sock_ctx_enable(ep); - break; - default: - return -FI_ENOSYS; - } - break; - - case FI_CLASS_RX_CTX: - case FI_CLASS_SRX_CTX: - rx_ctx = container_of(fid, struct sock_rx_ctx, ctx.fid); - switch (command) { - case FI_GETOPSFLAG: - ret = sock_getopflags(NULL, &rx_ctx->attr, (uint64_t *) arg); - if (ret) - return -EINVAL; - break; - case FI_SETOPSFLAG: - ret = sock_setopflags(NULL, &rx_ctx->attr, *(uint64_t *) arg); - if (ret) - return -EINVAL; - break; - case FI_ENABLE: - ep = container_of(fid, struct fid_ep, fid); - return sock_ctx_enable(ep); - break; - default: - return -FI_ENOSYS; - } - break; - - default: - return -FI_ENOSYS; - } - - return 0; -} - -static struct fi_ops sock_ctx_ops = { - .size = sizeof(struct fi_ops), - .close = sock_ctx_close, - .bind = sock_ctx_bind, - .control = sock_ctx_control, - .ops_open = fi_no_ops_open, -}; - -static int sock_ctx_getopt(fid_t fid, int level, int optname, - void *optval, size_t *optlen) -{ - struct sock_rx_ctx *rx_ctx; - rx_ctx = container_of(fid, struct sock_rx_ctx, ctx.fid); - - if (level != FI_OPT_ENDPOINT) - return -ENOPROTOOPT; - - switch (optname) { - case FI_OPT_MIN_MULTI_RECV: - if (*optlen < sizeof(size_t)) - return -FI_ETOOSMALL; - *(size_t *)optval = rx_ctx->min_multi_recv; - *optlen = sizeof(size_t); - break; - case FI_OPT_CM_DATA_SIZE: - if (*optlen < sizeof(size_t)) - return -FI_ETOOSMALL; - *((size_t *) optval) = SOCK_EP_MAX_CM_DATA_SZ; - *optlen = sizeof(size_t); - break; - default: - return -FI_ENOPROTOOPT; - } - return 0; -} - -static int sock_ctx_setopt(fid_t fid, int level, int optname, - const void *optval, size_t optlen) -{ - struct sock_rx_ctx *rx_ctx; - rx_ctx = container_of(fid, struct sock_rx_ctx, ctx.fid); - - if (level != FI_OPT_ENDPOINT) - return -ENOPROTOOPT; - - switch (optname) { - case FI_OPT_MIN_MULTI_RECV: - rx_ctx->min_multi_recv = *(size_t *)optval; - break; - - default: - return -ENOPROTOOPT; - } - return 0; -} - -static ssize_t sock_rx_ctx_cancel(struct sock_rx_ctx *rx_ctx, void *context) -{ - struct dlist_entry *entry; - ssize_t ret = -FI_ENOENT; - struct sock_rx_entry *rx_entry; - struct sock_pe_entry pe_entry; - - ofi_mutex_lock(&rx_ctx->lock); - for (entry = rx_ctx->rx_entry_list.next; - entry != &rx_ctx->rx_entry_list; entry = entry->next) { - - rx_entry = container_of(entry, struct sock_rx_entry, entry); - if (rx_entry->is_busy) - continue; - - if ((uintptr_t) context == rx_entry->context) { - if (rx_ctx->comp.recv_cq) { - memset(&pe_entry, 0, sizeof(pe_entry)); - pe_entry.comp = &rx_ctx->comp; - pe_entry.tag = rx_entry->tag; - pe_entry.context = rx_entry->context; - pe_entry.flags = (FI_MSG | FI_RECV); - if (rx_entry->is_tagged) - pe_entry.flags |= FI_TAGGED; - - if (sock_cq_report_error(pe_entry.comp->recv_cq, - &pe_entry, 0, FI_ECANCELED, - -FI_ECANCELED, NULL, 0)) { - SOCK_LOG_ERROR("failed to report error\n"); - } - } - - if (rx_ctx->comp.recv_cntr) - fi_cntr_adderr(&rx_ctx->comp.recv_cntr->cntr_fid, 1); - - dlist_remove(&rx_entry->entry); - sock_rx_release_entry(rx_entry); - ret = 0; - break; - } - } - ofi_mutex_unlock(&rx_ctx->lock); - return ret; -} - -static ssize_t sock_ep_cancel(fid_t fid, void *context) -{ - struct sock_rx_ctx *rx_ctx = NULL; - struct sock_ep *sock_ep; - - switch (fid->fclass) { - case FI_CLASS_EP: - sock_ep = container_of(fid, struct sock_ep, ep.fid); - rx_ctx = sock_ep->attr->rx_ctx; - break; - - case FI_CLASS_RX_CTX: - case FI_CLASS_SRX_CTX: - rx_ctx = container_of(fid, struct sock_rx_ctx, ctx.fid); - break; - - case FI_CLASS_TX_CTX: - case FI_CLASS_STX_CTX: - return -FI_ENOENT; - - default: - SOCK_LOG_ERROR("Invalid ep type\n"); - return -FI_EINVAL; - } - - return sock_rx_ctx_cancel(rx_ctx, context); -} - -size_t sock_get_tx_size(size_t size) -{ - return roundup_power_of_two(size * SOCK_EP_TX_ENTRY_SZ) / - SOCK_EP_TX_ENTRY_SZ; -} - -static ssize_t sock_rx_size_left(struct fid_ep *ep) -{ - struct sock_rx_ctx *rx_ctx; - struct sock_ep *sock_ep; - - switch (ep->fid.fclass) { - case FI_CLASS_EP: - sock_ep = container_of(ep, struct sock_ep, ep); - rx_ctx = sock_ep->attr->rx_ctx; - break; - - case FI_CLASS_RX_CTX: - case FI_CLASS_SRX_CTX: - rx_ctx = container_of(ep, struct sock_rx_ctx, ctx); - break; - - default: - SOCK_LOG_ERROR("Invalid ep type\n"); - return -FI_EINVAL; - } - - return rx_ctx->enabled ? rx_ctx->num_left : -FI_EOPBADSTATE; -} - -static ssize_t sock_tx_size_left(struct fid_ep *ep) -{ - struct sock_ep *sock_ep; - struct sock_tx_ctx *tx_ctx; - ssize_t num_left = 0; - - switch (ep->fid.fclass) { - case FI_CLASS_EP: - sock_ep = container_of(ep, struct sock_ep, ep); - tx_ctx = sock_ep->attr->tx_ctx; - break; - - case FI_CLASS_TX_CTX: - tx_ctx = container_of(ep, struct sock_tx_ctx, fid.ctx); - break; - - default: - SOCK_LOG_ERROR("Invalid EP type\n"); - return -FI_EINVAL; - } - - if (!tx_ctx->enabled) - return -FI_EOPBADSTATE; - - ofi_mutex_lock(&tx_ctx->rb_lock); - num_left = ofi_rbavail(&tx_ctx->rb)/SOCK_EP_TX_ENTRY_SZ; - ofi_mutex_unlock(&tx_ctx->rb_lock); - return num_left; -} - -struct fi_ops_ep sock_ctx_ep_ops = { - .size = sizeof(struct fi_ops_ep), - .cancel = sock_ep_cancel, - .getopt = sock_ctx_getopt, - .setopt = sock_ctx_setopt, - .tx_ctx = fi_no_tx_ctx, - .rx_ctx = fi_no_rx_ctx, - .rx_size_left = sock_rx_size_left, - .tx_size_left = sock_tx_size_left, -}; - -static int sock_eq_fid_match(struct dlist_entry *entry, const void *arg) -{ - struct sock_eq_entry *sock_eq_entry; - struct fi_eq_entry *eq_entry; - fid_t fid = (fid_t)arg; - - sock_eq_entry = container_of(entry, struct sock_eq_entry, entry); - /* fi_eq_entry, fi_eq_cm_entry and fi_eq_err_entry all - * have fid_t as first member */ - eq_entry = (struct fi_eq_entry *)sock_eq_entry->event; - return (fid == eq_entry->fid); -} - -static void sock_ep_clear_eq_list(struct dlistfd_head *list, - struct fid_ep *ep_fid) -{ - struct dlist_entry *entry; - - while (!dlistfd_empty(list)) { - entry = dlist_remove_first_match(&list->list, sock_eq_fid_match, - ep_fid); - if (!entry) - break; - dlistfd_reset(list); - free(container_of(entry, struct sock_eq_entry, entry)); - } -} - -static int sock_ep_close(struct fid *fid) -{ - struct sock_conn_req_handle *handle; - struct sock_ep *sock_ep; - - switch (fid->fclass) { - case FI_CLASS_EP: - sock_ep = container_of(fid, struct sock_ep, ep.fid); - break; - - case FI_CLASS_SEP: - sock_ep = container_of(fid, struct sock_ep, ep.fid); - break; - - default: - return -FI_EINVAL; - } - - if (sock_ep->is_alias) { - ofi_atomic_dec32(&sock_ep->attr->ref); - return 0; - } - if (ofi_atomic_get32(&sock_ep->attr->ref) || - ofi_atomic_get32(&sock_ep->attr->num_rx_ctx) || - ofi_atomic_get32(&sock_ep->attr->num_tx_ctx)) - return -FI_EBUSY; - - if (sock_ep->attr->ep_type == FI_EP_MSG) { - if (sock_ep->attr->info.handle) { - handle = container_of(sock_ep->attr->info.handle, - struct sock_conn_req_handle, handle); - sock_ep_cm_wait_handle_finalized(&sock_ep->attr->domain->cm_head, - handle); - free(handle->req); - free(handle); - } - } else { - if (sock_ep->attr->av) - ofi_atomic_dec32(&sock_ep->attr->av->ref); - } - if (sock_ep->attr->av) { - ofi_mutex_lock(&sock_ep->attr->av->list_lock); - fid_list_remove(&sock_ep->attr->av->ep_list, - &sock_ep->attr->lock, &sock_ep->ep.fid); - ofi_mutex_unlock(&sock_ep->attr->av->list_lock); - } - - pthread_mutex_lock(&sock_ep->attr->domain->pe->list_lock); - if (sock_ep->attr->tx_shared) { - ofi_mutex_lock(&sock_ep->attr->tx_ctx->lock); - dlist_remove(&sock_ep->attr->tx_ctx_entry); - ofi_mutex_unlock(&sock_ep->attr->tx_ctx->lock); - } - - if (sock_ep->attr->rx_shared) { - ofi_mutex_lock(&sock_ep->attr->rx_ctx->lock); - dlist_remove(&sock_ep->attr->rx_ctx_entry); - ofi_mutex_unlock(&sock_ep->attr->rx_ctx->lock); - } - pthread_mutex_unlock(&sock_ep->attr->domain->pe->list_lock); - - if (sock_ep->attr->conn_handle.do_listen) { - ofi_mutex_lock(&sock_ep->attr->domain->conn_listener.signal_lock); - ofi_epoll_del(sock_ep->attr->domain->conn_listener.epollfd, - sock_ep->attr->conn_handle.sock); - sock_ep->attr->domain->conn_listener.removed_from_epollfd = true; - ofi_mutex_unlock(&sock_ep->attr->domain->conn_listener.signal_lock); - ofi_close_socket(sock_ep->attr->conn_handle.sock); - sock_ep->attr->conn_handle.do_listen = 0; - } - - ofi_mutex_destroy(&sock_ep->attr->cm.lock); - - if (sock_ep->attr->eq) { - ofi_mutex_lock(&sock_ep->attr->eq->lock); - sock_ep_clear_eq_list(&sock_ep->attr->eq->list, - &sock_ep->ep); - /* Any err_data if present would be freed by - * sock_eq_clean_err_data_list when EQ is closed */ - sock_ep_clear_eq_list(&sock_ep->attr->eq->err_list, - &sock_ep->ep); - ofi_mutex_unlock(&sock_ep->attr->eq->lock); - } - - if (sock_ep->attr->fclass != FI_CLASS_SEP) { - if (!sock_ep->attr->tx_shared) - sock_pe_remove_tx_ctx(sock_ep->attr->tx_array[0]); - - sock_tx_ctx_close(sock_ep->attr->tx_array[0]); - sock_tx_ctx_free(sock_ep->attr->tx_array[0]); - } - - if (sock_ep->attr->fclass != FI_CLASS_SEP) { - if (!sock_ep->attr->rx_shared) - sock_pe_remove_rx_ctx(sock_ep->attr->rx_array[0]); - - sock_rx_ctx_close(sock_ep->attr->rx_array[0]); - sock_rx_ctx_free(sock_ep->attr->rx_array[0]); - } - - free(sock_ep->attr->tx_array); - free(sock_ep->attr->rx_array); - - if (sock_ep->attr->src_addr) - free(sock_ep->attr->src_addr); - if (sock_ep->attr->dest_addr) - free(sock_ep->attr->dest_addr); - - ofi_mutex_lock(&sock_ep->attr->domain->pe->lock); - ofi_idm_reset(&sock_ep->attr->av_idm, NULL); - sock_conn_map_destroy(sock_ep->attr); - ofi_mutex_unlock(&sock_ep->attr->domain->pe->lock); - - ofi_atomic_dec32(&sock_ep->attr->domain->ref); - ofi_mutex_destroy(&sock_ep->attr->lock); - free(sock_ep->attr); - free(sock_ep); - return 0; -} - -static int sock_ep_bind(struct fid *fid, struct fid *bfid, uint64_t flags) -{ - int ret; - size_t i; - struct sock_ep *ep; - struct sock_eq *eq; - struct sock_cq *cq; - struct sock_av *av; - struct sock_cntr *cntr; - struct sock_tx_ctx *tx_ctx; - struct sock_rx_ctx *rx_ctx; - - ret = ofi_ep_bind_valid(&sock_prov, bfid, flags); - if (ret) - return ret; - - switch (fid->fclass) { - case FI_CLASS_EP: - ep = container_of(fid, struct sock_ep, ep.fid); - break; - - case FI_CLASS_SEP: - ep = container_of(fid, struct sock_ep, ep.fid); - break; - - default: - return -FI_EINVAL; - } - - switch (bfid->fclass) { - case FI_CLASS_EQ: - eq = container_of(bfid, struct sock_eq, eq.fid); - ep->attr->eq = eq; - break; - - case FI_CLASS_MR: - return 0; - - case FI_CLASS_CQ: - cq = container_of(bfid, struct sock_cq, cq_fid.fid); - if (ep->attr->domain != cq->domain) - return -FI_EINVAL; - - if (flags & FI_SEND) { - for (i = 0; i < ep->attr->ep_attr.tx_ctx_cnt; i++) { - tx_ctx = ep->attr->tx_array[i]; - - if (!tx_ctx) - continue; - - ret = sock_ctx_bind_cq(&tx_ctx->fid.ctx.fid, bfid, flags); - if (ret) - return ret; - } - } - - if (flags & FI_RECV) { - for (i = 0; i < ep->attr->ep_attr.rx_ctx_cnt; i++) { - rx_ctx = ep->attr->rx_array[i]; - - if (!rx_ctx) - continue; - - ret = sock_ctx_bind_cq(&rx_ctx->ctx.fid, bfid, flags); - if (ret) - return ret; - } - } - break; - - case FI_CLASS_CNTR: - cntr = container_of(bfid, struct sock_cntr, cntr_fid.fid); - if (ep->attr->domain != cntr->domain) - return -FI_EINVAL; - - if (flags & FI_SEND || flags & FI_WRITE || flags & FI_READ) { - for (i = 0; i < ep->attr->ep_attr.tx_ctx_cnt; i++) { - tx_ctx = ep->attr->tx_array[i]; - - if (!tx_ctx) - continue; - - ret = sock_ctx_bind_cntr(&tx_ctx->fid.ctx.fid, bfid, flags); - if (ret) - return ret; - } - } - - if (flags & FI_RECV || flags & FI_REMOTE_READ || - flags & FI_REMOTE_WRITE) { - for (i = 0; i < ep->attr->ep_attr.rx_ctx_cnt; i++) { - rx_ctx = ep->attr->rx_array[i]; - - if (!rx_ctx) - continue; - - ret = sock_ctx_bind_cntr(&rx_ctx->ctx.fid, bfid, flags); - if (ret) - return ret; - } - } - break; - - case FI_CLASS_AV: - av = container_of(bfid, struct sock_av, av_fid.fid); - if (ep->attr->domain != av->domain) - return -FI_EINVAL; - - ep->attr->av = av; - ofi_atomic_inc32(&av->ref); - - if (ep->attr->tx_ctx && - ep->attr->tx_ctx->fid.ctx.fid.fclass == FI_CLASS_TX_CTX) { - ep->attr->tx_ctx->av = av; - } - - if (ep->attr->rx_ctx && - ep->attr->rx_ctx->ctx.fid.fclass == FI_CLASS_RX_CTX) - ep->attr->rx_ctx->av = av; - - for (i = 0; i < ep->attr->ep_attr.tx_ctx_cnt; i++) { - if (ep->attr->tx_array[i]) - ep->attr->tx_array[i]->av = av; - } - - for (i = 0; i < ep->attr->ep_attr.rx_ctx_cnt; i++) { - if (ep->attr->rx_array[i]) - ep->attr->rx_array[i]->av = av; - } - ofi_mutex_lock(&av->list_lock); - ret = fid_list_insert(&av->ep_list, &ep->attr->lock, &ep->ep.fid); - if (ret) { - SOCK_LOG_ERROR("Error in adding fid in the EP list\n"); - ofi_mutex_unlock(&av->list_lock); - return ret; - } - ofi_mutex_unlock(&av->list_lock); - break; - - case FI_CLASS_STX_CTX: - tx_ctx = container_of(bfid, struct sock_tx_ctx, fid.stx.fid); - ofi_mutex_lock(&tx_ctx->lock); - dlist_insert_tail(&ep->attr->tx_ctx_entry, &tx_ctx->ep_list); - ofi_mutex_unlock(&tx_ctx->lock); - - ep->attr->tx_ctx->use_shared = 1; - ep->attr->tx_ctx->stx_ctx = tx_ctx; - break; - - case FI_CLASS_SRX_CTX: - rx_ctx = container_of(bfid, struct sock_rx_ctx, ctx); - ofi_mutex_lock(&rx_ctx->lock); - dlist_insert_tail(&ep->attr->rx_ctx_entry, &rx_ctx->ep_list); - ofi_mutex_unlock(&rx_ctx->lock); - - ep->attr->rx_ctx->use_shared = 1; - ep->attr->rx_ctx->srx_ctx = rx_ctx; - break; - - default: - return -ENOSYS; - } - - return 0; -} - -static int sock_ep_control(struct fid *fid, int command, void *arg) -{ - int ret; - struct fid_ep *ep_fid; - struct fi_alias *alias; - struct sock_ep *sock_ep, *new_ep; - - switch (fid->fclass) { - case FI_CLASS_EP: - sock_ep = container_of(fid, struct sock_ep, ep.fid); - break; - - case FI_CLASS_SEP: - sock_ep = container_of(fid, struct sock_ep, ep.fid); - break; - - default: - return -FI_EINVAL; - } - - switch (command) { - case FI_ALIAS: - alias = (struct fi_alias *)arg; - new_ep = calloc(1, sizeof(*new_ep)); - if (!new_ep) - return -FI_ENOMEM; - - memcpy(&new_ep->tx_attr, &sock_ep->tx_attr, sizeof(struct fi_tx_attr)); - memcpy(&new_ep->rx_attr, &sock_ep->rx_attr, sizeof(struct fi_rx_attr)); - ret = sock_setopflags(&new_ep->tx_attr, &new_ep->rx_attr, - alias->flags); - if (ret) { - free(new_ep); - return -FI_EINVAL; - } - new_ep->attr = sock_ep->attr; - new_ep->is_alias = 1; - memcpy(&new_ep->ep, &sock_ep->ep, sizeof(struct fid_ep)); - *alias->fid = &new_ep->ep.fid; - ofi_atomic_inc32(&new_ep->attr->ref); - break; - case FI_GETOPSFLAG: - ret = sock_getopflags(&sock_ep->tx_attr, &sock_ep->rx_attr, (uint64_t *) arg); - if (ret) - return -EINVAL; - break; - case FI_SETOPSFLAG: - ret = sock_setopflags(&sock_ep->tx_attr, &sock_ep->rx_attr, *(uint64_t *) arg); - if (ret) - return -FI_EINVAL; - break; - break; - case FI_ENABLE: - ep_fid = container_of(fid, struct fid_ep, fid); - return sock_ep_enable(ep_fid); - - default: - return -FI_EINVAL; - } - return 0; -} - - -struct fi_ops sock_ep_fi_ops = { - .size = sizeof(struct fi_ops), - .close = sock_ep_close, - .bind = sock_ep_bind, - .control = sock_ep_control, - .ops_open = fi_no_ops_open, -}; - -int sock_ep_enable(struct fid_ep *ep) -{ - size_t i; - struct sock_ep *sock_ep; - struct sock_tx_ctx *tx_ctx; - struct sock_rx_ctx *rx_ctx; - - sock_ep = container_of(ep, struct sock_ep, ep); - for (i = 0; i < sock_ep->attr->ep_attr.tx_ctx_cnt; i++) { - tx_ctx = sock_ep->attr->tx_array[i]; - if (tx_ctx) { - tx_ctx->enabled = 1; - if (tx_ctx->use_shared) { - if (tx_ctx->stx_ctx) { - sock_pe_add_tx_ctx(tx_ctx->domain->pe, tx_ctx->stx_ctx); - tx_ctx->stx_ctx->enabled = 1; - } - } else { - sock_pe_add_tx_ctx(tx_ctx->domain->pe, tx_ctx); - } - } - } - - for (i = 0; i < sock_ep->attr->ep_attr.rx_ctx_cnt; i++) { - rx_ctx = sock_ep->attr->rx_array[i]; - if (rx_ctx) { - rx_ctx->enabled = 1; - if (rx_ctx->use_shared) { - if (rx_ctx->srx_ctx) { - sock_pe_add_rx_ctx(rx_ctx->domain->pe, rx_ctx->srx_ctx); - rx_ctx->srx_ctx->enabled = 1; - } - } else { - sock_pe_add_rx_ctx(rx_ctx->domain->pe, rx_ctx); - } - } - } - - if (sock_ep->attr->ep_type != FI_EP_MSG && !sock_ep->attr->conn_handle.do_listen) { - int ret = sock_conn_listen(sock_ep->attr); - if (ret) { - SOCK_LOG_ERROR("cannot start connection thread\n"); - return ret; - } - } - sock_ep->attr->is_enabled = 1; - return 0; -} - -int sock_ep_disable(struct fid_ep *ep) -{ - size_t i; - struct sock_ep *sock_ep; - - sock_ep = container_of(ep, struct sock_ep, ep); - - if (sock_ep->attr->tx_ctx && - sock_ep->attr->tx_ctx->fid.ctx.fid.fclass == FI_CLASS_TX_CTX) { - sock_ep->attr->tx_ctx->enabled = 0; - } - - if (sock_ep->attr->rx_ctx && - sock_ep->attr->rx_ctx->ctx.fid.fclass == FI_CLASS_RX_CTX) { - sock_ep->attr->rx_ctx->enabled = 0; - } - - for (i = 0; i < sock_ep->attr->ep_attr.tx_ctx_cnt; i++) { - if (sock_ep->attr->tx_array[i]) - sock_ep->attr->tx_array[i]->enabled = 0; - } - - for (i = 0; i < sock_ep->attr->ep_attr.rx_ctx_cnt; i++) { - if (sock_ep->attr->rx_array[i]) - sock_ep->attr->rx_array[i]->enabled = 0; - } - sock_ep->attr->is_enabled = 0; - return 0; -} - -static int sock_ep_getopt(fid_t fid, int level, int optname, - void *optval, size_t *optlen) -{ - struct sock_ep *sock_ep; - sock_ep = container_of(fid, struct sock_ep, ep.fid); - - if (level != FI_OPT_ENDPOINT) - return -ENOPROTOOPT; - - switch (optname) { - case FI_OPT_MIN_MULTI_RECV: - *(size_t *)optval = sock_ep->attr->min_multi_recv; - *optlen = sizeof(size_t); - break; - - case FI_OPT_CM_DATA_SIZE: - if (*optlen < sizeof(size_t)) { - *optlen = sizeof(size_t); - return -FI_ETOOSMALL; - } - *((size_t *) optval) = SOCK_EP_MAX_CM_DATA_SZ; - *optlen = sizeof(size_t); - break; - - default: - return -FI_ENOPROTOOPT; - } - return 0; -} - -static int sock_ep_setopt(fid_t fid, int level, int optname, - const void *optval, size_t optlen) -{ - size_t i; - struct sock_ep *sock_ep; - sock_ep = container_of(fid, struct sock_ep, ep.fid); - - if (level != FI_OPT_ENDPOINT) - return -ENOPROTOOPT; - - switch (optname) { - case FI_OPT_MIN_MULTI_RECV: - - sock_ep->attr->min_multi_recv = *(size_t *)optval; - for (i = 0; i < sock_ep->attr->ep_attr.rx_ctx_cnt; i++) { - if (sock_ep->attr->rx_array[i] != NULL) { - sock_ep->attr->rx_array[i]->min_multi_recv = - sock_ep->attr->min_multi_recv; - } - } - break; - - default: - return -ENOPROTOOPT; - } - return 0; -} - -static int sock_ep_tx_ctx(struct fid_ep *ep, int index, struct fi_tx_attr *attr, - struct fid_ep **tx_ep, void *context) -{ - struct sock_ep *sock_ep; - struct sock_tx_ctx *tx_ctx; - - sock_ep = container_of(ep, struct sock_ep, ep); - if (sock_ep->attr->fclass != FI_CLASS_SEP || - index >= (int)sock_ep->attr->ep_attr.tx_ctx_cnt) - return -FI_EINVAL; - - if (attr) { - if (ofi_check_tx_attr(&sock_prov, sock_ep->attr->info.tx_attr, - attr, 0) || - ofi_check_attr_subset(&sock_prov, - sock_ep->attr->info.tx_attr->caps, - attr->caps & ~OFI_IGNORED_TX_CAPS)) { - return -FI_ENODATA; - } - tx_ctx = sock_tx_ctx_alloc(attr, context, 0); - } else { - tx_ctx = sock_tx_ctx_alloc(&sock_ep->tx_attr, context, 0); - } - if (!tx_ctx) - return -FI_ENOMEM; - - tx_ctx->tx_id = (uint16_t) index; - tx_ctx->ep_attr = sock_ep->attr; - tx_ctx->domain = sock_ep->attr->domain; - if (tx_ctx->rx_ctrl_ctx && tx_ctx->rx_ctrl_ctx->is_ctrl_ctx) - tx_ctx->rx_ctrl_ctx->domain = sock_ep->attr->domain; - tx_ctx->av = sock_ep->attr->av; - dlist_insert_tail(&sock_ep->attr->tx_ctx_entry, &tx_ctx->ep_list); - - tx_ctx->fid.ctx.fid.ops = &sock_ctx_ops; - tx_ctx->fid.ctx.ops = &sock_ctx_ep_ops; - tx_ctx->fid.ctx.msg = &sock_ep_msg_ops; - tx_ctx->fid.ctx.tagged = &sock_ep_tagged; - tx_ctx->fid.ctx.rma = &sock_ep_rma; - tx_ctx->fid.ctx.atomic = &sock_ep_atomic; - - *tx_ep = &tx_ctx->fid.ctx; - sock_ep->attr->tx_array[index] = tx_ctx; - ofi_atomic_inc32(&sock_ep->attr->num_tx_ctx); - ofi_atomic_inc32(&sock_ep->attr->domain->ref); - return 0; -} - -static int sock_ep_rx_ctx(struct fid_ep *ep, int index, struct fi_rx_attr *attr, - struct fid_ep **rx_ep, void *context) -{ - struct sock_ep *sock_ep; - struct sock_rx_ctx *rx_ctx; - - sock_ep = container_of(ep, struct sock_ep, ep); - if (sock_ep->attr->fclass != FI_CLASS_SEP || - index >= (int)sock_ep->attr->ep_attr.rx_ctx_cnt) - return -FI_EINVAL; - - if (attr) { - if (ofi_check_rx_attr(&sock_prov, &sock_ep->attr->info, attr, 0) || - ofi_check_attr_subset(&sock_prov, sock_ep->attr->info.rx_attr->caps, - attr->caps & ~OFI_IGNORED_RX_CAPS)) { - return -FI_ENODATA; - } - rx_ctx = sock_rx_ctx_alloc(attr, context, 0); - } else { - rx_ctx = sock_rx_ctx_alloc(&sock_ep->rx_attr, context, 0); - } - if (!rx_ctx) - return -FI_ENOMEM; - - rx_ctx->rx_id = (uint16_t) index; - rx_ctx->ep_attr = sock_ep->attr; - rx_ctx->domain = sock_ep->attr->domain; - rx_ctx->av = sock_ep->attr->av; - dlist_insert_tail(&sock_ep->attr->rx_ctx_entry, &rx_ctx->ep_list); - - rx_ctx->ctx.fid.ops = &sock_ctx_ops; - rx_ctx->ctx.ops = &sock_ctx_ep_ops; - rx_ctx->ctx.msg = &sock_ep_msg_ops; - rx_ctx->ctx.tagged = &sock_ep_tagged; - - rx_ctx->min_multi_recv = sock_ep->attr->min_multi_recv; - *rx_ep = &rx_ctx->ctx; - sock_ep->attr->rx_array[index] = rx_ctx; - ofi_atomic_inc32(&sock_ep->attr->num_rx_ctx); - ofi_atomic_inc32(&sock_ep->attr->domain->ref); - return 0; -} - -struct fi_ops_ep sock_ep_ops = { - .size = sizeof(struct fi_ops_ep), - .cancel = sock_ep_cancel, - .getopt = sock_ep_getopt, - .setopt = sock_ep_setopt, - .tx_ctx = sock_ep_tx_ctx, - .rx_ctx = sock_ep_rx_ctx, - .rx_size_left = sock_rx_size_left, - .tx_size_left = sock_tx_size_left, -}; - -static int sock_verify_tx_attr(const struct fi_tx_attr *attr) -{ - if (!attr) - return 0; - - if (attr->inject_size > SOCK_EP_MAX_INJECT_SZ) - return -FI_ENODATA; - - if (sock_get_tx_size(attr->size) > sock_get_tx_size(SOCK_EP_TX_SZ)) - return -FI_ENODATA; - - if (attr->iov_limit > SOCK_EP_MAX_IOV_LIMIT) - return -FI_ENODATA; - - if (attr->rma_iov_limit > SOCK_EP_MAX_IOV_LIMIT) - return -FI_ENODATA; - - return 0; -} - -int sock_stx_ctx(struct fid_domain *domain, - struct fi_tx_attr *attr, struct fid_stx **stx, void *context) -{ - struct sock_domain *dom; - struct sock_tx_ctx *tx_ctx; - - if (attr && sock_verify_tx_attr(attr)) - return -FI_EINVAL; - - dom = container_of(domain, struct sock_domain, dom_fid); - - tx_ctx = sock_stx_ctx_alloc(attr ? attr : &sock_stx_attr, context); - if (!tx_ctx) - return -FI_ENOMEM; - - tx_ctx->domain = dom; - if (tx_ctx->rx_ctrl_ctx && tx_ctx->rx_ctrl_ctx->is_ctrl_ctx) - tx_ctx->rx_ctrl_ctx->domain = dom; - - tx_ctx->fid.stx.fid.ops = &sock_ctx_ops; - tx_ctx->fid.stx.ops = &sock_ep_ops; - ofi_atomic_inc32(&dom->ref); - - *stx = &tx_ctx->fid.stx; - return 0; -} - -static int sock_verify_rx_attr(const struct fi_rx_attr *attr) -{ - if (!attr) - return 0; - - if ((attr->msg_order | SOCK_EP_MSG_ORDER) != SOCK_EP_MSG_ORDER) - return -FI_ENODATA; - - if ((attr->comp_order | SOCK_EP_COMP_ORDER) != SOCK_EP_COMP_ORDER) - return -FI_ENODATA; - - if (attr->total_buffered_recv > SOCK_EP_MAX_BUFF_RECV) - return -FI_ENODATA; - - if (sock_get_tx_size(attr->size) > sock_get_tx_size(SOCK_EP_TX_SZ)) - return -FI_ENODATA; - - if (attr->iov_limit > SOCK_EP_MAX_IOV_LIMIT) - return -FI_ENODATA; - - return 0; -} - -int sock_srx_ctx(struct fid_domain *domain, - struct fi_rx_attr *attr, struct fid_ep **srx, void *context) -{ - struct sock_domain *dom; - struct sock_rx_ctx *rx_ctx; - - if (attr && sock_verify_rx_attr(attr)) - return -FI_EINVAL; - - dom = container_of(domain, struct sock_domain, dom_fid); - rx_ctx = sock_rx_ctx_alloc(attr ? attr : &sock_srx_attr, context, 0); - if (!rx_ctx) - return -FI_ENOMEM; - - rx_ctx->domain = dom; - rx_ctx->ctx.fid.fclass = FI_CLASS_SRX_CTX; - - rx_ctx->ctx.fid.ops = &sock_ctx_ops; - rx_ctx->ctx.ops = &sock_ctx_ep_ops; - rx_ctx->ctx.msg = &sock_ep_msg_ops; - rx_ctx->ctx.tagged = &sock_ep_tagged; - rx_ctx->enabled = 1; - - /* default config */ - rx_ctx->min_multi_recv = SOCK_EP_MIN_MULTI_RECV; - *srx = &rx_ctx->ctx; - ofi_atomic_inc32(&dom->ref); - return 0; -} - -#if HAVE_GETIFADDRS -static char *sock_get_fabric_name(struct sockaddr *src_addr) -{ - int ret; - struct ifaddrs *ifaddrs, *ifa; - char *fabric_name = NULL; - union ofi_sock_ip net_in_addr; - char netbuf[OFI_ADDRSTRLEN]; - int prefix_len; - - ret = ofi_getifaddrs(&ifaddrs); - if (ret) - return NULL; - - for (ifa = ifaddrs; ifa != NULL; ifa = ifa->ifa_next) { - if (ifa->ifa_addr == NULL || !(ifa->ifa_flags & IFF_UP)) - continue; - - if (ofi_equals_ipaddr(ifa->ifa_addr, src_addr)) { - prefix_len = (int) ofi_mask_addr(&net_in_addr.sa, - ifa->ifa_addr, - ifa->ifa_netmask); - - switch (net_in_addr.sa.sa_family) { - case AF_INET: - inet_ntop(AF_INET, - &((struct sockaddr_in *)&net_in_addr)->sin_addr, - netbuf, sizeof(netbuf)); - break; - case AF_INET6: - inet_ntop(AF_INET6, - &((struct sockaddr_in6 *)&net_in_addr)->sin6_addr, - netbuf, sizeof(netbuf)); - break; - default: - snprintf(netbuf, sizeof(netbuf), "%s", ""); - netbuf[sizeof(netbuf)-1] = '\0'; - break; - } - snprintf(netbuf + strlen(netbuf), sizeof(netbuf) - strlen(netbuf), - "%s%d", "/", prefix_len); - netbuf[sizeof(netbuf)-1] = '\0'; - fabric_name = strdup(netbuf); - goto out; - } - } -out: - freeifaddrs(ifaddrs); - return fabric_name; -} - -char *sock_get_domain_name(struct sockaddr *src_addr) -{ - int ret; - struct ifaddrs *ifaddrs, *ifa; - char *domain_name = NULL; - - ret = ofi_getifaddrs(&ifaddrs); - if (ret) - return NULL; - - for (ifa = ifaddrs; ifa != NULL; ifa = ifa->ifa_next) { - if (ifa->ifa_addr == NULL || !(ifa->ifa_flags & IFF_UP)) - continue; - - if (ofi_equals_ipaddr(ifa->ifa_addr, src_addr)) { - domain_name = strdup(ifa->ifa_name); - goto out; - } - } -out: - freeifaddrs(ifaddrs); - return domain_name; -} -#else -static char *sock_get_fabric_name(struct sockaddr *src_addr) -{ - return NULL; -} - -char *sock_get_domain_name(struct sockaddr *src_addr) -{ - return NULL; -} -#endif - -static void sock_set_fabric_attr(void *src_addr, const struct fi_fabric_attr *hint_attr, - struct fi_fabric_attr *attr) -{ - struct sock_fabric *fabric; - - *attr = sock_fabric_attr; - if (hint_attr && hint_attr->fabric) { - attr->fabric = hint_attr->fabric; - } else { - fabric = sock_fab_list_head(); - attr->fabric = fabric ? &fabric->fab_fid : NULL; - } - - /* reverse lookup network address from node and assign it as fabric name */ - attr->name = sock_get_fabric_name(src_addr); - if (!attr->name) - attr->name = strdup(sock_fab_name); - - attr->prov_name = strdup(hint_attr->prov_name); - attr->api_version = hint_attr->api_version; -} - -static void sock_set_domain_attr(uint32_t api_version, void *src_addr, - const struct fi_domain_attr *hint_attr, - struct fi_domain_attr *attr) -{ - struct sock_domain *domain; - - domain = sock_dom_list_head(); - attr->domain = domain ? &domain->dom_fid : NULL; - if (!hint_attr) { - *attr = sock_domain_attr; - - if (FI_VERSION_LT(api_version, FI_VERSION(1, 5))) - attr->mr_mode = FI_MR_SCALABLE; - goto out; - } - - if (hint_attr->domain) { - domain = container_of(hint_attr->domain, - struct sock_domain, dom_fid); - *attr = domain->attr; - attr->domain = hint_attr->domain; - goto out; - } - - *attr = *hint_attr; - if (attr->threading == FI_THREAD_UNSPEC) - attr->threading = sock_domain_attr.threading; - if (attr->control_progress == FI_PROGRESS_UNSPEC) - attr->control_progress = sock_domain_attr.control_progress; - if (attr->data_progress == FI_PROGRESS_UNSPEC) - attr->data_progress = sock_domain_attr.data_progress; - if (FI_VERSION_LT(api_version, FI_VERSION(1, 5))) { - if (attr->mr_mode == FI_MR_UNSPEC) - attr->mr_mode = FI_MR_SCALABLE; - } else { - if ((attr->mr_mode != FI_MR_BASIC) && - (attr->mr_mode != FI_MR_SCALABLE)) - attr->mr_mode = 0; - } - - if (attr->cq_cnt == 0) - attr->cq_cnt = sock_domain_attr.cq_cnt; - if (attr->ep_cnt == 0) - attr->ep_cnt = sock_domain_attr.ep_cnt; - if (attr->tx_ctx_cnt == 0) - attr->tx_ctx_cnt = sock_domain_attr.tx_ctx_cnt; - if (attr->rx_ctx_cnt == 0) - attr->rx_ctx_cnt = sock_domain_attr.rx_ctx_cnt; - if (attr->max_ep_tx_ctx == 0) - attr->max_ep_tx_ctx = sock_domain_attr.max_ep_tx_ctx; - if (attr->max_ep_rx_ctx == 0) - attr->max_ep_rx_ctx = sock_domain_attr.max_ep_rx_ctx; - if (attr->max_ep_stx_ctx == 0) - attr->max_ep_stx_ctx = sock_domain_attr.max_ep_stx_ctx; - if (attr->max_ep_srx_ctx == 0) - attr->max_ep_srx_ctx = sock_domain_attr.max_ep_srx_ctx; - if (attr->cntr_cnt == 0) - attr->cntr_cnt = sock_domain_attr.cntr_cnt; - if (attr->mr_iov_limit == 0) - attr->mr_iov_limit = sock_domain_attr.mr_iov_limit; - - attr->mr_key_size = sock_domain_attr.mr_key_size; - attr->cq_data_size = sock_domain_attr.cq_data_size; - attr->resource_mgmt = sock_domain_attr.resource_mgmt; -out: - /* reverse lookup interface from node and assign it as domain name */ - attr->name = sock_get_domain_name(src_addr); - if (!attr->name) - attr->name = strdup(sock_fab_name); -} - - -struct fi_info *sock_fi_info(uint32_t version, enum fi_ep_type ep_type, - const struct fi_info *hints, void *src_addr, - void *dest_addr) -{ - struct fi_info *info; - - info = fi_dupinfo(hints); - if (!info) - return NULL; - free(info->src_addr); - free(info->dest_addr); - info->src_addr = NULL; - info->dest_addr = NULL; - info->src_addrlen = 0; - info->dest_addrlen = 0; - - info->src_addr = calloc(1, ofi_sizeofip(src_addr)); - if (!info->src_addr) - goto err; - - info->mode = SOCK_MODE; - - if (src_addr) { - memcpy(info->src_addr, src_addr, ofi_sizeofaddr(src_addr)); - } else { - sock_get_src_addr_from_hostname(info->src_addr, NULL, - dest_addr ? ((struct sockaddr *) dest_addr)->sa_family : - ofi_get_sa_family(hints)); - } - - info->src_addrlen = ofi_sizeofaddr(info->src_addr); - if (info->src_addrlen == sizeof(struct sockaddr_in6)) - info->addr_format = FI_SOCKADDR_IN6; - else - info->addr_format = FI_SOCKADDR_IN; - - if (dest_addr) { - info->dest_addr = calloc(1, ofi_sizeofip(dest_addr)); - if (!info->dest_addr) - goto err; - info->dest_addrlen = ofi_sizeofaddr(dest_addr); - memcpy(info->dest_addr, dest_addr, info->dest_addrlen); - } - - if (hints) { - sock_set_domain_attr(version, info->src_addr, hints->domain_attr, - info->domain_attr); - sock_set_fabric_attr(info->src_addr, hints->fabric_attr, info->fabric_attr); - } else { - sock_set_domain_attr(version, info->src_addr, NULL, - info->domain_attr); - sock_set_fabric_attr(info->src_addr, NULL, info->fabric_attr); - } - - info->ep_attr->type = ep_type; - return info; -err: - fi_freeinfo(info); - return NULL; -} - -int sock_get_src_addr_from_hostname(union ofi_sock_ip *src_addr, - const char *service, uint16_t sa_family) -{ - int ret; - struct addrinfo ai, *rai = NULL; - char hostname[HOST_NAME_MAX]; - - memset(&ai, 0, sizeof(ai)); - ai.ai_family = sa_family; - ai.ai_socktype = SOCK_STREAM; - - ofi_getnodename(sa_family, hostname, sizeof(hostname)); - ret = getaddrinfo(hostname, service, &ai, &rai); - if (ret) { - SOCK_LOG_DBG("getaddrinfo failed!\n"); - return -FI_EINVAL; - } - memcpy(src_addr, rai->ai_addr, rai->ai_addrlen); - freeaddrinfo(rai); - return 0; -} - -static int sock_ep_assign_src_addr(struct sock_ep *sock_ep, struct fi_info *info) -{ - sock_ep->attr->src_addr = calloc(1, sizeof(union ofi_sock_ip)); - if (!sock_ep->attr->src_addr) - return -FI_ENOMEM; - - if (info && info->dest_addr) - return sock_get_src_addr(info->dest_addr, sock_ep->attr->src_addr); - else - return sock_get_src_addr_from_hostname(sock_ep->attr->src_addr, - NULL, 0); -} - -int sock_alloc_endpoint(struct fid_domain *domain, struct fi_info *info, - struct sock_ep **ep, void *context, size_t fclass) -{ - int ret; - struct sock_ep *sock_ep; - struct sock_tx_ctx *tx_ctx; - struct sock_rx_ctx *rx_ctx; - struct sock_domain *sock_dom; - - assert(info); - sock_dom = container_of(domain, struct sock_domain, dom_fid); - - sock_ep = calloc(1, sizeof(*sock_ep)); - if (!sock_ep) - return -FI_ENOMEM; - - switch (fclass) { - case FI_CLASS_EP: - sock_ep->ep.fid.fclass = FI_CLASS_EP; - sock_ep->ep.fid.context = context; - sock_ep->ep.fid.ops = &sock_ep_fi_ops; - - sock_ep->ep.ops = &sock_ep_ops; - sock_ep->ep.cm = &sock_ep_cm_ops; - sock_ep->ep.msg = &sock_ep_msg_ops; - sock_ep->ep.rma = &sock_ep_rma; - sock_ep->ep.tagged = &sock_ep_tagged; - sock_ep->ep.atomic = &sock_ep_atomic; - break; - - case FI_CLASS_SEP: - sock_ep->ep.fid.fclass = FI_CLASS_SEP; - sock_ep->ep.fid.context = context; - sock_ep->ep.fid.ops = &sock_ep_fi_ops; - - sock_ep->ep.ops = &sock_ep_ops; - sock_ep->ep.cm = &sock_ep_cm_ops; - break; - - default: - ret = -FI_EINVAL; - goto err1; - } - - sock_ep->attr = (struct sock_ep_attr *) calloc(1, sizeof(struct sock_ep_attr)); - if (!sock_ep->attr) { - ret = -FI_ENOMEM; - goto err1; - } - sock_ep->attr->fclass = fclass; - *ep = sock_ep; - - sock_ep->attr->info.caps = info->caps; - sock_ep->attr->info.addr_format = info->addr_format; - - if (info->ep_attr) { - sock_ep->attr->ep_type = info->ep_attr->type; - sock_ep->attr->ep_attr.tx_ctx_cnt = info->ep_attr->tx_ctx_cnt; - sock_ep->attr->ep_attr.rx_ctx_cnt = info->ep_attr->rx_ctx_cnt; - } - - if (info->src_addr) { - sock_ep->attr->src_addr = calloc(1, sizeof(*sock_ep-> - attr->src_addr)); - if (!sock_ep->attr->src_addr) { - ret = -FI_ENOMEM; - goto err2; - } - memcpy(sock_ep->attr->src_addr, info->src_addr, - info->src_addrlen); - } - - if (info->dest_addr) { - sock_ep->attr->dest_addr = calloc(1, sizeof(*sock_ep-> - attr->dest_addr)); - if (!sock_ep->attr->dest_addr) { - ret = -FI_ENOMEM; - goto err2; - } - memcpy(sock_ep->attr->dest_addr, info->dest_addr, - info->dest_addrlen); - } - - if (info->tx_attr) { - sock_ep->tx_attr = *info->tx_attr; - if (!(sock_ep->tx_attr.op_flags & (FI_INJECT_COMPLETE | - FI_TRANSMIT_COMPLETE | FI_DELIVERY_COMPLETE))) - sock_ep->tx_attr.op_flags |= FI_TRANSMIT_COMPLETE; - sock_ep->tx_attr.size = sock_ep->tx_attr.size ? - sock_ep->tx_attr.size : SOCK_EP_TX_SZ; - } - - if (info->rx_attr) - sock_ep->rx_attr = *info->rx_attr; - sock_ep->attr->info.handle = info->handle; - - if (!sock_ep->attr->src_addr && sock_ep_assign_src_addr(sock_ep, info)) { - SOCK_LOG_ERROR("failed to get src_address\n"); - ret = -FI_EINVAL; - goto err2; - } - - ofi_atomic_initialize32(&sock_ep->attr->ref, 0); - ofi_atomic_initialize32(&sock_ep->attr->num_tx_ctx, 0); - ofi_atomic_initialize32(&sock_ep->attr->num_rx_ctx, 0); - ofi_mutex_init(&sock_ep->attr->lock); - - if (sock_ep->attr->ep_attr.tx_ctx_cnt == FI_SHARED_CONTEXT) - sock_ep->attr->tx_shared = 1; - if (sock_ep->attr->ep_attr.rx_ctx_cnt == FI_SHARED_CONTEXT) - sock_ep->attr->rx_shared = 1; - - if (sock_ep->attr->fclass != FI_CLASS_SEP) { - sock_ep->attr->ep_attr.tx_ctx_cnt = 1; - sock_ep->attr->ep_attr.rx_ctx_cnt = 1; - } - - sock_ep->attr->tx_array = calloc(sock_ep->attr->ep_attr.tx_ctx_cnt, - sizeof(struct sock_tx_ctx *)); - if (!sock_ep->attr->tx_array) { - ret = -FI_ENOMEM; - goto err2; - } - - sock_ep->attr->rx_array = calloc(sock_ep->attr->ep_attr.rx_ctx_cnt, - sizeof(struct sock_rx_ctx *)); - if (!sock_ep->attr->rx_array) { - ret = -FI_ENOMEM; - goto err2; - } - - if (sock_ep->attr->fclass != FI_CLASS_SEP) { - /* default tx ctx */ - tx_ctx = sock_tx_ctx_alloc(&sock_ep->tx_attr, context, - sock_ep->attr->tx_shared); - if (!tx_ctx) { - ret = -FI_ENOMEM; - goto err2; - } - tx_ctx->ep_attr = sock_ep->attr; - tx_ctx->domain = sock_dom; - if (tx_ctx->rx_ctrl_ctx && tx_ctx->rx_ctrl_ctx->is_ctrl_ctx) - tx_ctx->rx_ctrl_ctx->domain = sock_dom; - tx_ctx->tx_id = 0; - dlist_insert_tail(&sock_ep->attr->tx_ctx_entry, &tx_ctx->ep_list); - sock_ep->attr->tx_array[0] = tx_ctx; - sock_ep->attr->tx_ctx = tx_ctx; - - /* default rx_ctx */ - rx_ctx = sock_rx_ctx_alloc(&sock_ep->rx_attr, context, - sock_ep->attr->rx_shared); - if (!rx_ctx) { - ret = -FI_ENOMEM; - goto err2; - } - rx_ctx->ep_attr = sock_ep->attr; - rx_ctx->domain = sock_dom; - rx_ctx->rx_id = 0; - dlist_insert_tail(&sock_ep->attr->rx_ctx_entry, &rx_ctx->ep_list); - sock_ep->attr->rx_array[0] = rx_ctx; - sock_ep->attr->rx_ctx = rx_ctx; - } - - /* default config */ - sock_ep->attr->min_multi_recv = SOCK_EP_MIN_MULTI_RECV; - - memcpy(&sock_ep->attr->info, info, sizeof(struct fi_info)); - - sock_ep->attr->domain = sock_dom; - ofi_mutex_init(&sock_ep->attr->cm.lock); - - if (sock_conn_map_init(sock_ep, sock_cm_def_map_sz)) { - SOCK_LOG_ERROR("failed to init connection map\n"); - ret = -FI_EINVAL; - goto err2; - } - - ofi_atomic_inc32(&sock_dom->ref); - return 0; - -err2: - if (sock_ep->attr) { - free(sock_ep->attr->src_addr); - free(sock_ep->attr->dest_addr); - free(sock_ep->attr); - } -err1: - free(sock_ep); - return ret; -} - -void sock_ep_remove_conn(struct sock_ep_attr *attr, struct sock_conn *conn) -{ - if (attr->cmap.used <= 0 || conn->sock_fd == -1) - return; - sock_pe_poll_del(attr->domain->pe, conn->sock_fd); - sock_conn_release_entry(&attr->cmap, conn); -} - -struct sock_conn *sock_ep_lookup_conn(struct sock_ep_attr *attr, fi_addr_t index, - union ofi_sock_ip *addr) -{ - int i; - uint64_t idx; - char buf[8]; - struct sock_conn *conn; - - idx = (attr->ep_type == FI_EP_MSG) ? index : index & attr->av->mask; - - conn = ofi_idm_lookup(&attr->av_idm, (int) idx); - if (conn && conn != SOCK_CM_CONN_IN_PROGRESS) { - /* Verify that the existing connection is still usable, and - * that the peer didn't restart. - */ - if (conn->connected == 0 || - (sock_comm_peek(conn, buf, 8) == 0 && conn->connected == 0)) { - sock_ep_remove_conn(attr, conn); - ofi_straddr_log(&sock_prov, FI_LOG_WARN, FI_LOG_EP_CTRL, - "Peer disconnected", &addr->sa); - return NULL; - } - if (conn->av_index != FI_ADDR_NOTAVAIL) - assert(conn->av_index == idx); - else - conn->av_index = idx; - return conn; - } - - for (i = 0; i < attr->cmap.used; i++) { - if (!attr->cmap.table[i].connected) - continue; - - if (ofi_equals_sockaddr(&attr->cmap.table[i].addr.sa, &addr->sa) && - attr->cmap.table[i].av_index == idx) { - conn = &attr->cmap.table[i]; - break; - } - } - if (conn && conn != SOCK_CM_CONN_IN_PROGRESS) { - if (conn->connected == 0 || - (sock_comm_peek(conn, buf, 8) == 0 && conn->connected == 0)) { - sock_ep_remove_conn(attr, conn); - ofi_straddr_log(&sock_prov, FI_LOG_WARN, FI_LOG_EP_CTRL, - "Peer disconnected", &addr->sa); - return NULL; - } - if (conn->av_index != FI_ADDR_NOTAVAIL) - assert(conn->av_index == idx); - else - conn->av_index = idx; - } - return conn; -} - -int sock_ep_get_conn(struct sock_ep_attr *attr, struct sock_tx_ctx *tx_ctx, - fi_addr_t index, struct sock_conn **pconn) -{ - struct sock_conn *conn; - uint64_t av_index = (attr->ep_type == FI_EP_MSG) ? - 0 : (index & attr->av->mask); - union ofi_sock_ip *addr; - int ret = FI_SUCCESS; - - if (attr->ep_type == FI_EP_MSG) - addr = attr->dest_addr; - else { - ofi_mutex_lock(&attr->av->table_lock); - addr = &attr->av->table[av_index].addr; - ofi_mutex_unlock(&attr->av->table_lock); - } - - ofi_mutex_lock(&attr->cmap.lock); - conn = sock_ep_lookup_conn(attr, av_index, addr); - if (!conn) { - conn = SOCK_CM_CONN_IN_PROGRESS; - if (ofi_idm_set(&attr->av_idm, (int) av_index, conn) < 0) - SOCK_LOG_ERROR("ofi_idm_set failed\n"); - } - ofi_mutex_unlock(&attr->cmap.lock); - - if (conn == SOCK_CM_CONN_IN_PROGRESS) - ret = sock_ep_connect(attr, av_index, &conn); - - if (!conn) { - SOCK_LOG_ERROR("Unable to find connection entry. " - "Error in connecting: %s\n", - fi_strerror(-ret)); - ofi_straddr_log(&sock_prov, FI_LOG_WARN, FI_LOG_EP_CTRL, - "Unable to connect to", &addr->sa); - return -FI_ENOENT; - } - - *pconn = conn; - return conn->address_published ? - 0 : (int) sock_conn_send_src_addr(attr, tx_ctx, conn); -} diff --git a/prov/sockets/src/sock_ep_dgram.c b/prov/sockets/src/sock_ep_dgram.c deleted file mode 100644 index 7a18532be90..00000000000 --- a/prov/sockets/src/sock_ep_dgram.c +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2014 Intel Corporation, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "config.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "sock_util.h" -#include "sock.h" - -#define SOCK_LOG_DBG(...) _SOCK_LOG_DBG(FI_LOG_EP_CTRL, __VA_ARGS__) -#define SOCK_LOG_ERROR(...) _SOCK_LOG_ERROR(FI_LOG_EP_CTRL, __VA_ARGS__) - - -int sock_dgram_ep(struct fid_domain *domain, struct fi_info *info, - struct fid_ep **ep, void *context) -{ - int ret; - struct sock_ep *endpoint; - - ret = sock_alloc_endpoint(domain, info, &endpoint, context, FI_CLASS_EP); - if (ret) - return ret; - - *ep = &endpoint->ep; - return 0; -} - -int sock_dgram_sep(struct fid_domain *domain, struct fi_info *info, - struct fid_ep **sep, void *context) -{ - int ret; - struct sock_ep *endpoint; - - ret = sock_alloc_endpoint(domain, info, &endpoint, context, FI_CLASS_SEP); - if (ret) - return ret; - - *sep = &endpoint->ep; - return 0; -} diff --git a/prov/sockets/src/sock_ep_msg.c b/prov/sockets/src/sock_ep_msg.c deleted file mode 100644 index 19c37aa9cbf..00000000000 --- a/prov/sockets/src/sock_ep_msg.c +++ /dev/null @@ -1,1304 +0,0 @@ -/* - * Copyright (c) 2014 Intel Corporation, Inc. All rights reserved. - * Copyright (c) 2017 DataDirect Networks, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "config.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "sock.h" -#include "sock_util.h" - -#define SOCK_LOG_DBG(...) _SOCK_LOG_DBG(FI_LOG_EP_CTRL, __VA_ARGS__) -#define SOCK_LOG_ERROR(...) _SOCK_LOG_ERROR(FI_LOG_EP_CTRL, __VA_ARGS__) - - -static int sock_ep_cm_getname(fid_t fid, void *addr, size_t *addrlen) -{ - struct sock_ep *sock_ep = NULL; - struct sock_pep *sock_pep = NULL; - size_t len; - - switch (fid->fclass) { - case FI_CLASS_EP: - case FI_CLASS_SEP: - sock_ep = container_of(fid, struct sock_ep, ep.fid); - if (sock_ep->attr->is_enabled == 0) - return -FI_EOPBADSTATE; - - len = MIN(*addrlen, ofi_sizeofaddr(&sock_ep->attr->src_addr->sa)); - memcpy(addr, sock_ep->attr->src_addr, len); - *addrlen = ofi_sizeofaddr(&sock_ep->attr->src_addr->sa); - break; - case FI_CLASS_PEP: - sock_pep = container_of(fid, struct sock_pep, pep.fid); - if (!sock_pep->name_set) - return -FI_EOPBADSTATE; - - len = MIN(*addrlen, ofi_sizeofaddr(&sock_pep->src_addr.sa)); - memcpy(addr, &sock_pep->src_addr, len); - *addrlen = ofi_sizeofaddr(&sock_pep->src_addr.sa); - break; - default: - SOCK_LOG_ERROR("Invalid argument\n"); - return -FI_EINVAL; - } - - return (len == *addrlen) ? 0 : -FI_ETOOSMALL; -} - -static int sock_pep_create_listener(struct sock_pep *pep) -{ - int ret; - socklen_t addr_size; - - pep->cm.sock = ofi_socket(pep->src_addr.sa.sa_family, - SOCK_STREAM, IPPROTO_TCP); - if (pep->cm.sock == INVALID_SOCKET) - return -ofi_sockerr(); - - sock_set_sockopts(pep->cm.sock, SOCK_OPTS_NONBLOCK); - - ret = bind(pep->cm.sock, &pep->src_addr.sa, - (socklen_t) ofi_sizeofaddr(&pep->src_addr.sa)); - if (ret) { - SOCK_LOG_ERROR("failed to bind listener: %s\n", - strerror(ofi_sockerr())); - ret = -ofi_sockerr(); - goto err; - } - - addr_size = sizeof(pep->src_addr); - if (ofi_getsockname(pep->cm.sock, &pep->src_addr.sa, &addr_size) == - SOCKET_ERROR) { - ret = -ofi_sockerr(); - goto err; - } - - if (listen(pep->cm.sock, sock_cm_def_map_sz)) { - SOCK_LOG_ERROR("failed to listen socket: %s\n", - strerror(ofi_sockerr())); - ret = -ofi_sockerr(); - goto err; - } - - pep->cm.do_listen = 1; - pep->name_set = 1; - return 0; -err: - if (pep->cm.sock) { - ofi_close_socket(pep->cm.sock); - pep->cm.sock = INVALID_SOCKET; - } - - return ret; -} - -static int sock_ep_cm_setname(fid_t fid, void *addr, size_t addrlen) -{ - struct sock_ep *sock_ep = NULL; - struct sock_pep *sock_pep = NULL; - - if (!addrlen || addrlen != ofi_sizeofaddr(addr)) - return -FI_EINVAL; - - switch (fid->fclass) { - case FI_CLASS_EP: - case FI_CLASS_SEP: - sock_ep = container_of(fid, struct sock_ep, ep.fid); - if (sock_ep->attr->conn_handle.do_listen) - return -FI_EINVAL; - memcpy(sock_ep->attr->src_addr, addr, addrlen); - return sock_conn_listen(sock_ep->attr); - case FI_CLASS_PEP: - sock_pep = container_of(fid, struct sock_pep, pep.fid); - if (sock_pep->cm.listener_thread) - return -FI_EINVAL; - memcpy(&sock_pep->src_addr, addr, addrlen); - return sock_pep_create_listener(sock_pep); - default: - SOCK_LOG_ERROR("Invalid argument\n"); - return -FI_EINVAL; - } -} - -static int sock_ep_cm_getpeer(struct fid_ep *ep, void *addr, size_t *addrlen) -{ - struct sock_ep *sock_ep; - size_t len; - - sock_ep = container_of(ep, struct sock_ep, ep); - len = MIN(*addrlen, ofi_sizeofaddr(&sock_ep->attr->dest_addr->sa)); - memcpy(addr, sock_ep->attr->dest_addr, len); - *addrlen = ofi_sizeofaddr(&sock_ep->attr->dest_addr->sa); - return (len == *addrlen) ? 0 : -FI_ETOOSMALL; -} - -static int sock_cm_send(int fd, const void *buf, size_t len) -{ - ssize_t ret, done = 0; - - while ((size_t) done != len) { - ret = ofi_send_socket(fd, (const char*) buf + done, - len - done, MSG_NOSIGNAL); - if (ret < 0) { - if (OFI_SOCK_TRY_SND_RCV_AGAIN(ofi_sockerr())) - continue; - SOCK_LOG_ERROR("failed to write to fd: %s\n", - strerror(ofi_sockerr())); - return -FI_EIO; - } - done += ret; - } - return 0; -} - -static int sock_cm_recv(int fd, void *buf, size_t len) -{ - ssize_t ret, done = 0; - while ((size_t) done != len) { - ret = ofi_recv_socket(fd, (char*) buf + done, len - done, 0); - if (ret <= 0) { - if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) - continue; - SOCK_LOG_ERROR("failed to read from fd: %s\n", - strerror(ofi_sockerr())); - return -FI_EIO; - } - done += ret; - } - return 0; -} - -static void sock_ep_cm_monitor_handle(struct sock_ep_cm_head *cm_head, - struct sock_conn_req_handle *handle, - uint32_t events) -{ - int ret; - - pthread_mutex_lock(&cm_head->signal_lock); - if (handle->monitored) - goto unlock; - - /* Mark the handle as monitored before adding it to the pollset */ - handle->monitored = 1; - ret = ofi_epoll_add(cm_head->epollfd, handle->sock_fd, - events, handle); - if (ret) { - SOCK_LOG_ERROR("failed to monitor fd %d: %d\n", - handle->sock_fd, ret); - handle->monitored = 0; - } else { - fd_signal_set(&cm_head->signal); - } -unlock: - pthread_mutex_unlock(&cm_head->signal_lock); -} - -static void -sock_ep_cm_unmonitor_handle_locked(struct sock_ep_cm_head *cm_head, - struct sock_conn_req_handle *handle, - int close_socket) -{ - int ret; - - if (handle->monitored) { - ret = ofi_epoll_del(cm_head->epollfd, handle->sock_fd); - if (ret) - SOCK_LOG_ERROR("failed to unmonitor fd %d: %d\n", - handle->sock_fd, ret); - handle->monitored = 0; - cm_head->removed_from_epollfd = true; - } - - /* Multiple threads might call sock_ep_cm_unmonitor_handle() at the - * same time. Some caution is required to prevent a socket from being - * close concurrently, which could cause an unexpected socket to be - * closed by mistake. */ - if (close_socket && handle->sock_fd != INVALID_SOCKET) { - ofi_close_socket(handle->sock_fd); - handle->sock_fd = INVALID_SOCKET; - } -} - -static void sock_ep_cm_unmonitor_handle(struct sock_ep_cm_head *cm_head, - struct sock_conn_req_handle *handle, - int close_socket) -{ - pthread_mutex_lock(&cm_head->signal_lock); - sock_ep_cm_unmonitor_handle_locked(cm_head, handle, close_socket); - pthread_mutex_unlock(&cm_head->signal_lock); -} - -static void sock_ep_cm_shutdown_report(struct sock_ep *ep, int send_shutdown) -{ - struct fi_eq_cm_entry cm_entry = {0}; - struct sock_conn_hdr msg = {0}; - enum sock_cm_state old_state; - - ofi_mutex_lock(&ep->attr->cm.lock); - old_state = ep->attr->cm.state; - switch (ep->attr->cm.state) { - case SOCK_CM_STATE_REQUESTED: - /* fallthrough */ - case SOCK_CM_STATE_CONNECTED: - ep->attr->cm.state = SOCK_CM_STATE_DISCONNECTED; - break; - case SOCK_CM_STATE_DISCONNECTED: - /* Nothing to do, already disconnected */ - break; - default: - assert(0); - break; - } - ofi_mutex_unlock(&ep->attr->cm.lock); - - switch (old_state) { - case SOCK_CM_STATE_CONNECTED: - if (send_shutdown) { - msg.type = SOCK_CONN_SHUTDOWN; - if (sock_cm_send(ep->attr->cm.sock, &msg, sizeof(msg))) - SOCK_LOG_DBG("failed to send shutdown msg\n"); - } - - cm_entry.fid = &ep->ep.fid; - SOCK_LOG_DBG("reporting FI_SHUTDOWN\n"); - if (sock_eq_report_event(ep->attr->eq, FI_SHUTDOWN, - &cm_entry, sizeof(cm_entry), 0)) - SOCK_LOG_ERROR("Error in writing to EQ\n"); - break; - case SOCK_CM_STATE_REQUESTED: - SOCK_LOG_DBG("reporting FI_REJECT\n"); - if (sock_eq_report_error(ep->attr->eq, &ep->ep.fid, NULL, 0, - FI_ECONNREFUSED, -FI_ECONNREFUSED, - NULL, 0)) - SOCK_LOG_ERROR("Error in writing to EQ\n"); - break; - - case SOCK_CM_STATE_DISCONNECTED: - /* Nothing to do, already disconnected */ - break; - default: - assert(0); - break; - } -} - -static void sock_ep_cm_shutdown_handler(struct sock_ep_cm_head *cm_head, - struct sock_conn_hdr *hdr, - struct sock_conn_req_handle *handle) -{ - struct sock_ep *ep = handle->ep; - assert(ep); - - assert(hdr->type == SOCK_CONN_SHUTDOWN); - sock_ep_cm_shutdown_report(ep, 0); - sock_ep_cm_unmonitor_handle_locked(cm_head, handle, 1); -} - -static void sock_ep_cm_report_connect_fail(struct sock_ep *ep, - void *param, size_t paramlen) -{ - int do_report = 0; - - ofi_mutex_lock(&ep->attr->cm.lock); - if (ep->attr->cm.state == SOCK_CM_STATE_REQUESTED) { - do_report = 1; - ep->attr->cm.state = SOCK_CM_STATE_DISCONNECTED; - } - ofi_mutex_unlock(&ep->attr->cm.lock); - - if (do_report) { - SOCK_LOG_DBG("reporting FI_REJECT\n"); - if (sock_eq_report_error(ep->attr->eq, &ep->ep.fid, NULL, 0, - FI_ECONNREFUSED, -FI_ECONNREFUSED, - param, paramlen)) - SOCK_LOG_ERROR("Error in writing to EQ\n"); - } -} - -/* Caller must hold `cm_head::signal_lock` */ -static void sock_ep_cm_add_to_msg_list(struct sock_ep_cm_head *cm_head, - struct sock_conn_req_handle *handle) -{ - dlist_insert_tail(&handle->entry, &cm_head->msg_list); - fd_signal_set(&cm_head->signal); -} - -static void sock_ep_cm_connect_handler(struct sock_ep_cm_head *cm_head, - struct sock_conn_hdr *hdr, - struct sock_conn_req_handle *handle) -{ - int sock_fd = handle->sock_fd; - struct sock_ep *ep = handle->ep; - void *param = NULL; - struct fi_eq_cm_entry *cm_entry = NULL; - int cm_data_sz, response_port; - - assert(hdr->type == SOCK_CONN_ACCEPT || - hdr->type == SOCK_CONN_REJECT); - - cm_data_sz = ntohs(hdr->cm_data_sz); - if (cm_data_sz > SOCK_EP_MAX_CM_DATA_SZ) { - SOCK_LOG_ERROR("CM data size too large\n"); - goto err; - } - - response_port = ntohs(hdr->port); - if (cm_data_sz) { - param = calloc(1, cm_data_sz); - if (!param) - goto err; - - if (sock_cm_recv(sock_fd, param, cm_data_sz)) - goto err; - } - - if (hdr->type == SOCK_CONN_REJECT) { - sock_ep_cm_report_connect_fail(handle->ep, param, cm_data_sz); - sock_ep_cm_unmonitor_handle_locked(cm_head, handle, 1); - } else { - cm_entry = calloc(1, sizeof(*cm_entry) + SOCK_EP_MAX_CM_DATA_SZ); - if (!cm_entry) - goto err; - - cm_entry->fid = &ep->ep.fid; - if (cm_data_sz) - memcpy(&cm_entry->data, param, cm_data_sz); - ep->attr->cm.state = SOCK_CM_STATE_CONNECTED; - ep->attr->cm.sock = sock_fd; - ep->attr->msg_dest_port = (uint16_t) response_port; - SOCK_LOG_DBG("got accept - port: %d\n", response_port); - - SOCK_LOG_DBG("Reporting FI_CONNECTED\n"); - if (sock_eq_report_event(ep->attr->eq, FI_CONNECTED, cm_entry, - sizeof(*cm_entry) + cm_data_sz, 0)) - SOCK_LOG_ERROR("Error in writing to EQ\n"); - } - goto out; -err: - SOCK_LOG_ERROR("io failed : %s\n", strerror(ofi_sockerr())); - sock_ep_cm_report_connect_fail(handle->ep, NULL, 0); - sock_ep_cm_unmonitor_handle_locked(cm_head, handle, 1); - handle->ep->attr->info.handle = NULL; - /* Register handle for later deletion */ - handle->state = SOCK_CONN_HANDLE_DELETED; - /* `cm_head::signal_lock` has already been held - * in `sock_ep_cm_thread` function */ - sock_ep_cm_add_to_msg_list(cm_head, handle); -out: - free(param); - free(cm_entry); -} - -static struct sock_conn_req_handle *sock_ep_cm_new_handle(void) -{ - struct sock_conn_req_handle *handle; - - handle = calloc(1, sizeof(*handle)); - if (handle) { - pthread_mutex_init(&handle->finalized_mutex, NULL); - pthread_cond_init(&handle->finalized_cond, NULL); - handle->state = SOCK_CONN_HANDLE_ACTIVE; - } - return handle; -} - -static int sock_ep_cm_connect(struct fid_ep *ep, const void *addr, - const void *param, size_t paramlen) -{ - struct sock_conn_req *req = NULL; - struct sock_ep_cm_head *cm_head = NULL; - struct sock_conn_req_handle *handle = NULL; - int sock_fd, ret; - struct sock_ep *_ep; - struct sock_eq *_eq; - - _ep = container_of(ep, struct sock_ep, ep); - _eq = _ep->attr->eq; - if (!_eq || !addr || (paramlen > SOCK_EP_MAX_CM_DATA_SZ)) - return -FI_EINVAL; - - if (!_ep->attr->conn_handle.do_listen && sock_conn_listen(_ep->attr)) - return -FI_EINVAL; - - if (!_ep->attr->dest_addr) { - _ep->attr->dest_addr = calloc(1, sizeof(*_ep->attr->dest_addr)); - if (!_ep->attr->dest_addr) - return -FI_ENOMEM; - } - memcpy(_ep->attr->dest_addr, addr, ofi_sizeofaddr(addr)); - - req = calloc(1, sizeof(*req)); - if (!req) - return -FI_ENOMEM; - - handle = sock_ep_cm_new_handle(); - if (!handle) { - ret = -FI_ENOMEM; - goto err; - } - - req->hdr.type = SOCK_CONN_REQ; - req->hdr.port = htons(_ep->attr->msg_src_port); - req->hdr.cm_data_sz = htons((uint16_t) paramlen); - req->caps = _ep->attr->info.caps; - memcpy(&req->src_addr, _ep->attr->src_addr, - ofi_sizeofaddr(&_ep->attr->src_addr->sa)); - memcpy(&handle->dest_addr, addr, ofi_sizeofaddr(addr)); - - cm_head = &_ep->attr->domain->cm_head; - _ep->attr->info.handle = (void*) handle; - handle->ep = _ep; - handle->req = req; - if (paramlen) { - handle->paramlen = paramlen; - memcpy(handle->cm_data, param, paramlen); - } - - sock_fd = ofi_socket(handle->dest_addr.sa.sa_family, SOCK_STREAM, 0); - if (sock_fd < 0) { - SOCK_LOG_ERROR("no socket\n"); - ret = -ofi_sockerr(); - goto err; - } - - ofi_straddr_dbg(&sock_prov, FI_LOG_EP_CTRL, "Connecting to address", - &handle->dest_addr); - sock_set_sockopts(sock_fd, SOCK_OPTS_KEEPALIVE); - ret = connect(sock_fd, &handle->dest_addr.sa, - (socklen_t) ofi_sizeofaddr(&handle->dest_addr.sa)); - if (ret < 0) { - SOCK_LOG_ERROR("connect failed : %s\n", - strerror(ofi_sockerr())); - ret = -ofi_sockerr(); - goto close_socket; - } - - ret = sock_cm_send(sock_fd, req, sizeof(*req)); - if (ret) - goto close_socket; - - if (handle->paramlen) { - ret = sock_cm_send(sock_fd, handle->cm_data, handle->paramlen); - if (ret) - goto close_socket; - } - - /* Monitor the connection */ - _ep->attr->cm.state = SOCK_CM_STATE_REQUESTED; - handle->sock_fd = sock_fd; - sock_ep_cm_monitor_handle(cm_head, handle, OFI_EPOLL_IN); - - return 0; -close_socket: - SOCK_LOG_ERROR("io failed : %s\n", strerror(errno)); - ofi_close_socket(sock_fd); -err: - _ep->attr->info.handle = NULL; - free(req); - free(handle); - return ret; -} - -static int sock_ep_cm_accept(struct fid_ep *ep, const void *param, size_t paramlen) -{ - int ret; - struct sock_ep_cm_head *cm_head = NULL; - struct sock_conn_req_handle *handle; - struct sock_ep_attr *ep_attr; - struct fi_eq_cm_entry cm_entry; - struct sock_conn_hdr reply = {0}; - struct sock_ep *_ep; - - _ep = container_of(ep, struct sock_ep, ep); - if (!_ep->attr->eq || paramlen > SOCK_EP_MAX_CM_DATA_SZ) - return -FI_EINVAL; - - if (!_ep->attr->conn_handle.do_listen && sock_conn_listen(_ep->attr)) - return -FI_EINVAL; - - handle = container_of(_ep->attr->info.handle, - struct sock_conn_req_handle, handle); - if (!handle || handle->handle.fclass != FI_CLASS_CONNREQ) { - SOCK_LOG_ERROR("invalid handle for cm_accept\n"); - return -FI_EINVAL; - } - - handle->ep = _ep; - handle->paramlen = 0; - handle->state = SOCK_CONN_HANDLE_ACCEPTED; - if (paramlen) { - handle->paramlen = paramlen; - memcpy(handle->cm_data, param, paramlen); - } - cm_head = &_ep->attr->domain->cm_head; - ep_attr = handle->ep->attr; - ep_attr->msg_dest_port = ntohs(handle->req->hdr.port); - - reply.type = SOCK_CONN_ACCEPT; - reply.port = htons(ep_attr->msg_src_port); - reply.cm_data_sz = htons((uint16_t) handle->paramlen); - ret = sock_cm_send(handle->sock_fd, &reply, sizeof(reply)); - if (ret) { - SOCK_LOG_ERROR("failed to reply\n"); - return ret; - } - - if (handle->paramlen) { - ret = sock_cm_send(handle->sock_fd, handle->cm_data, handle->paramlen); - if (ret) { - SOCK_LOG_ERROR("failed to send userdata\n"); - return ret; - } - } - /* Monitor the handle prior to report the event */ - sock_ep_cm_monitor_handle(cm_head, handle, OFI_EPOLL_IN); - sock_ep_enable(ep); - - memset(&cm_entry, 0, sizeof(cm_entry)); - cm_entry.fid = &handle->ep->ep.fid; - SOCK_LOG_DBG("reporting FI_CONNECTED\n"); - if (sock_eq_report_event(ep_attr->eq, FI_CONNECTED, &cm_entry, - sizeof(cm_entry), 0)) - SOCK_LOG_ERROR("Error in writing to EQ\n"); - ep_attr->cm.state = SOCK_CM_STATE_CONNECTED; - ep_attr->cm.sock = handle->sock_fd; - - return 0; -} - -static int sock_ep_cm_shutdown(struct fid_ep *ep, uint64_t flags) -{ - struct sock_ep *_ep; - - _ep = container_of(ep, struct sock_ep, ep); - sock_ep_cm_shutdown_report(_ep, 1); - - ofi_close_socket(_ep->attr->cm.sock); - _ep->attr->cm.sock = INVALID_SOCKET; - sock_ep_disable(ep); - return 0; -} - -struct fi_ops_cm sock_ep_cm_ops = { - .size = sizeof(struct fi_ops_cm), - .setname = sock_ep_cm_setname, - .getname = sock_ep_cm_getname, - .getpeer = sock_ep_cm_getpeer, - .connect = sock_ep_cm_connect, - .listen = fi_no_listen, - .accept = sock_ep_cm_accept, - .reject = fi_no_reject, - .shutdown = sock_ep_cm_shutdown, - .join = fi_no_join, -}; - -int sock_msg_ep(struct fid_domain *domain, struct fi_info *info, - struct fid_ep **ep, void *context) -{ - struct sock_ep *endpoint; - struct sock_pep *pep; - int ret; - - ret = sock_alloc_endpoint(domain, info, &endpoint, context, FI_CLASS_EP); - if (ret) - return ret; - - if (info && info->handle && info->handle->fclass == FI_CLASS_PEP) { - pep = container_of(info->handle, struct sock_pep, pep.fid); - *endpoint->attr->src_addr = pep->src_addr; - } - - *ep = &endpoint->ep; - return 0; -} - -static int sock_pep_fi_bind(fid_t fid, struct fid *bfid, uint64_t flags) -{ - struct sock_pep *pep; - struct sock_eq *eq; - - pep = container_of(fid, struct sock_pep, pep.fid); - - if (bfid->fclass != FI_CLASS_EQ) - return -FI_EINVAL; - - eq = container_of(bfid, struct sock_eq, eq.fid); - if (pep->sock_fab != eq->sock_fab) { - SOCK_LOG_ERROR("Cannot bind Passive EP and EQ on different fabric\n"); - return -FI_EINVAL; - } - pep->eq = eq; - return 0; -} - -static int sock_pep_fi_close(fid_t fid) -{ - ssize_t ret; - char c = 0; - struct sock_pep *pep; - - pep = container_of(fid, struct sock_pep, pep.fid); - pep->cm.do_listen = 0; - ret = ofi_write_socket(pep->cm.signal_fds[0], &c, 1); - if (ret != 1) - SOCK_LOG_DBG("Failed to signal\n"); - - if (pep->cm.listener_thread && - pthread_join(pep->cm.listener_thread, NULL)) { - SOCK_LOG_DBG("pthread join failed\n"); - } - - sock_ep_cm_stop_thread(&pep->cm_head); - - ofi_close_socket(pep->cm.signal_fds[0]); - ofi_close_socket(pep->cm.signal_fds[1]); - - free(pep); - return 0; -} - -static struct fi_ops sock_pep_fi_ops = { - .size = sizeof(struct fi_ops), - .close = sock_pep_fi_close, - .bind = sock_pep_fi_bind, - .control = fi_no_control, - .ops_open = fi_no_ops_open, -}; - -static struct fi_info *sock_ep_msg_get_info(struct sock_pep *pep, - struct sock_conn_req *req) -{ - struct fi_info hints; - uint64_t requested, supported; - - requested = req->caps & sock_msg_info.caps; - supported = pep->info.caps & sock_msg_info.caps; - supported = (supported & FI_RMA) ? - (supported | FI_REMOTE_READ | FI_REMOTE_WRITE) : supported; - if ((requested | supported) != supported) - return NULL; - - hints = pep->info; - hints.caps = req->caps; - return sock_fi_info(pep->sock_fab->fab_fid.api_version, FI_EP_MSG, - &hints, &pep->src_addr, &req->src_addr); -} - -void sock_ep_cm_signal(struct sock_ep_cm_head *cm_head) -{ - pthread_mutex_lock(&cm_head->signal_lock); - fd_signal_set(&cm_head->signal); - pthread_mutex_unlock(&cm_head->signal_lock); -} - -static void sock_ep_cm_process_rejected(struct sock_ep_cm_head *cm_head, - struct sock_conn_req_handle *hreq) -{ - struct sock_conn_hdr reply; - - reply.type = SOCK_CONN_REJECT; - reply.cm_data_sz = htons((uint16_t) hreq->paramlen); - - SOCK_LOG_DBG("sending reject message\n"); - if (sock_cm_send(hreq->sock_fd, &reply, sizeof(reply))) { - SOCK_LOG_ERROR("failed to reply\n"); - goto free_handle; - } - - if (hreq->paramlen && sock_cm_send(hreq->sock_fd, hreq->cm_data, - hreq->paramlen)) { - SOCK_LOG_ERROR("failed to send userdata\n"); - goto free_handle; - } - -free_handle: - sock_ep_cm_unmonitor_handle(cm_head, hreq, 1); - free(hreq->req); - free(hreq); -} - -static void sock_ep_cm_process_deleted(struct sock_ep_cm_head *cm_head, - struct sock_conn_req_handle *hreq) -{ - free(hreq->req); - free(hreq); -} - -static void sock_ep_cm_process_finalizing(struct sock_ep_cm_head *cm_head, - struct sock_conn_req_handle *hreq) -{ - sock_ep_cm_unmonitor_handle(cm_head, hreq, 1); - - pthread_mutex_lock(&hreq->finalized_mutex); - hreq->state = SOCK_CONN_HANDLE_FINALIZED; - pthread_cond_signal(&hreq->finalized_cond); - pthread_mutex_unlock(&hreq->finalized_mutex); -} - -static struct sock_conn_req_handle * -sock_ep_cm_pop_from_msg_list(struct sock_ep_cm_head *cm_head) -{ - struct dlist_entry *entry; - struct sock_conn_req_handle *hreq = NULL; - - pthread_mutex_lock(&cm_head->signal_lock); - if (!dlist_empty(&cm_head->msg_list)) { - entry = cm_head->msg_list.next; - dlist_remove(entry); - hreq = container_of(entry, struct sock_conn_req_handle, entry); - } - pthread_mutex_unlock(&cm_head->signal_lock); - return hreq; -} - -static void -sock_ep_cm_check_closing_rejected_list(struct sock_ep_cm_head *cm_head) -{ - struct sock_conn_req_handle *hreq; - - while ((hreq = sock_ep_cm_pop_from_msg_list(cm_head)) != NULL) { - switch (hreq->state) { - case SOCK_CONN_HANDLE_REJECTED: - sock_ep_cm_process_rejected(cm_head, hreq); - break; - case SOCK_CONN_HANDLE_FINALIZING: - sock_ep_cm_process_finalizing(cm_head, hreq); - break; - case SOCK_CONN_HANDLE_DELETED: - sock_ep_cm_process_deleted(cm_head, hreq); - break; - default: - assert(0); - break; - } - } -} - -static void sock_pep_req_handler(struct sock_ep_cm_head *cm_head, - struct sock_conn_hdr *hdr, - struct sock_conn_req_handle *handle) -{ - int ret, entry_sz; - struct fi_info *info; - struct sock_conn_req *conn_req = NULL; - struct fi_eq_cm_entry *cm_entry = NULL; - int req_cm_data_sz; - - assert(hdr->type == SOCK_CONN_REQ); - - conn_req = calloc(1, sizeof(*conn_req) + SOCK_EP_MAX_CM_DATA_SZ); - if (!conn_req) { - SOCK_LOG_ERROR("cannot allocate memory\n"); - goto err; - } - - memcpy(&conn_req->hdr, hdr, sizeof(*hdr)); - - ret = sock_cm_recv(handle->sock_fd, - &conn_req->src_addr, - sizeof(*conn_req) - sizeof(struct sock_conn_hdr)); - if (ret) { - SOCK_LOG_ERROR("IO failed\n"); - goto err; - } - - req_cm_data_sz = ntohs(conn_req->hdr.cm_data_sz); - if (req_cm_data_sz > SOCK_EP_MAX_CM_DATA_SZ) { - SOCK_LOG_ERROR("CM data size is too large\n"); - goto err; - } - - if (req_cm_data_sz) { - ret = sock_cm_recv(handle->sock_fd, conn_req->cm_data, - req_cm_data_sz); - if (ret) { - SOCK_LOG_ERROR("IO failed for cm-data\n"); - goto err; - } - } - - info = sock_ep_msg_get_info(handle->pep, conn_req); - if (!info) { - handle->paramlen = 0; - handle->state = SOCK_CONN_HANDLE_REJECTED; - /* `cm_head::signal_lock` has already been held - * in `sock_ep_cm_thread` function */ - sock_ep_cm_add_to_msg_list(cm_head, handle); - - free(conn_req); - return; - } - - cm_entry = calloc(1, sizeof(*cm_entry) + req_cm_data_sz); - if (!cm_entry) { - SOCK_LOG_ERROR("cannot allocate memory\n"); - goto err; - } - - handle->handle.fclass = FI_CLASS_CONNREQ; - handle->req = conn_req; - - entry_sz = sizeof(*cm_entry) + req_cm_data_sz; - cm_entry->fid = &handle->pep->pep.fid; - cm_entry->info = info; - cm_entry->info->handle = &handle->handle; - memcpy(cm_entry->data, conn_req->cm_data, req_cm_data_sz); - - sock_ep_cm_unmonitor_handle_locked(cm_head, handle, 0); - - SOCK_LOG_DBG("reporting conn-req to EQ\n"); - if (sock_eq_report_event(handle->pep->eq, FI_CONNREQ, cm_entry, entry_sz, 0)) - SOCK_LOG_ERROR("Error in writing to EQ\n"); - - free(cm_entry); - return; -err: - ofi_close_socket(handle->sock_fd); - free(cm_entry); - free(conn_req); - free(handle); -} - -static void *sock_pep_listener_thread(void *data) -{ - struct sock_pep *pep = (struct sock_pep *) data; - struct sock_conn_req_handle *handle = NULL; - struct pollfd poll_fds[2]; - - ssize_t ret = 0, conn_fd; - char tmp = 0; - - SOCK_LOG_DBG("Starting listener thread for PEP: %p\n", pep); - poll_fds[0].fd = pep->cm.sock; - poll_fds[1].fd = pep->cm.signal_fds[1]; - poll_fds[0].events = poll_fds[1].events = POLLIN; - while (*((volatile int *) &pep->cm.do_listen)) { - ret = poll(poll_fds, 2, -1); - if (ret > 0) { - if (poll_fds[1].revents & POLLIN) { - ret = ofi_read_socket(pep->cm.signal_fds[1], &tmp, 1); - if (ret != 1) - SOCK_LOG_DBG("Invalid signal\n"); - continue; - } - } else { - break; - } - - conn_fd = accept(pep->cm.sock, NULL, 0); - if (conn_fd < 0) { - SOCK_LOG_ERROR("failed to accept: %d\n", ofi_sockerr()); - continue; - } - - sock_set_sockopts(conn_fd, SOCK_OPTS_KEEPALIVE); - handle = sock_ep_cm_new_handle(); - if (!handle) { - SOCK_LOG_ERROR("cannot allocate memory\n"); - ofi_close_socket(conn_fd); - break; - } - - handle->sock_fd = conn_fd; - handle->pep = pep; - - /* Monitor the connection */ - sock_ep_cm_monitor_handle(&pep->cm_head, handle, OFI_EPOLL_IN); - } - - SOCK_LOG_DBG("PEP listener thread exiting\n"); - ofi_close_socket(pep->cm.sock); - return NULL; -} - -static int sock_pep_start_listener_thread(struct sock_pep *pep) -{ - if (pthread_create(&pep->cm.listener_thread, NULL, - sock_pep_listener_thread, (void *)pep)) { - SOCK_LOG_ERROR("Couldn't create listener thread\n"); - return -FI_EINVAL; - } - return 0; -} - -static int sock_pep_listen(struct fid_pep *pep) -{ - struct sock_pep *_pep; - _pep = container_of(pep, struct sock_pep, pep); - if (_pep->cm.listener_thread) - return 0; - - if (sock_ep_cm_start_thread(&_pep->cm_head)) { - SOCK_LOG_ERROR("Couldn't create listener thread\n"); - return -FI_EINVAL; - } - - if (!_pep->cm.do_listen && sock_pep_create_listener(_pep)) { - SOCK_LOG_ERROR("Failed to create pep thread\n"); - return -FI_EINVAL; - } - - return sock_pep_start_listener_thread(_pep); -} - -static int sock_pep_reject(struct fid_pep *pep, fid_t handle, - const void *param, size_t paramlen) -{ - struct sock_conn_req_handle *hreq; - struct sock_conn_req *req; - struct sock_pep *_pep; - struct sock_ep_cm_head *cm_head; - - _pep = container_of(pep, struct sock_pep, pep); - hreq = container_of(handle, struct sock_conn_req_handle, handle); - req = hreq->req; - if (!req || hreq->handle.fclass != FI_CLASS_CONNREQ || - hreq->state == SOCK_CONN_HANDLE_ACCEPTED) - return -FI_EINVAL; - - hreq->paramlen = 0; - if (paramlen) { - memcpy(hreq->cm_data, param, paramlen); - hreq->paramlen = paramlen; - } - - cm_head = &_pep->cm_head; - hreq->state = SOCK_CONN_HANDLE_REJECTED; - pthread_mutex_lock(&cm_head->signal_lock); - sock_ep_cm_add_to_msg_list(cm_head, hreq); - pthread_mutex_unlock(&cm_head->signal_lock); - return 0; -} - -static struct fi_ops_cm sock_pep_cm_ops = { - .size = sizeof(struct fi_ops_cm), - .setname = sock_ep_cm_setname, - .getname = sock_ep_cm_getname, - .getpeer = fi_no_getpeer, - .connect = fi_no_connect, - .listen = sock_pep_listen, - .accept = fi_no_accept, - .reject = sock_pep_reject, - .shutdown = fi_no_shutdown, - .join = fi_no_join, -}; - - -int sock_pep_getopt(fid_t fid, int level, int optname, - void *optval, size_t *optlen) -{ - if (level != FI_OPT_ENDPOINT || optname != FI_OPT_CM_DATA_SIZE) - return -FI_ENOPROTOOPT; - - if (*optlen < sizeof(size_t)) { - *optlen = sizeof(size_t); - return -FI_ETOOSMALL; - } - *((size_t *) optval) = SOCK_EP_MAX_CM_DATA_SZ; - *optlen = sizeof(size_t); - return 0; -} - -static struct fi_ops_ep sock_pep_ops = { - .size = sizeof(struct fi_ops_ep), - .getopt = sock_pep_getopt, - .setopt = fi_no_setopt, - .tx_ctx = fi_no_tx_ctx, - .rx_ctx = fi_no_rx_ctx, - .rx_size_left = fi_no_rx_size_left, - .tx_size_left = fi_no_tx_size_left, -}; - -int sock_msg_sep(struct fid_domain *domain, struct fi_info *info, - struct fid_ep **sep, void *context) -{ - int ret; - struct sock_ep *endpoint; - - ret = sock_alloc_endpoint(domain, info, &endpoint, context, FI_CLASS_SEP); - if (ret) - return ret; - - *sep = &endpoint->ep; - return 0; -} - -int sock_msg_passive_ep(struct fid_fabric *fabric, struct fi_info *info, - struct fid_pep **pep, void *context) -{ - int ret = 0; - struct sock_pep *_pep; - struct addrinfo hints, *result; - - assert(info); - _pep = calloc(1, sizeof(*_pep)); - if (!_pep) - return -FI_ENOMEM; - - if (info->src_addr) { - memcpy(&_pep->src_addr, info->src_addr, - info->src_addrlen); - } else { - memset(&hints, 0, sizeof(hints)); - hints.ai_socktype = SOCK_STREAM; - hints.ai_family = ofi_get_sa_family(info); - if (!hints.ai_family) - hints.ai_family = AF_INET; - - if (hints.ai_family == AF_INET) { - ret = getaddrinfo("127.0.0.1", NULL, &hints, - &result); - } else if (hints.ai_family == AF_INET6) { - ret = getaddrinfo("::1", NULL, &hints, &result); - } else { - ret = getaddrinfo("localhost", NULL, &hints, - &result); - } - if (ret) { - ret = -FI_EINVAL; - SOCK_LOG_DBG("getaddrinfo failed!\n"); - goto err; - } - memcpy(&_pep->src_addr, result->ai_addr, - result->ai_addrlen); - freeaddrinfo(result); - } - _pep->info = *info; - - ret = socketpair(AF_UNIX, SOCK_STREAM, 0, _pep->cm.signal_fds); - if (ret) { - ret = -ofi_sockerr(); - goto err; - } - - fd_set_nonblock(_pep->cm.signal_fds[1]); - - _pep->pep.fid.fclass = FI_CLASS_PEP; - _pep->pep.fid.context = context; - _pep->pep.fid.ops = &sock_pep_fi_ops; - _pep->pep.cm = &sock_pep_cm_ops; - _pep->pep.ops = &sock_pep_ops; - - _pep->sock_fab = container_of(fabric, struct sock_fabric, fab_fid); - *pep = &_pep->pep; - return 0; -err: - free(_pep); - return ret; -} - -static void sock_ep_cm_handle_rx(struct sock_ep_cm_head *cm_head, - struct sock_conn_req_handle *handle) -{ - struct sock_conn_hdr hdr; - - if (sock_cm_recv(handle->sock_fd, &hdr, sizeof(hdr))) { - SOCK_LOG_ERROR("io failed for fd %d\n", handle->sock_fd); - if (handle->ep) { - sock_ep_cm_shutdown_report(handle->ep, 0); - } - - sock_ep_cm_unmonitor_handle_locked(cm_head, handle, 1); - return; - } - - switch(hdr.type) { - case SOCK_CONN_REQ: - sock_pep_req_handler(cm_head, &hdr, handle); - break; - case SOCK_CONN_ACCEPT: - case SOCK_CONN_REJECT: - sock_ep_cm_connect_handler(cm_head, &hdr, handle); - break; - case SOCK_CONN_SHUTDOWN: - sock_ep_cm_shutdown_handler(cm_head, &hdr, handle); - break; - default: - SOCK_LOG_ERROR("Unexpected message type %d\n", hdr.type); - break; - } -} - -static void *sock_ep_cm_thread(void *arg) -{ - int num_fds, i; - struct sock_ep_cm_head *cm_head = arg; - struct ofi_epollfds_event events[SOCK_EPOLL_WAIT_EVENTS]; - struct sock_conn_req_handle *handle; - - while (cm_head->do_listen) { - sock_ep_cm_check_closing_rejected_list(cm_head); - - num_fds = ofi_epoll_wait(cm_head->epollfd, events, - SOCK_EPOLL_WAIT_EVENTS, -1); - if (num_fds < 0) { - SOCK_LOG_ERROR("poll failed : %s\n", strerror(errno)); - continue; - } - - pthread_mutex_lock(&cm_head->signal_lock); - if (cm_head->removed_from_epollfd) { - /* If we removed a socket from the epollfd after - * ofi_epoll_wait returned, we can hit a use after - * free error. If a change was made, we skip processing - * and recheck for events. - */ - cm_head->removed_from_epollfd = false; - goto skip; - } - for (i = 0; i < num_fds; i++) { - handle = events[i].data.ptr; - - if (handle == NULL) { /* Signal event */ - fd_signal_reset(&cm_head->signal); - continue; - } - - /* ep_contexts[] may report multiple events for the same handle. - * Suppose we received 2 elements for 1 handle: the first will - * unmonitor the handle, then the second event will have - * handle->monitored set to 0 - */ - if (!handle->monitored) { - assert(handle->sock_fd == INVALID_SOCKET); - continue; - } - - assert(handle->sock_fd != INVALID_SOCKET); - sock_ep_cm_handle_rx(cm_head, handle); - } -skip: - pthread_mutex_unlock(&cm_head->signal_lock); - } - return NULL; -} - - -int sock_ep_cm_start_thread(struct sock_ep_cm_head *cm_head) -{ - assert(cm_head->do_listen == 0); - - pthread_mutex_init(&cm_head->signal_lock, NULL); - dlist_init(&cm_head->msg_list); - - int ret = ofi_epoll_create(&cm_head->epollfd); - if (ret < 0) { - SOCK_LOG_ERROR("failed to create epoll set\n"); - goto err1; - } - - ret = fd_signal_init(&cm_head->signal); - if (ret < 0) { - ret = -errno; - SOCK_LOG_ERROR("failed to init signal\n"); - goto err2; - } - - ret = ofi_epoll_add(cm_head->epollfd, - cm_head->signal.fd[FI_READ_FD], - OFI_EPOLL_IN, NULL); - if (ret != 0){ - SOCK_LOG_ERROR("failed to add signal fd to epoll\n"); - goto err3; - } - - cm_head->do_listen = 1; - cm_head->removed_from_epollfd = false; - ret = pthread_create(&cm_head->listener_thread, 0, - sock_ep_cm_thread, cm_head); - if (ret) { - SOCK_LOG_ERROR("failed to create conn listener thread\n"); - goto err3; - } - return 0; - -err3: - cm_head->do_listen = 0; - fd_signal_free(&cm_head->signal); -err2: - ofi_epoll_close(cm_head->epollfd); -err1: - return ret; -} - -void sock_ep_cm_wait_handle_finalized(struct sock_ep_cm_head *cm_head, - struct sock_conn_req_handle *handle) -{ - handle->state = SOCK_CONN_HANDLE_FINALIZING; - pthread_mutex_lock(&cm_head->signal_lock); - sock_ep_cm_add_to_msg_list(cm_head, handle); - pthread_mutex_unlock(&cm_head->signal_lock); - - pthread_mutex_lock(&handle->finalized_mutex); - while (handle->state != SOCK_CONN_HANDLE_FINALIZED) - ofi_wait_cond(&handle->finalized_cond, - &handle->finalized_mutex, -1); - pthread_mutex_unlock(&handle->finalized_mutex); -} - -void sock_ep_cm_stop_thread(struct sock_ep_cm_head *cm_head) -{ - if (cm_head->do_listen == 0) - return; - - cm_head->do_listen = 0; - - sock_ep_cm_signal(cm_head); - - if (cm_head->listener_thread && - pthread_join(cm_head->listener_thread, NULL)) { - SOCK_LOG_DBG("pthread join failed\n"); - } - ofi_epoll_close(cm_head->epollfd); - fd_signal_free(&cm_head->signal); - pthread_mutex_destroy(&cm_head->signal_lock); -} diff --git a/prov/sockets/src/sock_ep_rdm.c b/prov/sockets/src/sock_ep_rdm.c deleted file mode 100644 index ede4bba67eb..00000000000 --- a/prov/sockets/src/sock_ep_rdm.c +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (c) 2014 Intel Corporation, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "config.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "sock.h" -#include "sock_util.h" - -#define SOCK_LOG_DBG(...) _SOCK_LOG_DBG(FI_LOG_EP_CTRL, __VA_ARGS__) -#define SOCK_LOG_ERROR(...) _SOCK_LOG_ERROR(FI_LOG_EP_CTRL, __VA_ARGS__) - - -int sock_rdm_ep(struct fid_domain *domain, struct fi_info *info, - struct fid_ep **ep, void *context) -{ - int ret; - struct sock_ep *endpoint; - - ret = sock_alloc_endpoint(domain, info, &endpoint, context, FI_CLASS_EP); - if (ret) - return ret; - - *ep = &endpoint->ep; - return 0; -} - -int sock_rdm_sep(struct fid_domain *domain, struct fi_info *info, - struct fid_ep **sep, void *context) -{ - int ret; - struct sock_ep *endpoint; - - ret = sock_alloc_endpoint(domain, info, &endpoint, context, FI_CLASS_SEP); - if (ret) - return ret; - - *sep = &endpoint->ep; - return 0; -} diff --git a/prov/sockets/src/sock_eq.c b/prov/sockets/src/sock_eq.c deleted file mode 100644 index 86c560f0169..00000000000 --- a/prov/sockets/src/sock_eq.c +++ /dev/null @@ -1,447 +0,0 @@ -/* - * Copyright (c) 2014 Intel Corporation, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "config.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "sock.h" -#include "sock_util.h" - -#define SOCK_LOG_DBG(...) _SOCK_LOG_DBG(FI_LOG_EQ, __VA_ARGS__) -#define SOCK_LOG_ERROR(...) _SOCK_LOG_ERROR(FI_LOG_EQ, __VA_ARGS__) - -static void sock_eq_clean_err_data_list(struct sock_eq *eq, int free_all) -{ - struct dlist_entry *entry, *next_entry; - struct sock_eq_err_data_entry *err_data_entry; - - for (entry = eq->err_data_list.next; entry != &eq->err_data_list;) { - next_entry = entry->next; - err_data_entry = container_of( - entry, struct sock_eq_err_data_entry, entry); - if (free_all || err_data_entry->do_free) { - dlist_remove(entry); - free(err_data_entry); - } - entry = next_entry; - } -} - -static ssize_t sock_eq_sread(struct fid_eq *eq, uint32_t *event, void *buf, - size_t len, int timeout, uint64_t flags) -{ - ssize_t ret; - struct sock_eq *sock_eq; - struct dlist_entry *list; - struct sock_eq_entry *entry; - - sock_eq = container_of(eq, struct sock_eq, eq); - sock_eq_clean_err_data_list(sock_eq, 0); - if (!dlistfd_empty(&sock_eq->err_list)) - return -FI_EAVAIL; - - if (dlistfd_empty(&sock_eq->list)) { - if (!timeout) { - SOCK_LOG_DBG("Nothing to read from eq!\n"); - return -FI_EAGAIN; - } - ret = dlistfd_wait_avail(&sock_eq->list, timeout); - if (!dlistfd_empty(&sock_eq->err_list)) - return -FI_EAVAIL; - - if (ret <= 0) - return (ret == 0 || ret == -FI_ETIMEDOUT) ? - -FI_EAGAIN : ret; - } - - ofi_mutex_lock(&sock_eq->lock); - list = sock_eq->list.list.next; - entry = container_of(list, struct sock_eq_entry, entry); - - if (entry->len > len) { - ret = -FI_ETOOSMALL; - goto out; - } - - ret = entry->len; - *event = entry->type; - memcpy(buf, entry->event, entry->len); - - if (!(flags & FI_PEEK)) { - dlistfd_remove(list, &sock_eq->list); - free(entry); - } - -out: - ofi_mutex_unlock(&sock_eq->lock); - return (ret == 0 || ret == -FI_ETIMEDOUT) ? -FI_EAGAIN : ret; -} - - -static ssize_t sock_eq_read(struct fid_eq *eq, uint32_t *event, void *buf, - size_t len, uint64_t flags) -{ - return sock_eq_sread(eq, event, buf, len, 0, flags); -} - -static ssize_t sock_eq_readerr(struct fid_eq *eq, struct fi_eq_err_entry *buf, - uint64_t flags) -{ - ssize_t ret; - struct sock_eq *sock_eq; - struct dlist_entry *list; - struct sock_eq_entry *entry; - struct fi_eq_err_entry *err_entry; - struct sock_eq_err_data_entry *err_data_entry; - void *err_data = NULL; - size_t err_data_size = 0; - uint32_t api_version; - - sock_eq = container_of(eq, struct sock_eq, eq); - ofi_mutex_lock(&sock_eq->lock); - if (dlistfd_empty(&sock_eq->err_list)) { - ret = -FI_EAGAIN; - goto out; - } - - api_version = sock_eq->sock_fab->fab_fid.api_version; - - list = sock_eq->err_list.list.next; - entry = container_of(list, struct sock_eq_entry, entry); - err_entry = (struct fi_eq_err_entry *) entry->event; - - ret = entry->len; - - if ((FI_VERSION_GE(api_version, FI_VERSION(1, 5))) - && buf->err_data && buf->err_data_size) { - err_data = buf->err_data; - err_data_size = buf->err_data_size; - *buf = *err_entry; - buf->err_data = err_data; - - /* Fill provided user's buffer */ - buf->err_data_size = MIN(err_entry->err_data_size, err_data_size); - memcpy(buf->err_data, err_entry->err_data, buf->err_data_size); - } else { - *buf = *err_entry; - } - - if (!(flags & FI_PEEK)) { - if (err_entry->err_data) { - err_data_entry = container_of( - err_entry->err_data, - struct sock_eq_err_data_entry, err_data); - err_data_entry->do_free = 1; - } - - dlistfd_remove(list, &sock_eq->err_list); - dlistfd_reset(&sock_eq->list); - free(entry); - } - -out: - ofi_mutex_unlock(&sock_eq->lock); - return (ret == 0) ? -FI_EAGAIN : ret; -} - -int sock_eq_report_event(struct sock_eq *sock_eq, uint32_t event, - const void *buf, size_t len, uint64_t flags) -{ - struct sock_eq_entry *entry; - - entry = calloc(1, len + sizeof(*entry)); - if (!entry) - return -FI_ENOMEM; - - entry->type = event; - entry->len = len; - entry->flags = flags; - memcpy(entry->event, buf, len); - - ofi_mutex_lock(&sock_eq->lock); - dlistfd_insert_tail(&entry->entry, &sock_eq->list); - if (sock_eq->signal) - sock_wait_signal(sock_eq->waitset); - ofi_mutex_unlock(&sock_eq->lock); - return 0; -} - -int sock_eq_report_error(struct sock_eq *sock_eq, fid_t fid, void *context, - uint64_t data, int err, int prov_errno, - void *err_data, size_t err_data_size) -{ - struct fi_eq_err_entry *err_entry; - struct sock_eq_entry *entry; - struct sock_eq_err_data_entry *err_data_entry; - - entry = calloc(1, sizeof(*err_entry) + sizeof(*entry)); - if (!entry) - return -FI_ENOMEM; - - err_entry = (struct fi_eq_err_entry *) entry->event; - err_entry->fid = fid; - err_entry->context = context; - err_entry->data = data; - err_entry->err = err; - err_entry->prov_errno = prov_errno; - err_entry->err_data = err_data; - err_entry->err_data_size = err_data_size; - entry->len = sizeof(*err_entry); - - if (err_data) { - err_data_entry = (struct sock_eq_err_data_entry *) - calloc(1, sizeof(*err_data_entry) + err_data_size); - if (!err_data_entry) { - free(entry); - return -FI_ENOMEM; - } - - err_data_entry->do_free = 0; - memcpy(err_data_entry->err_data, err_data, err_data_size); - err_entry->err_data = err_data_entry->err_data; - dlist_insert_tail(&err_data_entry->entry, - &sock_eq->err_data_list); - } - - ofi_mutex_lock(&sock_eq->lock); - dlistfd_insert_tail(&entry->entry, &sock_eq->err_list); - dlistfd_signal(&sock_eq->list); - if (sock_eq->signal) - sock_wait_signal(sock_eq->waitset); - ofi_mutex_unlock(&sock_eq->lock); - return 0; -} - -static ssize_t sock_eq_write(struct fid_eq *eq, uint32_t event, - const void *buf, size_t len, uint64_t flags) -{ - struct sock_eq *sock_eq; - int ret; - - sock_eq = container_of(eq, struct sock_eq, eq); - if (!(sock_eq->attr.flags & FI_WRITE)) - return -FI_EINVAL; - - ret = sock_eq_report_event(sock_eq, event, buf, len, flags); - return ret ? ret : len; - -} - -static const char *sock_eq_strerror(struct fid_eq *eq, int prov_errno, - const void *err_data, char *buf, size_t len) -{ - if (buf && len) - return strncpy(buf, fi_strerror(-prov_errno), len); - return fi_strerror(-prov_errno); -} - -static struct fi_ops_eq sock_eq_ops = { - .size = sizeof(struct fi_ops_eq), - .read = sock_eq_read, - .readerr = sock_eq_readerr, - .write = sock_eq_write, - .sread = sock_eq_sread, - .strerror = sock_eq_strerror, -}; - -static int sock_eq_fi_close(struct fid *fid) -{ - struct sock_eq *sock_eq; - - sock_eq = container_of(fid, struct sock_eq, eq); - sock_eq_clean_err_data_list(sock_eq, 1); - - dlistfd_head_free(&sock_eq->list); - dlistfd_head_free(&sock_eq->err_list); - ofi_mutex_destroy(&sock_eq->lock); - ofi_atomic_dec32(&sock_eq->sock_fab->ref); - - if (sock_eq->signal && sock_eq->attr.wait_obj == FI_WAIT_MUTEX_COND) - sock_wait_close(&sock_eq->waitset->fid); - - free(sock_eq); - return 0; -} - -static int sock_eq_control(struct fid *fid, int command, void *arg) -{ - int ret = 0; - struct sock_eq *eq; - - eq = container_of(fid, struct sock_eq, eq.fid); - switch (command) { - case FI_GETWAIT: - switch (eq->attr.wait_obj) { - case FI_WAIT_NONE: - case FI_WAIT_UNSPEC: - case FI_WAIT_FD: - memcpy(arg, &eq->list.signal.fd[FI_READ_FD], sizeof(int)); - break; - case FI_WAIT_SET: - case FI_WAIT_MUTEX_COND: - sock_wait_get_obj(eq->waitset, arg); - break; - default: - ret = -FI_EINVAL; - break; - } - break; - default: - ret = -FI_EINVAL; - break; - } - return ret; -} - -static struct fi_ops sock_eq_fi_ops = { - .size = sizeof(struct fi_ops), - .close = sock_eq_fi_close, - .bind = fi_no_bind, - .control = sock_eq_control, - .ops_open = fi_no_ops_open, -}; - -static int _sock_eq_verify_attr(struct fi_eq_attr *attr) -{ - if (!attr) - return 0; - - switch (attr->wait_obj) { - case FI_WAIT_NONE: - case FI_WAIT_FD: - case FI_WAIT_SET: - case FI_WAIT_MUTEX_COND: - break; - case FI_WAIT_UNSPEC: - attr->wait_obj = FI_WAIT_FD; - break; - default: - return -FI_ENOSYS; - } - - return 0; -} - -static struct fi_eq_attr _sock_eq_def_attr = { - .size = SOCK_EQ_DEF_SZ, - .flags = 0, - .wait_obj = FI_WAIT_FD, - .signaling_vector = 0, - .wait_set = NULL, -}; - -int sock_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr, - struct fid_eq **eq, void *context) -{ - int ret; - struct sock_eq *sock_eq; - struct fi_wait_attr wait_attr; - - ret = _sock_eq_verify_attr(attr); - if (ret) - return ret; - - sock_eq = calloc(1, sizeof(*sock_eq)); - if (!sock_eq) - return -FI_ENOMEM; - - sock_eq->sock_fab = container_of(fabric, struct sock_fabric, fab_fid); - sock_eq->eq.fid.fclass = FI_CLASS_EQ; - sock_eq->eq.fid.context = context; - sock_eq->eq.fid.ops = &sock_eq_fi_ops; - sock_eq->eq.ops = &sock_eq_ops; - - if (attr == NULL) - memcpy(&sock_eq->attr, &_sock_eq_def_attr, - sizeof(struct fi_eq_attr)); - else - memcpy(&sock_eq->attr, attr, sizeof(struct fi_eq_attr)); - - dlist_init(&sock_eq->err_data_list); - ret = dlistfd_head_init(&sock_eq->list); - if (ret) - goto err1; - - ret = dlistfd_head_init(&sock_eq->err_list); - if (ret) - goto err2; - - ofi_mutex_init(&sock_eq->lock); - ofi_atomic_inc32(&sock_eq->sock_fab->ref); - - switch (sock_eq->attr.wait_obj) { - case FI_WAIT_NONE: - case FI_WAIT_UNSPEC: - sock_eq->signal = 0; - break; - case FI_WAIT_FD: - sock_eq->signal = 0; - break; - case FI_WAIT_MUTEX_COND: - wait_attr.flags = 0; - wait_attr.wait_obj = FI_WAIT_MUTEX_COND; - ret = sock_wait_open(fabric, &wait_attr, &sock_eq->waitset); - if (ret) - goto err2; - sock_eq->signal = 1; - break; - case FI_WAIT_SET: - if (!attr) { - ret = -FI_EINVAL; - goto err2; - } - sock_eq->waitset = attr->wait_set; - sock_eq->signal = 1; - break; - default: - break; - } - - sock_eq->wait_fd = -1; - *eq = &sock_eq->eq; - return 0; - -err2: - dlistfd_head_free(&sock_eq->list); -err1: - free(sock_eq); - return ret; -} diff --git a/prov/sockets/src/sock_fabric.c b/prov/sockets/src/sock_fabric.c deleted file mode 100644 index d0260b045f7..00000000000 --- a/prov/sockets/src/sock_fabric.c +++ /dev/null @@ -1,398 +0,0 @@ -/* - * Copyright (c) 2014 Intel Corporation, Inc. All rights reserved. - * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2017 DataDirect Networks, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "config.h" - -#include -#include -#include -#include - -#include "ofi_prov.h" -#include "ofi_net.h" - -#include "sock.h" -#include "sock_util.h" - -#define SOCK_LOG_DBG(...) _SOCK_LOG_DBG(FI_LOG_FABRIC, __VA_ARGS__) -#define SOCK_LOG_ERROR(...) _SOCK_LOG_ERROR(FI_LOG_FABRIC, __VA_ARGS__) - -int sock_pe_waittime = SOCK_PE_WAITTIME; -const char sock_fab_name[] = "IP"; -const char sock_dom_name[] = "sockets"; -const char sock_prov_name[] = "sockets"; -int sock_conn_timeout = SOCK_CM_DEF_TIMEOUT; -int sock_conn_retry = SOCK_CM_DEF_RETRY; -int sock_cm_def_map_sz = SOCK_CMAP_DEF_SZ; -int sock_av_def_sz = SOCK_AV_DEF_SZ; -int sock_cq_def_sz = SOCK_CQ_DEF_SZ; -int sock_eq_def_sz = SOCK_EQ_DEF_SZ; -#if ENABLE_DEBUG -int sock_dgram_drop_rate = 0; -#endif -int sock_keepalive_enable; -int sock_keepalive_time = INT_MAX; -int sock_keepalive_intvl = INT_MAX; -int sock_keepalive_probes = INT_MAX; -int sock_buf_sz = 0; - -static struct dlist_entry sock_fab_list; -static struct dlist_entry sock_dom_list; -static ofi_mutex_t sock_list_lock; -static int read_default_params; - -void sock_dom_add_to_list(struct sock_domain *domain) -{ - ofi_mutex_lock(&sock_list_lock); - dlist_insert_tail(&domain->dom_list_entry, &sock_dom_list); - ofi_mutex_unlock(&sock_list_lock); -} - -static inline int sock_dom_check_list_internal(struct sock_domain *domain) -{ - struct dlist_entry *entry; - struct sock_domain *dom_entry; - for (entry = sock_dom_list.next; entry != &sock_dom_list; - entry = entry->next) { - dom_entry = container_of(entry, struct sock_domain, - dom_list_entry); - if (dom_entry == domain) - return 1; - } - return 0; -} - -int sock_dom_check_list(struct sock_domain *domain) -{ - int found; - ofi_mutex_lock(&sock_list_lock); - found = sock_dom_check_list_internal(domain); - ofi_mutex_unlock(&sock_list_lock); - return found; -} - -void sock_dom_remove_from_list(struct sock_domain *domain) -{ - ofi_mutex_lock(&sock_list_lock); - if (sock_dom_check_list_internal(domain)) - dlist_remove(&domain->dom_list_entry); - - ofi_mutex_unlock(&sock_list_lock); -} - -struct sock_domain *sock_dom_list_head(void) -{ - struct sock_domain *domain; - ofi_mutex_lock(&sock_list_lock); - if (dlist_empty(&sock_dom_list)) { - domain = NULL; - } else { - domain = container_of(sock_dom_list.next, - struct sock_domain, dom_list_entry); - } - ofi_mutex_unlock(&sock_list_lock); - return domain; -} - -int sock_dom_check_manual_progress(struct sock_fabric *fabric) -{ - struct dlist_entry *entry; - struct sock_domain *dom_entry; - for (entry = sock_dom_list.next; entry != &sock_dom_list; - entry = entry->next) { - dom_entry = container_of(entry, struct sock_domain, - dom_list_entry); - if (dom_entry->fab == fabric && - dom_entry->progress_mode == FI_PROGRESS_MANUAL) - return 1; - } - return 0; -} - -void sock_fab_add_to_list(struct sock_fabric *fabric) -{ - ofi_mutex_lock(&sock_list_lock); - dlist_insert_tail(&fabric->fab_list_entry, &sock_fab_list); - ofi_mutex_unlock(&sock_list_lock); -} - -static inline int sock_fab_check_list_internal(struct sock_fabric *fabric) -{ - struct dlist_entry *entry; - struct sock_fabric *fab_entry; - for (entry = sock_fab_list.next; entry != &sock_fab_list; - entry = entry->next) { - fab_entry = container_of(entry, struct sock_fabric, - fab_list_entry); - if (fab_entry == fabric) - return 1; - } - return 0; -} - -int sock_fab_check_list(struct sock_fabric *fabric) -{ - int found; - ofi_mutex_lock(&sock_list_lock); - found = sock_fab_check_list_internal(fabric); - ofi_mutex_unlock(&sock_list_lock); - return found; -} - -void sock_fab_remove_from_list(struct sock_fabric *fabric) -{ - ofi_mutex_lock(&sock_list_lock); - if (sock_fab_check_list_internal(fabric)) - dlist_remove(&fabric->fab_list_entry); - - ofi_mutex_unlock(&sock_list_lock); -} - -struct sock_fabric *sock_fab_list_head(void) -{ - struct sock_fabric *fabric; - ofi_mutex_lock(&sock_list_lock); - if (dlist_empty(&sock_fab_list)) { - fabric = NULL; - } else { - fabric = container_of(sock_fab_list.next, - struct sock_fabric, fab_list_entry); - } - ofi_mutex_unlock(&sock_list_lock); - return fabric; -} - -static int sock_trywait(struct fid_fabric *fabric, struct fid **fids, int count) -{ - /* we're always ready to wait! */ - return 0; -} - -static struct fi_ops_fabric sock_fab_ops = { - .size = sizeof(struct fi_ops_fabric), - .domain = sock_domain, - .passive_ep = sock_msg_passive_ep, - .eq_open = sock_eq_open, - .wait_open = sock_wait_open, - .trywait = sock_trywait -}; - -static int sock_fabric_close(fid_t fid) -{ - struct sock_fabric *fab; - fab = container_of(fid, struct sock_fabric, fab_fid); - if (ofi_atomic_get32(&fab->ref)) - return -FI_EBUSY; - - sock_fab_remove_from_list(fab); - ofi_mutex_destroy(&fab->lock); - free(fab); - return 0; -} - -static struct fi_ops sock_fab_fi_ops = { - .size = sizeof(struct fi_ops), - .close = sock_fabric_close, - .bind = fi_no_bind, - .control = fi_no_control, - .ops_open = fi_no_ops_open, -}; - -static void sock_read_default_params() -{ - if (!read_default_params) { - fi_param_get_int(&sock_prov, "pe_waittime", &sock_pe_waittime); - fi_param_get_int(&sock_prov, "conn_timeout", &sock_conn_timeout); - fi_param_get_int(&sock_prov, "max_conn_retry", &sock_conn_retry); - fi_param_get_int(&sock_prov, "def_conn_map_sz", &sock_cm_def_map_sz); - fi_param_get_int(&sock_prov, "def_av_sz", &sock_av_def_sz); - fi_param_get_int(&sock_prov, "def_cq_sz", &sock_cq_def_sz); - fi_param_get_int(&sock_prov, "def_eq_sz", &sock_eq_def_sz); -#if ENABLE_DEBUG - fi_param_get_int(&sock_prov, "dgram_drop_rate", &sock_dgram_drop_rate); -#endif - fi_param_get_bool(&sock_prov, "keepalive_enable", &sock_keepalive_enable); - fi_param_get_int(&sock_prov, "keepalive_time", &sock_keepalive_time); - fi_param_get_int(&sock_prov, "keepalive_intvl", &sock_keepalive_intvl); - fi_param_get_int(&sock_prov, "keepalive_probes", &sock_keepalive_probes); - fi_param_get_int(&sock_prov, "max_buf_sz", &sock_buf_sz); - - read_default_params = 1; - } -} - -static int sock_fabric(struct fi_fabric_attr *attr, - struct fid_fabric **fabric, void *context) -{ - struct sock_fabric *fab; - - fab = calloc(1, sizeof(*fab)); - if (!fab) - return -FI_ENOMEM; - - sock_read_default_params(); - - ofi_mutex_init(&fab->lock); - dlist_init(&fab->service_list); - - fab->fab_fid.fid.fclass = FI_CLASS_FABRIC; - fab->fab_fid.fid.context = context; - fab->fab_fid.fid.ops = &sock_fab_fi_ops; - fab->fab_fid.ops = &sock_fab_ops; - *fabric = &fab->fab_fid; - ofi_atomic_initialize32(&fab->ref, 0); -#if ENABLE_DEBUG - fab->num_send_msg = 0; -#endif - sock_fab_add_to_list(fab); - return 0; -} - -int sock_get_src_addr(union ofi_sock_ip *dest_addr, - union ofi_sock_ip *src_addr) -{ - int sock, ret; - socklen_t len; - - sock = ofi_socket(dest_addr->sa.sa_family, SOCK_DGRAM, 0); - if (sock < 0) - return -ofi_sockerr(); - - len = (socklen_t) ofi_sizeofaddr(&dest_addr->sa); - ret = connect(sock, &dest_addr->sa, len); - if (ret) { - SOCK_LOG_DBG("Failed to connect udp socket\n"); - ret = sock_get_src_addr_from_hostname(src_addr, NULL, - dest_addr->sa.sa_family); - goto out; - } - - ret = getsockname(sock, &src_addr->sa, &len); - ofi_addr_set_port(&src_addr->sa, 0); - if (ret) { - SOCK_LOG_DBG("getsockname failed\n"); - ret = -ofi_sockerr(); - } - -out: - ofi_close_socket(sock); - return ret; -} - -static int sock_getinfo(uint32_t version, const char *node, const char *service, - uint64_t flags, const struct fi_info *hints, - struct fi_info **info) -{ - return ofi_ip_getinfo(&sock_util_prov, version, node, service, flags, - hints, info); -} - -static void fi_sockets_fini(void) -{ - ofi_mutex_destroy(&sock_list_lock); -} - -struct fi_provider sock_prov = { - .name = sock_prov_name, - .version = OFI_VERSION_DEF_PROV, - .fi_version = OFI_VERSION_LATEST, - .getinfo = sock_getinfo, - .fabric = sock_fabric, - .cleanup = fi_sockets_fini -}; - -struct util_prov sock_util_prov = { - .prov = &sock_prov, - .info = &sock_dgram_info, - .flags = 0, -}; - -SOCKETS_INI -{ -#if HAVE_SOCKETS_DL - ofi_pmem_init(); -#endif - - fi_param_define(&sock_prov, "pe_waittime", FI_PARAM_INT, - "How many milliseconds to spin while waiting for progress"); - - fi_param_define(&sock_prov, "conn_timeout", FI_PARAM_INT, - "How many milliseconds to wait for one connection establishment"); - - fi_param_define(&sock_prov, "max_conn_retry", FI_PARAM_INT, - "Number of connection retries before reporting as failure"); - - fi_param_define(&sock_prov, "def_conn_map_sz", FI_PARAM_INT, - "Default connection map size"); - - fi_param_define(&sock_prov, "def_av_sz", FI_PARAM_INT, - "Default address vector size"); - - fi_param_define(&sock_prov, "def_cq_sz", FI_PARAM_INT, - "Default completion queue size"); - - fi_param_define(&sock_prov, "def_eq_sz", FI_PARAM_INT, - "Default event queue size"); - - fi_param_define(&sock_prov, "pe_affinity", FI_PARAM_STRING, - "If specified, bind the progress thread to the indicated range(s) of Linux virtual processor ID(s). " - "This option is currently not supported on OS X and Windows. Usage: id_start[-id_end[:stride]][,]"); - - fi_param_define(&sock_prov, "keepalive_enable", FI_PARAM_BOOL, - "Enable keepalive support"); - - fi_param_define(&sock_prov, "keepalive_time", FI_PARAM_INT, - "Idle time in seconds before sending the first keepalive probe"); - - fi_param_define(&sock_prov, "keepalive_intvl", FI_PARAM_INT, - "Time in seconds between individual keepalive probes"); - - fi_param_define(&sock_prov, "keepalive_probes", FI_PARAM_INT, - "Maximum number of keepalive probes sent before dropping the connection"); - - fi_param_define(&sock_prov, "iface", FI_PARAM_STRING, - "Specify interface name"); - - fi_param_define(&sock_prov, "max_buf_sz", FI_PARAM_INT, - "Maximum socket send and recv buffer in bytes (i.e. SO_RCVBUF, SO_SNDBUF)"); - - ofi_mutex_init(&sock_list_lock); - dlist_init(&sock_fab_list); - dlist_init(&sock_dom_list); -#if ENABLE_DEBUG - fi_param_define(&sock_prov, "dgram_drop_rate", FI_PARAM_INT, - "Drop every Nth dgram frame (debug only)"); -#endif - return &sock_prov; -} diff --git a/prov/sockets/src/sock_mr.c b/prov/sockets/src/sock_mr.c deleted file mode 100644 index abf35949ea9..00000000000 --- a/prov/sockets/src/sock_mr.c +++ /dev/null @@ -1,225 +0,0 @@ -/* -* Copyright (c) 2017 Intel Corporation, Inc. All rights reserved. -* -* This software is available to you under a choice of one of two -* licenses. You may choose to be licensed under the terms of the GNU -* General Public License (GPL) Version 2, available from the file -* COPYING in the main directory of this source tree, or the -* BSD license below: -* -* Redistribution and use in source and binary forms, with or -* without modification, are permitted provided that the following -* conditions are met: -* -* - Redistributions of source code must retain the above -* copyright notice, this list of conditions and the following -* disclaimer. -* -* - Redistributions in binary form must reproduce the above -* copyright notice, this list of conditions and the following -* disclaimer in the documentation and/or other materials -* provided with the distribution. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -*/ - -#include "config.h" - -#include -#include - -#include - -#include "sock.h" -#include "sock_util.h" - -#define SOCK_LOG_DBG(...) _SOCK_LOG_DBG(FI_LOG_MR, __VA_ARGS__) -#define SOCK_LOG_ERROR(...) _SOCK_LOG_ERROR(FI_LOG_MR, __VA_ARGS__) - -static int sock_mr_close(struct fid *fid) -{ - struct sock_domain *dom; - struct sock_mr *mr; - int err = 0; - - mr = container_of(fid, struct sock_mr, mr_fid.fid); - dom = mr->domain; - - ofi_mutex_lock(&dom->lock); - err = ofi_mr_map_remove(&dom->mr_map, mr->key); - if (err != 0) - SOCK_LOG_ERROR("MR Erase error %d \n", err); - - ofi_mutex_unlock(&dom->lock); - ofi_atomic_dec32(&dom->ref); - free(mr); - return 0; -} - -static int sock_mr_bind(struct fid *fid, struct fid *bfid, uint64_t flags) -{ - struct sock_cntr *cntr; - struct sock_cq *cq; - struct sock_mr *mr; - - mr = container_of(fid, struct sock_mr, mr_fid.fid); - switch (bfid->fclass) { - case FI_CLASS_CQ: - cq = container_of(bfid, struct sock_cq, cq_fid.fid); - if (mr->domain != cq->domain) - return -FI_EINVAL; - - if (flags & FI_REMOTE_WRITE) - mr->cq = cq; - break; - - case FI_CLASS_CNTR: - cntr = container_of(bfid, struct sock_cntr, cntr_fid.fid); - if (mr->domain != cntr->domain) - return -FI_EINVAL; - - if (flags & FI_REMOTE_WRITE) - mr->cntr = cntr; - break; - - default: - return -FI_EINVAL; - } - return 0; -} - -static struct fi_ops sock_mr_fi_ops = { - .size = sizeof(struct fi_ops), - .close = sock_mr_close, - .bind = sock_mr_bind, - .control = fi_no_control, - .ops_open = fi_no_ops_open, -}; - -struct sock_mr *sock_mr_verify_key(struct sock_domain *domain, uint64_t key, - uintptr_t *buf, size_t len, uint64_t access) -{ - int err = 0; - struct sock_mr *mr; - - ofi_mutex_lock(&domain->lock); - - err = ofi_mr_map_verify(&domain->mr_map, buf, len, key, access, (void **)&mr); - if (err != 0) { - SOCK_LOG_ERROR("MR check failed\n"); - mr = NULL; - } - - ofi_mutex_unlock(&domain->lock); - return mr; -} - -struct sock_mr *sock_mr_verify_desc(struct sock_domain *domain, void *desc, - void *buf, size_t len, uint64_t access) -{ - uint64_t key = (uintptr_t)desc; - return sock_mr_verify_key(domain, key, buf, len, access); -} - -static int sock_regattr(struct fid *fid, const struct fi_mr_attr *attr, - uint64_t flags, struct fid_mr **mr) -{ - struct fi_eq_entry eq_entry; - struct sock_domain *dom; - struct fi_mr_attr cur_abi_attr; - struct sock_mr *_mr; - uint64_t key; - struct fid_domain *domain; - int ret = 0; - - if (fid->fclass != FI_CLASS_DOMAIN || !attr || attr->iov_count <= 0) { - return -FI_EINVAL; - } - - domain = container_of(fid, struct fid_domain, fid); - dom = container_of(domain, struct sock_domain, dom_fid); - - _mr = calloc(1, sizeof(*_mr)); - if (!_mr) - return -FI_ENOMEM; - - ofi_mr_update_attr(dom->fab->fab_fid.api_version, dom->info.caps, - attr, &cur_abi_attr); - ofi_mutex_lock(&dom->lock); - - _mr->mr_fid.fid.fclass = FI_CLASS_MR; - _mr->mr_fid.fid.context = attr->context; - _mr->mr_fid.fid.ops = &sock_mr_fi_ops; - - _mr->domain = dom; - _mr->flags = flags; - - ret = ofi_mr_map_insert(&dom->mr_map, &cur_abi_attr, &key, _mr); - if (ret != 0) - goto err; - - _mr->mr_fid.key = _mr->key = key; - _mr->mr_fid.mem_desc = (void *) (uintptr_t) key; - ofi_mutex_unlock(&dom->lock); - - *mr = &_mr->mr_fid; - ofi_atomic_inc32(&dom->ref); - - if (dom->mr_eq) { - eq_entry.fid = &domain->fid; - eq_entry.context = attr->context; - return sock_eq_report_event(dom->mr_eq, FI_MR_COMPLETE, - &eq_entry, sizeof(eq_entry), 0); - } - - return 0; - -err: - ofi_mutex_unlock(&dom->lock); - free(_mr); - return ret; -} - -static int sock_regv(struct fid *fid, const struct iovec *iov, - size_t count, uint64_t access, - uint64_t offset, uint64_t requested_key, - uint64_t flags, struct fid_mr **mr, void *context) -{ - struct fi_mr_attr attr; - - attr.mr_iov = iov; - attr.iov_count = count; - attr.access = access; - attr.offset = offset; - attr.requested_key = requested_key; - attr.context = context; - attr.auth_key_size = 0; - attr.auth_key = NULL; - return sock_regattr(fid, &attr, flags, mr); -} - -static int sock_reg(struct fid *fid, const void *buf, size_t len, - uint64_t access, uint64_t offset, uint64_t requested_key, - uint64_t flags, struct fid_mr **mr, void *context) -{ - struct iovec iov; - - iov.iov_base = (void *)buf; - iov.iov_len = len; - return sock_regv(fid, &iov, 1, access, offset, requested_key, - flags, mr, context); -} - -struct fi_ops_mr sock_dom_mr_ops = { - .size = sizeof(struct fi_ops_mr), - .reg = sock_reg, - .regv = sock_regv, - .regattr = sock_regattr, -}; diff --git a/prov/sockets/src/sock_msg.c b/prov/sockets/src/sock_msg.c deleted file mode 100644 index 2ce58a4b2f3..00000000000 --- a/prov/sockets/src/sock_msg.c +++ /dev/null @@ -1,760 +0,0 @@ -/* - * Copyright (c) 2014-2015 Intel Corporation, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "config.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "sock.h" -#include "sock_util.h" - -#define SOCK_LOG_DBG(...) _SOCK_LOG_DBG(FI_LOG_EP_DATA, __VA_ARGS__) -#define SOCK_LOG_ERROR(...) _SOCK_LOG_ERROR(FI_LOG_EP_DATA, __VA_ARGS__) - -ssize_t sock_ep_recvmsg(struct fid_ep *ep, const struct fi_msg *msg, - uint64_t flags) -{ - ssize_t ret; - size_t i; - struct sock_rx_ctx *rx_ctx; - struct sock_rx_entry *rx_entry; - struct sock_ep *sock_ep; - uint64_t op_flags; - - switch (ep->fid.fclass) { - case FI_CLASS_EP: - sock_ep = container_of(ep, struct sock_ep, ep); - rx_ctx = sock_ep->attr->rx_ctx; - op_flags = sock_ep->rx_attr.op_flags; - break; - case FI_CLASS_RX_CTX: - case FI_CLASS_SRX_CTX: - rx_ctx = container_of(ep, struct sock_rx_ctx, ctx); - op_flags = rx_ctx->attr.op_flags; - break; - default: - SOCK_LOG_ERROR("Invalid ep type\n"); - return -FI_EINVAL; - } - -#if ENABLE_DEBUG - if (msg->iov_count > SOCK_EP_MAX_IOV_LIMIT) - return -FI_EINVAL; -#endif - - if (!rx_ctx->enabled) - return -FI_EOPBADSTATE; - - if (flags & SOCK_USE_OP_FLAGS) - flags |= op_flags; - - if (flags & FI_TRIGGER) { - ret = sock_queue_msg_op(ep, msg, flags, FI_OP_RECV); - if (ret != 1) - return ret; - } - - if (flags & FI_PEEK) { - return sock_rx_peek_recv(rx_ctx, msg->addr, 0L, ~0ULL, - msg->context, flags, 0); - } else if (flags & FI_CLAIM) { - return sock_rx_claim_recv(rx_ctx, msg->context, flags, - 0L, ~0ULL, 0, - msg->msg_iov, msg->iov_count); - } - - ofi_mutex_lock(&rx_ctx->lock); - rx_entry = sock_rx_new_entry(rx_ctx); - ofi_mutex_unlock(&rx_ctx->lock); - if (!rx_entry) - return -FI_ENOMEM; - - rx_entry->rx_op.op = SOCK_OP_RECV; - rx_entry->rx_op.dest_iov_len = (uint8_t) msg->iov_count; - - rx_entry->flags = flags; - rx_entry->context = (uintptr_t) msg->context; - rx_entry->addr = (rx_ctx->attr.caps & FI_DIRECTED_RECV) ? - msg->addr : FI_ADDR_UNSPEC; - rx_entry->data = msg->data; - rx_entry->ignore = ~0ULL; - rx_entry->is_tagged = 0; - - for (i = 0; i < msg->iov_count; i++) { - rx_entry->iov[i].iov.addr = (uintptr_t) msg->msg_iov[i].iov_base; - rx_entry->iov[i].iov.len = msg->msg_iov[i].iov_len; - rx_entry->total_len += rx_entry->iov[i].iov.len; - } - - SOCK_LOG_DBG("New rx_entry: %p (ctx: %p)\n", rx_entry, rx_ctx); - ofi_mutex_lock(&rx_ctx->lock); - dlist_insert_tail(&rx_entry->entry, &rx_ctx->rx_entry_list); - rx_ctx->progress_start = &rx_ctx->rx_buffered_list; - ofi_mutex_unlock(&rx_ctx->lock); - return 0; -} - -static ssize_t sock_ep_recv(struct fid_ep *ep, void *buf, size_t len, - void *desc, fi_addr_t src_addr, void *context) -{ - struct iovec msg_iov = { - .iov_base = buf, - .iov_len = len, - }; - struct fi_msg msg = { - .msg_iov = &msg_iov, - .desc = &desc, - .iov_count = 1, - .addr = src_addr, - .context = context, - .data = 0, - }; - - return sock_ep_recvmsg(ep, &msg, SOCK_USE_OP_FLAGS); -} - -static ssize_t sock_ep_recvv(struct fid_ep *ep, const struct iovec *iov, - void **desc, size_t count, fi_addr_t src_addr, - void *context) -{ - struct fi_msg msg = { - .msg_iov = iov, - .desc = desc, - .iov_count = count, - .addr = src_addr, - .context = context, - .data = 0, - }; - - return sock_ep_recvmsg(ep, &msg, SOCK_USE_OP_FLAGS); -} - -ssize_t sock_ep_sendmsg(struct fid_ep *ep, const struct fi_msg *msg, - uint64_t flags) -{ - ssize_t ret; - size_t i; - uint64_t total_len, op_flags; - struct sock_op tx_op; - union sock_iov tx_iov; - struct sock_conn *conn; - struct sock_tx_ctx *tx_ctx; - struct sock_ep *sock_ep; - struct sock_ep_attr *ep_attr; - - switch (ep->fid.fclass) { - case FI_CLASS_EP: - sock_ep = container_of(ep, struct sock_ep, ep); - ep_attr = sock_ep->attr; - tx_ctx = sock_ep->attr->tx_ctx->use_shared ? - sock_ep->attr->tx_ctx->stx_ctx : sock_ep->attr->tx_ctx; - op_flags = sock_ep->tx_attr.op_flags; - break; - case FI_CLASS_TX_CTX: - tx_ctx = container_of(ep, struct sock_tx_ctx, fid.ctx); - ep_attr = tx_ctx->ep_attr; - op_flags = tx_ctx->attr.op_flags; - break; - default: - SOCK_LOG_ERROR("Invalid EP type\n"); - return -FI_EINVAL; - } - -#if ENABLE_DEBUG - if (msg->iov_count > SOCK_EP_MAX_IOV_LIMIT) - return -FI_EINVAL; -#endif - - if (!tx_ctx->enabled) - return -FI_EOPBADSTATE; - - if (sock_drop_packet(ep_attr)) - return 0; - - ret = sock_ep_get_conn(ep_attr, tx_ctx, msg->addr, &conn); - if (ret) - return ret; - - SOCK_LOG_DBG("New sendmsg on TX: %p using conn: %p\n", - tx_ctx, conn); - - SOCK_EP_SET_TX_OP_FLAGS(flags); - if (flags & SOCK_USE_OP_FLAGS) - flags |= op_flags; - - if (flags & FI_TRIGGER) { - ret = sock_queue_msg_op(ep, msg, flags, FI_OP_SEND); - if (ret != 1) - return ret; - } - - memset(&tx_op, 0, sizeof(struct sock_op)); - tx_op.op = SOCK_OP_SEND; - - total_len = 0; - if (flags & FI_INJECT) { - for (i = 0; i < msg->iov_count; i++) - total_len += msg->msg_iov[i].iov_len; - - if (total_len > SOCK_EP_MAX_INJECT_SZ) - return -FI_EINVAL; - - tx_op.src_iov_len = (uint8_t) total_len; - } else { - tx_op.src_iov_len = (uint8_t) msg->iov_count; - total_len = msg->iov_count * sizeof(union sock_iov); - } - - total_len += sizeof(struct sock_op_send); - - if (flags & FI_REMOTE_CQ_DATA) - total_len += sizeof(uint64_t); - - sock_tx_ctx_start(tx_ctx); - if (ofi_rbavail(&tx_ctx->rb) < total_len) { - ret = -FI_EAGAIN; - goto err; - } - - sock_tx_ctx_write_op_send(tx_ctx, &tx_op, flags, (uintptr_t) msg->context, - msg->addr, (uintptr_t) ((msg->iov_count > 0) ? - msg->msg_iov[0].iov_base : NULL), - ep_attr, conn); - - if (flags & FI_REMOTE_CQ_DATA) - sock_tx_ctx_write(tx_ctx, &msg->data, sizeof(msg->data)); - - if (flags & FI_INJECT) { - for (i = 0; i < msg->iov_count; i++) { - sock_tx_ctx_write(tx_ctx, msg->msg_iov[i].iov_base, - msg->msg_iov[i].iov_len); - } - } else { - for (i = 0; i < msg->iov_count; i++) { - tx_iov.iov.addr = (uintptr_t) msg->msg_iov[i].iov_base; - tx_iov.iov.len = msg->msg_iov[i].iov_len; - sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov)); - } - } - - sock_tx_ctx_commit(tx_ctx); - return 0; - -err: - sock_tx_ctx_abort(tx_ctx); - return ret; -} - -static ssize_t sock_ep_send(struct fid_ep *ep, const void *buf, size_t len, - void *desc, fi_addr_t dest_addr, void *context) -{ - struct iovec msg_iov = { - .iov_base = (void *)buf, - .iov_len = len, - }; - struct fi_msg msg = { - .msg_iov = &msg_iov, - .desc = &desc, - .iov_count = 1, - .addr = dest_addr, - .context = context, - .data = 0, - }; - - return sock_ep_sendmsg(ep, &msg, SOCK_USE_OP_FLAGS); -} - -static ssize_t sock_ep_sendv(struct fid_ep *ep, const struct iovec *iov, - void **desc, size_t count, fi_addr_t dest_addr, - void *context) -{ - struct fi_msg msg = { - .msg_iov = iov, - .desc = desc, - .iov_count = count, - .addr = dest_addr, - .context = context, - .data = 0, - }; - - return sock_ep_sendmsg(ep, &msg, SOCK_USE_OP_FLAGS); -} - -static ssize_t sock_ep_senddata(struct fid_ep *ep, const void *buf, size_t len, - void *desc, uint64_t data, fi_addr_t dest_addr, - void *context) -{ - struct iovec msg_iov = { - .iov_base = (void *)buf, - .iov_len = len, - }; - struct fi_msg msg = { - .msg_iov = &msg_iov, - .desc = desc, - .iov_count = 1, - .addr = dest_addr, - .context = context, - .data = data, - }; - - return sock_ep_sendmsg(ep, &msg, FI_REMOTE_CQ_DATA | SOCK_USE_OP_FLAGS); -} - -static ssize_t sock_ep_inject(struct fid_ep *ep, const void *buf, size_t len, - fi_addr_t dest_addr) -{ - struct iovec msg_iov = { - .iov_base = (void *)buf, - .iov_len = len, - }; - struct fi_msg msg = { - .msg_iov = &msg_iov, - .desc = NULL, - .iov_count = 1, - .addr = dest_addr, - .context = NULL, - .data = 0, - }; - - return sock_ep_sendmsg(ep, &msg, FI_INJECT | - SOCK_NO_COMPLETION | SOCK_USE_OP_FLAGS); -} - -static ssize_t sock_ep_injectdata(struct fid_ep *ep, const void *buf, - size_t len, uint64_t data, fi_addr_t dest_addr) -{ - struct iovec msg_iov = { - .iov_base = (void *)buf, - .iov_len = len, - }; - struct fi_msg msg = { - .msg_iov = &msg_iov, - .desc = NULL, - .iov_count = 1, - .addr = dest_addr, - .context = NULL, - .data = data, - }; - - return sock_ep_sendmsg(ep, &msg, FI_REMOTE_CQ_DATA | FI_INJECT | - SOCK_NO_COMPLETION | SOCK_USE_OP_FLAGS); -} - -struct fi_ops_msg sock_ep_msg_ops = { - .size = sizeof(struct fi_ops_msg), - .recv = sock_ep_recv, - .recvv = sock_ep_recvv, - .recvmsg = sock_ep_recvmsg, - .send = sock_ep_send, - .sendv = sock_ep_sendv, - .sendmsg = sock_ep_sendmsg, - .inject = sock_ep_inject, - .senddata = sock_ep_senddata, - .injectdata = sock_ep_injectdata -}; - -ssize_t sock_ep_trecvmsg(struct fid_ep *ep, - const struct fi_msg_tagged *msg, uint64_t flags) -{ - ssize_t ret; - size_t i; - struct sock_rx_ctx *rx_ctx; - struct sock_rx_entry *rx_entry; - struct sock_ep *sock_ep; - uint64_t op_flags; - - switch (ep->fid.fclass) { - case FI_CLASS_EP: - sock_ep = container_of(ep, struct sock_ep, ep); - rx_ctx = sock_ep->attr->rx_ctx; - op_flags = sock_ep->rx_attr.op_flags; - break; - case FI_CLASS_RX_CTX: - case FI_CLASS_SRX_CTX: - rx_ctx = container_of(ep, struct sock_rx_ctx, ctx); - op_flags = rx_ctx->attr.op_flags; - break; - default: - SOCK_LOG_ERROR("Invalid ep type\n"); - return -FI_EINVAL; - } - -#if ENABLE_DEBUG - if (msg->iov_count > SOCK_EP_MAX_IOV_LIMIT) - return -FI_EINVAL; -#endif - - if (!rx_ctx->enabled) - return -FI_EOPBADSTATE; - - if (flags & SOCK_USE_OP_FLAGS) - flags |= op_flags; - flags &= ~FI_MULTI_RECV; - - if (flags & FI_TRIGGER) { - ret = sock_queue_tmsg_op(ep, msg, flags, FI_OP_TRECV); - if (ret != 1) - return ret; - } - - if (flags & FI_PEEK) { - return sock_rx_peek_recv(rx_ctx, msg->addr, - msg->tag, msg->ignore, - msg->context, flags, 1); - } else if (flags & FI_CLAIM) { - return sock_rx_claim_recv(rx_ctx, msg->context, flags, - msg->tag, msg->ignore, 1, - msg->msg_iov, msg->iov_count); - } - - ofi_mutex_lock(&rx_ctx->lock); - rx_entry = sock_rx_new_entry(rx_ctx); - ofi_mutex_unlock(&rx_ctx->lock); - if (!rx_entry) - return -FI_ENOMEM; - - rx_entry->rx_op.op = SOCK_OP_TRECV; - rx_entry->rx_op.dest_iov_len = (uint8_t) msg->iov_count; - - rx_entry->flags = flags; - rx_entry->context = (uintptr_t) msg->context; - rx_entry->addr = (rx_ctx->attr.caps & FI_DIRECTED_RECV) ? - msg->addr : FI_ADDR_UNSPEC; - rx_entry->data = msg->data; - rx_entry->tag = msg->tag; - rx_entry->ignore = msg->ignore; - rx_entry->is_tagged = 1; - - for (i = 0; i < msg->iov_count; i++) { - rx_entry->iov[i].iov.addr = (uintptr_t) msg->msg_iov[i].iov_base; - rx_entry->iov[i].iov.len = msg->msg_iov[i].iov_len; - rx_entry->total_len += rx_entry->iov[i].iov.len; - } - - ofi_mutex_lock(&rx_ctx->lock); - SOCK_LOG_DBG("New rx_entry: %p (ctx: %p)\n", rx_entry, rx_ctx); - dlist_insert_tail(&rx_entry->entry, &rx_ctx->rx_entry_list); - rx_ctx->progress_start = &rx_ctx->rx_buffered_list; - ofi_mutex_unlock(&rx_ctx->lock); - return 0; -} - -static ssize_t sock_ep_trecv(struct fid_ep *ep, void *buf, size_t len, - void *desc, fi_addr_t src_addr, uint64_t tag, - uint64_t ignore, void *context) -{ - struct iovec msg_iov = { - .iov_base = buf, - .iov_len = len, - }; - struct fi_msg_tagged msg = { - .msg_iov = &msg_iov, - .desc = &desc, - .iov_count = 1, - .addr = src_addr, - .context = context, - .tag = tag, - .ignore = ignore, - .data = 0, - }; - - return sock_ep_trecvmsg(ep, &msg, SOCK_USE_OP_FLAGS); -} - -static ssize_t sock_ep_trecvv(struct fid_ep *ep, const struct iovec *iov, - void **desc, size_t count, fi_addr_t src_addr, - uint64_t tag, uint64_t ignore, void *context) -{ - struct fi_msg_tagged msg = { - .msg_iov = iov, - .desc = desc, - .iov_count = count, - .addr = src_addr, - .context = context, - .tag = tag, - .ignore = ignore, - .data = 0, - }; - - return sock_ep_trecvmsg(ep, &msg, SOCK_USE_OP_FLAGS); -} - -ssize_t sock_ep_tsendmsg(struct fid_ep *ep, - const struct fi_msg_tagged *msg, uint64_t flags) -{ - ssize_t ret; - size_t i; - uint64_t total_len, op_flags; - struct sock_op tx_op; - union sock_iov tx_iov; - struct sock_conn *conn; - struct sock_tx_ctx *tx_ctx; - struct sock_ep *sock_ep; - struct sock_ep_attr *ep_attr; - - switch (ep->fid.fclass) { - case FI_CLASS_EP: - sock_ep = container_of(ep, struct sock_ep, ep); - tx_ctx = sock_ep->attr->tx_ctx->use_shared ? - sock_ep->attr->tx_ctx->stx_ctx : sock_ep->attr->tx_ctx; - ep_attr = sock_ep->attr; - op_flags = sock_ep->tx_attr.op_flags; - break; - case FI_CLASS_TX_CTX: - tx_ctx = container_of(ep, struct sock_tx_ctx, fid.ctx); - ep_attr = tx_ctx->ep_attr; - op_flags = tx_ctx->attr.op_flags; - break; - default: - SOCK_LOG_ERROR("Invalid EP type\n"); - return -FI_EINVAL; - } - -#if ENABLE_DEBUG - if (msg->iov_count > SOCK_EP_MAX_IOV_LIMIT) - return -FI_EINVAL; -#endif - - if (!tx_ctx->enabled) - return -FI_EOPBADSTATE; - - if (sock_drop_packet(ep_attr)) - return 0; - - ret = sock_ep_get_conn(ep_attr, tx_ctx, msg->addr, &conn); - if (ret) - return ret; - - SOCK_EP_SET_TX_OP_FLAGS(flags); - if (flags & SOCK_USE_OP_FLAGS) - flags |= op_flags; - - if (flags & FI_TRIGGER) { - ret = sock_queue_tmsg_op(ep, msg, flags, FI_OP_TSEND); - if (ret != 1) - return ret; - } - - memset(&tx_op, 0, sizeof(tx_op)); - tx_op.op = SOCK_OP_TSEND; - - total_len = 0; - if (flags & FI_INJECT) { - for (i = 0; i < msg->iov_count; i++) - total_len += msg->msg_iov[i].iov_len; - - tx_op.src_iov_len = (uint8_t) total_len; - if (total_len > SOCK_EP_MAX_INJECT_SZ) - return -FI_EINVAL; - } else { - total_len = msg->iov_count * sizeof(union sock_iov); - tx_op.src_iov_len = (uint8_t) msg->iov_count; - } - - total_len += sizeof(struct sock_op_tsend); - if (flags & FI_REMOTE_CQ_DATA) - total_len += sizeof(uint64_t); - - sock_tx_ctx_start(tx_ctx); - if (ofi_rbavail(&tx_ctx->rb) < total_len) { - ret = -FI_EAGAIN; - goto err; - } - - sock_tx_ctx_write_op_tsend(tx_ctx, &tx_op, flags, - (uintptr_t) msg->context, msg->addr, - (uintptr_t) ((msg->iov_count > 0) ? - msg->msg_iov[0].iov_base : NULL), - ep_attr, conn, msg->tag); - - if (flags & FI_REMOTE_CQ_DATA) - sock_tx_ctx_write(tx_ctx, &msg->data, sizeof(msg->data)); - - if (flags & FI_INJECT) { - for (i = 0; i < msg->iov_count; i++) { - sock_tx_ctx_write(tx_ctx, msg->msg_iov[i].iov_base, - msg->msg_iov[i].iov_len); - } - } else { - for (i = 0; i < msg->iov_count; i++) { - tx_iov.iov.addr = (uintptr_t) msg->msg_iov[i].iov_base; - tx_iov.iov.len = msg->msg_iov[i].iov_len; - sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov)); - } - } - - sock_tx_ctx_commit(tx_ctx); - return 0; - -err: - sock_tx_ctx_abort(tx_ctx); - return ret; -} - -static ssize_t sock_ep_tsend(struct fid_ep *ep, const void *buf, size_t len, - void *desc, fi_addr_t dest_addr, uint64_t tag, - void *context) -{ - struct iovec msg_iov = { - .iov_base = (void *)buf, - .iov_len = len, - }; - struct fi_msg_tagged msg = { - .msg_iov = &msg_iov, - .desc = &desc, - .iov_count = 1, - .addr = dest_addr, - .tag = tag, - .ignore = 0, - .context = context, - .data = 0, - }; - - return sock_ep_tsendmsg(ep, &msg, SOCK_USE_OP_FLAGS); -} - -static ssize_t sock_ep_tsendv(struct fid_ep *ep, const struct iovec *iov, - void **desc, size_t count, fi_addr_t dest_addr, - uint64_t tag, void *context) -{ - struct fi_msg_tagged msg = { - .msg_iov = iov, - .desc = desc, - .iov_count = count, - .addr = dest_addr, - .tag = tag, - .ignore = 0, - .context = context, - .data = 0, - }; - - return sock_ep_tsendmsg(ep, &msg, SOCK_USE_OP_FLAGS); -} - -static ssize_t sock_ep_tsenddata(struct fid_ep *ep, const void *buf, size_t len, - void *desc, uint64_t data, fi_addr_t dest_addr, - uint64_t tag, void *context) -{ - struct iovec msg_iov = { - .iov_base = (void *)buf, - .iov_len = len, - }; - struct fi_msg_tagged msg = { - .msg_iov = &msg_iov, - .desc = desc, - .iov_count = 1, - .addr = dest_addr, - .tag = tag, - .ignore = 0, - .context = context, - .data = data, - }; - - return sock_ep_tsendmsg(ep, &msg, FI_REMOTE_CQ_DATA | SOCK_USE_OP_FLAGS); -} - -static ssize_t sock_ep_tinject(struct fid_ep *ep, const void *buf, size_t len, - fi_addr_t dest_addr, uint64_t tag) -{ - struct iovec msg_iov = { - .iov_base = (void *)buf, - .iov_len = len, - }; - struct fi_msg_tagged msg = { - .msg_iov = &msg_iov, - .desc = NULL, - .iov_count = 1, - .addr = dest_addr, - .tag = tag, - .ignore = 0, - .context = NULL, - .data = 0, - }; - - return sock_ep_tsendmsg(ep, &msg, FI_INJECT | - SOCK_NO_COMPLETION | SOCK_USE_OP_FLAGS); -} - -static ssize_t sock_ep_tinjectdata(struct fid_ep *ep, const void *buf, - size_t len, uint64_t data, fi_addr_t dest_addr, - uint64_t tag) -{ - struct iovec msg_iov = { - .iov_base = (void *)buf, - .iov_len = len, - }; - struct fi_msg_tagged msg = { - .msg_iov = &msg_iov, - .desc = NULL, - .iov_count = 1, - .addr = dest_addr, - .tag = tag, - .ignore = 0, - .context = NULL, - .data = data, - }; - - return sock_ep_tsendmsg(ep, &msg, FI_REMOTE_CQ_DATA | FI_INJECT | - SOCK_NO_COMPLETION | SOCK_USE_OP_FLAGS); -} - - -struct fi_ops_tagged sock_ep_tagged = { - .size = sizeof(struct fi_ops_tagged), - .recv = sock_ep_trecv, - .recvv = sock_ep_trecvv, - .recvmsg = sock_ep_trecvmsg, - .send = sock_ep_tsend, - .sendv = sock_ep_tsendv, - .sendmsg = sock_ep_tsendmsg, - .inject = sock_ep_tinject, - .senddata = sock_ep_tsenddata, - .injectdata = sock_ep_tinjectdata, -}; - diff --git a/prov/sockets/src/sock_poll.c b/prov/sockets/src/sock_poll.c deleted file mode 100644 index 26db17f0978..00000000000 --- a/prov/sockets/src/sock_poll.c +++ /dev/null @@ -1,240 +0,0 @@ -/* - * Copyright (c) 2014 Intel Corporation, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "config.h" - -#include -#include - -#include "sock.h" -#include "sock_util.h" - -#define SOCK_LOG_DBG(...) _SOCK_LOG_DBG(FI_LOG_CORE, __VA_ARGS__) -#define SOCK_LOG_ERROR(...) _SOCK_LOG_ERROR(FI_LOG_CORE, __VA_ARGS__) - -static int sock_poll_add(struct fid_poll *pollset, struct fid *event_fid, - uint64_t flags) -{ - struct sock_poll *poll; - struct sock_fid_list *list_item; - struct sock_cq *cq; - struct sock_cntr *cntr; - - poll = container_of(pollset, struct sock_poll, poll_fid.fid); - list_item = calloc(1, sizeof(*list_item)); - if (!list_item) - return -FI_ENOMEM; - - list_item->fid = event_fid; - dlist_init(&list_item->entry); - dlist_insert_after(&list_item->entry, &poll->fid_list); - - switch (list_item->fid->fclass) { - case FI_CLASS_CQ: - cq = container_of(list_item->fid, struct sock_cq, cq_fid); - ofi_atomic_inc32(&cq->ref); - break; - case FI_CLASS_CNTR: - cntr = container_of(list_item->fid, struct sock_cntr, cntr_fid); - ofi_atomic_inc32(&cntr->ref); - break; - default: - SOCK_LOG_ERROR("Invalid fid class\n"); - return -FI_EINVAL; - } - return 0; -} - -static int sock_poll_del(struct fid_poll *pollset, struct fid *event_fid, - uint64_t flags) -{ - struct sock_poll *poll; - struct sock_fid_list *list_item; - struct dlist_entry *p, *head; - struct sock_cq *cq; - struct sock_cntr *cntr; - - poll = container_of(pollset, struct sock_poll, poll_fid.fid); - head = &poll->fid_list; - for (p = head->next; p != head; p = p->next) { - list_item = container_of(p, struct sock_fid_list, entry); - if (list_item->fid == event_fid) { - dlist_remove(p); - switch (list_item->fid->fclass) { - case FI_CLASS_CQ: - cq = container_of(list_item->fid, struct sock_cq, cq_fid); - ofi_atomic_dec32(&cq->ref); - break; - case FI_CLASS_CNTR: - cntr = container_of(list_item->fid, struct sock_cntr, cntr_fid); - ofi_atomic_dec32(&cntr->ref); - break; - default: - SOCK_LOG_ERROR("Invalid fid class\n"); - break; - } - free(list_item); - break; - } - } - return 0; -} - -static int sock_poll_poll(struct fid_poll *pollset, void **context, int count) -{ - struct sock_poll *poll; - struct sock_cq *cq; - struct sock_eq *eq; - struct sock_cntr *cntr; - struct sock_fid_list *list_item; - struct dlist_entry *p, *head; - int ret_count = 0; - - poll = container_of(pollset, struct sock_poll, poll_fid.fid); - head = &poll->fid_list; - - for (p = head->next; p != head && ret_count < count; p = p->next) { - list_item = container_of(p, struct sock_fid_list, entry); - switch (list_item->fid->fclass) { - case FI_CLASS_CQ: - cq = container_of(list_item->fid, struct sock_cq, - cq_fid); - sock_cq_progress(cq); - pthread_mutex_lock(&cq->lock); - if (ofi_rbfdused(&cq->cq_rbfd) || ofi_rbused(&cq->cqerr_rb)) { - *context++ = cq->cq_fid.fid.context; - ret_count++; - } - pthread_mutex_unlock(&cq->lock); - break; - - case FI_CLASS_CNTR: - cntr = container_of(list_item->fid, struct sock_cntr, - cntr_fid); - sock_cntr_progress(cntr); - pthread_mutex_lock(&cntr->mut); - if (ofi_atomic_get32(&cntr->value) != - ofi_atomic_get32(&cntr->last_read_val)) { - ofi_atomic_set32(&cntr->last_read_val, - ofi_atomic_get32(&cntr->value)); - *context++ = cntr->cntr_fid.fid.context; - ret_count++; - } - pthread_mutex_unlock(&cntr->mut); - break; - - case FI_CLASS_EQ: - eq = container_of(list_item->fid, struct sock_eq, eq); - ofi_mutex_lock(&eq->lock); - if (!dlistfd_empty(&eq->list) || - !dlistfd_empty(&eq->err_list)) { - *context++ = eq->eq.fid.context; - ret_count++; - } - ofi_mutex_unlock(&eq->lock); - break; - - default: - break; - } - } - - return ret_count; -} - -static int sock_poll_close(fid_t fid) -{ - struct sock_poll *poll; - struct sock_fid_list *list_item; - struct dlist_entry *p, *head; - - poll = container_of(fid, struct sock_poll, poll_fid.fid); - - head = &poll->fid_list; - while (!dlist_empty(head)) { - p = head->next; - list_item = container_of(p, struct sock_fid_list, entry); - sock_poll_del(&poll->poll_fid, list_item->fid, 0); - } - - ofi_atomic_dec32(&poll->domain->ref); - free(poll); - return 0; -} - -static struct fi_ops sock_poll_fi_ops = { - .size = sizeof(struct fi_ops), - .close = sock_poll_close, - .bind = fi_no_bind, - .control = fi_no_control, - .ops_open = fi_no_ops_open, -}; - -static struct fi_ops_poll sock_poll_ops = { - .size = sizeof(struct fi_ops_poll), - .poll = sock_poll_poll, - .poll_add = sock_poll_add, - .poll_del = sock_poll_del, -}; - -static int sock_poll_verify_attr(struct fi_poll_attr *attr) -{ - if (attr->flags) - return -FI_ENODATA; - return 0; -} - -int sock_poll_open(struct fid_domain *domain, struct fi_poll_attr *attr, - struct fid_poll **pollset) -{ - struct sock_domain *dom; - struct sock_poll *poll; - - if (attr && sock_poll_verify_attr(attr)) - return -FI_EINVAL; - - dom = container_of(domain, struct sock_domain, dom_fid); - poll = calloc(1, sizeof(*poll)); - if (!poll) - return -FI_ENOMEM; - - dlist_init(&poll->fid_list); - poll->poll_fid.fid.fclass = FI_CLASS_POLL; - poll->poll_fid.fid.context = 0; - poll->poll_fid.fid.ops = &sock_poll_fi_ops; - poll->poll_fid.ops = &sock_poll_ops; - poll->domain = dom; - ofi_atomic_inc32(&dom->ref); - - *pollset = &poll->poll_fid; - return 0; -} diff --git a/prov/sockets/src/sock_progress.c b/prov/sockets/src/sock_progress.c deleted file mode 100644 index cf607415552..00000000000 --- a/prov/sockets/src/sock_progress.c +++ /dev/null @@ -1,2826 +0,0 @@ -/* - * Copyright (c) 2014 Intel Corporation, Inc. All rights reserved. - * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "config.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include "sock.h" -#include "sock_util.h" - -#define SOCK_LOG_DBG(...) _SOCK_LOG_DBG(FI_LOG_EP_DATA, __VA_ARGS__) -#define SOCK_LOG_ERROR(...) _SOCK_LOG_ERROR(FI_LOG_EP_DATA, __VA_ARGS__) - -#define PE_INDEX(_pe, _e) (_e - &_pe->pe_table[0]) -#define SOCK_GET_RX_ID(_addr, _bits) (((_bits) == 0) ? 0 : \ - (((uint64_t)_addr) >> (64 - _bits))) - - -#define SOCK_EP_MAX_PROGRESS_CNT 10 -static int sock_pe_progress_buffered_rx(struct sock_rx_ctx *rx_ctx, - bool shallow); - -static inline int sock_pe_is_data_msg(int msg_id) -{ - switch (msg_id) { - case SOCK_OP_SEND: - case SOCK_OP_TSEND: - case SOCK_OP_WRITE: - case SOCK_OP_READ: - case SOCK_OP_ATOMIC: - return 1; - default: - return 0; - } -} - -static inline ssize_t sock_pe_send_field(struct sock_pe_entry *pe_entry, - void *field, size_t field_len, - size_t start_offset) -{ - ssize_t ret; - size_t offset, data_len; - - if (pe_entry->done_len >= start_offset + field_len) - return 0; - - offset = pe_entry->done_len - start_offset; - data_len = field_len - offset; - ret = sock_comm_send(pe_entry, (char *) field + offset, data_len); - - if (ret <= 0) - return -1; - - pe_entry->done_len += ret; - return ((size_t) ret == data_len) ? 0 : -1; -} - -static inline ssize_t sock_pe_recv_field(struct sock_pe_entry *pe_entry, - void *field, size_t field_len, - size_t start_offset) -{ - ssize_t ret; - size_t offset, data_len; - - if (pe_entry->done_len >= start_offset + field_len) - return 0; - - offset = pe_entry->done_len - start_offset; - data_len = field_len - offset; - ret = sock_comm_recv(pe_entry, (char *) field + offset, data_len); - if (ret <= 0) - return -1; - - pe_entry->done_len += ret; - return ((size_t) ret == data_len) ? 0 : -1; -} - -static inline void sock_pe_discard_field(struct sock_pe_entry *pe_entry) -{ - size_t ret; - if (!pe_entry->rem) - goto out; - - SOCK_LOG_DBG("Remaining for %p: %" PRId64 "\n", pe_entry, pe_entry->rem); - ret = sock_comm_discard(pe_entry, pe_entry->rem); - SOCK_LOG_DBG("Discarded %ld\n", ret); - - pe_entry->rem -= ret; - if (pe_entry->rem == 0) - pe_entry->conn->rx_pe_entry = NULL; - - out: - if (pe_entry->done_len == pe_entry->total_len && !pe_entry->rem) { - SOCK_LOG_DBG("Discard complete for %p\n", pe_entry); - pe_entry->is_complete = 1; - } -} - -static void sock_pe_release_entry(struct sock_pe *pe, - struct sock_pe_entry *pe_entry) -{ - assert((pe_entry->type != SOCK_PE_RX) || - ofi_rbempty(&pe_entry->comm_buf)); - dlist_remove(&pe_entry->ctx_entry); - - if (pe_entry->conn->tx_pe_entry == pe_entry) - pe_entry->conn->tx_pe_entry = NULL; - if (pe_entry->conn->rx_pe_entry == pe_entry) - pe_entry->conn->rx_pe_entry = NULL; - - if (pe_entry->type == SOCK_PE_RX && pe_entry->pe.rx.atomic_cmp) { - ofi_buf_free(pe_entry->pe.rx.atomic_cmp); - ofi_buf_free(pe_entry->pe.rx.atomic_src); - } - - if (pe_entry->is_pool_entry) { - ofi_rbfree(&pe_entry->comm_buf); - dlist_remove(&pe_entry->entry); - ofi_buf_free(pe_entry); - return; - } - - if (pe_entry->type == SOCK_PE_TX) - ofi_rbreset(&pe_entry->comm_buf); - - pe->num_free_entries++; - pe_entry->conn = NULL; - - memset(&pe_entry->pe.rx, 0, sizeof(pe_entry->pe.rx)); - memset(&pe_entry->pe.tx, 0, sizeof(pe_entry->pe.tx)); - memset(&pe_entry->msg_hdr, 0, sizeof(pe_entry->msg_hdr)); - memset(&pe_entry->response, 0, sizeof(pe_entry->response)); - - pe_entry->type = 0; - pe_entry->is_complete = 0; - pe_entry->is_error = 0; - pe_entry->done_len = 0; - pe_entry->total_len = 0; - pe_entry->data_len = 0; - pe_entry->buf = 0; - pe_entry->flags = 0; - pe_entry->context = 0L; - pe_entry->mr_checked = 0; - pe_entry->completion_reported = 0; - - dlist_remove(&pe_entry->entry); - dlist_insert_head(&pe_entry->entry, &pe->free_list); - SOCK_LOG_DBG("progress entry %p released\n", pe_entry); -} - -static struct sock_pe_entry *sock_pe_acquire_entry(struct sock_pe *pe) -{ - struct dlist_entry *entry; - struct sock_pe_entry *pe_entry; - - if (dlist_empty(&pe->free_list)) { - pe_entry = ofi_buf_alloc(pe->pe_rx_pool); - SOCK_LOG_DBG("Getting rx pool entry\n"); - if (pe_entry) { - memset(pe_entry, 0, sizeof(*pe_entry)); - pe_entry->is_pool_entry = 1; - if (ofi_rbinit(&pe_entry->comm_buf, SOCK_PE_OVERFLOW_COMM_BUFF_SZ)) - SOCK_LOG_ERROR("failed to init comm-cache\n"); - pe_entry->cache_sz = SOCK_PE_OVERFLOW_COMM_BUFF_SZ; - dlist_insert_tail(&pe_entry->entry, &pe->pool_list); - } - } else { - pe->num_free_entries--; - entry = pe->free_list.next; - pe_entry = container_of(entry, struct sock_pe_entry, entry); - - assert(ofi_rbempty(&pe_entry->comm_buf)); - dlist_remove(&pe_entry->entry); - dlist_insert_tail(&pe_entry->entry, &pe->busy_list); - SOCK_LOG_DBG("progress entry %p acquired : %lu\n", pe_entry, - PE_INDEX(pe, pe_entry)); - } - return pe_entry; -} - -static void sock_pe_report_send_cq_completion(struct sock_pe_entry *pe_entry) -{ - ssize_t ret = 0; - if (!(pe_entry->flags & SOCK_NO_COMPLETION)) { - if (pe_entry->comp->send_cq && - (!pe_entry->comp->send_cq_event || - (pe_entry->msg_hdr.flags & FI_COMPLETION))) - ret = pe_entry->comp->send_cq->report_completion( - pe_entry->comp->send_cq, pe_entry->addr, pe_entry); - } - - if (ret < 0) { - SOCK_LOG_ERROR("Failed to report completion %p\n", - pe_entry); - if (pe_entry->comp->eq) { - sock_eq_report_error( - pe_entry->comp->eq, - &pe_entry->comp->send_cq->cq_fid.fid, - pe_entry->comp->send_cq->cq_fid.fid.context, - 0, FI_ENOSPC, -FI_ENOSPC, NULL, 0); - } - } -} - -static void sock_pe_report_send_completion(struct sock_pe_entry *pe_entry) -{ - struct sock_triggered_context *trigger_context; - - if (pe_entry->completion_reported) - return; - - if (!(pe_entry->flags & SOCK_TRIGGERED_OP)) { - sock_pe_report_send_cq_completion(pe_entry); - if (pe_entry->comp->send_cntr) - sock_cntr_inc(pe_entry->comp->send_cntr); - } else { - trigger_context = (void *) (uintptr_t) pe_entry->context; - fi_cntr_add(trigger_context->trigger.work.completion_cntr, 1); - } - pe_entry->completion_reported = 1; -} - -static void sock_pe_report_recv_cq_completion(struct sock_pe_entry *pe_entry) -{ - ssize_t ret = 0; - if (pe_entry->comp->recv_cq && - (!pe_entry->comp->recv_cq_event || - (pe_entry->flags & FI_COMPLETION))) - ret = pe_entry->comp->recv_cq->report_completion( - pe_entry->comp->recv_cq, pe_entry->addr, - pe_entry); - - if (ret < 0) { - SOCK_LOG_ERROR("Failed to report completion %p\n", pe_entry); - if (pe_entry->comp->eq) { - sock_eq_report_error( - pe_entry->comp->eq, - &pe_entry->comp->recv_cq->cq_fid.fid, - pe_entry->comp->recv_cq->cq_fid.fid.context, - 0, FI_ENOSPC, -FI_ENOSPC, NULL, 0); - } - } -} - -static void sock_pe_report_recv_completion(struct sock_pe_entry *pe_entry) -{ - struct sock_triggered_context *trigger_context; - - if (pe_entry->completion_reported) - return; - - if (!(pe_entry->flags & SOCK_TRIGGERED_OP)) { - sock_pe_report_recv_cq_completion(pe_entry); - if (pe_entry->comp->recv_cntr) - sock_cntr_inc(pe_entry->comp->recv_cntr); - } else { - trigger_context = (void *) (uintptr_t) pe_entry->context; - fi_cntr_add(trigger_context->trigger.work.completion_cntr, 1); - } - pe_entry->completion_reported = 1; -} - -static void sock_pe_report_mr_completion(struct sock_domain *domain, - struct sock_pe_entry *pe_entry) -{ - int i; - struct sock_mr *mr; - - for (i = 0; i < pe_entry->msg_hdr.dest_iov_len; i++) { - ofi_mutex_lock(&domain->lock); - mr = ofi_mr_map_get(&domain->mr_map, - pe_entry->pe.rx.rx_iov[i].iov.key); - ofi_mutex_unlock(&domain->lock); - if (!mr || (!mr->cq && !mr->cntr)) - continue; - - pe_entry->buf = pe_entry->pe.rx.rx_iov[i].iov.addr; - pe_entry->data_len = pe_entry->pe.rx.rx_iov[i].iov.len; - - if (mr->cntr) - sock_cntr_inc(mr->cntr); - } -} - -static void sock_pe_report_remote_write(struct sock_rx_ctx *rx_ctx, - struct sock_pe_entry *pe_entry) -{ - pe_entry->buf = pe_entry->pe.rx.rx_iov[0].iov.addr; - pe_entry->data_len = pe_entry->pe.rx.rx_iov[0].iov.len; - - if (pe_entry->flags & FI_REMOTE_CQ_DATA) { - sock_pe_report_recv_cq_completion(pe_entry); - } - - if ((!pe_entry->comp->rem_write_cntr && - !(pe_entry->msg_hdr.flags & FI_REMOTE_WRITE))) - return; - - if (pe_entry->comp->rem_write_cntr) - sock_cntr_inc(pe_entry->comp->rem_write_cntr); -} - -static void sock_pe_report_write_completion(struct sock_pe_entry *pe_entry) -{ - struct sock_triggered_context *trigger_context; - - if (pe_entry->completion_reported) - return; - - if (!(pe_entry->flags & SOCK_NO_COMPLETION)) - sock_pe_report_send_cq_completion(pe_entry); - - if (!(pe_entry->flags & SOCK_TRIGGERED_OP)) { - if (pe_entry->comp->write_cntr) - sock_cntr_inc(pe_entry->comp->write_cntr); - } else { - trigger_context = (void *) (uintptr_t) pe_entry->context; - fi_cntr_add(trigger_context->trigger.work.completion_cntr, 1); - } - pe_entry->completion_reported = 1; -} - -static void sock_pe_report_remote_read(struct sock_rx_ctx *rx_ctx, - struct sock_pe_entry *pe_entry) -{ - pe_entry->buf = pe_entry->pe.rx.rx_iov[0].iov.addr; - pe_entry->data_len = pe_entry->pe.rx.rx_iov[0].iov.len; - - if ((!pe_entry->comp->rem_read_cntr && - !(pe_entry->msg_hdr.flags & FI_REMOTE_READ))) - return; - - if (pe_entry->comp->rem_read_cntr) - sock_cntr_inc(pe_entry->comp->rem_read_cntr); -} - -static void sock_pe_report_read_completion(struct sock_pe_entry *pe_entry) -{ - struct sock_triggered_context *trigger_context; - - if (pe_entry->completion_reported) - return; - - if (!(pe_entry->flags & SOCK_NO_COMPLETION)) - sock_pe_report_send_cq_completion(pe_entry); - - if (!(pe_entry->flags & SOCK_TRIGGERED_OP)) { - if (pe_entry->comp->read_cntr) - sock_cntr_inc(pe_entry->comp->read_cntr); - } else { - trigger_context = (void *) (uintptr_t) pe_entry->context; - fi_cntr_add(trigger_context->trigger.work.completion_cntr, 1); - } - pe_entry->completion_reported = 1; -} - -static void sock_pe_report_rx_error(struct sock_pe_entry *pe_entry, int rem, int err) -{ - if (pe_entry->completion_reported) - return; - - if (pe_entry->comp->recv_cntr) - fi_cntr_adderr(&pe_entry->comp->recv_cntr->cntr_fid, 1); - if (pe_entry->comp->recv_cq) - sock_cq_report_error(pe_entry->comp->recv_cq, pe_entry, rem, - err, -err, NULL, 0); - pe_entry->completion_reported = 1; -} - -static void sock_pe_report_tx_error(struct sock_pe_entry *pe_entry, int rem, int err) -{ - if (pe_entry->completion_reported) - return; - - if (pe_entry->comp->send_cntr) - fi_cntr_adderr(&pe_entry->comp->send_cntr->cntr_fid, 1); - if (pe_entry->comp->send_cq) - sock_cq_report_error(pe_entry->comp->send_cq, pe_entry, rem, - err, -err, NULL, 0); - pe_entry->completion_reported = 1; -} - -static void sock_pe_report_tx_rma_read_err(struct sock_pe_entry *pe_entry, - int err) -{ - if (pe_entry->completion_reported) - return; - - if (pe_entry->comp->read_cntr) - fi_cntr_adderr(&pe_entry->comp->read_cntr->cntr_fid, 1); - if (pe_entry->comp->send_cq) - sock_cq_report_error(pe_entry->comp->send_cq, pe_entry, 0, - err, -err, NULL, 0); - pe_entry->completion_reported = 1; -} - -static void sock_pe_report_tx_rma_write_err(struct sock_pe_entry *pe_entry, - int err) -{ - if (pe_entry->completion_reported) - return; - - if (pe_entry->comp->write_cntr) - fi_cntr_adderr(&pe_entry->comp->write_cntr->cntr_fid, 1); - if (pe_entry->comp->send_cq) - sock_cq_report_error(pe_entry->comp->send_cq, pe_entry, 0, - err, -err, NULL, 0); - pe_entry->completion_reported = 1; -} - -static void sock_pe_progress_pending_ack(struct sock_pe *pe, - struct sock_pe_entry *pe_entry) -{ - size_t len, data_len, i; - struct sock_conn *conn = pe_entry->conn; - - if (!conn || pe_entry->rem) - return; - - if (conn->tx_pe_entry != NULL && conn->tx_pe_entry != pe_entry) { - SOCK_LOG_DBG("Cannot progress %p as conn %p is being used by %p\n", - pe_entry, conn, conn->tx_pe_entry); - return; - } - - if (conn->tx_pe_entry == NULL) { - SOCK_LOG_DBG("Connection %p grabbed by %p\n", conn, pe_entry); - conn->tx_pe_entry = pe_entry; - } - - if (sock_pe_send_field(pe_entry, &pe_entry->response, - sizeof(pe_entry->response), 0)) - return; - len = sizeof(struct sock_msg_response); - - switch (pe_entry->response.msg_hdr.op_type) { - case SOCK_OP_READ_COMPLETE: - for (i = 0; i < pe_entry->msg_hdr.dest_iov_len; i++) { - if (sock_pe_send_field( - pe_entry, - (char *) (uintptr_t) pe_entry->pe.rx.rx_iov[i].iov.addr, - pe_entry->pe.rx.rx_iov[i].iov.len, len)) - return; - len += pe_entry->pe.rx.rx_iov[i].iov.len; - } - break; - - case SOCK_OP_ATOMIC_COMPLETE: - data_len = pe_entry->total_len - len; - if (data_len) { - if (sock_pe_send_field(pe_entry, - pe_entry->pe.rx.atomic_cmp, - data_len, len)) - return; - len += data_len; - } - break; - - default: - break; - } - - if (pe_entry->total_len == pe_entry->done_len && !pe_entry->rem) { - sock_comm_flush(pe_entry); - if (!sock_comm_tx_done(pe_entry)) - return; - pe_entry->is_complete = 1; - pe_entry->pe.rx.pending_send = 0; - pe_entry->conn->tx_pe_entry = NULL; - } -} - -static void sock_pe_send_response(struct sock_pe *pe, - struct sock_rx_ctx *rx_ctx, - struct sock_pe_entry *pe_entry, - size_t data_len, uint8_t op_type, int err) -{ - struct sock_msg_response *response = &pe_entry->response; - memset(response, 0, sizeof(struct sock_msg_response)); - - response->pe_entry_id = htons(pe_entry->msg_hdr.pe_entry_id); - response->err = htonl(err); - response->msg_hdr.dest_iov_len = 0; - response->msg_hdr.flags = 0; - response->msg_hdr.msg_len = sizeof(*response) + data_len; - response->msg_hdr.version = SOCK_WIRE_PROTO_VERSION; - response->msg_hdr.op_type = op_type; - response->msg_hdr.msg_len = htonll(response->msg_hdr.msg_len); - response->msg_hdr.rx_id = pe_entry->msg_hdr.rx_id; - - pe->pe_atomic = NULL; - pe_entry->done_len = 0; - pe_entry->pe.rx.pending_send = 1; - if (pe_entry->rem == 0) - pe_entry->conn->rx_pe_entry = NULL; - pe_entry->total_len = sizeof(*response) + data_len; - - sock_pe_progress_pending_ack(pe, pe_entry); -} - -static inline ssize_t sock_pe_read_response(struct sock_pe_entry *pe_entry) -{ - ssize_t ret; - size_t len, data_len; - - if (pe_entry->done_len >= sizeof(struct sock_msg_response)) - return 0; - - len = sizeof(struct sock_msg_hdr); - data_len = sizeof(struct sock_msg_response) - len; - ret = sock_pe_recv_field(pe_entry, &pe_entry->response.pe_entry_id, - data_len, len); - if (ret) - return ret; - pe_entry->response.pe_entry_id = ntohs(pe_entry->response.pe_entry_id); - pe_entry->response.err = ntohl(pe_entry->response.err); - return 0; -} - -static int sock_pe_handle_ack(struct sock_pe *pe, - struct sock_pe_entry *pe_entry) -{ - struct sock_pe_entry *waiting_entry; - struct sock_msg_response *response; - - if (sock_pe_read_response(pe_entry)) - return 0; - - response = &pe_entry->response; - assert(response->pe_entry_id <= SOCK_PE_MAX_ENTRIES); - waiting_entry = &pe->pe_table[response->pe_entry_id]; - SOCK_LOG_DBG("Received ack for PE entry %p (index: %d)\n", - waiting_entry, response->pe_entry_id); - - assert(waiting_entry->type == SOCK_PE_TX); - sock_pe_report_send_completion(waiting_entry); - waiting_entry->is_complete = 1; - pe_entry->is_complete = 1; - return 0; -} - -static int sock_pe_handle_error(struct sock_pe *pe, - struct sock_pe_entry *pe_entry) -{ - struct sock_pe_entry *waiting_entry; - struct sock_msg_response *response; - - if (sock_pe_read_response(pe_entry)) - return 0; - - response = &pe_entry->response; - assert(response->pe_entry_id <= SOCK_PE_MAX_ENTRIES); - waiting_entry = &pe->pe_table[response->pe_entry_id]; - SOCK_LOG_ERROR("Received error for PE entry %p (index: %d)\n", - waiting_entry, response->pe_entry_id); - - assert(waiting_entry->type == SOCK_PE_TX); - - switch (pe_entry->msg_hdr.op_type) { - case SOCK_OP_READ_ERROR: - sock_pe_report_tx_rma_read_err(waiting_entry, - pe_entry->response.err); - break; - case SOCK_OP_WRITE_ERROR: - case SOCK_OP_ATOMIC_ERROR: - sock_pe_report_tx_rma_write_err(waiting_entry, - pe_entry->response.err); - break; - default: - SOCK_LOG_ERROR("Invalid op type\n"); - } - waiting_entry->is_complete = 1; - pe_entry->is_complete = 1; - return 0; -} - -static int sock_pe_handle_read_complete(struct sock_pe *pe, - struct sock_pe_entry *pe_entry) -{ - struct sock_pe_entry *waiting_entry; - struct sock_msg_response *response; - size_t len, i; - - if (sock_pe_read_response(pe_entry)) - return 0; - - response = &pe_entry->response; - assert(response->pe_entry_id <= SOCK_PE_MAX_ENTRIES); - waiting_entry = &pe->pe_table[response->pe_entry_id]; - SOCK_LOG_DBG("Received read complete for PE entry %p (index: %d)\n", - waiting_entry, response->pe_entry_id); - - waiting_entry = &pe->pe_table[response->pe_entry_id]; - assert(waiting_entry->type == SOCK_PE_TX); - - len = sizeof(struct sock_msg_response); - for (i = 0; i < waiting_entry->pe.tx.tx_op.dest_iov_len; i++) { - if (sock_pe_recv_field( - pe_entry, - (char *) (uintptr_t) waiting_entry->pe.tx.tx_iov[i].dst.iov.addr, - waiting_entry->pe.tx.tx_iov[i].dst.iov.len, len)) - return 0; - len += waiting_entry->pe.tx.tx_iov[i].dst.iov.len; - } - - sock_pe_report_read_completion(waiting_entry); - waiting_entry->is_complete = 1; - pe_entry->is_complete = 1; - return 0; -} - -static int sock_pe_handle_write_complete(struct sock_pe *pe, - struct sock_pe_entry *pe_entry) -{ - struct sock_pe_entry *waiting_entry; - struct sock_msg_response *response; - - if (sock_pe_read_response(pe_entry)) - return 0; - - response = &pe_entry->response; - assert(response->pe_entry_id <= SOCK_PE_MAX_ENTRIES); - waiting_entry = &pe->pe_table[response->pe_entry_id]; - SOCK_LOG_DBG("Received ack for PE entry %p (index: %d)\n", - waiting_entry, response->pe_entry_id); - - assert(waiting_entry->type == SOCK_PE_TX); - sock_pe_report_write_completion(waiting_entry); - waiting_entry->is_complete = 1; - pe_entry->is_complete = 1; - return 0; -} - -static int sock_pe_handle_atomic_complete(struct sock_pe *pe, - struct sock_pe_entry *pe_entry) -{ - size_t datatype_sz; - struct sock_pe_entry *waiting_entry; - struct sock_msg_response *response; - size_t len, i; - - if (sock_pe_read_response(pe_entry)) - return 0; - - response = &pe_entry->response; - assert(response->pe_entry_id <= SOCK_PE_MAX_ENTRIES); - waiting_entry = &pe->pe_table[response->pe_entry_id]; - SOCK_LOG_DBG("Received atomic complete for PE entry %p (index: %d)\n", - waiting_entry, response->pe_entry_id); - - waiting_entry = &pe->pe_table[response->pe_entry_id]; - assert(waiting_entry->type == SOCK_PE_TX); - - len = sizeof(struct sock_msg_response); - datatype_sz = ofi_datatype_size(waiting_entry->pe.tx.tx_op.atomic.datatype); - for (i = 0; i < waiting_entry->pe.tx.tx_op.atomic.res_iov_len; i++) { - if (sock_pe_recv_field( - pe_entry, - (char *) (uintptr_t) waiting_entry->pe.tx.tx_iov[i].res.ioc.addr, - waiting_entry->pe.tx.tx_iov[i].res.ioc.count * datatype_sz, - len)) - return 0; - len += waiting_entry->pe.tx.tx_iov[i].res.ioc.count * datatype_sz; - } - - if (waiting_entry->pe.rx.rx_op.atomic.res_iov_len) - sock_pe_report_read_completion(waiting_entry); - else - sock_pe_report_write_completion(waiting_entry); - - waiting_entry->is_complete = 1; - pe_entry->is_complete = 1; - return 0; -} - -static int sock_pe_process_rx_read(struct sock_pe *pe, - struct sock_rx_ctx *rx_ctx, - struct sock_pe_entry *pe_entry) -{ - int i; - struct sock_mr *mr; - uint64_t len, entry_len, data_len; - - len = sizeof(struct sock_msg_hdr); - entry_len = sizeof(union sock_iov) * pe_entry->msg_hdr.dest_iov_len; - if (sock_pe_recv_field(pe_entry, &pe_entry->pe.rx.rx_iov[0], - entry_len, len)) - return 0; - len += entry_len; - - /* verify mr */ - data_len = 0; - for (i = 0; i < pe_entry->msg_hdr.dest_iov_len && !pe_entry->mr_checked; i++) { - - mr = sock_mr_verify_key(rx_ctx->domain, - pe_entry->pe.rx.rx_iov[i].iov.key, - (uintptr_t *) &pe_entry->pe.rx.rx_iov[i].iov.addr, - pe_entry->pe.rx.rx_iov[i].iov.len, - FI_REMOTE_READ); - if (!mr) { - SOCK_LOG_ERROR("Remote memory access error: %p, %zu, %" PRIu64 "\n", - (void *) (uintptr_t) pe_entry->pe.rx.rx_iov[i].iov.addr, - pe_entry->pe.rx.rx_iov[i].iov.len, - pe_entry->pe.rx.rx_iov[i].iov.key); - pe_entry->is_error = 1; - pe_entry->rem = pe_entry->total_len - pe_entry->done_len; - sock_pe_send_response(pe, rx_ctx, pe_entry, 0, - SOCK_OP_READ_ERROR, FI_EACCES); - return 0; - } - - data_len += pe_entry->pe.rx.rx_iov[i].iov.len; - } - pe_entry->mr_checked = 1; - pe_entry->buf = pe_entry->pe.rx.rx_iov[0].iov.addr; - pe_entry->data_len = data_len; - pe_entry->flags |= (FI_RMA | FI_REMOTE_READ); - if (!pe_entry->completion_reported) { - sock_pe_report_remote_read(rx_ctx, pe_entry); - pe_entry->completion_reported = 1; - } - sock_pe_send_response(pe, rx_ctx, pe_entry, data_len, - SOCK_OP_READ_COMPLETE, 0); - return 0; -} - -static int sock_pe_process_rx_write(struct sock_pe *pe, - struct sock_rx_ctx *rx_ctx, - struct sock_pe_entry *pe_entry) -{ - int i, ret = 0; - struct sock_mr *mr; - uint64_t rem, len, entry_len; - - len = sizeof(struct sock_msg_hdr); - if (pe_entry->msg_hdr.flags & FI_REMOTE_CQ_DATA) { - if (sock_pe_recv_field(pe_entry, &pe_entry->data, - SOCK_CQ_DATA_SIZE, len)) - return 0; - len += SOCK_CQ_DATA_SIZE; - } - - entry_len = sizeof(union sock_iov) * pe_entry->msg_hdr.dest_iov_len; - if (sock_pe_recv_field(pe_entry, &pe_entry->pe.rx.rx_iov[0], entry_len, len)) - return 0; - len += entry_len; - - for (i = 0; i < pe_entry->msg_hdr.dest_iov_len && !pe_entry->mr_checked; i++) { - mr = sock_mr_verify_key(rx_ctx->domain, - pe_entry->pe.rx.rx_iov[i].iov.key, - (uintptr_t *) &pe_entry->pe.rx.rx_iov[i].iov.addr, - pe_entry->pe.rx.rx_iov[i].iov.len, - FI_REMOTE_WRITE); - if (!mr) { - SOCK_LOG_ERROR("Remote memory access error: %p, %zu, %" PRIu64 "\n", - (void *) (uintptr_t) pe_entry->pe.rx.rx_iov[i].iov.addr, - pe_entry->pe.rx.rx_iov[i].iov.len, - pe_entry->pe.rx.rx_iov[i].iov.key); - pe_entry->is_error = 1; - pe_entry->rem = pe_entry->total_len - pe_entry->done_len; - sock_pe_send_response(pe, rx_ctx, pe_entry, 0, - SOCK_OP_WRITE_ERROR, FI_EACCES); - return 0; - } - } - pe_entry->mr_checked = 1; - - rem = pe_entry->msg_hdr.msg_len - len; - for (i = 0; rem > 0 && i < pe_entry->msg_hdr.dest_iov_len; i++) { - if (sock_pe_recv_field(pe_entry, - (void *) (uintptr_t) pe_entry->pe.rx.rx_iov[i].iov.addr, - pe_entry->pe.rx.rx_iov[i].iov.len, len)) - return 0; - len += pe_entry->pe.rx.rx_iov[i].iov.len; - rem -= pe_entry->pe.rx.rx_iov[i].iov.len; - } - pe_entry->buf = pe_entry->pe.rx.rx_iov[0].iov.addr; - pe_entry->data_len = 0; - for (i = 0; i < pe_entry->msg_hdr.dest_iov_len; i++) { - pe_entry->data_len += pe_entry->pe.rx.rx_iov[i].iov.len; - if ((pe_entry->msg_hdr.flags & FI_COMMIT_COMPLETE) && - ofi_pmem_commit) { - (*ofi_pmem_commit)((const void *) (uintptr_t) - pe_entry->pe.rx.rx_iov[i].iov.addr, - pe_entry->pe.rx.rx_iov[i].iov.len); - } - } - - /* report error, if any */ - if (rem) { - sock_pe_report_rx_error(pe_entry, (int) rem, FI_ETRUNC); - goto out; - } - -out: - pe_entry->flags |= (FI_RMA | FI_REMOTE_WRITE); - if (!pe_entry->completion_reported) { - sock_pe_report_remote_write(rx_ctx, pe_entry); - sock_pe_report_mr_completion(rx_ctx->domain, pe_entry); - pe_entry->completion_reported = 1; - } - sock_pe_send_response(pe, rx_ctx, pe_entry, 0, - SOCK_OP_WRITE_COMPLETE, 0); - return ret; -} - -/* - * Provider re-uses compare buffer to return result. This can be optimized - * in the future to have a separate buffer. - */ -static void sock_pe_do_atomic(void *cmp, void *dst, void *src, - enum fi_datatype datatype, enum fi_op op, - size_t cnt, int fetch) -{ - char tmp_result[SOCK_EP_MAX_ATOMIC_SZ]; - - if (ofi_atomic_isswap_op(op)) { - ofi_atomic_swap_handler(op, datatype, dst, src, cmp, - tmp_result, cnt); - if (cmp != NULL) - memcpy(cmp, tmp_result, ofi_datatype_size(datatype) * cnt); - } else if (fetch && ofi_atomic_isreadwrite_op(op)) { - ofi_atomic_readwrite_handler(op, datatype, dst, src, cmp, cnt); - } else if (ofi_atomic_iswrite_op(op)) { - ofi_atomic_write_handler(op, datatype, dst, src, cnt); - } -} - -static int sock_pe_recv_atomic_hdrs(struct sock_pe *pe, - struct sock_pe_entry *pe_entry, - size_t *datatype_sz, uint64_t *entry_len) -{ - uint64_t len; - int i; - - if (!pe_entry->pe.rx.atomic_cmp) { - pe_entry->pe.rx.atomic_cmp = ofi_buf_alloc(pe->atomic_rx_pool); - pe_entry->pe.rx.atomic_src = ofi_buf_alloc(pe->atomic_rx_pool); - if (!pe_entry->pe.rx.atomic_cmp || !pe_entry->pe.rx.atomic_src) - return -FI_ENOMEM; - } - - len = sizeof(struct sock_msg_hdr); - if (sock_pe_recv_field(pe_entry, &pe_entry->pe.rx.rx_op, - sizeof(struct sock_op), len)) - return -FI_EAGAIN; - len += sizeof(struct sock_op); - - if (pe_entry->msg_hdr.flags & FI_REMOTE_CQ_DATA) { - if (sock_pe_recv_field(pe_entry, &pe_entry->data, - SOCK_CQ_DATA_SIZE, len)) - return -FI_EAGAIN; - len += SOCK_CQ_DATA_SIZE; - } - - /* dst iocs */ - *entry_len = sizeof(union sock_iov) * pe_entry->pe.rx.rx_op.dest_iov_len; - if (sock_pe_recv_field(pe_entry, &pe_entry->pe.rx.rx_iov[0], - *entry_len, len)) - return -FI_EAGAIN; - len += *entry_len; - - *entry_len = 0; - *datatype_sz = ofi_datatype_size(pe_entry->pe.rx.rx_op.atomic.datatype); - for (i = 0; i < pe_entry->pe.rx.rx_op.dest_iov_len; i++) { - *entry_len += pe_entry->pe.rx.rx_iov[i].ioc.count; - } - *entry_len *= *datatype_sz; - - /* cmp data */ - if (pe_entry->pe.rx.rx_op.atomic.cmp_iov_len) { - if (sock_pe_recv_field(pe_entry, pe_entry->pe.rx.atomic_cmp, - *entry_len, len)) - return -FI_EAGAIN; - len += *entry_len; - } - - /* src data */ - if (pe_entry->pe.rx.rx_op.atomic.op != FI_ATOMIC_READ && - pe_entry->pe.rx.rx_op.src_iov_len) { - if (sock_pe_recv_field(pe_entry, pe_entry->pe.rx.atomic_src, - *entry_len, len)) - return -FI_EAGAIN; - len += *entry_len; - } - - return 0; -} - -static int sock_pe_process_rx_atomic(struct sock_pe *pe, - struct sock_rx_ctx *rx_ctx, - struct sock_pe_entry *pe_entry) -{ - int i, ret = 0; - size_t datatype_sz; - struct sock_mr *mr; - uint64_t offset, entry_len; - - ret = sock_pe_recv_atomic_hdrs(pe, pe_entry, &datatype_sz, &entry_len); - if (ret) - return ret == -FI_EAGAIN ? 0 : ret; - - for (i = 0; i < pe_entry->pe.rx.rx_op.dest_iov_len && !pe_entry->mr_checked; i++) { - mr = sock_mr_verify_key(rx_ctx->domain, - pe_entry->pe.rx.rx_iov[i].ioc.key, - (uintptr_t *) &pe_entry->pe.rx.rx_iov[i].ioc.addr, - pe_entry->pe.rx.rx_iov[i].ioc.count * datatype_sz, - FI_REMOTE_WRITE); - if (!mr) { - SOCK_LOG_ERROR("Remote memory access error: %p, %zu, %" PRIu64 "\n", - (void *) (uintptr_t) pe_entry->pe.rx.rx_iov[i].ioc.addr, - pe_entry->pe.rx.rx_iov[i].ioc.count * datatype_sz, - pe_entry->pe.rx.rx_iov[i].ioc.key); - pe_entry->is_error = 1; - pe_entry->rem = pe_entry->total_len - pe_entry->done_len; - sock_pe_send_response(pe, rx_ctx, pe_entry, 0, - SOCK_OP_ATOMIC_ERROR, FI_EACCES); - return 0; - } - } - pe_entry->mr_checked = 1; - - if (pe->pe_atomic) { - if (pe->pe_atomic != pe_entry) - return 0; - } else { - pe->pe_atomic = pe_entry; - } - - offset = 0; - for (i = 0; i < pe_entry->pe.rx.rx_op.dest_iov_len; i++) { - sock_pe_do_atomic(pe_entry->pe.rx.atomic_cmp + offset, - (char *) (uintptr_t) pe_entry->pe.rx.rx_iov[i].ioc.addr, - pe_entry->pe.rx.atomic_src + offset, - pe_entry->pe.rx.rx_op.atomic.datatype, - pe_entry->pe.rx.rx_op.atomic.op, - pe_entry->pe.rx.rx_iov[i].ioc.count, - pe_entry->pe.rx.rx_op.atomic.res_iov_len); - offset += datatype_sz * pe_entry->pe.rx.rx_iov[i].ioc.count; - } - - pe_entry->buf = pe_entry->pe.rx.rx_iov[0].iov.addr; - pe_entry->data_len = offset; - - pe_entry->flags |= FI_ATOMIC; - if (pe_entry->pe.rx.rx_op.atomic.op == FI_ATOMIC_READ) - pe_entry->flags |= FI_REMOTE_READ; - else - pe_entry->flags |= FI_REMOTE_WRITE; - - if (!pe_entry->completion_reported) { - sock_pe_report_remote_write(rx_ctx, pe_entry); - sock_pe_report_mr_completion(rx_ctx->domain, pe_entry); - pe_entry->completion_reported = 1; - } - sock_pe_send_response(pe, rx_ctx, pe_entry, - pe_entry->pe.rx.rx_op.atomic.res_iov_len ? - entry_len : 0, SOCK_OP_ATOMIC_COMPLETE, 0); - return ret; -} - -/* - * For simplicity, we treat all tagged atomics as buffered. This would need - * to change if we wanted to report back possible error data or handle - * atomic fetch operations. - */ -static int -sock_pe_process_rx_tatomic(struct sock_pe *pe, struct sock_rx_ctx *rx_ctx, - struct sock_pe_entry *pe_entry) -{ - int ret = 0; - size_t datatype_sz; - uint64_t entry_len; - struct sock_rx_entry *rx_entry; - - ret = sock_pe_recv_atomic_hdrs(pe, pe_entry, &datatype_sz, &entry_len); - if (ret) - return ret == -FI_EAGAIN ? 0 : ret; - - assert(pe_entry->pe.rx.rx_iov[0].ioc.addr == 0); - assert(pe_entry->pe.rx.rx_op.dest_iov_len == 1); - assert(pe_entry->pe.rx.rx_op.atomic.cmp_iov_len == 0); - - pe_entry->tag = pe_entry->pe.rx.rx_iov[0].ioc.key; - pe_entry->data_len = entry_len; - - ofi_mutex_lock(&rx_ctx->lock); - rx_entry = sock_rx_new_buffered_entry(rx_ctx, entry_len); - if (!rx_entry) { - ofi_mutex_unlock(&rx_ctx->lock); - return -FI_ENOMEM; - } - - rx_entry->rx_op = pe_entry->pe.rx.rx_op; - memcpy((void *) (uintptr_t) rx_entry->iov[0].ioc.addr, - pe_entry->pe.rx.atomic_src, entry_len); - rx_entry->addr = pe_entry->addr; - rx_entry->tag = pe_entry->tag; - rx_entry->data = pe_entry->data; - rx_entry->ignore = 0; - rx_entry->comp = pe_entry->comp; - rx_entry->is_complete = 1; - - if (pe_entry->msg_hdr.flags & FI_REMOTE_CQ_DATA) - rx_entry->flags |= FI_REMOTE_CQ_DATA; - rx_entry->flags |= FI_TAGGED | FI_ATOMIC; - rx_entry->is_tagged = 1; - - pe_entry->pe.rx.rx_entry = rx_entry; - - sock_pe_progress_buffered_rx(rx_ctx, true); - ofi_mutex_unlock(&rx_ctx->lock); - - pe_entry->is_complete = 1; - - sock_pe_send_response(pe, rx_ctx, pe_entry, 0, - SOCK_OP_ATOMIC_COMPLETE, 0); - return ret; -} - -ssize_t sock_rx_peek_recv(struct sock_rx_ctx *rx_ctx, fi_addr_t addr, - uint64_t tag, uint64_t ignore, void *context, - uint64_t flags, uint8_t is_tagged) -{ - struct sock_rx_entry *rx_buffered; - struct sock_pe_entry pe_entry; - - ofi_mutex_lock(&rx_ctx->lock); - rx_buffered = sock_rx_get_buffered_entry(rx_ctx, - (rx_ctx->attr.caps & FI_DIRECTED_RECV) ? - addr : FI_ADDR_UNSPEC, - tag, ignore, is_tagged); - - memset(&pe_entry, 0, sizeof(pe_entry)); - pe_entry.comp = &rx_ctx->comp; - pe_entry.context = (uintptr_t)context; - pe_entry.flags = (flags | FI_MSG | FI_RECV); - if (is_tagged) - pe_entry.flags |= FI_TAGGED; - - if (rx_buffered) { - pe_entry.data_len = rx_buffered->total_len; - pe_entry.tag = rx_buffered->tag; - pe_entry.data = rx_buffered->data; - rx_buffered->context = (uintptr_t)context; - if (flags & FI_CLAIM) - rx_buffered->is_claimed = 1; - - if (flags & FI_DISCARD) { - dlist_remove(&rx_buffered->entry); - sock_rx_release_entry(rx_buffered); - } - sock_pe_report_recv_completion(&pe_entry); - } else { - sock_cq_report_error(rx_ctx->comp.recv_cq, &pe_entry, 0, - FI_ENOMSG, -FI_ENOMSG, NULL, 0); - } - ofi_mutex_unlock(&rx_ctx->lock); - return 0; -} - -ssize_t sock_rx_claim_recv(struct sock_rx_ctx *rx_ctx, void *context, - uint64_t flags, uint64_t tag, uint64_t ignore, - uint8_t is_tagged, const struct iovec *msg_iov, - size_t iov_count) -{ - ssize_t ret = 0; - size_t rem = 0, i, offset, len; - struct dlist_entry *entry; - struct sock_pe_entry pe_entry; - struct sock_rx_entry *rx_buffered = NULL; - - ofi_mutex_lock(&rx_ctx->lock); - for (entry = rx_ctx->rx_buffered_list.next; - entry != &rx_ctx->rx_buffered_list; entry = entry->next) { - rx_buffered = container_of(entry, struct sock_rx_entry, entry); - if (rx_buffered->is_claimed && - (uintptr_t)rx_buffered->context == (uintptr_t)context && - is_tagged == rx_buffered->is_tagged && - (tag & ~ignore) == (rx_buffered->tag & ~ignore)) - break; - else - rx_buffered = NULL; - } - - if (rx_buffered) { - memset(&pe_entry, 0, sizeof(pe_entry)); - pe_entry.comp = &rx_ctx->comp; - pe_entry.data_len = rx_buffered->total_len; - pe_entry.tag = rx_buffered->tag; - pe_entry.data = rx_buffered->data; - pe_entry.context = rx_buffered->context; - pe_entry.flags = (flags | FI_MSG | FI_RECV); - pe_entry.addr = rx_buffered->addr; - if (is_tagged) - pe_entry.flags |= FI_TAGGED; - - if (!(flags & FI_DISCARD)) { - pe_entry.buf = (uintptr_t)msg_iov[0].iov_base; - offset = 0; - rem = rx_buffered->total_len; - for (i = 0; i < iov_count && rem > 0; i++) { - len = MIN(msg_iov[i].iov_len, rem); - memcpy(msg_iov[i].iov_base, - (char *) (uintptr_t) - rx_buffered->iov[0].iov.addr + offset, len); - rem -= len; - offset += len; - } - } - - if (rem) { - SOCK_LOG_DBG("Not enough space in posted recv buffer\n"); - sock_pe_report_rx_error(&pe_entry, (int) rem, FI_ETRUNC); - } else { - sock_pe_report_recv_completion(&pe_entry); - } - - dlist_remove(&rx_buffered->entry); - sock_rx_release_entry(rx_buffered); - if (rx_ctx->progress_start == entry) - rx_ctx->progress_start = &rx_ctx->rx_buffered_list; - } else { - ret = -FI_ENOMSG; - } - - ofi_mutex_unlock(&rx_ctx->lock); - return ret; -} - -/* Check buffered msg list against posted list. If shallow is true, - * we only check SOCK_EP_MAX_PROGRESS_CNT messages to prevent progress - * test taking too long */ -static int sock_pe_progress_buffered_rx(struct sock_rx_ctx *rx_ctx, - bool shallow) -{ - struct dlist_entry *entry; - struct sock_pe_entry pe_entry; - struct sock_rx_entry *rx_buffered, *rx_posted; - size_t i, rem = 0, offset, len, used_len, dst_offset, datatype_sz; - size_t max_cnt; - char *src, *dst; - - if (dlist_empty(&rx_ctx->rx_entry_list) || - dlist_empty(&rx_ctx->rx_buffered_list)) - return 0; - - if (!shallow) { - /* ignoring rx_ctx->progress_start */ - entry = rx_ctx->rx_buffered_list.next; - max_cnt = SIZE_MAX; - } else { - /* continue where last time left off */ - entry = rx_ctx->progress_start; - if (entry == &rx_ctx->rx_buffered_list) { - entry = entry->next; - } - max_cnt = SOCK_EP_MAX_PROGRESS_CNT; - } - for (i = 0; i < max_cnt && entry != &rx_ctx->rx_buffered_list; i++) { - rx_buffered = container_of(entry, struct sock_rx_entry, entry); - entry = entry->next; - - if (!rx_buffered->is_complete || rx_buffered->is_claimed) - continue; - - rx_posted = sock_rx_get_entry(rx_ctx, rx_buffered->addr, - rx_buffered->tag, - rx_buffered->is_tagged); - if (!rx_posted) - continue; - - SOCK_LOG_DBG("Consuming buffered entry: %p, ctx: %p\n", - rx_buffered, rx_ctx); - SOCK_LOG_DBG("Consuming posted entry: %p, ctx: %p\n", - rx_posted, rx_ctx); - - datatype_sz = (rx_buffered->flags & FI_ATOMIC) ? - ofi_datatype_size(rx_buffered->rx_op.atomic.datatype) : 0; - offset = 0; - rem = rx_buffered->iov[0].iov.len; - rx_ctx->buffered_len -= rem; - used_len = rx_posted->used; - memset(&pe_entry, 0, sizeof(pe_entry)); - for (i = 0; i < rx_posted->rx_op.dest_iov_len && rem > 0; i++) { - /* Try to find the first iovec entry where the data - * has not been consumed. In the common case, there - * is only one iovec, i.e. a single buffer */ - if (used_len >= rx_posted->iov[i].iov.len) { - used_len -= rx_posted->iov[i].iov.len; - continue; - } - - dst_offset = used_len; - len = MIN(rx_posted->iov[i].iov.len, rem); - pe_entry.buf = rx_posted->iov[i].iov.addr + dst_offset; - - src = (char *) (uintptr_t) - rx_buffered->iov[0].iov.addr + offset; - dst = (char *) (uintptr_t) - rx_posted->iov[i].iov.addr + dst_offset; - - if (datatype_sz) { - sock_pe_do_atomic(NULL, dst, src, - rx_buffered->rx_op.atomic.datatype, - rx_buffered->rx_op.atomic.op, - len / datatype_sz, 0); - } else { - memcpy(dst, src, len); - } - offset += len; - rem -= len; - dst_offset = used_len = 0; - rx_posted->used += len; - pe_entry.data_len = rx_buffered->used; - } - - pe_entry.done_len = offset; - pe_entry.data = rx_buffered->data; - pe_entry.tag = rx_buffered->tag; - pe_entry.context = (uint64_t)rx_posted->context; - pe_entry.pe.rx.rx_iov[0].iov.addr = rx_posted->iov[0].iov.addr; - pe_entry.type = SOCK_PE_RX; - pe_entry.comp = rx_buffered->comp; - pe_entry.flags = rx_posted->flags; - pe_entry.flags |= (FI_MSG | FI_RECV); - pe_entry.addr = rx_buffered->addr; - if (rx_buffered->is_tagged) - pe_entry.flags |= FI_TAGGED; - pe_entry.flags &= ~FI_MULTI_RECV; - - if (rx_posted->flags & FI_MULTI_RECV) { - if (sock_rx_avail_len(rx_posted) < rx_ctx->min_multi_recv) { - pe_entry.flags |= FI_MULTI_RECV; - dlist_remove(&rx_posted->entry); - } - } else { - dlist_remove(&rx_posted->entry); - } - - if (rem) { - SOCK_LOG_DBG("Not enough space in posted recv buffer\n"); - sock_pe_report_rx_error(&pe_entry, (int) rem, FI_ETRUNC); - } else { - sock_pe_report_recv_completion(&pe_entry); - } - - /* Mark that we are done processing the posted recv buff. - * This allows another thread to grab it when calling - * sock_rx_get_entry() */ - rx_posted->is_busy = 0; - - dlist_remove(&rx_buffered->entry); - sock_rx_release_entry(rx_buffered); - - if ((!(rx_posted->flags & FI_MULTI_RECV) || - (pe_entry.flags & FI_MULTI_RECV))) { - sock_rx_release_entry(rx_posted); - rx_ctx->num_left++; - } - } - /* remember where we left off for next shallow progress */ - rx_ctx->progress_start = entry; - return 0; -} - -static int sock_pe_process_rx_send(struct sock_pe *pe, - struct sock_rx_ctx *rx_ctx, - struct sock_pe_entry *pe_entry) -{ - ssize_t i, ret = 0; - struct sock_rx_entry *rx_entry; - uint64_t len, rem, offset, data_len, done_data, used; - - offset = 0; - len = sizeof(struct sock_msg_hdr); - - if (pe_entry->addr == FI_ADDR_NOTAVAIL && - pe_entry->ep_attr->ep_type == FI_EP_RDM && pe_entry->ep_attr->av) { - if (pe_entry->conn->av_index == FI_ADDR_NOTAVAIL) { - /* this may happen when connection message comes in - * before fi_av_insert. Let's try setting - * conn->av_index now. */ - pe_entry->conn->av_index = - sock_av_get_addr_index(pe_entry->ep_attr->av, - &(pe_entry->conn->addr)); - } - pe_entry->addr = pe_entry->conn->av_index; - } - - if (pe_entry->msg_hdr.op_type == SOCK_OP_TSEND) { - if (sock_pe_recv_field(pe_entry, &pe_entry->tag, - SOCK_TAG_SIZE, len)) - return 0; - len += SOCK_TAG_SIZE; - } - - if (pe_entry->msg_hdr.flags & FI_REMOTE_CQ_DATA) { - if (sock_pe_recv_field(pe_entry, &pe_entry->data, - SOCK_CQ_DATA_SIZE, len)) - return 0; - len += SOCK_CQ_DATA_SIZE; - } - - data_len = pe_entry->msg_hdr.msg_len - len; - if (pe_entry->done_len == len && !pe_entry->pe.rx.rx_entry) { - ofi_mutex_lock(&rx_ctx->lock); - rx_ctx->progress_start = &rx_ctx->rx_buffered_list; - sock_pe_progress_buffered_rx(rx_ctx, false); - - rx_entry = sock_rx_get_entry(rx_ctx, pe_entry->addr, pe_entry->tag, - pe_entry->msg_hdr.op_type == SOCK_OP_TSEND ? 1 : 0); - SOCK_LOG_DBG("Consuming posted entry: %p\n", rx_entry); - - if (!rx_entry) { - SOCK_LOG_DBG("%p: No matching recv, buffering recv (len = %llu)\n", - pe_entry, (long long unsigned int)data_len); - - rx_entry = sock_rx_new_buffered_entry(rx_ctx, data_len); - if (!rx_entry) { - ofi_mutex_unlock(&rx_ctx->lock); - return -FI_ENOMEM; - } - - rx_entry->addr = pe_entry->addr; - rx_entry->tag = pe_entry->tag; - rx_entry->data = pe_entry->data; - rx_entry->ignore = 0; - rx_entry->comp = pe_entry->comp; - - if (pe_entry->msg_hdr.flags & FI_REMOTE_CQ_DATA) - rx_entry->flags |= FI_REMOTE_CQ_DATA; - - if (pe_entry->msg_hdr.op_type == SOCK_OP_TSEND) - rx_entry->is_tagged = 1; - } - ofi_mutex_unlock(&rx_ctx->lock); - pe_entry->context = rx_entry->context; - pe_entry->pe.rx.rx_entry = rx_entry; - } - - rx_entry = pe_entry->pe.rx.rx_entry; - done_data = pe_entry->done_len - len; - pe_entry->data_len = data_len; - rem = pe_entry->data_len - done_data; - used = rx_entry->used; - - for (i = 0; rem > 0 && i < rx_entry->rx_op.dest_iov_len; i++) { - - /* skip used contents in rx_entry */ - if (used >= rx_entry->iov[i].iov.len) { - used -= rx_entry->iov[i].iov.len; - continue; - } - - offset = used; - data_len = MIN(rx_entry->iov[i].iov.len - used, rem); - ret = sock_comm_recv(pe_entry, - (char *) (uintptr_t) rx_entry->iov[i].iov.addr + offset, - data_len); - if (ret <= 0) - return (int) ret; - - if (!pe_entry->buf) - pe_entry->buf = rx_entry->iov[i].iov.addr + offset; - rem -= ret; - used = 0; - pe_entry->done_len += ret; - rx_entry->used += ret; - if ((size_t) ret != data_len) - return 0; - } - - pe_entry->is_complete = 1; - rx_entry->is_complete = 1; - - pe_entry->flags = rx_entry->flags; - if (pe_entry->msg_hdr.op_type == SOCK_OP_TSEND) - pe_entry->flags |= FI_TAGGED; - pe_entry->flags |= (FI_MSG | FI_RECV); - - if (pe_entry->msg_hdr.flags & FI_REMOTE_CQ_DATA) - pe_entry->flags |= FI_REMOTE_CQ_DATA; - pe_entry->flags &= ~FI_MULTI_RECV; - - ofi_mutex_lock(&rx_ctx->lock); - if (rx_entry->flags & FI_MULTI_RECV) { - if (sock_rx_avail_len(rx_entry) < rx_ctx->min_multi_recv) { - pe_entry->flags |= FI_MULTI_RECV; - dlist_remove(&rx_entry->entry); - } - } else { - if (!rx_entry->is_buffered) - dlist_remove(&rx_entry->entry); - } - rx_entry->is_busy = 0; - ofi_mutex_unlock(&rx_ctx->lock); - - /* report error, if any */ - if (rem) { - SOCK_LOG_ERROR("Not enough space in posted recv buffer\n"); - sock_pe_report_rx_error(pe_entry, (int) rem, FI_ETRUNC); - pe_entry->is_error = 1; - pe_entry->rem = pe_entry->total_len - pe_entry->done_len; - goto out; - } else { - if (!rx_entry->is_buffered) - sock_pe_report_recv_completion(pe_entry); - } - -out: - if (pe_entry->msg_hdr.flags & FI_TRANSMIT_COMPLETE) { - sock_pe_send_response(pe, rx_ctx, pe_entry, 0, - SOCK_OP_SEND_COMPLETE, 0); - } - - if (!rx_entry->is_buffered && - (!(rx_entry->flags & FI_MULTI_RECV) || - (pe_entry->flags & FI_MULTI_RECV))) { - ofi_mutex_lock(&rx_ctx->lock); - sock_rx_release_entry(rx_entry); - rx_ctx->num_left++; - ofi_mutex_unlock(&rx_ctx->lock); - } - return (int) ret; -} - -static int sock_pe_process_rx_conn_msg(struct sock_pe *pe, - struct sock_rx_ctx *rx_ctx, - struct sock_pe_entry *pe_entry) -{ - uint64_t len, data_len; - struct sock_ep_attr *ep_attr; - struct sock_conn_map *map; - union ofi_sock_ip *addr; - struct sock_conn *conn; - uint64_t index; - - if (!pe_entry->comm_addr) { - pe_entry->comm_addr = calloc(1, sizeof(union ofi_sock_ip)); - if (!pe_entry->comm_addr) - return -FI_ENOMEM; - } - - len = sizeof(struct sock_msg_hdr); - data_len = sizeof(union ofi_sock_ip); - if (sock_pe_recv_field(pe_entry, pe_entry->comm_addr, data_len, len)) { - return 0; - } - - ep_attr = pe_entry->conn->ep_attr; - map = &ep_attr->cmap; - addr = pe_entry->comm_addr; - pe_entry->conn->addr = *addr; - - index = (ep_attr->ep_type == FI_EP_MSG) ? 0 : sock_av_get_addr_index(ep_attr->av, addr); - if (index != -1) { - ofi_mutex_lock(&map->lock); - conn = sock_ep_lookup_conn(ep_attr, index, addr); - if (conn == NULL || conn == SOCK_CM_CONN_IN_PROGRESS) { - if (ofi_idm_set(&ep_attr->av_idm, (int) index, pe_entry->conn) < 0) - SOCK_LOG_ERROR("ofi_idm_set failed\n"); - } - ofi_mutex_unlock(&map->lock); - } - pe_entry->conn->av_index = (ep_attr->ep_type == FI_EP_MSG || index == -1) ? - FI_ADDR_NOTAVAIL : index; - - pe_entry->is_complete = 1; - pe_entry->pe.rx.pending_send = 0; - free(pe_entry->comm_addr); - pe_entry->comm_addr = NULL; - return 0; -} - -static int sock_pe_process_recv(struct sock_pe *pe, struct sock_rx_ctx *rx_ctx, - struct sock_pe_entry *pe_entry) -{ - int ret; - struct sock_msg_hdr *msg_hdr; - - msg_hdr = &pe_entry->msg_hdr; - if (msg_hdr->version != SOCK_WIRE_PROTO_VERSION) { - SOCK_LOG_ERROR("Invalid wire protocol\n"); - ret = -FI_EINVAL; - goto out; - } - - switch (pe_entry->msg_hdr.op_type) { - case SOCK_OP_SEND: - case SOCK_OP_TSEND: - ret = sock_pe_process_rx_send(pe, rx_ctx, pe_entry); - break; - case SOCK_OP_WRITE: - ret = sock_pe_process_rx_write(pe, rx_ctx, pe_entry); - break; - case SOCK_OP_READ: - ret = sock_pe_process_rx_read(pe, rx_ctx, pe_entry); - break; - case SOCK_OP_ATOMIC: - if (msg_hdr->flags & FI_TAGGED) - ret = sock_pe_process_rx_tatomic(pe, rx_ctx, pe_entry); - else - ret = sock_pe_process_rx_atomic(pe, rx_ctx, pe_entry); - break; - case SOCK_OP_SEND_COMPLETE: - ret = sock_pe_handle_ack(pe, pe_entry); - break; - case SOCK_OP_WRITE_COMPLETE: - ret = sock_pe_handle_write_complete(pe, pe_entry); - break; - case SOCK_OP_READ_COMPLETE: - ret = sock_pe_handle_read_complete(pe, pe_entry); - break; - case SOCK_OP_ATOMIC_COMPLETE: - ret = sock_pe_handle_atomic_complete(pe, pe_entry); - break; - case SOCK_OP_WRITE_ERROR: - case SOCK_OP_READ_ERROR: - case SOCK_OP_ATOMIC_ERROR: - ret = sock_pe_handle_error(pe, pe_entry); - break; - case SOCK_OP_CONN_MSG: - ret = sock_pe_process_rx_conn_msg(pe, rx_ctx, pe_entry); - break; - default: - ret = -FI_ENOSYS; - SOCK_LOG_ERROR("Operation not supported\n"); - break; - } - -out: - return ret; -} - -static int sock_pe_peek_hdr(struct sock_pe *pe, - struct sock_pe_entry *pe_entry) -{ - int len; - struct sock_msg_hdr *msg_hdr; - struct sock_conn *conn = pe_entry->conn; - - if (conn->rx_pe_entry != NULL && conn->rx_pe_entry != pe_entry) - return -1; - - if (conn->rx_pe_entry == NULL) { - conn->rx_pe_entry = pe_entry; - } - - len = sizeof(struct sock_msg_hdr); - msg_hdr = &pe_entry->msg_hdr; - if (sock_comm_peek(pe_entry->conn, (void *) msg_hdr, len) != len) - return -1; - - msg_hdr->msg_len = ntohll(msg_hdr->msg_len); - msg_hdr->flags = ntohll(msg_hdr->flags); - msg_hdr->pe_entry_id = ntohs(msg_hdr->pe_entry_id); - pe_entry->total_len = msg_hdr->msg_len; - - SOCK_LOG_DBG("PE RX (Hdr peek): MsgLen: %" PRIu64 ", TX-ID: %d, Type: %d\n", - msg_hdr->msg_len, msg_hdr->rx_id, msg_hdr->op_type); - return 0; -} - -static int sock_pe_read_hdr(struct sock_pe *pe, struct sock_rx_ctx *rx_ctx, - struct sock_pe_entry *pe_entry) -{ - struct sock_msg_hdr *msg_hdr; - struct sock_conn *conn = pe_entry->conn; - - if (conn->rx_pe_entry != NULL && conn->rx_pe_entry != pe_entry) - return 0; - - if (conn->rx_pe_entry == NULL) - conn->rx_pe_entry = pe_entry; - - msg_hdr = &pe_entry->msg_hdr; - if (sock_pe_peek_hdr(pe, pe_entry)) - return -1; - - if (rx_ctx->is_ctrl_ctx && sock_pe_is_data_msg(msg_hdr->op_type)) - return -1; - - if (sock_pe_is_data_msg(msg_hdr->op_type) && - msg_hdr->rx_id != rx_ctx->rx_id) - return -1; - - if (sock_pe_recv_field(pe_entry, (void *) msg_hdr, - sizeof(struct sock_msg_hdr), 0)) { - SOCK_LOG_ERROR("Failed to recv header\n"); - return -1; - } - - msg_hdr->msg_len = ntohll(msg_hdr->msg_len); - msg_hdr->flags = ntohll(msg_hdr->flags); - msg_hdr->pe_entry_id = ntohs(msg_hdr->pe_entry_id); - pe_entry->pe.rx.header_read = 1; - pe_entry->flags = msg_hdr->flags; - pe_entry->total_len = msg_hdr->msg_len; - - SOCK_LOG_DBG("PE RX (Hdr read): MsgLen: %" PRIu64 ", TX-ID: %d, Type: %d\n", - msg_hdr->msg_len, msg_hdr->rx_id, msg_hdr->op_type); - return 0; -} - -static int sock_pe_progress_tx_atomic(struct sock_pe *pe, - struct sock_pe_entry *pe_entry, - struct sock_conn *conn) -{ - size_t datatype_sz; - union sock_iov iov[SOCK_EP_MAX_IOV_LIMIT]; - ssize_t len, i, entry_len; - - if (pe_entry->pe.tx.send_done) - return 0; - - len = sizeof(struct sock_msg_hdr); - entry_len = sizeof(struct sock_atomic_req) - sizeof(struct sock_msg_hdr); - if (sock_pe_send_field(pe_entry, &pe_entry->pe.tx.tx_op, entry_len, len)) - return 0; - len += entry_len; - - if (pe_entry->flags & FI_REMOTE_CQ_DATA) { - if (sock_pe_send_field(pe_entry, &pe_entry->data, - SOCK_CQ_DATA_SIZE, len)) - return 0; - len += SOCK_CQ_DATA_SIZE; - } - - /* dest iocs */ - entry_len = sizeof(union sock_iov) * pe_entry->pe.tx.tx_op.dest_iov_len; - for (i = 0; i < pe_entry->pe.tx.tx_op.dest_iov_len; i++) { - iov[i].ioc.addr = pe_entry->pe.tx.tx_iov[i].dst.ioc.addr; - iov[i].ioc.count = pe_entry->pe.tx.tx_iov[i].dst.ioc.count; - iov[i].ioc.key = pe_entry->pe.tx.tx_iov[i].dst.ioc.key; - } - - if (sock_pe_send_field(pe_entry, &iov[0], entry_len, len)) - return 0; - len += entry_len; - - datatype_sz = ofi_datatype_size(pe_entry->pe.tx.tx_op.atomic.datatype); - if (pe_entry->flags & FI_INJECT) { - /* cmp data */ - if (sock_pe_send_field(pe_entry, - &pe_entry->pe.tx.inject[0] + pe_entry->pe.tx.tx_op.src_iov_len, - pe_entry->pe.tx.tx_op.atomic.cmp_iov_len, len)) - return 0; - len += pe_entry->pe.tx.tx_op.atomic.cmp_iov_len; - /* data */ - if (sock_pe_send_field(pe_entry, - &pe_entry->pe.tx.inject[0], - pe_entry->pe.tx.tx_op.src_iov_len, len)) - return 0; - len += pe_entry->pe.tx.tx_op.src_iov_len; - } else { - /* cmp data */ - for (i = 0; i < pe_entry->pe.tx.tx_op.atomic.cmp_iov_len; i++) { - if (sock_pe_send_field(pe_entry, - (void *) (uintptr_t) pe_entry->pe.tx.tx_iov[i].cmp.ioc.addr, - pe_entry->pe.tx.tx_iov[i].cmp.ioc.count * - datatype_sz, len)) - return 0; - len += (pe_entry->pe.tx.tx_iov[i].cmp.ioc.count * datatype_sz); - } - /* data */ - for (i = 0; i < pe_entry->pe.tx.tx_op.src_iov_len; i++) { - if (pe_entry->pe.tx.tx_op.atomic.op != FI_ATOMIC_READ) { - if (sock_pe_send_field(pe_entry, - (void *) (uintptr_t) pe_entry->pe.tx.tx_iov[i].src.ioc.addr, - pe_entry->pe.tx.tx_iov[i].src.ioc.count * - datatype_sz, len)) - return 0; - len += (pe_entry->pe.tx.tx_iov[i].src.ioc.count * datatype_sz); - } - } - } - - sock_comm_flush(pe_entry); - if (!sock_comm_tx_done(pe_entry)) - return 0; - - if (pe_entry->done_len == pe_entry->total_len) { - pe_entry->pe.tx.send_done = 1; - pe_entry->conn->tx_pe_entry = NULL; - SOCK_LOG_DBG("Send complete\n"); - } - - pe_entry->flags |= FI_ATOMIC; - if (pe_entry->pe.tx.tx_op.atomic.op == FI_ATOMIC_READ) - pe_entry->flags |= FI_READ; - else - pe_entry->flags |= FI_WRITE; - pe_entry->msg_hdr.flags = pe_entry->flags; - return 0; -} - -static int sock_pe_progress_tx_write(struct sock_pe *pe, - struct sock_pe_entry *pe_entry, - struct sock_conn *conn) -{ - union sock_iov dest_iov[SOCK_EP_MAX_IOV_LIMIT]; - ssize_t len, i, dest_iov_len; - - if (pe_entry->pe.tx.send_done) - return 0; - - len = sizeof(struct sock_msg_hdr); - if (pe_entry->flags & FI_REMOTE_CQ_DATA) { - if (sock_pe_send_field(pe_entry, &pe_entry->data, - SOCK_CQ_DATA_SIZE, len)) - return 0; - len += SOCK_CQ_DATA_SIZE; - } - - /* dest iovs */ - dest_iov_len = sizeof(union sock_iov) * pe_entry->pe.tx.tx_op.dest_iov_len; - for (i = 0; i < pe_entry->pe.tx.tx_op.dest_iov_len; i++) { - dest_iov[i].iov.addr = pe_entry->pe.tx.tx_iov[i].dst.iov.addr; - dest_iov[i].iov.len = pe_entry->pe.tx.tx_iov[i].dst.iov.len; - dest_iov[i].iov.key = pe_entry->pe.tx.tx_iov[i].dst.iov.key; - } - if (sock_pe_send_field(pe_entry, &dest_iov[0], dest_iov_len, len)) - return 0; - len += dest_iov_len; - - /* data */ - if (pe_entry->flags & FI_INJECT) { - if (sock_pe_send_field(pe_entry, &pe_entry->pe.tx.inject[0], - pe_entry->pe.tx.tx_op.src_iov_len, len)) - return 0; - len += pe_entry->pe.tx.tx_op.src_iov_len; - pe_entry->data_len = pe_entry->pe.tx.tx_op.src_iov_len; - } else { - pe_entry->data_len = 0; - for (i = 0; i < pe_entry->pe.tx.tx_op.src_iov_len; i++) { - if (sock_pe_send_field( - pe_entry, - (void *) (uintptr_t) pe_entry->pe.tx.tx_iov[i].src.iov.addr, - pe_entry->pe.tx.tx_iov[i].src.iov.len, len)) - return 0; - len += pe_entry->pe.tx.tx_iov[i].src.iov.len; - pe_entry->data_len += pe_entry->pe.tx.tx_iov[i].src.iov.len; - } - } - - sock_comm_flush(pe_entry); - if (!sock_comm_tx_done(pe_entry)) - return 0; - - if (pe_entry->done_len == pe_entry->total_len) { - pe_entry->pe.tx.send_done = 1; - pe_entry->conn->tx_pe_entry = NULL; - SOCK_LOG_DBG("Send complete\n"); - } - pe_entry->flags |= (FI_RMA | FI_WRITE); - pe_entry->msg_hdr.flags = pe_entry->flags; - return 0; -} - -static int sock_pe_progress_tx_read(struct sock_pe *pe, - struct sock_pe_entry *pe_entry, - struct sock_conn *conn) -{ - union sock_iov src_iov[SOCK_EP_MAX_IOV_LIMIT]; - ssize_t len, i, src_iov_len; - - if (pe_entry->pe.tx.send_done) - return 0; - - len = sizeof(struct sock_msg_hdr); - - /* src iovs */ - src_iov_len = sizeof(union sock_iov) * pe_entry->pe.tx.tx_op.src_iov_len; - pe_entry->data_len = 0; - for (i = 0; i < pe_entry->pe.tx.tx_op.src_iov_len; i++) { - src_iov[i].iov.addr = pe_entry->pe.tx.tx_iov[i].src.iov.addr; - src_iov[i].iov.len = pe_entry->pe.tx.tx_iov[i].src.iov.len; - src_iov[i].iov.key = pe_entry->pe.tx.tx_iov[i].src.iov.key; - pe_entry->data_len += pe_entry->pe.tx.tx_iov[i].src.iov.len; - } - - if (sock_pe_send_field(pe_entry, &src_iov[0], src_iov_len, len)) - return 0; - len += src_iov_len; - - sock_comm_flush(pe_entry); - if (!sock_comm_tx_done(pe_entry)) - return 0; - - if (pe_entry->done_len == pe_entry->total_len) { - pe_entry->pe.tx.send_done = 1; - pe_entry->conn->tx_pe_entry = NULL; - SOCK_LOG_DBG("Send complete\n"); - } - pe_entry->flags |= (FI_RMA | FI_READ); - pe_entry->msg_hdr.flags = pe_entry->flags; - return 0; -} - - -static int sock_pe_progress_tx_send(struct sock_pe *pe, - struct sock_pe_entry *pe_entry, - struct sock_conn *conn) -{ - size_t len, i; - if (pe_entry->pe.tx.send_done) - return 0; - - len = sizeof(struct sock_msg_hdr); - if (pe_entry->pe.tx.tx_op.op == SOCK_OP_TSEND) { - if (sock_pe_send_field(pe_entry, &pe_entry->tag, - SOCK_TAG_SIZE, len)) - return 0; - len += SOCK_TAG_SIZE; - } - - if (pe_entry->flags & FI_REMOTE_CQ_DATA) { - if (sock_pe_send_field(pe_entry, &pe_entry->data, - SOCK_CQ_DATA_SIZE, len)) - return 0; - len += SOCK_CQ_DATA_SIZE; - } - - if (pe_entry->flags & FI_INJECT) { - if (sock_pe_send_field(pe_entry, pe_entry->pe.tx.inject, - pe_entry->pe.tx.tx_op.src_iov_len, len)) - return 0; - len += pe_entry->pe.tx.tx_op.src_iov_len; - pe_entry->data_len = pe_entry->pe.tx.tx_op.src_iov_len; - } else { - pe_entry->data_len = 0; - for (i = 0; i < pe_entry->pe.tx.tx_op.src_iov_len; i++) { - if (sock_pe_send_field(pe_entry, - (void *) (uintptr_t) pe_entry->pe.tx.tx_iov[i].src.iov.addr, - pe_entry->pe.tx.tx_iov[i].src.iov.len, len)) - return 0; - len += pe_entry->pe.tx.tx_iov[i].src.iov.len; - pe_entry->data_len += pe_entry->pe.tx.tx_iov[i].src.iov.len; - } - } - - sock_comm_flush(pe_entry); - if (!sock_comm_tx_done(pe_entry)) - return 0; - - pe_entry->tag = 0; - if (pe_entry->pe.tx.tx_op.op == SOCK_OP_TSEND) - pe_entry->flags |= FI_TAGGED; - pe_entry->flags |= (FI_MSG | FI_SEND); - - pe_entry->msg_hdr.flags = pe_entry->flags; - if (pe_entry->done_len == pe_entry->total_len) { - pe_entry->pe.tx.send_done = 1; - pe_entry->conn->tx_pe_entry = NULL; - SOCK_LOG_DBG("Send complete\n"); - - if (pe_entry->flags & FI_INJECT_COMPLETE) { - sock_pe_report_send_completion(pe_entry); - pe_entry->is_complete = 1; - } - } - - return 0; -} - -static int sock_pe_progress_tx_conn_msg(struct sock_pe *pe, - struct sock_pe_entry *pe_entry, - struct sock_conn *conn) -{ - size_t len; - if (pe_entry->pe.tx.send_done) - return 0; - - len = sizeof(struct sock_msg_hdr); - - if (sock_pe_send_field(pe_entry, pe_entry->pe.tx.inject, - pe_entry->pe.tx.tx_op.src_iov_len, len)) - return 0; - len += pe_entry->pe.tx.tx_op.src_iov_len; - pe_entry->data_len = pe_entry->pe.tx.tx_op.src_iov_len; - - sock_comm_flush(pe_entry); - if (!sock_comm_tx_done(pe_entry)) - return 0; - - if (pe_entry->done_len == pe_entry->total_len) { - pe_entry->pe.tx.send_done = 1; - pe_entry->conn->tx_pe_entry = NULL; - SOCK_LOG_DBG("Send complete\n"); - pe_entry->is_complete = 1; - } - return 0; -} - -static int sock_pe_progress_tx_entry(struct sock_pe *pe, - struct sock_tx_ctx *tx_ctx, - struct sock_pe_entry *pe_entry) -{ - int ret = 0; - struct sock_conn *conn = pe_entry->conn; - - if (pe_entry->is_complete || !conn) - goto out; - - if (sock_comm_is_disconnected(pe_entry)) { - ofi_straddr_log(&sock_prov, FI_LOG_WARN, FI_LOG_EP_DATA, - "Peer disconnected: removing fd from pollset", - &pe_entry->conn->addr.sa); - ofi_mutex_lock(&pe_entry->ep_attr->cmap.lock); - sock_ep_remove_conn(pe_entry->ep_attr, pe_entry->conn); - ofi_mutex_unlock(&pe_entry->ep_attr->cmap.lock); - - sock_pe_report_tx_error(pe_entry, 0, FI_EIO); - pe_entry->is_complete = 1; - - goto out; - } - - if (pe_entry->pe.tx.send_done) - goto out; - - if (conn->tx_pe_entry != NULL && conn->tx_pe_entry != pe_entry) { - SOCK_LOG_DBG("Cannot progress %p as conn %p is being used by %p\n", - pe_entry, conn, conn->tx_pe_entry); - goto out; - } - - if (conn->tx_pe_entry == NULL) { - SOCK_LOG_DBG("Connection %p grabbed by %p\n", conn, pe_entry); - conn->tx_pe_entry = pe_entry; - } - - if ((pe_entry->flags & FI_FENCE) && - (tx_ctx->pe_entry_list.next != &pe_entry->ctx_entry)) { - SOCK_LOG_DBG("Waiting for FI_FENCE\n"); - goto out; - } - - if (!pe_entry->pe.tx.header_sent) { - if (sock_pe_send_field(pe_entry, &pe_entry->msg_hdr, - sizeof(struct sock_msg_hdr), 0)) - goto out; - pe_entry->pe.tx.header_sent = 1; - } - - switch (pe_entry->msg_hdr.op_type) { - case SOCK_OP_SEND: - case SOCK_OP_TSEND: - ret = sock_pe_progress_tx_send(pe, pe_entry, conn); - break; - case SOCK_OP_WRITE: - ret = sock_pe_progress_tx_write(pe, pe_entry, conn); - break; - case SOCK_OP_READ: - ret = sock_pe_progress_tx_read(pe, pe_entry, conn); - break; - case SOCK_OP_ATOMIC: - ret = sock_pe_progress_tx_atomic(pe, pe_entry, conn); - break; - case SOCK_OP_CONN_MSG: - ret = sock_pe_progress_tx_conn_msg(pe, pe_entry, conn); - break; - default: - ret = -FI_ENOSYS; - SOCK_LOG_ERROR("Operation not supported\n"); - break; - } - -out: - if (pe_entry->is_complete) { - sock_pe_release_entry(pe, pe_entry); - SOCK_LOG_DBG("[%p] TX done\n", pe_entry); - } - return ret; -} - -static int sock_pe_progress_rx_pe_entry(struct sock_pe *pe, - struct sock_pe_entry *pe_entry, - struct sock_rx_ctx *rx_ctx) -{ - int ret; - - if (sock_comm_is_disconnected(pe_entry)) { - ofi_straddr_log(&sock_prov, FI_LOG_WARN, FI_LOG_EP_DATA, - "Peer disconnected: removing fd from pollset", - &pe_entry->conn->addr.sa); - ofi_mutex_lock(&pe_entry->ep_attr->cmap.lock); - sock_ep_remove_conn(pe_entry->ep_attr, pe_entry->conn); - ofi_mutex_unlock(&pe_entry->ep_attr->cmap.lock); - - if (pe_entry->pe.rx.header_read) - sock_pe_report_rx_error(pe_entry, 0, FI_EIO); - - sock_pe_release_entry(pe, pe_entry); - return 0; - } - - if (pe_entry->pe.rx.pending_send) { - sock_pe_progress_pending_ack(pe, pe_entry); - goto out; - } - - if (pe_entry->is_error) - goto out; - - if (!pe_entry->pe.rx.header_read) { - if (sock_pe_read_hdr(pe, rx_ctx, pe_entry) == -1) { - sock_pe_release_entry(pe, pe_entry); - return 0; - } - } - - if (pe_entry->pe.rx.header_read) { - ret = sock_pe_process_recv(pe, rx_ctx, pe_entry); - if (ret < 0) - return ret; - } - -out: - if (pe_entry->is_error) - sock_pe_discard_field(pe_entry); - - if (pe_entry->is_complete && !pe_entry->pe.rx.pending_send) { - sock_pe_release_entry(pe, pe_entry); - SOCK_LOG_DBG("[%p] RX done\n", pe_entry); - } - return 0; -} - -static void sock_pe_new_rx_entry(struct sock_pe *pe, struct sock_rx_ctx *rx_ctx, - struct sock_ep_attr *ep_attr, struct sock_conn *conn) -{ - struct sock_pe_entry *pe_entry; - - pe_entry = sock_pe_acquire_entry(pe); - if (!pe_entry) - return; - memset(&pe_entry->pe.rx, 0, sizeof(pe_entry->pe.rx)); - - pe_entry->conn = conn; - pe_entry->type = SOCK_PE_RX; - pe_entry->ep_attr = ep_attr; - pe_entry->is_complete = 0; - pe_entry->done_len = 0; - pe_entry->completion_reported = 0; - - if (ep_attr->ep_type == FI_EP_MSG || !ep_attr->av) - pe_entry->addr = FI_ADDR_NOTAVAIL; - else - pe_entry->addr = conn->av_index; - - if (rx_ctx->ctx.fid.fclass == FI_CLASS_SRX_CTX) - pe_entry->comp = &ep_attr->rx_ctx->comp; - else - pe_entry->comp = &rx_ctx->comp; - - SOCK_LOG_DBG("New RX on PE entry %p (%ld)\n", - pe_entry, PE_INDEX(pe, pe_entry)); - - SOCK_LOG_DBG("Inserting rx_entry to PE entry %p, conn: %p\n", - pe_entry, pe_entry->conn); - - dlist_insert_tail(&pe_entry->ctx_entry, &rx_ctx->pe_entry_list); -} - -static int sock_pe_new_tx_entry(struct sock_pe *pe, struct sock_tx_ctx *tx_ctx) -{ - int i; - size_t datatype_sz; - struct sock_msg_hdr *msg_hdr; - struct sock_pe_entry *pe_entry; - struct sock_ep_attr *ep_attr; - - pe_entry = sock_pe_acquire_entry(pe); - memset(&pe_entry->pe.tx, 0, sizeof(pe_entry->pe.tx)); - memset(&pe_entry->msg_hdr, 0, sizeof(pe_entry->msg_hdr)); - - pe_entry->type = SOCK_PE_TX; - pe_entry->is_complete = 0; - pe_entry->done_len = 0; - pe_entry->conn = NULL; - pe_entry->ep_attr = tx_ctx->ep_attr; - pe_entry->pe.tx.tx_ctx = tx_ctx; - pe_entry->completion_reported = 0; - - dlist_insert_tail(&pe_entry->ctx_entry, &tx_ctx->pe_entry_list); - - /* fill in PE tx entry */ - msg_hdr = &pe_entry->msg_hdr; - msg_hdr->msg_len = sizeof(*msg_hdr); - - msg_hdr->pe_entry_id = (uint16_t) PE_INDEX(pe, pe_entry); - SOCK_LOG_DBG("New TX on PE entry %p (%d)\n", - pe_entry, msg_hdr->pe_entry_id); - - sock_tx_ctx_read_op_send(tx_ctx, &pe_entry->pe.tx.tx_op, - &pe_entry->flags, &pe_entry->context, &pe_entry->addr, - &pe_entry->buf, &ep_attr, &pe_entry->conn); - - if (pe_entry->pe.tx.tx_op.op == SOCK_OP_TSEND) { - ofi_rbread(&tx_ctx->rb, &pe_entry->tag, sizeof(pe_entry->tag)); - msg_hdr->msg_len += sizeof(pe_entry->tag); - } - - if (ep_attr && tx_ctx->fclass == FI_CLASS_STX_CTX) - pe_entry->comp = &ep_attr->tx_ctx->comp; - else - pe_entry->comp = &tx_ctx->comp; - - if (pe_entry->flags & FI_REMOTE_CQ_DATA) { - ofi_rbread(&tx_ctx->rb, &pe_entry->data, sizeof(pe_entry->data)); - msg_hdr->msg_len += sizeof(pe_entry->data); - } - - msg_hdr->op_type = pe_entry->pe.tx.tx_op.op; - switch (pe_entry->pe.tx.tx_op.op) { - case SOCK_OP_SEND: - case SOCK_OP_TSEND: - if (pe_entry->flags & FI_INJECT) { - ofi_rbread(&tx_ctx->rb, &pe_entry->pe.tx.inject[0], - pe_entry->pe.tx.tx_op.src_iov_len); - msg_hdr->msg_len += pe_entry->pe.tx.tx_op.src_iov_len; - } else { - for (i = 0; i < pe_entry->pe.tx.tx_op.src_iov_len; i++) { - ofi_rbread(&tx_ctx->rb, &pe_entry->pe.tx.tx_iov[i].src, - sizeof(pe_entry->pe.tx.tx_iov[i].src)); - msg_hdr->msg_len += pe_entry->pe.tx.tx_iov[i].src.iov.len; - } - } - msg_hdr->dest_iov_len = pe_entry->pe.tx.tx_op.dest_iov_len; - if (pe_entry->flags & SOCK_NO_COMPLETION) - pe_entry->flags |= FI_INJECT_COMPLETE; - break; - case SOCK_OP_WRITE: - if (pe_entry->flags & FI_INJECT) { - ofi_rbread(&tx_ctx->rb, &pe_entry->pe.tx.inject[0], - pe_entry->pe.tx.tx_op.src_iov_len); - msg_hdr->msg_len += pe_entry->pe.tx.tx_op.src_iov_len; - } else { - for (i = 0; i < pe_entry->pe.tx.tx_op.src_iov_len; i++) { - ofi_rbread(&tx_ctx->rb, &pe_entry->pe.tx.tx_iov[i].src, - sizeof(pe_entry->pe.tx.tx_iov[i].src)); - msg_hdr->msg_len += pe_entry->pe.tx.tx_iov[i].src.iov.len; - } - } - - for (i = 0; i < pe_entry->pe.tx.tx_op.dest_iov_len; i++) { - ofi_rbread(&tx_ctx->rb, &pe_entry->pe.tx.tx_iov[i].dst, - sizeof(pe_entry->pe.tx.tx_iov[i].dst)); - } - msg_hdr->msg_len += sizeof(union sock_iov) * i; - msg_hdr->dest_iov_len = pe_entry->pe.tx.tx_op.dest_iov_len; - break; - case SOCK_OP_READ: - for (i = 0; i < pe_entry->pe.tx.tx_op.src_iov_len; i++) { - ofi_rbread(&tx_ctx->rb, &pe_entry->pe.tx.tx_iov[i].src, - sizeof(pe_entry->pe.tx.tx_iov[i].src)); - } - msg_hdr->msg_len += sizeof(union sock_iov) * i; - - for (i = 0; i < pe_entry->pe.tx.tx_op.dest_iov_len; i++) { - ofi_rbread(&tx_ctx->rb, &pe_entry->pe.tx.tx_iov[i].dst, - sizeof(pe_entry->pe.tx.tx_iov[i].dst)); - } - msg_hdr->dest_iov_len = pe_entry->pe.tx.tx_op.src_iov_len; - break; - case SOCK_OP_ATOMIC: - msg_hdr->msg_len += sizeof(struct sock_op); - datatype_sz = ofi_datatype_size(pe_entry->pe.tx.tx_op.atomic.datatype); - if (pe_entry->flags & FI_INJECT) { - ofi_rbread(&tx_ctx->rb, &pe_entry->pe.tx.inject[0], - pe_entry->pe.tx.tx_op.src_iov_len); - ofi_rbread(&tx_ctx->rb, &pe_entry->pe.tx.inject[0] + - pe_entry->pe.tx.tx_op.src_iov_len, - pe_entry->pe.tx.tx_op.atomic.cmp_iov_len); - msg_hdr->msg_len += pe_entry->pe.tx.tx_op.src_iov_len + - pe_entry->pe.tx.tx_op.atomic.cmp_iov_len; - } else { - for (i = 0; i < pe_entry->pe.tx.tx_op.src_iov_len; i++) { - ofi_rbread(&tx_ctx->rb, &pe_entry->pe.tx.tx_iov[i].src, - sizeof(pe_entry->pe.tx.tx_iov[i].src)); - - if (pe_entry->pe.tx.tx_op.atomic.op != FI_ATOMIC_READ) - msg_hdr->msg_len += datatype_sz * - pe_entry->pe.tx.tx_iov[i].src.ioc.count; - } - for (i = 0; i < pe_entry->pe.tx.tx_op.atomic.cmp_iov_len; i++) { - ofi_rbread(&tx_ctx->rb, &pe_entry->pe.tx.tx_iov[i].cmp, - sizeof(pe_entry->pe.tx.tx_iov[i].cmp)); - msg_hdr->msg_len += datatype_sz * - pe_entry->pe.tx.tx_iov[i].cmp.ioc.count; - } - } - - for (i = 0; i < pe_entry->pe.tx.tx_op.dest_iov_len; i++) { - ofi_rbread(&tx_ctx->rb, &pe_entry->pe.tx.tx_iov[i].dst, - sizeof(pe_entry->pe.tx.tx_iov[i].dst)); - } - msg_hdr->msg_len += sizeof(union sock_iov) * i; - - for (i = 0; i < pe_entry->pe.tx.tx_op.atomic.res_iov_len; i++) { - ofi_rbread(&tx_ctx->rb, &pe_entry->pe.tx.tx_iov[i].res, - sizeof(pe_entry->pe.tx.tx_iov[i].res)); - } - - msg_hdr->dest_iov_len = pe_entry->pe.tx.tx_op.dest_iov_len; - break; - case SOCK_OP_CONN_MSG: - ofi_rbread(&tx_ctx->rb, &pe_entry->pe.tx.inject[0], - pe_entry->pe.tx.tx_op.src_iov_len); - msg_hdr->msg_len += pe_entry->pe.tx.tx_op.src_iov_len; - break; - default: - SOCK_LOG_ERROR("Invalid operation type\n"); - return -FI_EINVAL; - } - SOCK_LOG_DBG("Inserting TX-entry to PE entry %p, conn: %p\n", - pe_entry, pe_entry->conn); - - /* prepare message header */ - msg_hdr->version = SOCK_WIRE_PROTO_VERSION; - - if (tx_ctx->av) - msg_hdr->rx_id = (uint8_t) SOCK_GET_RX_ID(pe_entry->addr, - tx_ctx->av->rx_ctx_bits); - else - msg_hdr->rx_id = 0; - - if (pe_entry->flags & FI_INJECT_COMPLETE) - pe_entry->flags &= ~FI_TRANSMIT_COMPLETE; - - msg_hdr->flags = htonll(pe_entry->flags); - pe_entry->total_len = msg_hdr->msg_len; - msg_hdr->msg_len = htonll(msg_hdr->msg_len); - msg_hdr->pe_entry_id = htons(msg_hdr->pe_entry_id); - - return sock_pe_progress_tx_entry(pe, tx_ctx, pe_entry); -} - -void sock_pe_signal(struct sock_pe *pe) -{ - char c = 0; - if (pe->domain->progress_mode != FI_PROGRESS_AUTO) - return; - - ofi_mutex_lock(&pe->signal_lock); - if (pe->wcnt == pe->rcnt) { - if (ofi_write_socket(pe->signal_fds[SOCK_SIGNAL_WR_FD], &c, 1) != 1) - SOCK_LOG_ERROR("Failed to signal\n"); - else - pe->wcnt++; - } - ofi_mutex_unlock(&pe->signal_lock); -} - -void sock_pe_poll_add(struct sock_pe *pe, int fd) -{ - ofi_mutex_lock(&pe->signal_lock); - if (ofi_epoll_add(pe->epoll_set, fd, OFI_EPOLL_IN, NULL)) - SOCK_LOG_ERROR("failed to add to epoll set: %d\n", fd); - ofi_mutex_unlock(&pe->signal_lock); -} - -void sock_pe_poll_del(struct sock_pe *pe, int fd) -{ - ofi_mutex_lock(&pe->signal_lock); - if (ofi_epoll_del(pe->epoll_set, fd)) - SOCK_LOG_DBG("failed to del from epoll set: %d\n", fd); - ofi_mutex_unlock(&pe->signal_lock); -} - -void sock_pe_add_tx_ctx(struct sock_pe *pe, struct sock_tx_ctx *ctx) -{ - struct dlist_entry *entry; - struct sock_tx_ctx *curr_ctx; - pthread_mutex_lock(&pe->list_lock); - for (entry = pe->tx_list.next; entry != &pe->tx_list; - entry = entry->next) { - curr_ctx = container_of(entry, struct sock_tx_ctx, pe_entry); - if (curr_ctx == ctx) - goto out; - } - - dlist_insert_tail(&ctx->pe_entry, &pe->tx_list); - sock_pe_signal(pe); -out: - pthread_mutex_unlock(&pe->list_lock); - SOCK_LOG_DBG("TX ctx added to PE\n"); -} - -void sock_pe_add_rx_ctx(struct sock_pe *pe, struct sock_rx_ctx *ctx) -{ - struct dlist_entry *entry; - struct sock_rx_ctx *curr_ctx; - pthread_mutex_lock(&pe->list_lock); - for (entry = pe->rx_list.next; entry != &pe->rx_list; - entry = entry->next) { - curr_ctx = container_of(entry, struct sock_rx_ctx, pe_entry); - if (curr_ctx == ctx) - goto out; - } - dlist_insert_tail(&ctx->pe_entry, &pe->rx_list); - sock_pe_signal(pe); -out: - pthread_mutex_unlock(&pe->list_lock); - SOCK_LOG_DBG("RX ctx added to PE\n"); -} - -void sock_pe_remove_tx_ctx(struct sock_tx_ctx *tx_ctx) -{ - pthread_mutex_lock(&tx_ctx->domain->pe->list_lock); - dlist_remove(&tx_ctx->pe_entry); - pthread_mutex_unlock(&tx_ctx->domain->pe->list_lock); -} - -void sock_pe_remove_rx_ctx(struct sock_rx_ctx *rx_ctx) -{ - pthread_mutex_lock(&rx_ctx->domain->pe->list_lock); - dlist_remove(&rx_ctx->pe_entry); - pthread_mutex_unlock(&rx_ctx->domain->pe->list_lock); -} - -static int sock_pe_progress_rx_ep(struct sock_pe *pe, - struct sock_ep_attr *ep_attr, - struct sock_rx_ctx *rx_ctx) -{ - int i, num_fds; - struct sock_conn *conn; - struct sock_conn_map *map; - - map = &ep_attr->cmap; - - if (!map->used) - return 0; - - if (map->epoll_size < map->used) { - int new_size = map->used * 2; - struct ofi_epollfds_event *events; - - events = realloc(map->epoll_events, - sizeof(*map->epoll_events) * new_size); - if (events) { - map->epoll_events = events; - map->epoll_size = new_size; - } - } - - num_fds = ofi_epoll_wait(map->epoll_set, map->epoll_events, - MIN(map->used, map->epoll_size), 0); - if (num_fds < 0 || num_fds == 0) { - if (num_fds < 0) - SOCK_LOG_ERROR("epoll failed: %d\n", num_fds); - return num_fds; - } - - ofi_mutex_lock(&map->lock); - for (i = 0; i < num_fds; i++) { - conn = map->epoll_events[i].data.ptr; - if (!conn) - SOCK_LOG_ERROR("ofi_idm_lookup failed\n"); - - if (!conn || conn->rx_pe_entry) - continue; - - sock_pe_new_rx_entry(pe, rx_ctx, ep_attr, conn); - } - ofi_mutex_unlock(&map->lock); - - return 0; -} - -int sock_pe_progress_rx_ctx(struct sock_pe *pe, struct sock_rx_ctx *rx_ctx) -{ - int ret = 0; - struct sock_ep_attr *ep_attr; - struct dlist_entry *entry; - struct sock_pe_entry *pe_entry; - - ofi_mutex_lock(&pe->lock); - - ofi_mutex_lock(&rx_ctx->lock); - sock_pe_progress_buffered_rx(rx_ctx, true); - ofi_mutex_unlock(&rx_ctx->lock); - - /* check for incoming data */ - if (rx_ctx->ctx.fid.fclass == FI_CLASS_SRX_CTX) { - for (entry = rx_ctx->ep_list.next; - entry != &rx_ctx->ep_list;) { - ep_attr = container_of(entry, struct sock_ep_attr, rx_ctx_entry); - entry = entry->next; - ret = sock_pe_progress_rx_ep(pe, ep_attr, rx_ctx); - if (ret < 0) - goto out; - } - } else { - ep_attr = rx_ctx->ep_attr; - ret = sock_pe_progress_rx_ep(pe, ep_attr, rx_ctx); - if (ret < 0) - goto out; - } - - for (entry = rx_ctx->pe_entry_list.next; - entry != &rx_ctx->pe_entry_list;) { - pe_entry = container_of(entry, struct sock_pe_entry, ctx_entry); - entry = entry->next; - ret = sock_pe_progress_rx_pe_entry(pe, pe_entry, rx_ctx); - if (ret < 0) - goto out; - } -out: - if (ret < 0) - SOCK_LOG_ERROR("failed to progress RX ctx\n"); - ofi_mutex_unlock(&pe->lock); - return ret; -} - -int sock_pe_progress_ep_rx(struct sock_pe *pe, struct sock_ep_attr *ep_attr) -{ - struct sock_rx_ctx *rx_ctx; - int ret, i; - - for (i = 0; i < ep_attr->ep_attr.rx_ctx_cnt; i++) { - rx_ctx = ep_attr->rx_array[i]; - if (!rx_ctx) - continue; - - ret = sock_pe_progress_rx_ctx(pe, rx_ctx); - if (ret < 0) - return ret; - } - return 0; -} - -int sock_pe_progress_ep_tx(struct sock_pe *pe, struct sock_ep_attr *ep_attr) -{ - struct sock_tx_ctx *tx_ctx; - int ret, i; - - for (i = 0; i < ep_attr->ep_attr.tx_ctx_cnt; i++) { - tx_ctx = ep_attr->tx_array[i]; - if (!tx_ctx) - continue; - - ret = sock_pe_progress_tx_ctx(pe, tx_ctx); - if (ret < 0) - return ret; - } - return 0; -} - -void sock_pe_progress_rx_ctrl_ctx(struct sock_pe *pe, struct sock_rx_ctx *rx_ctx, - struct sock_tx_ctx *tx_ctx) -{ - struct sock_ep_attr *ep_attr; - struct dlist_entry *entry; - struct sock_pe_entry *pe_entry; - - /* check for incoming data */ - if (tx_ctx->fclass == FI_CLASS_STX_CTX) { - for (entry = tx_ctx->ep_list.next; entry != &tx_ctx->ep_list;) { - ep_attr = container_of(entry, struct sock_ep_attr, tx_ctx_entry); - entry = entry->next; - sock_pe_progress_rx_ep(pe, ep_attr, tx_ctx->rx_ctrl_ctx); - } - } else { - sock_pe_progress_rx_ep(pe, tx_ctx->ep_attr, tx_ctx->rx_ctrl_ctx); - } - - for (entry = rx_ctx->pe_entry_list.next; - entry != &rx_ctx->pe_entry_list;) { - pe_entry = container_of(entry, struct sock_pe_entry, ctx_entry); - entry = entry->next; - sock_pe_progress_rx_pe_entry(pe, pe_entry, rx_ctx); - } -} - -int sock_pe_progress_tx_ctx(struct sock_pe *pe, struct sock_tx_ctx *tx_ctx) -{ - int ret = 0; - struct dlist_entry *entry; - struct sock_pe_entry *pe_entry; - - ofi_mutex_lock(&pe->lock); - - /* progress tx_ctx in PE table */ - for (entry = tx_ctx->pe_entry_list.next; - entry != &tx_ctx->pe_entry_list;) { - pe_entry = container_of(entry, struct sock_pe_entry, ctx_entry); - entry = entry->next; - - ret = sock_pe_progress_tx_entry(pe, tx_ctx, pe_entry); - if (ret < 0) { - SOCK_LOG_ERROR("Error in progressing %p\n", pe_entry); - goto out; - } - } - - ofi_mutex_lock(&tx_ctx->rb_lock); - if (!ofi_rbempty(&tx_ctx->rb) && !dlist_empty(&pe->free_list)) { - ret = sock_pe_new_tx_entry(pe, tx_ctx); - } - ofi_mutex_unlock(&tx_ctx->rb_lock); - if (ret < 0) - goto out; - - sock_pe_progress_rx_ctrl_ctx(pe, tx_ctx->rx_ctrl_ctx, tx_ctx); -out: - if (ret < 0) - SOCK_LOG_ERROR("failed to progress TX ctx\n"); - ofi_mutex_unlock(&pe->lock); - return ret; -} - -static int sock_pe_wait_ok(struct sock_pe *pe) -{ - struct dlist_entry *entry; - struct sock_tx_ctx *tx_ctx; - struct sock_rx_ctx *rx_ctx; - - if (pe->waittime && ((ofi_gettime_ms() - pe->waittime) < (uint64_t)sock_pe_waittime)) - return 0; - - if (dlist_empty(&pe->tx_list) && dlist_empty(&pe->rx_list)) - return 1; - - if (!dlist_empty(&pe->tx_list)) { - for (entry = pe->tx_list.next; - entry != &pe->tx_list; entry = entry->next) { - tx_ctx = container_of(entry, struct sock_tx_ctx, - pe_entry); - if (!ofi_rbempty(&tx_ctx->rb) || - !dlist_empty(&tx_ctx->pe_entry_list)) { - return 0; - } - } - } - - if (!dlist_empty(&pe->rx_list)) { - for (entry = pe->rx_list.next; - entry != &pe->rx_list; entry = entry->next) { - rx_ctx = container_of(entry, struct sock_rx_ctx, - pe_entry); - if (!dlist_empty(&rx_ctx->rx_buffered_list) || - !dlist_empty(&rx_ctx->pe_entry_list)) { - return 0; - } - } - } - - return 1; -} - -static void sock_pe_wait(struct sock_pe *pe) -{ - char tmp; - int ret; - struct ofi_epollfds_event event; - - ret = ofi_epoll_wait(pe->epoll_set, &event, 1, -1); - if (ret < 0) - SOCK_LOG_ERROR("poll failed : %s\n", strerror(ofi_sockerr())); - - ofi_mutex_lock(&pe->signal_lock); - if (pe->rcnt != pe->wcnt) { - if (ofi_read_socket(pe->signal_fds[SOCK_SIGNAL_RD_FD], &tmp, 1) == 1) - pe->rcnt++; - else - SOCK_LOG_ERROR("Invalid signal\n"); - } - ofi_mutex_unlock(&pe->signal_lock); - pe->waittime = ofi_gettime_ms(); -} - -static void sock_pe_set_affinity(void) -{ - char *sock_pe_affinity_str; - if (fi_param_get_str(&sock_prov, "pe_affinity", &sock_pe_affinity_str) != FI_SUCCESS) - return; - - if (sock_pe_affinity_str == NULL) - return; - - if (ofi_set_thread_affinity(sock_pe_affinity_str) == -FI_ENOSYS) - SOCK_LOG_ERROR("FI_SOCKETS_PE_AFFINITY is not supported on OS X and Windows\n"); -} - -static void *sock_pe_progress_thread(void *data) -{ - int ret; - struct dlist_entry *entry; - struct sock_tx_ctx *tx_ctx; - struct sock_rx_ctx *rx_ctx; - struct sock_pe *pe = (struct sock_pe *)data; - - SOCK_LOG_DBG("Progress thread started\n"); - sock_pe_set_affinity(); - while (*((volatile int *)&pe->do_progress)) { - pthread_mutex_lock(&pe->list_lock); - if (pe->domain->progress_mode == FI_PROGRESS_AUTO && - sock_pe_wait_ok(pe)) { - pthread_mutex_unlock(&pe->list_lock); - sock_pe_wait(pe); - pthread_mutex_lock(&pe->list_lock); - } - - if (!dlist_empty(&pe->tx_list)) { - for (entry = pe->tx_list.next; - entry != &pe->tx_list; entry = entry->next) { - tx_ctx = container_of(entry, struct sock_tx_ctx, - pe_entry); - ret = sock_pe_progress_tx_ctx(pe, tx_ctx); - if (ret < 0) { - SOCK_LOG_ERROR("failed to progress TX\n"); - pthread_mutex_unlock(&pe->list_lock); - return NULL; - } - } - } - - if (!dlist_empty(&pe->rx_list)) { - for (entry = pe->rx_list.next; - entry != &pe->rx_list; entry = entry->next) { - rx_ctx = container_of(entry, struct sock_rx_ctx, - pe_entry); - ret = sock_pe_progress_rx_ctx(pe, rx_ctx); - if (ret < 0) { - SOCK_LOG_ERROR("failed to progress RX\n"); - pthread_mutex_unlock(&pe->list_lock); - return NULL; - } - } - } - pthread_mutex_unlock(&pe->list_lock); - } - - SOCK_LOG_DBG("Progress thread terminated\n"); - return NULL; -} - -static void sock_pe_init_table(struct sock_pe *pe) -{ - int i; - - memset(&pe->pe_table, 0, - sizeof(struct sock_pe_entry) * SOCK_PE_MAX_ENTRIES); - - dlist_init(&pe->free_list); - dlist_init(&pe->busy_list); - dlist_init(&pe->pool_list); - - for (i = 0; i < SOCK_PE_MAX_ENTRIES; i++) { - dlist_insert_head(&pe->pe_table[i].entry, &pe->free_list); - pe->pe_table[i].cache_sz = SOCK_PE_COMM_BUFF_SZ; - if (ofi_rbinit(&pe->pe_table[i].comm_buf, SOCK_PE_COMM_BUFF_SZ)) - SOCK_LOG_ERROR("failed to init comm-cache\n"); - } - - pe->num_free_entries = SOCK_PE_MAX_ENTRIES; - SOCK_LOG_DBG("PE table init: OK\n"); -} - -struct sock_pe *sock_pe_init(struct sock_domain *domain) -{ - struct sock_pe *pe; - int ret; - - pe = calloc(1, sizeof(*pe)); - if (!pe) - return NULL; - - sock_pe_init_table(pe); - dlist_init(&pe->tx_list); - dlist_init(&pe->rx_list); - ofi_mutex_init(&pe->lock); - ofi_mutex_init(&pe->signal_lock); - pthread_mutex_init(&pe->list_lock, NULL); - pe->domain = domain; - - - ret = ofi_bufpool_create(&pe->pe_rx_pool, - sizeof(struct sock_pe_entry), 16, 0, 1024, 0); - if (ret) { - SOCK_LOG_ERROR("failed to create buffer pool\n"); - goto err1; - } - - ret = ofi_bufpool_create(&pe->atomic_rx_pool, - SOCK_EP_MAX_ATOMIC_SZ, 16, 0, 32, 0); - if (ret) { - SOCK_LOG_ERROR("failed to create atomic rx buffer pool\n"); - goto err2; - } - - if (ofi_epoll_create(&pe->epoll_set) < 0) { - SOCK_LOG_ERROR("failed to create epoll set\n"); - goto err3; - } - - if (domain->progress_mode == FI_PROGRESS_AUTO) { - if (socketpair(AF_UNIX, SOCK_STREAM, 0, pe->signal_fds) < 0) - goto err4; - - if (fd_set_nonblock(pe->signal_fds[SOCK_SIGNAL_RD_FD]) || - ofi_epoll_add(pe->epoll_set, - pe->signal_fds[SOCK_SIGNAL_RD_FD], - OFI_EPOLL_IN, NULL)) - goto err5; - - pe->do_progress = 1; - if (pthread_create(&pe->progress_thread, NULL, - sock_pe_progress_thread, (void *)pe)) { - SOCK_LOG_ERROR("Couldn't create progress thread\n"); - goto err5; - } - } - SOCK_LOG_DBG("PE init: OK\n"); - return pe; - -err5: - ofi_close_socket(pe->signal_fds[0]); - ofi_close_socket(pe->signal_fds[1]); -err4: - ofi_epoll_close(pe->epoll_set); -err3: - ofi_bufpool_destroy(pe->atomic_rx_pool); -err2: - ofi_bufpool_destroy(pe->pe_rx_pool); -err1: - ofi_mutex_destroy(&pe->lock); - free(pe); - return NULL; -} - -static void sock_pe_free_util_pool(struct sock_pe *pe) -{ - struct dlist_entry *entry; - struct sock_pe_entry *pe_entry; - - while (!dlist_empty(&pe->pool_list)) { - entry = pe->pool_list.next; - pe_entry = container_of(entry, struct sock_pe_entry, entry); - ofi_rbfree(&pe_entry->comm_buf); - dlist_remove(&pe_entry->entry); - ofi_buf_free(pe_entry); - } - - ofi_bufpool_destroy(pe->pe_rx_pool); - ofi_bufpool_destroy(pe->atomic_rx_pool); -} - -void sock_pe_finalize(struct sock_pe *pe) -{ - int i; - if (pe->domain->progress_mode == FI_PROGRESS_AUTO) { - pe->do_progress = 0; - sock_pe_signal(pe); - pthread_join(pe->progress_thread, NULL); - ofi_close_socket(pe->signal_fds[0]); - ofi_close_socket(pe->signal_fds[1]); - } - - for (i = 0; i < SOCK_PE_MAX_ENTRIES; i++) { - ofi_rbfree(&pe->pe_table[i].comm_buf); - } - - sock_pe_free_util_pool(pe); - ofi_mutex_destroy(&pe->lock); - ofi_mutex_destroy(&pe->signal_lock); - pthread_mutex_destroy(&pe->list_lock); - ofi_epoll_close(pe->epoll_set); - free(pe); - SOCK_LOG_DBG("Progress engine finalize: OK\n"); -} diff --git a/prov/sockets/src/sock_rma.c b/prov/sockets/src/sock_rma.c deleted file mode 100644 index a82c9d225fd..00000000000 --- a/prov/sockets/src/sock_rma.c +++ /dev/null @@ -1,502 +0,0 @@ -/* - * Copyright (c) 2014 Intel Corporation, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "config.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "sock.h" -#include "sock_util.h" - -#define SOCK_LOG_DBG(...) _SOCK_LOG_DBG(FI_LOG_EP_DATA, __VA_ARGS__) -#define SOCK_LOG_ERROR(...) _SOCK_LOG_ERROR(FI_LOG_EP_DATA, __VA_ARGS__) - -ssize_t sock_ep_rma_readmsg(struct fid_ep *ep, const struct fi_msg_rma *msg, - uint64_t flags) -{ - ssize_t ret; - size_t i; - struct sock_op tx_op; - union sock_iov tx_iov; - struct sock_conn *conn; - struct sock_tx_ctx *tx_ctx; - uint64_t total_len, src_len, dst_len, op_flags; - struct sock_ep *sock_ep; - struct sock_ep_attr *ep_attr; - - switch (ep->fid.fclass) { - case FI_CLASS_EP: - sock_ep = container_of(ep, struct sock_ep, ep); - tx_ctx = sock_ep->attr->tx_ctx->use_shared ? - sock_ep->attr->tx_ctx->stx_ctx : sock_ep->attr->tx_ctx; - ep_attr = sock_ep->attr; - op_flags = sock_ep->tx_attr.op_flags; - break; - - case FI_CLASS_TX_CTX: - tx_ctx = container_of(ep, struct sock_tx_ctx, fid.ctx); - ep_attr = tx_ctx->ep_attr; - op_flags = tx_ctx->attr.op_flags; - break; - - default: - SOCK_LOG_ERROR("Invalid EP type\n"); - return -FI_EINVAL; - } - -#if ENABLE_DEBUG - if (msg->iov_count > SOCK_EP_MAX_IOV_LIMIT || - msg->rma_iov_count > SOCK_EP_MAX_IOV_LIMIT) - return -FI_EINVAL; -#endif - - if (!tx_ctx->enabled) - return -FI_EOPBADSTATE; - - ret = sock_ep_get_conn(ep_attr, tx_ctx, msg->addr, &conn); - if (ret) - return ret; - - SOCK_EP_SET_TX_OP_FLAGS(flags); - if (flags & SOCK_USE_OP_FLAGS) - flags |= op_flags; - - if (flags & FI_TRIGGER) { - ret = sock_queue_rma_op(ep, msg, flags, FI_OP_READ); - if (ret != 1) - return ret; - } - - total_len = sizeof(struct sock_op_send) + - (msg->iov_count * sizeof(union sock_iov)) + - (msg->rma_iov_count * sizeof(union sock_iov)); - - sock_tx_ctx_start(tx_ctx); - if (ofi_rbavail(&tx_ctx->rb) < total_len) { - ret = -FI_EAGAIN; - goto err; - } - - memset(&tx_op, 0, sizeof(struct sock_op)); - tx_op.op = SOCK_OP_READ; - tx_op.src_iov_len = (uint8_t) msg->rma_iov_count; - tx_op.dest_iov_len = (uint8_t) msg->iov_count; - - sock_tx_ctx_write_op_send(tx_ctx, &tx_op, flags, - (uintptr_t) msg->context, msg->addr, - (uintptr_t) msg->msg_iov[0].iov_base, - ep_attr, conn); - - src_len = 0; - for (i = 0; i < msg->rma_iov_count; i++) { - tx_iov.iov.addr = msg->rma_iov[i].addr; - tx_iov.iov.key = msg->rma_iov[i].key; - tx_iov.iov.len = msg->rma_iov[i].len; - sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov)); - src_len += tx_iov.iov.len; - } - - dst_len = 0; - for (i = 0; i < msg->iov_count; i++) { - tx_iov.iov.addr = (uintptr_t) msg->msg_iov[i].iov_base; - tx_iov.iov.len = msg->msg_iov[i].iov_len; - sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov)); - dst_len += tx_iov.iov.len; - } - -#if ENABLE_DEBUG - if (dst_len != src_len) { - SOCK_LOG_ERROR("Buffer length mismatch\n"); - ret = -FI_EINVAL; - goto err; - } -#endif - - sock_tx_ctx_commit(tx_ctx); - return 0; - -err: - sock_tx_ctx_abort(tx_ctx); - return ret; -} - -static ssize_t sock_ep_rma_read(struct fid_ep *ep, void *buf, size_t len, - void *desc, fi_addr_t src_addr, uint64_t addr, - uint64_t key, void *context) -{ - struct iovec msg_iov = { - .iov_base = (void *)buf, - .iov_len = len, - }; - struct fi_rma_iov rma_iov = { - .addr = addr, - .key = key, - .len = len, - }; - struct fi_msg_rma msg = { - .msg_iov = &msg_iov, - .desc = &desc, - .iov_count = 1, - .rma_iov_count = 1, - .rma_iov = &rma_iov, - .addr = src_addr, - .context = context, - .data = 0, - }; - - return sock_ep_rma_readmsg(ep, &msg, SOCK_USE_OP_FLAGS); -} - -static ssize_t sock_ep_rma_readv(struct fid_ep *ep, const struct iovec *iov, - void **desc, size_t count, - fi_addr_t src_addr, uint64_t addr, uint64_t key, - void *context) -{ - struct fi_rma_iov rma_iov = { - .addr = addr, - .len = ofi_total_iov_len(iov, count), - .key = key, - }; - struct fi_msg_rma msg = { - .msg_iov = iov, - .desc = desc, - .iov_count = count, - .rma_iov_count = 1, - .rma_iov = &rma_iov, - .addr = src_addr, - .context = context, - .data = 0, - }; - - return sock_ep_rma_readmsg(ep, &msg, SOCK_USE_OP_FLAGS); -} - -ssize_t sock_ep_rma_writemsg(struct fid_ep *ep, const struct fi_msg_rma *msg, - uint64_t flags) -{ - ssize_t ret; - size_t i; - struct sock_op tx_op; - union sock_iov tx_iov; - struct sock_conn *conn; - struct sock_tx_ctx *tx_ctx; - uint64_t total_len, src_len, dst_len, op_flags; - struct sock_ep *sock_ep; - struct sock_ep_attr *ep_attr; - - switch (ep->fid.fclass) { - case FI_CLASS_EP: - sock_ep = container_of(ep, struct sock_ep, ep); - tx_ctx = sock_ep->attr->tx_ctx->use_shared ? - sock_ep->attr->tx_ctx->stx_ctx : sock_ep->attr->tx_ctx; - ep_attr = sock_ep->attr; - op_flags = sock_ep->tx_attr.op_flags; - break; - - case FI_CLASS_TX_CTX: - tx_ctx = container_of(ep, struct sock_tx_ctx, fid.ctx); - ep_attr = tx_ctx->ep_attr; - op_flags = tx_ctx->attr.op_flags; - break; - - default: - SOCK_LOG_ERROR("Invalid EP type\n"); - return -FI_EINVAL; - } - -#if ENABLE_DEBUG - if (msg->iov_count > SOCK_EP_MAX_IOV_LIMIT || - msg->rma_iov_count > SOCK_EP_MAX_IOV_LIMIT) - return -FI_EINVAL; -#endif - - if (!tx_ctx->enabled) - return -FI_EOPBADSTATE; - - ret = sock_ep_get_conn(ep_attr, tx_ctx, msg->addr, &conn); - if (ret) - return ret; - - SOCK_EP_SET_TX_OP_FLAGS(flags); - if (flags & SOCK_USE_OP_FLAGS) - flags |= op_flags; - - if (flags & FI_TRIGGER) { - ret = sock_queue_rma_op(ep, msg, flags, FI_OP_WRITE); - if (ret != 1) - return ret; - } - - memset(&tx_op, 0, sizeof(struct sock_op)); - tx_op.op = SOCK_OP_WRITE; - tx_op.dest_iov_len = (uint8_t) msg->rma_iov_count; - - total_len = 0; - if (flags & FI_INJECT) { - for (i = 0; i < msg->iov_count; i++) - total_len += msg->msg_iov[i].iov_len; - - if (total_len > SOCK_EP_MAX_INJECT_SZ) - return -FI_EINVAL; - - tx_op.src_iov_len = (uint8_t) total_len; - } else { - total_len += msg->iov_count * sizeof(union sock_iov); - tx_op.src_iov_len = (uint8_t) msg->iov_count; - } - - total_len += (sizeof(struct sock_op_send) + - (msg->rma_iov_count * sizeof(union sock_iov))); - - sock_tx_ctx_start(tx_ctx); - if (ofi_rbavail(&tx_ctx->rb) < total_len) { - ret = -FI_EAGAIN; - goto err; - } - - sock_tx_ctx_write_op_send(tx_ctx, &tx_op, flags, - (uintptr_t) msg->context, msg->addr, - (uintptr_t) msg->msg_iov[0].iov_base, ep_attr, conn); - - if (flags & FI_REMOTE_CQ_DATA) - sock_tx_ctx_write(tx_ctx, &msg->data, sizeof(msg->data)); - - src_len = 0; - if (flags & FI_INJECT) { - for (i = 0; i < msg->iov_count; i++) { - sock_tx_ctx_write(tx_ctx, msg->msg_iov[i].iov_base, - msg->msg_iov[i].iov_len); - src_len += msg->msg_iov[i].iov_len; - } - } else { - for (i = 0; i < msg->iov_count; i++) { - tx_iov.iov.addr = (uintptr_t) msg->msg_iov[i].iov_base; - tx_iov.iov.len = msg->msg_iov[i].iov_len; - sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov)); - src_len += tx_iov.iov.len; - } - } - - dst_len = 0; - for (i = 0; i < msg->rma_iov_count; i++) { - tx_iov.iov.addr = msg->rma_iov[i].addr; - tx_iov.iov.key = msg->rma_iov[i].key; - tx_iov.iov.len = msg->rma_iov[i].len; - sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov)); - dst_len += tx_iov.iov.len; - } - -#if ENABLE_DEBUG - if (dst_len != src_len) { - SOCK_LOG_ERROR("Buffer length mismatch\n"); - ret = -FI_EINVAL; - goto err; - } -#endif - - sock_tx_ctx_commit(tx_ctx); - return 0; - -err: - sock_tx_ctx_abort(tx_ctx); - return ret; -} - -static ssize_t sock_ep_rma_write(struct fid_ep *ep, const void *buf, - size_t len, void *desc, fi_addr_t dest_addr, - uint64_t addr, uint64_t key, void *context) -{ - struct iovec msg_iov = { - .iov_base = (void *)buf, - .iov_len = len, - }; - struct fi_rma_iov rma_iov = { - .addr = addr, - .key = key, - .len = len, - }; - struct fi_msg_rma msg = { - .msg_iov = &msg_iov, - .desc = &desc, - .iov_count = 1, - .rma_iov_count = 1, - .rma_iov = &rma_iov, - .addr = dest_addr, - .context = context, - .data = 0, - }; - - return sock_ep_rma_writemsg(ep, &msg, SOCK_USE_OP_FLAGS); -} - -static ssize_t sock_ep_rma_writev(struct fid_ep *ep, const struct iovec *iov, - void **desc, size_t count, fi_addr_t dest_addr, - uint64_t addr, uint64_t key, void *context) -{ - struct fi_rma_iov rma_iov = { - .addr = addr, - .key = key, - .len = ofi_total_iov_len(iov, count), - }; - struct fi_msg_rma msg = { - .msg_iov = iov, - .desc = desc, - .iov_count = count, - .rma_iov_count = 1, - .rma_iov = &rma_iov, - .addr = dest_addr, - .context = context, - .data = 0, - }; - - return sock_ep_rma_writemsg(ep, &msg, SOCK_USE_OP_FLAGS); -} - -static ssize_t sock_ep_rma_writedata(struct fid_ep *ep, const void *buf, - size_t len, void *desc, uint64_t data, - fi_addr_t dest_addr, uint64_t addr, - uint64_t key, void *context) -{ - struct iovec msg_iov = { - .iov_base = (void *)buf, - .iov_len = len, - }; - struct fi_rma_iov rma_iov = { - .addr = addr, - .key = key, - .len = len, - }; - struct fi_msg_rma msg = { - .desc = &desc, - .iov_count = 1, - .rma_iov_count = 1, - .rma_iov = &rma_iov, - .msg_iov = &msg_iov, - .addr = dest_addr, - .context = context, - .data = data, - }; - - return sock_ep_rma_writemsg(ep, &msg, FI_REMOTE_CQ_DATA | - SOCK_USE_OP_FLAGS); -} - -static ssize_t sock_ep_rma_inject(struct fid_ep *ep, const void *buf, - size_t len, fi_addr_t dest_addr, uint64_t addr, - uint64_t key) -{ - struct iovec msg_iov = { - .iov_base = (void *)buf, - .iov_len = len, - }; - struct fi_rma_iov rma_iov = { - .addr = addr, - .key = key, - .len = len, - }; - struct fi_msg_rma msg = { - .iov_count = 1, - .rma_iov_count = 1, - .rma_iov = &rma_iov, - .msg_iov = &msg_iov, - .desc = NULL, - .addr = dest_addr, - .context = NULL, - .data = 0, - }; - - return sock_ep_rma_writemsg(ep, &msg, FI_INJECT | - SOCK_NO_COMPLETION | SOCK_USE_OP_FLAGS); -} - -static ssize_t sock_ep_rma_injectdata(struct fid_ep *ep, const void *buf, - size_t len, uint64_t data, - fi_addr_t dest_addr, uint64_t addr, - uint64_t key) -{ - struct iovec msg_iov = { - .iov_base = (void *)buf, - .iov_len = len, - }; - struct fi_rma_iov rma_iov = { - .addr = addr, - .key = key, - .len = len, - }; - struct fi_msg_rma msg = { - .iov_count = 1, - .rma_iov_count = 1, - .rma_iov = &rma_iov, - .msg_iov = &msg_iov, - .desc = NULL, - .addr = dest_addr, - .context = NULL, - .data = data, - }; - - return sock_ep_rma_writemsg(ep, &msg, FI_INJECT | FI_REMOTE_CQ_DATA | - SOCK_NO_COMPLETION | SOCK_USE_OP_FLAGS); -} - - -struct fi_ops_rma sock_ep_rma = { - .size = sizeof(struct fi_ops_rma), - .read = sock_ep_rma_read, - .readv = sock_ep_rma_readv, - .readmsg = sock_ep_rma_readmsg, - .write = sock_ep_rma_write, - .writev = sock_ep_rma_writev, - .writemsg = sock_ep_rma_writemsg, - .inject = sock_ep_rma_inject, - .injectdata = sock_ep_rma_injectdata, - .writedata = sock_ep_rma_writedata, -}; - diff --git a/prov/sockets/src/sock_rx_entry.c b/prov/sockets/src/sock_rx_entry.c deleted file mode 100644 index 8f3e082df6a..00000000000 --- a/prov/sockets/src/sock_rx_entry.c +++ /dev/null @@ -1,183 +0,0 @@ -/* - * Copyright (c) 2014 Intel Corporation, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "config.h" - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "sock.h" -#include "sock_util.h" - -#define SOCK_LOG_DBG(...) _SOCK_LOG_DBG(FI_LOG_EP_DATA, __VA_ARGS__) -#define SOCK_LOG_ERROR(...) _SOCK_LOG_ERROR(FI_LOG_EP_DATA, __VA_ARGS__) - -struct sock_rx_entry *sock_rx_new_entry(struct sock_rx_ctx *rx_ctx) -{ - struct sock_rx_entry *rx_entry; - struct slist_entry *entry; - size_t i; - - if (rx_ctx->rx_entry_pool == NULL) { - rx_ctx->rx_entry_pool = calloc(rx_ctx->attr.size, - sizeof(*rx_entry)); - if (!rx_ctx->rx_entry_pool) - return NULL; - - slist_init(&rx_ctx->pool_list); - - for (i = 0; i < rx_ctx->attr.size; i++) { - slist_insert_tail(&rx_ctx->rx_entry_pool[i].pool_entry, - &rx_ctx->pool_list); - rx_ctx->rx_entry_pool[i].is_pool_entry = 1; - } - } - - if (!slist_empty(&rx_ctx->pool_list)) { - entry = slist_remove_head(&rx_ctx->pool_list); - rx_entry = container_of(entry, struct sock_rx_entry, pool_entry); - rx_entry->rx_ctx = rx_ctx; - } else { - rx_entry = calloc(1, sizeof(*rx_entry)); - if (!rx_entry) - return NULL; - } - - rx_entry->is_tagged = 0; - SOCK_LOG_DBG("New rx_entry: %p, ctx: %p\n", rx_entry, rx_ctx); - dlist_init(&rx_entry->entry); - rx_ctx->num_left--; - return rx_entry; -} - -void sock_rx_release_entry(struct sock_rx_entry *rx_entry) -{ - struct sock_rx_ctx *rx_ctx; - SOCK_LOG_DBG("Releasing rx_entry: %p\n", rx_entry); - if (rx_entry->is_pool_entry) { - rx_ctx = rx_entry->rx_ctx; - memset(rx_entry, 0, sizeof(*rx_entry)); - rx_entry->rx_ctx = rx_ctx; - rx_entry->is_pool_entry = 1; - slist_insert_head(&rx_entry->pool_entry, &rx_ctx->pool_list); - } else { - free(rx_entry); - } -} - -struct sock_rx_entry *sock_rx_new_buffered_entry(struct sock_rx_ctx *rx_ctx, - size_t len) -{ - struct sock_rx_entry *rx_entry; - - if (rx_ctx->buffered_len + len >= rx_ctx->attr.total_buffered_recv) - SOCK_LOG_ERROR("Exceeded buffered recv limit\n"); - - rx_entry = calloc(1, sizeof(*rx_entry) + len); - if (!rx_entry) - return NULL; - - SOCK_LOG_DBG("New buffered entry:%p len: %lu, ctx: %p\n", - rx_entry, len, rx_ctx); - - rx_entry->is_busy = 1; - rx_entry->is_buffered = 1; - rx_entry->rx_op.dest_iov_len = 1; - rx_entry->iov[0].iov.len = len; - rx_entry->iov[0].iov.addr = (uintptr_t) (rx_entry + 1); - rx_entry->total_len = len; - - rx_ctx->buffered_len += len; - dlist_insert_tail(&rx_entry->entry, &rx_ctx->rx_buffered_list); - rx_ctx->progress_start = &rx_ctx->rx_buffered_list; - - return rx_entry; -} - -struct sock_rx_entry *sock_rx_get_entry(struct sock_rx_ctx *rx_ctx, - uint64_t addr, uint64_t tag, - uint8_t is_tagged) -{ - struct dlist_entry *entry; - struct sock_rx_entry *rx_entry; - - for (entry = rx_ctx->rx_entry_list.next; - entry != &rx_ctx->rx_entry_list; entry = entry->next) { - - rx_entry = container_of(entry, struct sock_rx_entry, entry); - if (rx_entry->is_busy || (is_tagged != rx_entry->is_tagged)) - continue; - - if (((rx_entry->tag & ~rx_entry->ignore) == (tag & ~rx_entry->ignore)) && - (rx_entry->addr == FI_ADDR_UNSPEC || addr == FI_ADDR_UNSPEC || - rx_entry->addr == addr || - (rx_ctx->av && - !sock_av_compare_addr(rx_ctx->av, addr, rx_entry->addr)))) { - rx_entry->is_busy = 1; - return rx_entry; - } - } - return NULL; -} - -struct sock_rx_entry *sock_rx_get_buffered_entry(struct sock_rx_ctx *rx_ctx, - uint64_t addr, uint64_t tag, - uint64_t ignore, - uint8_t is_tagged) -{ - struct dlist_entry *entry; - struct sock_rx_entry *rx_entry; - - for (entry = rx_ctx->rx_buffered_list.next; - entry != &rx_ctx->rx_buffered_list; entry = entry->next) { - - rx_entry = container_of(entry, struct sock_rx_entry, entry); - if (rx_entry->is_busy || (is_tagged != rx_entry->is_tagged) || - rx_entry->is_claimed) - continue; - - if (((rx_entry->tag & ~ignore) == (tag & ~ignore)) && - (rx_entry->addr == FI_ADDR_UNSPEC || addr == FI_ADDR_UNSPEC || - rx_entry->addr == addr || - (rx_ctx->av && - !sock_av_compare_addr(rx_ctx->av, addr, rx_entry->addr)))) { - return rx_entry; - } - } - return NULL; -} diff --git a/prov/sockets/src/sock_trigger.c b/prov/sockets/src/sock_trigger.c deleted file mode 100644 index 07c81e4ab83..00000000000 --- a/prov/sockets/src/sock_trigger.c +++ /dev/null @@ -1,348 +0,0 @@ -/* - * Copyright (c) 2014-2015 Intel Corporation, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "config.h" - -#include -#include -#include -#include - -#include "sock.h" -#include "sock_util.h" - -#define SOCK_LOG_DBG(...) _SOCK_LOG_DBG(FI_LOG_EP_DATA, __VA_ARGS__) -#define SOCK_LOG_ERROR(...) _SOCK_LOG_ERROR(FI_LOG_EP_DATA, __VA_ARGS__) - -ssize_t sock_queue_rma_op(struct fid_ep *ep, const struct fi_msg_rma *msg, - uint64_t flags, enum fi_op_type op_type) -{ - struct sock_cntr *cntr; - struct sock_trigger *trigger; - struct sock_triggered_context *trigger_context; - struct sock_trigger_work *work; - - trigger_context = (struct sock_triggered_context *) msg->context; - if ((flags & FI_INJECT) || !trigger_context || - ((trigger_context->event_type != FI_TRIGGER_THRESHOLD) && - (trigger_context->event_type != SOCK_DEFERRED_WORK))) - return -FI_EINVAL; - - work = &trigger_context->trigger.work; - cntr = container_of(work->triggering_cntr, struct sock_cntr, cntr_fid); - if (ofi_atomic_get32(&cntr->value) >= (int) work->threshold) - return 1; - - trigger = calloc(1, sizeof(*trigger)); - if (!trigger) - return -FI_ENOMEM; - - trigger->context = trigger_context; - trigger->threshold = work->threshold; - - memcpy(&trigger->op.rma.msg, msg, sizeof(*msg)); - trigger->op.rma.msg.msg_iov = &trigger->op.rma.msg_iov[0]; - trigger->op.rma.msg.rma_iov = &trigger->op.rma.rma_iov[0]; - - memcpy(&trigger->op.rma.msg_iov[0], &msg->msg_iov[0], - msg->iov_count * sizeof(struct iovec)); - memcpy(&trigger->op.rma.rma_iov[0], &msg->rma_iov[0], - msg->rma_iov_count * sizeof(struct fi_rma_iov)); - - trigger->op_type = op_type; - trigger->ep = ep; - trigger->flags = flags; - - ofi_mutex_lock(&cntr->trigger_lock); - dlist_insert_tail(&trigger->entry, &cntr->trigger_list); - ofi_mutex_unlock(&cntr->trigger_lock); - sock_cntr_check_trigger_list(cntr); - return 0; -} - -ssize_t sock_queue_msg_op(struct fid_ep *ep, const struct fi_msg *msg, - uint64_t flags, enum fi_op_type op_type) -{ - struct sock_cntr *cntr; - struct sock_trigger *trigger; - struct sock_triggered_context *trigger_context; - struct sock_trigger_work *work; - - trigger_context = (struct sock_triggered_context *) msg->context; - if ((flags & FI_INJECT) || !trigger_context || - ((trigger_context->event_type != FI_TRIGGER_THRESHOLD) && - (trigger_context->event_type != SOCK_DEFERRED_WORK))) - return -FI_EINVAL; - - work = &trigger_context->trigger.work; - cntr = container_of(work->triggering_cntr, struct sock_cntr, cntr_fid); - if (ofi_atomic_get32(&cntr->value) >= (int) work->threshold) - return 1; - - trigger = calloc(1, sizeof(*trigger)); - if (!trigger) - return -FI_ENOMEM; - - trigger->context = trigger_context; - trigger->threshold = work->threshold; - - memcpy(&trigger->op.msg.msg, msg, sizeof(*msg)); - trigger->op.msg.msg.msg_iov = &trigger->op.msg.msg_iov[0]; - memcpy((void *) &trigger->op.msg.msg_iov[0], &msg->msg_iov[0], - msg->iov_count * sizeof(struct iovec)); - - trigger->op_type = op_type; - trigger->ep = ep; - trigger->flags = flags; - - ofi_mutex_lock(&cntr->trigger_lock); - dlist_insert_tail(&trigger->entry, &cntr->trigger_list); - ofi_mutex_unlock(&cntr->trigger_lock); - sock_cntr_check_trigger_list(cntr); - return 0; -} - -ssize_t sock_queue_tmsg_op(struct fid_ep *ep, const struct fi_msg_tagged *msg, - uint64_t flags, enum fi_op_type op_type) -{ - struct sock_cntr *cntr; - struct sock_trigger *trigger; - struct sock_triggered_context *trigger_context; - struct sock_trigger_work *work; - - trigger_context = (struct sock_triggered_context *) msg->context; - if ((flags & FI_INJECT) || !trigger_context || - ((trigger_context->event_type != FI_TRIGGER_THRESHOLD) && - (trigger_context->event_type != SOCK_DEFERRED_WORK))) - return -FI_EINVAL; - - work = &trigger_context->trigger.work; - cntr = container_of(work->triggering_cntr, struct sock_cntr, cntr_fid); - if (ofi_atomic_get32(&cntr->value) >= (int) work->threshold) - return 1; - - trigger = calloc(1, sizeof(*trigger)); - if (!trigger) - return -FI_ENOMEM; - - trigger->context = trigger_context; - trigger->threshold = work->threshold; - - memcpy(&trigger->op.tmsg.msg, msg, sizeof(*msg)); - trigger->op.tmsg.msg.msg_iov = &trigger->op.tmsg.msg_iov[0]; - memcpy((void *) &trigger->op.tmsg.msg_iov[0], &msg->msg_iov[0], - msg->iov_count * sizeof(struct iovec)); - - trigger->op_type = op_type; - trigger->ep = ep; - trigger->flags = flags; - - ofi_mutex_lock(&cntr->trigger_lock); - dlist_insert_tail(&trigger->entry, &cntr->trigger_list); - ofi_mutex_unlock(&cntr->trigger_lock); - sock_cntr_check_trigger_list(cntr); - return 0; -} - -ssize_t sock_queue_atomic_op(struct fid_ep *ep, const struct fi_msg_atomic *msg, - const struct fi_ioc *comparev, size_t compare_count, - struct fi_ioc *resultv, size_t result_count, - uint64_t flags, enum fi_op_type op_type) -{ - struct sock_cntr *cntr; - struct sock_trigger *trigger; - struct sock_triggered_context *trigger_context; - struct sock_trigger_work *work; - - trigger_context = (struct sock_triggered_context *) msg->context; - if ((flags & FI_INJECT) || !trigger_context || - ((trigger_context->event_type != FI_TRIGGER_THRESHOLD) && - (trigger_context->event_type != SOCK_DEFERRED_WORK))) - return -FI_EINVAL; - - work = &trigger_context->trigger.work; - cntr = container_of(work->triggering_cntr, struct sock_cntr, cntr_fid); - if (ofi_atomic_get32(&cntr->value) >= (int) work->threshold) - return 1; - - trigger = calloc(1, sizeof(*trigger)); - if (!trigger) - return -FI_ENOMEM; - - trigger->context = trigger_context; - trigger->threshold = work->threshold; - - memcpy(&trigger->op.atomic.msg, msg, sizeof(*msg)); - trigger->op.atomic.msg.msg_iov = &trigger->op.atomic.msg_iov[0]; - trigger->op.atomic.msg.rma_iov = &trigger->op.atomic.rma_iov[0]; - - memcpy(&trigger->op.atomic.msg_iov[0], &msg->msg_iov[0], - msg->iov_count * sizeof(struct fi_ioc)); - memcpy(&trigger->op.atomic.rma_iov[0], &msg->rma_iov[0], - msg->iov_count * sizeof(struct fi_rma_ioc)); - - if (comparev) { - memcpy(&trigger->op.atomic.comparev[0], &comparev[0], - compare_count * sizeof(struct fi_ioc)); - trigger->op.atomic.compare_count = compare_count; - } - - if (resultv) { - memcpy(&trigger->op.atomic.resultv[0], &resultv[0], - result_count * sizeof(struct fi_ioc)); - trigger->op.atomic.result_count = result_count; - } - - trigger->op_type = op_type; - trigger->ep = ep; - trigger->flags = flags; - - ofi_mutex_lock(&cntr->trigger_lock); - dlist_insert_tail(&trigger->entry, &cntr->trigger_list); - ofi_mutex_unlock(&cntr->trigger_lock); - sock_cntr_check_trigger_list(cntr); - return 0; -} - -ssize_t sock_queue_cntr_op(struct fi_deferred_work *work, uint64_t flags) -{ - struct sock_cntr *cntr; - struct sock_trigger *trigger; - - cntr = container_of(work->triggering_cntr, struct sock_cntr, cntr_fid); - if (ofi_atomic_get32(&cntr->value) >= (int) work->threshold) { - if (work->op_type == FI_OP_CNTR_SET) - fi_cntr_set(work->op.cntr->cntr, work->op.cntr->value); - else - fi_cntr_add(work->op.cntr->cntr, work->op.cntr->value); - return 0; - } - - trigger = calloc(1, sizeof(*trigger)); - if (!trigger) - return -FI_ENOMEM; - - trigger->context = (struct sock_triggered_context *) &work->context; - trigger->op_type = work->op_type; - trigger->threshold = work->threshold; - trigger->flags = flags; - - ofi_mutex_lock(&cntr->trigger_lock); - dlist_insert_tail(&trigger->entry, &cntr->trigger_list); - ofi_mutex_unlock(&cntr->trigger_lock); - sock_cntr_check_trigger_list(cntr); - return 0; -} - -ssize_t sock_queue_work(struct sock_domain *dom, struct fi_deferred_work *work) -{ - struct sock_triggered_context *ctx; - uint64_t flags = SOCK_NO_COMPLETION | SOCK_TRIGGERED_OP | FI_TRIGGER; - - /* We require the operation's context to point back to the fi_context - * embedded within the deferred work item. This is an implementation - * limitation, which we may turn into a requirement. The app must - * keep the fi_deferred_work structure around for the duration of the - * processing anyway. - */ - ctx = (struct sock_triggered_context *) &work->context; - ctx->event_type = SOCK_DEFERRED_WORK; - ctx->trigger.work.triggering_cntr = work->triggering_cntr; - ctx->trigger.work.threshold = work->threshold; - ctx->trigger.work.completion_cntr = work->completion_cntr; - - switch (work->op_type) { - case FI_OP_RECV: - if (work->op.msg->msg.context != &work->context) - return -FI_EINVAL; - return sock_ep_recvmsg(work->op.msg->ep, &work->op.msg->msg, - work->op.msg->flags | flags); - case FI_OP_SEND: - if (work->op.msg->msg.context != &work->context) - return -FI_EINVAL; - return sock_ep_sendmsg(work->op.msg->ep, &work->op.msg->msg, - work->op.msg->flags | flags); - case FI_OP_TRECV: - if (work->op.tagged->msg.context != &work->context) - return -FI_EINVAL; - return sock_ep_trecvmsg(work->op.tagged->ep, &work->op.tagged->msg, - work->op.tagged->flags | flags); - case FI_OP_TSEND: - if (work->op.tagged->msg.context != &work->context) - return -FI_EINVAL; - return sock_ep_tsendmsg(work->op.tagged->ep, &work->op.tagged->msg, - work->op.tagged->flags | flags); - case FI_OP_READ: - if (work->op.rma->msg.context != &work->context) - return -FI_EINVAL; - return sock_ep_rma_readmsg(work->op.rma->ep, &work->op.rma->msg, - work->op.rma->flags | flags); - case FI_OP_WRITE: - if (work->op.rma->msg.context != &work->context) - return -FI_EINVAL; - return sock_ep_rma_writemsg(work->op.rma->ep, &work->op.rma->msg, - work->op.rma->flags | flags); - case FI_OP_ATOMIC: - if (work->op.atomic->msg.context != &work->context) - return -FI_EINVAL; - return sock_ep_tx_atomic(work->op.atomic->ep, &work->op.atomic->msg, - NULL, NULL, 0, NULL, NULL, 0, - work->op.atomic->flags | flags); - case FI_OP_FETCH_ATOMIC: - if (work->op.fetch_atomic->msg.context != &work->context) - return -FI_EINVAL; - return sock_ep_tx_atomic(work->op.fetch_atomic->ep, - &work->op.fetch_atomic->msg, - NULL, NULL, 0, - work->op.fetch_atomic->fetch.msg_iov, - work->op.fetch_atomic->fetch.desc, - work->op.fetch_atomic->fetch.iov_count, - work->op.fetch_atomic->flags | flags); - case FI_OP_COMPARE_ATOMIC: - if (work->op.compare_atomic->msg.context != &work->context) - return -FI_EINVAL; - return sock_ep_tx_atomic(work->op.compare_atomic->ep, - &work->op.compare_atomic->msg, - work->op.compare_atomic->compare.msg_iov, - work->op.compare_atomic->compare.desc, - work->op.compare_atomic->compare.iov_count, - work->op.compare_atomic->fetch.msg_iov, - work->op.compare_atomic->fetch.desc, - work->op.compare_atomic->fetch.iov_count, - work->op.compare_atomic->flags | flags); - case FI_OP_CNTR_SET: - case FI_OP_CNTR_ADD: - return sock_queue_cntr_op(work, 0); - default: - return -FI_ENOSYS; - } -} diff --git a/prov/sockets/src/sock_wait.c b/prov/sockets/src/sock_wait.c deleted file mode 100644 index d55660b218e..00000000000 --- a/prov/sockets/src/sock_wait.c +++ /dev/null @@ -1,321 +0,0 @@ -/* - * Copyright (c) 2014 Intel Corporation, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "config.h" - -#include -#include -#include -#include - -#include "sock.h" -#include "sock_util.h" - -#define SOCK_LOG_DBG(...) _SOCK_LOG_DBG(FI_LOG_CORE, __VA_ARGS__) -#define SOCK_LOG_ERROR(...) _SOCK_LOG_ERROR(FI_LOG_CORE, __VA_ARGS__) - -enum { - WAIT_READ_FD = 0, - WAIT_WRITE_FD, -}; - -#ifndef _WIN32 /* there is no support of wait objects on windows */ -int sock_wait_get_obj(struct fid_wait *fid, void *arg) -{ - struct fi_mutex_cond mut_cond; - struct sock_wait *wait; - - wait = container_of(fid, struct sock_wait, wait_fid.fid); - if (sock_dom_check_manual_progress(wait->fab)) - return -FI_ENOSYS; - - switch (wait->type) { - case FI_WAIT_FD: - memcpy(arg, &wait->wobj.fd[WAIT_READ_FD], sizeof(int)); - break; - - case FI_WAIT_MUTEX_COND: - mut_cond.mutex = &wait->wobj.mutex_cond.mutex; - mut_cond.cond = &wait->wobj.mutex_cond.cond; - memcpy(arg, &mut_cond, sizeof(mut_cond)); - break; - default: - SOCK_LOG_ERROR("Invalid wait obj type\n"); - return -FI_EINVAL; - } - - return 0; -} -#else /* _WIN32 */ -int sock_wait_get_obj(struct fid_wait *fid, void *arg) -{ - return -FI_ENOSYS; -} -#endif - -static int sock_wait_init(struct sock_wait *wait, enum fi_wait_obj type) -{ - int ret; - - wait->type = type; - - switch (type) { - case FI_WAIT_FD: - if (socketpair(AF_UNIX, SOCK_STREAM, 0, wait->wobj.fd)) - return -ofi_sockerr(); - - ret = fd_set_nonblock(wait->wobj.fd[WAIT_READ_FD]); - if (ret) { - ofi_close_socket(wait->wobj.fd[WAIT_READ_FD]); - ofi_close_socket(wait->wobj.fd[WAIT_WRITE_FD]); - return ret; - } - break; - - case FI_WAIT_MUTEX_COND: - pthread_mutex_init(&wait->wobj.mutex_cond.mutex, NULL); - pthread_cond_init(&wait->wobj.mutex_cond.cond, NULL); - break; - - default: - SOCK_LOG_ERROR("Invalid wait object type\n"); - return -FI_EINVAL; - } - return 0; -} - -static int sock_wait_wait(struct fid_wait *wait_fid, int timeout) -{ - struct sock_cq *cq; - struct sock_cntr *cntr; - struct sock_wait *wait; - uint64_t start_ms = 0, end_ms = 0; - struct dlist_entry *p, *head; - struct sock_fid_list *list_item; - int err = 0; - ssize_t ret; - char c; - - wait = container_of(wait_fid, struct sock_wait, wait_fid); - if (timeout > 0) - start_ms = ofi_gettime_ms(); - - head = &wait->fid_list; - for (p = head->next; p != head; p = p->next) { - list_item = container_of(p, struct sock_fid_list, entry); - switch (list_item->fid->fclass) { - case FI_CLASS_CQ: - cq = container_of(list_item->fid, - struct sock_cq, cq_fid); - sock_cq_progress(cq); - if (ofi_rbused(&cq->cqerr_rb)) - return 1; - break; - - case FI_CLASS_CNTR: - cntr = container_of(list_item->fid, - struct sock_cntr, cntr_fid); - sock_cntr_progress(cntr); - break; - } - } - if (timeout > 0) { - end_ms = ofi_gettime_ms(); - timeout -= (int) (end_ms - start_ms); - timeout = timeout < 0 ? 0 : timeout; - } - - switch (wait->type) { - case FI_WAIT_FD: - err = fi_poll_fd(wait->wobj.fd[WAIT_READ_FD], timeout); - if (err == 0) { - err = -FI_ETIMEDOUT; - } else { - while (err > 0) { - ret = ofi_read_socket(wait->wobj.fd[WAIT_READ_FD], &c, 1); - if (ret != 1) { - SOCK_LOG_ERROR("failed to read wait_fd\n"); - err = 0; - break; - } else - err--; - } - } - break; - - case FI_WAIT_MUTEX_COND: - err = ofi_wait_cond(&wait->wobj.mutex_cond.cond, - &wait->wobj.mutex_cond.mutex, timeout); - break; - - default: - SOCK_LOG_ERROR("Invalid wait object type\n"); - return -FI_EINVAL; - } - return err; -} - -void sock_wait_signal(struct fid_wait *wait_fid) -{ - struct sock_wait *wait; - static char c = 'a'; - ssize_t ret; - - wait = container_of(wait_fid, struct sock_wait, wait_fid); - - switch (wait->type) { - case FI_WAIT_FD: - ret = ofi_write_socket(wait->wobj.fd[WAIT_WRITE_FD], &c, 1); - if (ret != 1) - SOCK_LOG_ERROR("failed to signal\n"); - break; - - case FI_WAIT_MUTEX_COND: - pthread_cond_signal(&wait->wobj.mutex_cond.cond); - break; - default: - SOCK_LOG_ERROR("Invalid wait object type\n"); - return; - } -} - -static struct fi_ops_wait sock_wait_ops = { - .size = sizeof(struct fi_ops_wait), - .wait = sock_wait_wait, -}; - -static int sock_wait_control(struct fid *fid, int command, void *arg) -{ - struct sock_wait *wait; - int ret = 0; - - wait = container_of(fid, struct sock_wait, wait_fid.fid); - switch (command) { - case FI_GETWAIT: - ret = sock_wait_get_obj(&wait->wait_fid, arg); - break; - default: - ret = -FI_EINVAL; - break; - } - return ret; -} - -int sock_wait_close(fid_t fid) -{ - struct sock_fid_list *list_item; - struct dlist_entry *p, *head; - struct sock_wait *wait; - - wait = container_of(fid, struct sock_wait, wait_fid.fid); - head = &wait->fid_list; - - for (p = head->next; p != head;) { - list_item = container_of(p, struct sock_fid_list, entry); - p = p->next; - free(list_item); - } - - if (wait->type == FI_WAIT_FD) { - ofi_close_socket(wait->wobj.fd[WAIT_READ_FD]); - ofi_close_socket(wait->wobj.fd[WAIT_WRITE_FD]); - } - - ofi_atomic_dec32(&wait->fab->ref); - free(wait); - return 0; -} - -static struct fi_ops sock_wait_fi_ops = { - .size = sizeof(struct fi_ops), - .close = sock_wait_close, - .bind = fi_no_bind, - .control = sock_wait_control, - .ops_open = fi_no_ops_open, -}; - -static int sock_verify_wait_attr(struct fi_wait_attr *attr) -{ - switch (attr->wait_obj) { - case FI_WAIT_UNSPEC: - case FI_WAIT_FD: - case FI_WAIT_MUTEX_COND: - break; - - default: - SOCK_LOG_ERROR("Invalid wait object type\n"); - return -FI_EINVAL; - } - if (attr->flags) - return -FI_EINVAL; - return 0; -} - -int sock_wait_open(struct fid_fabric *fabric, struct fi_wait_attr *attr, - struct fid_wait **waitset) -{ - int err; - struct sock_wait *wait; - struct sock_fabric *fab; - enum fi_wait_obj wait_obj_type; - - if (attr && sock_verify_wait_attr(attr)) - return -FI_EINVAL; - - fab = container_of(fabric, struct sock_fabric, fab_fid); - if (!attr || attr->wait_obj == FI_WAIT_UNSPEC) - wait_obj_type = FI_WAIT_FD; - else - wait_obj_type = attr->wait_obj; - - wait = calloc(1, sizeof(*wait)); - if (!wait) - return -FI_ENOMEM; - - err = sock_wait_init(wait, wait_obj_type); - if (err) { - free(wait); - return err; - } - - wait->wait_fid.fid.fclass = FI_CLASS_WAIT; - wait->wait_fid.fid.context = 0; - wait->wait_fid.fid.ops = &sock_wait_fi_ops; - wait->wait_fid.ops = &sock_wait_ops; - wait->fab = fab; - wait->type = wait_obj_type; - ofi_atomic_inc32(&fab->ref); - dlist_init(&wait->fid_list); - - *waitset = &wait->wait_fid; - return 0; -} diff --git a/src/fabric.c b/src/fabric.c index ecdde7ac22f..e3284e9bc86 100644 --- a/src/fabric.c +++ b/src/fabric.c @@ -454,7 +454,7 @@ static void ofi_ordered_provs_init(void) */ /* Before you add ANYTHING here, read the comment above!!! */ - "udp", "tcp", "sockets", "net", /* NOTHING GOES HERE! */ + "udp", "tcp", /* NOTHING GOES HERE! */ /* Seriously, read it! */ /* These are hooking providers only. Their order @@ -540,8 +540,7 @@ static void ofi_register_provider(struct fi_provider *provider, void *dlhandle) /* Prevent utility providers from layering on these core providers * unless explicitly requested. */ - if (!strcasecmp(provider->name, "sockets") || - !strcasecmp(provider->name, "shm") || + if (!strcasecmp(provider->name, "shm") || !strcasecmp(provider->name, "efa") || !strcasecmp(provider->name, "psm3") || !strcasecmp(provider->name, "ucx") || @@ -900,7 +899,6 @@ void fi_ini(void) ofi_register_provider(OPX_INIT, NULL); ofi_register_provider(UCX_INIT, NULL); ofi_register_provider(UDP_INIT, NULL); - ofi_register_provider(SOCKETS_INIT, NULL); ofi_register_provider(TCP_INIT, NULL); ofi_register_provider(HOOK_PERF_INIT, NULL); diff --git a/util/pingpong.c b/util/pingpong.c index b38c23d39a3..18e6d2900ae 100644 --- a/util/pingpong.c +++ b/util/pingpong.c @@ -2000,7 +2000,7 @@ static void pp_pingpong_usage(struct ct_pingpong *ct, char *name, char *desc) fprintf(stderr, " %-20s %s\n", "-d ", "domain name"); fprintf(stderr, " %-20s %s\n", "-p ", - "specific provider name eg sockets, verbs"); + "specific provider name eg tcp, verbs"); fprintf(stderr, " %-20s %s\n", "-e ", "endpoint type: msg|rdm|dgram (dgram)"); From 26380e43d6c05dacea8c7d84e2f42a4c0c3d28da Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Wed, 20 Sep 2023 13:28:42 -0700 Subject: [PATCH 07/34] prov/tcp: Add support for FABRIC_DIRECT builds The sockets provider will be removed. This adds the ability to verify the FABRIC_DIRECT build option and provide blank direct header file templates. Signed-off-by: Sean Hefty --- .travis.yml | 2 +- prov/tcp/include/rdma/fi_direct.h | 35 ++++++++++++++++++++ prov/tcp/include/rdma/fi_direct_atomic.h | 33 ++++++++++++++++++ prov/tcp/include/rdma/fi_direct_atomic_def.h | 33 ++++++++++++++++++ prov/tcp/include/rdma/fi_direct_cm.h | 33 ++++++++++++++++++ prov/tcp/include/rdma/fi_direct_domain.h | 33 ++++++++++++++++++ prov/tcp/include/rdma/fi_direct_endpoint.h | 33 ++++++++++++++++++ prov/tcp/include/rdma/fi_direct_eq.h | 33 ++++++++++++++++++ prov/tcp/include/rdma/fi_direct_rma.h | 33 ++++++++++++++++++ prov/tcp/include/rdma/fi_direct_tagged.h | 33 ++++++++++++++++++ prov/tcp/include/rdma/fi_direct_trigger.h | 33 ++++++++++++++++++ prov/tcp/provider_FABRIC_1.0.map | 1 + 12 files changed, 334 insertions(+), 1 deletion(-) create mode 100644 prov/tcp/include/rdma/fi_direct.h create mode 100644 prov/tcp/include/rdma/fi_direct_atomic.h create mode 100644 prov/tcp/include/rdma/fi_direct_atomic_def.h create mode 100644 prov/tcp/include/rdma/fi_direct_cm.h create mode 100644 prov/tcp/include/rdma/fi_direct_domain.h create mode 100644 prov/tcp/include/rdma/fi_direct_endpoint.h create mode 100644 prov/tcp/include/rdma/fi_direct_eq.h create mode 100644 prov/tcp/include/rdma/fi_direct_rma.h create mode 100644 prov/tcp/include/rdma/fi_direct_tagged.h create mode 100644 prov/tcp/include/rdma/fi_direct_trigger.h create mode 100644 prov/tcp/provider_FABRIC_1.0.map diff --git a/.travis.yml b/.travis.yml index 5a4f669a5be..10dcb657122 100644 --- a/.travis.yml +++ b/.travis.yml @@ -82,7 +82,7 @@ install: fi # Test fabric direct # (all other providers are automatically disabled by configure) - - ./configure --prefix=$PREFIX --enable-direct=sockets + - ./configure --prefix=$PREFIX --enable-direct=tcp - make -j2 $MAKE_FLAGS # Test loadable library option # List of providers current as of Jan 2020 diff --git a/prov/tcp/include/rdma/fi_direct.h b/prov/tcp/include/rdma/fi_direct.h new file mode 100644 index 00000000000..b20f040df2e --- /dev/null +++ b/prov/tcp/include/rdma/fi_direct.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Do not remove this file. The rdma/fi_direct_*.h files are required to support + * the FABRIC_DIRECT option. Also see man/fi_direct.7.md. + */ diff --git a/prov/tcp/include/rdma/fi_direct_atomic.h b/prov/tcp/include/rdma/fi_direct_atomic.h new file mode 100644 index 00000000000..177e37955b2 --- /dev/null +++ b/prov/tcp/include/rdma/fi_direct_atomic.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) Intel Corporation, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Do not remove this file. See fi_direct.h */ diff --git a/prov/tcp/include/rdma/fi_direct_atomic_def.h b/prov/tcp/include/rdma/fi_direct_atomic_def.h new file mode 100644 index 00000000000..177e37955b2 --- /dev/null +++ b/prov/tcp/include/rdma/fi_direct_atomic_def.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) Intel Corporation, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Do not remove this file. See fi_direct.h */ diff --git a/prov/tcp/include/rdma/fi_direct_cm.h b/prov/tcp/include/rdma/fi_direct_cm.h new file mode 100644 index 00000000000..177e37955b2 --- /dev/null +++ b/prov/tcp/include/rdma/fi_direct_cm.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) Intel Corporation, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Do not remove this file. See fi_direct.h */ diff --git a/prov/tcp/include/rdma/fi_direct_domain.h b/prov/tcp/include/rdma/fi_direct_domain.h new file mode 100644 index 00000000000..177e37955b2 --- /dev/null +++ b/prov/tcp/include/rdma/fi_direct_domain.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) Intel Corporation, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Do not remove this file. See fi_direct.h */ diff --git a/prov/tcp/include/rdma/fi_direct_endpoint.h b/prov/tcp/include/rdma/fi_direct_endpoint.h new file mode 100644 index 00000000000..177e37955b2 --- /dev/null +++ b/prov/tcp/include/rdma/fi_direct_endpoint.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) Intel Corporation, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Do not remove this file. See fi_direct.h */ diff --git a/prov/tcp/include/rdma/fi_direct_eq.h b/prov/tcp/include/rdma/fi_direct_eq.h new file mode 100644 index 00000000000..177e37955b2 --- /dev/null +++ b/prov/tcp/include/rdma/fi_direct_eq.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) Intel Corporation, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Do not remove this file. See fi_direct.h */ diff --git a/prov/tcp/include/rdma/fi_direct_rma.h b/prov/tcp/include/rdma/fi_direct_rma.h new file mode 100644 index 00000000000..177e37955b2 --- /dev/null +++ b/prov/tcp/include/rdma/fi_direct_rma.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) Intel Corporation, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Do not remove this file. See fi_direct.h */ diff --git a/prov/tcp/include/rdma/fi_direct_tagged.h b/prov/tcp/include/rdma/fi_direct_tagged.h new file mode 100644 index 00000000000..177e37955b2 --- /dev/null +++ b/prov/tcp/include/rdma/fi_direct_tagged.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) Intel Corporation, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Do not remove this file. See fi_direct.h */ diff --git a/prov/tcp/include/rdma/fi_direct_trigger.h b/prov/tcp/include/rdma/fi_direct_trigger.h new file mode 100644 index 00000000000..177e37955b2 --- /dev/null +++ b/prov/tcp/include/rdma/fi_direct_trigger.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) Intel Corporation, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Do not remove this file. See fi_direct.h */ diff --git a/prov/tcp/provider_FABRIC_1.0.map b/prov/tcp/provider_FABRIC_1.0.map new file mode 100644 index 00000000000..3783ced980e --- /dev/null +++ b/prov/tcp/provider_FABRIC_1.0.map @@ -0,0 +1 @@ +/* Do not remove this file. It is needed for FABRIC_DIRECT build option. */ From 964e193921f86a97bfbc23599ff0d568d9183f50 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Wed, 20 Sep 2023 14:20:52 -0700 Subject: [PATCH 08/34] core: Remove internally used definitions from public headers Several defines and values should not have been exposed in the public header files. Remove or move the definitions into internal headers. This removes the chance of possible conflicts with application definitions and API breakage. Signed-off-by: Sean Hefty --- fabtests/component/dmabuf-rdma/ofi_ctx_pool.h | 6 ++-- fabtests/functional/rdm_atomic.c | 6 ++-- fabtests/include/ft_osd.h | 1 + fabtests/include/shared.h | 5 ++++ fabtests/ubertest/config.c | 2 +- fabtests/ubertest/fabtest.h | 20 ++++++++----- include/ofi.h | 15 ++++++++++ include/rdma/fabric.h | 23 +++++++------- include/rdma/fi_cm.h | 4 +-- include/rdma/fi_domain.h | 10 +------ include/rdma/fi_endpoint.h | 8 ++--- include/rdma/providers/fi_log.h | 2 -- prov/opx/include/rdma/fi_direct_atomic_def.h | 2 -- prov/opx/include/rdma/opx/fi_opx_atomic.h | 4 +-- prov/opx/include/rdma/opx/fi_opx_rma_ops.h | 2 +- prov/opx/src/fi_opx_atomic.c | 18 +++++------ prov/opx/src/fi_opx_rma.c | 6 ++-- prov/psm2/src/psmx2_atomic.c | 30 +++++++++---------- prov/psm3/src/psmx3_atomic.c | 30 +++++++++---------- prov/sm2/src/sm2.h | 2 +- prov/sm2/src/sm2_av.c | 4 +-- prov/sm2/src/sm2_coordination.c | 8 ++--- prov/sm2/src/sm2_coordination.h | 2 +- prov/sm2/src/sm2_ep.c | 16 +++++----- prov/sm2/src/sm2_init.c | 16 +++++----- prov/tcp/src/xnet_init.c | 2 +- prov/util/src/util_av.c | 10 +++---- src/log.c | 21 +++++++------ 28 files changed, 144 insertions(+), 131 deletions(-) diff --git a/fabtests/component/dmabuf-rdma/ofi_ctx_pool.h b/fabtests/component/dmabuf-rdma/ofi_ctx_pool.h index 2627aa209da..18e6faf0346 100644 --- a/fabtests/component/dmabuf-rdma/ofi_ctx_pool.h +++ b/fabtests/component/dmabuf-rdma/ofi_ctx_pool.h @@ -34,7 +34,7 @@ */ struct context_list { - struct fi_context context; + struct fi_context context; /* keep first */ struct context_list *next; }; @@ -53,7 +53,7 @@ static inline struct context_pool *init_context_pool(size_t pool_size) pool_size * sizeof(struct context_list)); if (!pool) return NULL; - + pool->head = &pool->list[0]; pool->tail = &pool->list[pool_size - 1]; for (i = 0; i < pool_size; i++) @@ -84,7 +84,7 @@ static inline void put_context(struct context_pool *pool, if (!ctxt) return; - entry = container_of(ctxt, struct context_list, context); + entry = (struct context_list *) ctxt; entry->next = NULL; pool->tail->next = entry; pool->tail = entry; diff --git a/fabtests/functional/rdm_atomic.c b/fabtests/functional/rdm_atomic.c index b43d20b42d0..638b9e1148b 100644 --- a/fabtests/functional/rdm_atomic.c +++ b/fabtests/functional/rdm_atomic.c @@ -91,7 +91,7 @@ static enum fi_op get_fi_op(char *op) return FI_MSWAP; else { fprintf(stderr, "Not a valid atomic operation\n"); - return FI_ATOMIC_OP_LAST; + return OFI_ATOMIC_OP_CNT; } } @@ -342,7 +342,7 @@ static int run_ops(void) { int ret; - for (op_type = FI_MIN; op_type < FI_ATOMIC_OP_LAST; op_type++) { + for (op_type = FI_MIN; op_type < OFI_ATOMIC_OP_CNT; op_type++) { ret = run_op(); if (ret && ret != -FI_ENOSYS && ret != -FI_EOPNOTSUPP) { FT_PRINTERR("run_op", ret); @@ -464,7 +464,7 @@ int main(int argc, char **argv) } else { run_all_ops = 0; op_type = get_fi_op(optarg); - if (op_type == FI_ATOMIC_OP_LAST) { + if (op_type == OFI_ATOMIC_OP_CNT) { print_opts_usage(argv[0]); return EXIT_FAILURE; } diff --git a/fabtests/include/ft_osd.h b/fabtests/include/ft_osd.h index 6ea377941cd..c9e08d3eac8 100644 --- a/fabtests/include/ft_osd.h +++ b/fabtests/include/ft_osd.h @@ -46,6 +46,7 @@ #endif #define OFI_DATATYPE_CNT (FI_UINT128 + 1) +#define OFI_ATOMIC_OP_CNT (FI_MSWAP + 1) #ifdef HAVE___INT128 typedef __int128 ofi_int128_t; diff --git a/fabtests/include/shared.h b/fabtests/include/shared.h index 3a311174a05..0726e8f7e66 100644 --- a/fabtests/include/shared.h +++ b/fabtests/include/shared.h @@ -64,6 +64,11 @@ extern "C" { #define ALIGN(x, a) ALIGN_MASK(x, (typeof(x))(a) - 1) #define ALIGN_DOWN(x, a) ALIGN((x) - ((a) - 1), (a)) +#ifndef container_of +#define container_of(ptr, type, field) \ + ((type *) ((char *)ptr - offsetof(type, field))) +#endif + #define OFI_MR_BASIC_MAP (FI_MR_ALLOCATED | FI_MR_PROV_KEY | FI_MR_VIRT_ADDR) /* exit codes must be 0-255 */ diff --git a/fabtests/ubertest/config.c b/fabtests/ubertest/config.c index 49a1282a3da..eaa0dac6a3e 100644 --- a/fabtests/ubertest/config.c +++ b/fabtests/ubertest/config.c @@ -166,7 +166,7 @@ static struct key_t keys[] = { .str = "op", .offset = offsetof(struct ft_set, op), .val_type = VAL_NUM, - .val_size = sizeof(((struct ft_set *)0)->op) / FI_ATOMIC_OP_LAST, + .val_size = sizeof(((struct ft_set *)0)->op) / OFI_ATOMIC_OP_CNT, }, { .str = "datatype", diff --git a/fabtests/ubertest/fabtest.h b/fabtests/ubertest/fabtest.h index 6f1616ecdf4..d9b293d4b4c 100644 --- a/fabtests/ubertest/fabtest.h +++ b/fabtests/ubertest/fabtest.h @@ -233,14 +233,18 @@ enum ft_class_function { x == FT_FUNC_FETCH_ATOMICMSG || \ x == FT_FUNC_COMPARE_ATOMICMSG) +enum { + FT_NAME_MAX = 64 +}; + struct ft_set { - char node[FI_NAME_MAX]; - char service[FI_NAME_MAX]; - char prov_name[FI_NAME_MAX]; + char node[FT_NAME_MAX]; + char service[FT_NAME_MAX]; + char prov_name[FT_NAME_MAX]; enum ft_test_type test_type[FT_MAX_TEST]; enum ft_class_function class_function[FT_MAX_FUNCTIONS]; uint64_t msg_flags; - enum fi_op op[FI_ATOMIC_OP_LAST]; + enum fi_op op[OFI_ATOMIC_OP_CNT]; enum fi_datatype datatype[OFI_DATATYPE_CNT]; enum fi_ep_type ep_type[FT_MAX_EP_TYPES]; enum fi_av_type av_type[FT_MAX_AV_TYPES]; @@ -308,10 +312,10 @@ struct ft_info { enum fi_threading threading; uint32_t protocol; uint32_t protocol_version; - char node[FI_NAME_MAX]; - char service[FI_NAME_MAX]; - char prov_name[FI_NAME_MAX]; - char fabric_name[FI_NAME_MAX]; + char node[FT_NAME_MAX]; + char service[FT_NAME_MAX]; + char prov_name[FT_NAME_MAX]; + char fabric_name[FT_NAME_MAX]; uint64_t rx_cq_bind_flags; uint64_t tx_cq_bind_flags; uint64_t rx_op_flags; diff --git a/include/ofi.h b/include/ofi.h index 29a6ac1a518..b5bda966011 100644 --- a/include/ofi.h +++ b/include/ofi.h @@ -72,6 +72,12 @@ extern "C" { FI_VERSION(FI_MAJOR_VERSION * 100 + FI_MINOR_VERSION, \ FI_REVISION_VERSION * 10) +enum { + OFI_NAME_MAX = 64, + OFI_ATOMIC_OP_LAST = FI_MSWAP + 1, /* last pt 2 pt atomic */ + OFI_DATATYPE_LAST = FI_LONG_DOUBLE_COMPLEX + 1, /* compatibility */ +}; + #define OFI_GETINFO_INTERNAL (1ULL << 58) #define OFI_CORE_PROV_ONLY (1ULL << 59) #define OFI_GETINFO_HIDDEN (1ULL << 60) @@ -132,6 +138,15 @@ extern "C" { #define OFI_RX_OP_FLAGS \ (FI_COMPLETION | FI_MULTI_RECV) +#ifndef container_of +#define container_of(ptr, type, field) \ + ((type *) ((char *)ptr - offsetof(type, field))) +#endif + +#ifndef count_of +#define count_of(x) \ + ((sizeof(x)/sizeof(0[x])) / ((size_t)(!(sizeof(x) % sizeof(0[x]))))) +#endif #define sizeof_field(type, field) sizeof(((type *)0)->field) diff --git a/include/rdma/fabric.h b/include/rdma/fabric.h index 61f9c3bd97a..cd9413ba1f5 100644 --- a/include/rdma/fabric.h +++ b/include/rdma/fabric.h @@ -73,24 +73,21 @@ typedef SSIZE_T ssize_t; extern "C" { #endif -#ifndef container_of -#define container_of(ptr, type, field) \ - ((type *) ((char *)ptr - offsetof(type, field))) -#endif - -#ifndef count_of -#define count_of(x) \ - ((sizeof(x)/sizeof(0[x])) / ((size_t)(!(sizeof(x) % sizeof(0[x]))))) -#endif - #define FI_MAJOR_VERSION 1 #define FI_MINOR_VERSION 19 #define FI_REVISION_VERSION 0 +/* Removing these breaks the build for some apps. + * The use of FI_NAME_MAX is undefined. + * FI_ATOMIC_OP_LAST and FI_DATATYPE_LAST values cannot change + * (such as inserting new enum values that they are intended to be the + * last of) without breaking apps that recompile. So, they are hard-coded + * here. + */ enum { - FI_PATH_MAX = 256, - FI_NAME_MAX = 64, - FI_VERSION_MAX = 64 + FI_NAME_MAX = 64, + FI_ATOMIC_OP_LAST = 19, + FI_DATATYPE_LAST = 14, /* not actual last datatype */ }; #define FI_VERSION(major, minor) (((major) << 16) | (minor)) diff --git a/include/rdma/fi_cm.h b/include/rdma/fi_cm.h index e21fec91244..c0faf1f377b 100644 --- a/include/rdma/fi_cm.h +++ b/include/rdma/fi_cm.h @@ -71,13 +71,13 @@ struct fi_ops_cm { static inline int fi_setname(fid_t fid, void *addr, size_t addrlen) { - struct fid_ep *ep = container_of(fid, struct fid_ep, fid); + struct fid_ep *ep = (struct fid_ep *) fid; return ep->cm->setname(fid, addr, addrlen); } static inline int fi_getname(fid_t fid, void *addr, size_t *addrlen) { - struct fid_ep *ep = container_of(fid, struct fid_ep, fid); + struct fid_ep *ep = (struct fid_ep *) fid; return ep->cm->getname(fid, addr, addrlen); } diff --git a/include/rdma/fi_domain.h b/include/rdma/fi_domain.h index 67d098cd5de..d5bc5c18734 100644 --- a/include/rdma/fi_domain.h +++ b/include/rdma/fi_domain.h @@ -210,13 +210,7 @@ enum fi_datatype { FI_DOUBLE_COMPLEX, FI_LONG_DOUBLE, FI_LONG_DOUBLE_COMPLEX, - /* End of point to point atomic datatypes */ - FI_DATATYPE_LAST, - /* - * enums for 128-bit integer atomics, existing ordering and - * FI_DATATYPE_LAST preserved for compatabilty. - */ - FI_INT128 = FI_DATATYPE_LAST, + FI_INT128, FI_UINT128, /* Collective datatypes */ @@ -243,8 +237,6 @@ enum fi_op { FI_CSWAP_GE, FI_CSWAP_GT, FI_MSWAP, - /* End of point to point atomic ops */ - FI_ATOMIC_OP_LAST, /* Collective datatypes */ FI_NOOP = FI_COLLECTIVE_OFFSET, diff --git a/include/rdma/fi_endpoint.h b/include/rdma/fi_endpoint.h index cf0611b1bf2..69d4a6c4ff0 100644 --- a/include/rdma/fi_endpoint.h +++ b/include/rdma/fi_endpoint.h @@ -219,7 +219,7 @@ static inline int fi_enable(struct fid_ep *ep) static inline ssize_t fi_cancel(fid_t fid, void *context) { - struct fid_ep *ep = container_of(fid, struct fid_ep, fid); + struct fid_ep *ep = (struct fid_ep *) fid; return ep->ops->cancel(fid, context); } @@ -227,7 +227,7 @@ static inline int fi_setopt(fid_t fid, int level, int optname, const void *optval, size_t optlen) { - struct fid_ep *ep = container_of(fid, struct fid_ep, fid); + struct fid_ep *ep = (struct fid_ep *) fid; return ep->ops->setopt(fid, level, optname, optval, optlen); } @@ -235,7 +235,7 @@ static inline int fi_getopt(fid_t fid, int level, int optname, void *optval, size_t *optlen) { - struct fid_ep *ep = container_of(fid, struct fid_ep, fid); + struct fid_ep *ep = (struct fid_ep *) fid; return ep->ops->getopt(fid, level, optname, optval, optlen); } @@ -246,7 +246,7 @@ static inline int fi_ep_alias(struct fid_ep *ep, struct fid_ep **alias_ep, struct fid *fid; ret = fi_alias(&ep->fid, &fid, flags); if (!ret) - *alias_ep = container_of(fid, struct fid_ep, fid); + *alias_ep = (struct fid_ep *) fid; return ret; } diff --git a/include/rdma/providers/fi_log.h b/include/rdma/providers/fi_log.h index 614551b1d20..9268a2ee86c 100644 --- a/include/rdma/providers/fi_log.h +++ b/include/rdma/providers/fi_log.h @@ -53,7 +53,6 @@ enum fi_log_subsys { FI_LOG_EQ, FI_LOG_MR, FI_LOG_CNTR, - FI_LOG_SUBSYS_MAX }; enum fi_log_level { @@ -61,7 +60,6 @@ enum fi_log_level { FI_LOG_TRACE, FI_LOG_INFO, FI_LOG_DEBUG, - FI_LOG_MAX }; int fi_log_enabled(const struct fi_provider *prov, enum fi_log_level level, diff --git a/prov/opx/include/rdma/fi_direct_atomic_def.h b/prov/opx/include/rdma/fi_direct_atomic_def.h index 4bdcc67b5ce..0ba391c654b 100644 --- a/prov/opx/include/rdma/fi_direct_atomic_def.h +++ b/prov/opx/include/rdma/fi_direct_atomic_def.h @@ -51,7 +51,6 @@ enum fi_datatype { FI_DOUBLE_COMPLEX, /* 11 */ FI_LONG_DOUBLE, /* 12 */ FI_LONG_DOUBLE_COMPLEX, /* 13 */ - FI_DATATYPE_LAST /* 14 */ }; enum fi_op { FI_MIN, @@ -73,7 +72,6 @@ enum fi_op { FI_CSWAP_GE, FI_CSWAP_GT, FI_MSWAP, - FI_ATOMIC_OP_LAST }; #endif diff --git a/prov/opx/include/rdma/opx/fi_opx_atomic.h b/prov/opx/include/rdma/opx/fi_opx_atomic.h index ae04b11b717..71131b57ecd 100644 --- a/prov/opx/include/rdma/opx/fi_opx_atomic.h +++ b/prov/opx/include/rdma/opx/fi_opx_atomic.h @@ -96,7 +96,7 @@ extern "C" { static inline size_t sizeofdt(const enum fi_datatype datatype) { - static const size_t sizeofdt[FI_DATATYPE_LAST] = { + static const size_t sizeofdt[OFI_DATATYPE_LAST] = { sizeof(int8_t), /* FI_INT8 */ sizeof(uint8_t), /* FI_UINT8 */ sizeof(int16_t), /* FI_INT16 */ @@ -135,7 +135,7 @@ static inline size_t maxcount(const enum fi_datatype datatype, const unsigned is maxbytes / sizeof(long double), /* FI_LONG_DOUBLE */ \ maxbytes / sizeof(complex long double) /* FI_LONG_DOUBLE_COMPLEX */ - static const size_t maxcount[2][2][FI_DATATYPE_LAST] = { + static const size_t maxcount[2][2][OFI_DATATYPE_LAST] = { { { /* !compare, !fetch */ INIT_MAXCOUNT_ARRAY(FI_OPX_HFI1_PACKET_MTU) }, { /* !compare, fetch */ diff --git a/prov/opx/include/rdma/opx/fi_opx_rma_ops.h b/prov/opx/include/rdma/opx/fi_opx_rma_ops.h index eeb303e5a68..fd0b118b241 100644 --- a/prov/opx/include/rdma/opx/fi_opx_rma_ops.h +++ b/prov/opx/include/rdma/opx/fi_opx_rma_ops.h @@ -533,7 +533,7 @@ FI_OPX_RX_ATOMIC_SPECIALIZED_FUNCS_COMPLEX(LONG_DOUBLE_COMPLEX, complex long dou static inline void fi_opx_rx_atomic_dispatch(const void *buf, void *addr, size_t nbytes, enum fi_datatype dt, enum fi_op op) { - static void (*fi_opx_rx_atomic_dispatch_table[FI_DATATYPE_LAST][FI_ATOMIC_OP_LAST])( + static void (*fi_opx_rx_atomic_dispatch_table[OFI_DATATYPE_LAST][OFI_ATOMIC_OP_LAST])( const void *, void *, size_t) = { { FI_OPX_RX_ATOMIC_DISPATCH_FUNC_NAMES(INT8) }, { FI_OPX_RX_ATOMIC_DISPATCH_FUNC_NAMES(UINT8) }, diff --git a/prov/opx/src/fi_opx_atomic.c b/prov/opx/src/fi_opx_atomic.c index 3e13cf32295..f6a1995aa34 100644 --- a/prov/opx/src/fi_opx_atomic.c +++ b/prov/opx/src/fi_opx_atomic.c @@ -66,7 +66,7 @@ static inline int fi_opx_check_atomic(struct fi_opx_ep *opx_ep, enum fi_datatype default: return -FI_EINVAL; } - if (((int)dt >= FI_DATATYPE_LAST) || ((int)dt < 0)) + if (((int)dt >= OFI_DATATYPE_LAST) || ((int)dt < 0)) return -FI_EINVAL; if (!opx_ep) @@ -127,7 +127,7 @@ void fi_opx_atomic_op_internal(struct fi_opx_ep *opx_ep, (FI_COMPLETION | FI_DELIVERY_COMPLETE)); } - assert(dt == FI_VOID || dt < FI_DATATYPE_LAST); + assert(dt == FI_VOID || dt < OFI_DATATYPE_LAST); union fi_opx_hfi1_deferred_work *work = ofi_buf_alloc(opx_ep->tx->work_pending_pool); assert(work); struct fi_opx_hfi1_dput_params *params = &work->dput; @@ -224,7 +224,7 @@ size_t fi_opx_atomic_internal(struct fi_opx_ep *opx_ep, size_t buf_len = count * sizeofdt(datatype); if(op == FI_ATOMIC_READ) { assert(!is_compare); - assert(datatype < FI_DATATYPE_LAST); + assert(datatype < OFI_DATATYPE_LAST); FI_DBG_TRACE(fi_opx_global.prov, FI_LOG_EP_DATA, "===================================== ATOMIC READ (begin)\n"); struct iovec iov = { (void*)fetch_vaddr, buf_len }; @@ -1106,7 +1106,7 @@ ssize_t fi_opx_atomic_compwritemsg(struct fid_ep *ep, const struct fi_msg_atomic int fi_opx_atomic_writevalid(struct fid_ep *ep, enum fi_datatype datatype, enum fi_op op, size_t *count) { - static size_t sizeofdt[FI_DATATYPE_LAST] = { + static size_t sizeofdt[OFI_DATATYPE_LAST] = { sizeof(int8_t), /* FI_INT8 */ sizeof(uint8_t), /* FI_UINT8 */ sizeof(int16_t), /* FI_INT16 */ @@ -1123,7 +1123,7 @@ int fi_opx_atomic_writevalid(struct fid_ep *ep, enum fi_datatype datatype, enum sizeof(complex long double) /* FI_LONG_DOUBLE_COMPLEX */ }; - if ((op > FI_ATOMIC_WRITE) || (datatype >= FI_DATATYPE_LAST)) { + if ((op > FI_ATOMIC_WRITE) || (datatype >= OFI_DATATYPE_LAST)) { *count = 0; errno = FI_EOPNOTSUPP; return -errno; @@ -1137,7 +1137,7 @@ int fi_opx_atomic_writevalid(struct fid_ep *ep, enum fi_datatype datatype, enum int fi_opx_atomic_readwritevalid(struct fid_ep *ep, enum fi_datatype datatype, enum fi_op op, size_t *count) { - static size_t sizeofdt[FI_DATATYPE_LAST] = { + static size_t sizeofdt[OFI_DATATYPE_LAST] = { sizeof(int8_t), /* FI_INT8 */ sizeof(uint8_t), /* FI_UINT8 */ sizeof(int16_t), /* FI_INT16 */ @@ -1154,7 +1154,7 @@ int fi_opx_atomic_readwritevalid(struct fid_ep *ep, enum fi_datatype datatype, e sizeof(complex long double) /* FI_LONG_DOUBLE_COMPLEX */ }; - if ((op > FI_ATOMIC_WRITE) || (datatype >= FI_DATATYPE_LAST)) { + if ((op > FI_ATOMIC_WRITE) || (datatype >= OFI_DATATYPE_LAST)) { *count = 0; errno = FI_EOPNOTSUPP; return -errno; @@ -1170,7 +1170,7 @@ int fi_opx_atomic_readwritevalid(struct fid_ep *ep, enum fi_datatype datatype, e int fi_opx_atomic_compwritevalid(struct fid_ep *ep, enum fi_datatype datatype, enum fi_op op, size_t *count) { - static size_t sizeofdt[FI_DATATYPE_LAST] = { + static size_t sizeofdt[OFI_DATATYPE_LAST] = { sizeof(int8_t), /* FI_INT8 */ sizeof(uint8_t), /* FI_UINT8 */ sizeof(int16_t), /* FI_INT16 */ @@ -1187,7 +1187,7 @@ int fi_opx_atomic_compwritevalid(struct fid_ep *ep, enum fi_datatype datatype, e sizeof(complex long double) /* FI_LONG_DOUBLE_COMPLEX */ }; - if ((op < FI_CSWAP) || (op >= FI_ATOMIC_OP_LAST) || (datatype >= FI_DATATYPE_LAST)) { + if ((op < FI_CSWAP) || (op >= OFI_ATOMIC_OP_LAST) || (datatype >= OFI_DATATYPE_LAST)) { *count = 0; errno = FI_EOPNOTSUPP; return -errno; diff --git a/prov/opx/src/fi_opx_rma.c b/prov/opx/src/fi_opx_rma.c index f5414f4ce95..2bacf3e9617 100644 --- a/prov/opx/src/fi_opx_rma.c +++ b/prov/opx/src/fi_opx_rma.c @@ -120,7 +120,7 @@ int fi_opx_readv_internal_intranode(struct fi_opx_hfi1_rx_readv_params *params) uint64_t op64 = params->op << 40; uint64_t dt64 = params->dt << 32; assert(FI_OPX_HFI_DPUT_OPCODE_GET == params->opcode); // double check packet type - assert(params->dt == (FI_VOID - 1) || params->dt < FI_DATATYPE_LAST); + assert(params->dt == (FI_VOID - 1) || params->dt < OFI_DATATYPE_LAST); tx_hdr->qw[0] = opx_ep->rx->tx.cts.hdr.qw[0] | params->lrh_dlid | (params->lrh_dws << 32); tx_hdr->qw[1] = opx_ep->rx->tx.cts.hdr.qw[1] | params->bth_rx; tx_hdr->qw[2] = opx_ep->rx->tx.cts.hdr.qw[2]; @@ -145,7 +145,7 @@ int fi_opx_do_readv_internal(union fi_opx_hfi1_deferred_work *work) { struct fi_opx_hfi1_rx_readv_params *params = &work->readv; assert(params->niov <= 1); // TODO, support something ... bigger - assert(params->dt == (FI_VOID - 1) || params->dt < FI_DATATYPE_LAST); + assert(params->dt == (FI_VOID - 1) || params->dt < OFI_DATATYPE_LAST); if (params->is_intranode) { /* compile-time constant expression */ return fi_opx_readv_internal_intranode(params); @@ -458,7 +458,7 @@ void fi_opx_get_daos_av_addr_rank(struct fi_opx_ep *opx_ep, if (cur_av_rank) { union fi_opx_addr addr; addr.fi = cur_av_rank->fi_addr; - + FI_DBG_TRACE(fi_opx_global.prov, FI_LOG_EP_DATA, "Get av_rank_hashmap[%d] = rank:%d, LID:0x%x, fi:%08lx.\n", i++, cur_av_rank->key.rank, addr.uid.lid, addr.fi); diff --git a/prov/psm2/src/psmx2_atomic.c b/prov/psm2/src/psmx2_atomic.c index 0192f2259b7..7a34eac5359 100644 --- a/prov/psm2/src/psmx2_atomic.c +++ b/prov/psm2/src/psmx2_atomic.c @@ -819,8 +819,8 @@ ssize_t psmx2_atomic_write_generic(struct fid_ep *ep, flags); assert(buf); - assert((int)datatype >= 0 && (int)datatype < FI_DATATYPE_LAST); - assert((int)op >= 0 && (int)op < FI_ATOMIC_OP_LAST); + assert((int)datatype >= 0 && (int)datatype < OFI_DATATYPE_LAST); + assert((int)op >= 0 && (int)op < OFI_ATOMIC_OP_LAST); av = ep_priv->av; assert(av); @@ -918,8 +918,8 @@ ssize_t psmx2_atomic_writev_generic(struct fid_ep *ep, assert(iov); assert(count); - assert((int)datatype >= 0 && (int)datatype < FI_DATATYPE_LAST); - assert((int)op >= 0 && (int)op < FI_ATOMIC_OP_LAST); + assert((int)datatype >= 0 && (int)datatype < OFI_DATATYPE_LAST); + assert((int)op >= 0 && (int)op < OFI_ATOMIC_OP_LAST); while (count && !iov[count-1].count) count--; @@ -1123,8 +1123,8 @@ ssize_t psmx2_atomic_readwrite_generic(struct fid_ep *ep, context, flags); assert(buf || op == FI_ATOMIC_READ); - assert((int)datatype >= 0 && (int)datatype < FI_DATATYPE_LAST); - assert((int)op >= 0 && (int)op < FI_ATOMIC_OP_LAST); + assert((int)datatype >= 0 && (int)datatype < OFI_DATATYPE_LAST); + assert((int)op >= 0 && (int)op < OFI_ATOMIC_OP_LAST); av = ep_priv->av; assert(av); @@ -1235,8 +1235,8 @@ ssize_t psmx2_atomic_readwritev_generic(struct fid_ep *ep, assert((iov && count) || op == FI_ATOMIC_READ); assert(resultv); assert(result_count); - assert((int)datatype >= 0 && (int)datatype < FI_DATATYPE_LAST); - assert((int)op >= 0 && (int)op < FI_ATOMIC_OP_LAST); + assert((int)datatype >= 0 && (int)datatype < OFI_DATATYPE_LAST); + assert((int)op >= 0 && (int)op < OFI_ATOMIC_OP_LAST); if (iov) { while (count && !iov[count-1].count) @@ -1518,8 +1518,8 @@ ssize_t psmx2_atomic_compwrite_generic(struct fid_ep *ep, context, flags); assert(buf); - assert((int)datatype >= 0 && (int)datatype < FI_DATATYPE_LAST); - assert((int)op >= 0 && (int)op < FI_ATOMIC_OP_LAST); + assert((int)datatype >= 0 && (int)datatype < OFI_DATATYPE_LAST); + assert((int)op >= 0 && (int)op < OFI_ATOMIC_OP_LAST); av = ep_priv->av; assert(av); @@ -1639,8 +1639,8 @@ ssize_t psmx2_atomic_compwritev_generic(struct fid_ep *ep, assert(compare_count); assert(resultv); assert(result_count); - assert((int)datatype >= 0 && (int)datatype < FI_DATATYPE_LAST); - assert((int)op >= 0 && (int)op < FI_ATOMIC_OP_LAST); + assert((int)datatype >= 0 && (int)datatype < OFI_DATATYPE_LAST); + assert((int)op >= 0 && (int)op < OFI_ATOMIC_OP_LAST); while (count && !iov[count-1].count) count--; @@ -1905,7 +1905,7 @@ static int psmx2_atomic_writevalid_internal(size_t chunk_size, enum fi_datatype datatype, enum fi_op op, size_t *count) { - if (datatype >= FI_DATATYPE_LAST) + if (datatype >= OFI_DATATYPE_LAST) return -FI_EOPNOTSUPP; switch (op) { @@ -1936,7 +1936,7 @@ static int psmx2_atomic_readwritevalid_internal(size_t chunk_size, enum fi_datatype datatype, enum fi_op op, size_t *count) { - if (datatype >= FI_DATATYPE_LAST) + if (datatype >= OFI_DATATYPE_LAST) return -FI_EOPNOTSUPP; switch (op) { @@ -1969,7 +1969,7 @@ static int psmx2_atomic_compwritevalid_internal(size_t chunk_size, enum fi_op op, size_t *count) { - if (datatype >= FI_DATATYPE_LAST) + if (datatype >= OFI_DATATYPE_LAST) return -FI_EOPNOTSUPP; switch (op) { diff --git a/prov/psm3/src/psmx3_atomic.c b/prov/psm3/src/psmx3_atomic.c index 87e8fc50bc8..4b5af83ea97 100644 --- a/prov/psm3/src/psmx3_atomic.c +++ b/prov/psm3/src/psmx3_atomic.c @@ -828,8 +828,8 @@ ssize_t psmx3_atomic_write_generic(struct fid_ep *ep, flags); assert(buf); - assert((int)datatype >= 0 && (int)datatype < FI_DATATYPE_LAST); - assert((int)op >= 0 && (int)op < FI_ATOMIC_OP_LAST); + assert((int)datatype >= 0 && (int)datatype < OFI_DATATYPE_LAST); + assert((int)op >= 0 && (int)op < OFI_ATOMIC_OP_LAST); av = ep_priv->av; assert(av); @@ -928,8 +928,8 @@ ssize_t psmx3_atomic_writev_generic(struct fid_ep *ep, assert(iov); assert(count); - assert((int)datatype >= 0 && (int)datatype < FI_DATATYPE_LAST); - assert((int)op >= 0 && (int)op < FI_ATOMIC_OP_LAST); + assert((int)datatype >= 0 && (int)datatype < OFI_DATATYPE_LAST); + assert((int)op >= 0 && (int)op < OFI_ATOMIC_OP_LAST); while (count && !iov[count-1].count) count--; @@ -1134,8 +1134,8 @@ ssize_t psmx3_atomic_readwrite_generic(struct fid_ep *ep, context, flags); assert(buf || op == FI_ATOMIC_READ); - assert((int)datatype >= 0 && (int)datatype < FI_DATATYPE_LAST); - assert((int)op >= 0 && (int)op < FI_ATOMIC_OP_LAST); + assert((int)datatype >= 0 && (int)datatype < OFI_DATATYPE_LAST); + assert((int)op >= 0 && (int)op < OFI_ATOMIC_OP_LAST); av = ep_priv->av; assert(av); @@ -1247,8 +1247,8 @@ ssize_t psmx3_atomic_readwritev_generic(struct fid_ep *ep, assert((iov && count) || op == FI_ATOMIC_READ); assert(resultv); assert(result_count); - assert((int)datatype >= 0 && (int)datatype < FI_DATATYPE_LAST); - assert((int)op >= 0 && (int)op < FI_ATOMIC_OP_LAST); + assert((int)datatype >= 0 && (int)datatype < OFI_DATATYPE_LAST); + assert((int)op >= 0 && (int)op < OFI_ATOMIC_OP_LAST); dt_size = ofi_datatype_size(datatype); @@ -1532,8 +1532,8 @@ ssize_t psmx3_atomic_compwrite_generic(struct fid_ep *ep, context, flags); assert(buf); - assert((int)datatype >= 0 && (int)datatype < FI_DATATYPE_LAST); - assert((int)op >= 0 && (int)op < FI_ATOMIC_OP_LAST); + assert((int)datatype >= 0 && (int)datatype < OFI_DATATYPE_LAST); + assert((int)op >= 0 && (int)op < OFI_ATOMIC_OP_LAST); av = ep_priv->av; assert(av); @@ -1653,8 +1653,8 @@ ssize_t psmx3_atomic_compwritev_generic(struct fid_ep *ep, assert(compare_count); assert(resultv); assert(result_count); - assert((int)datatype >= 0 && (int)datatype < FI_DATATYPE_LAST); - assert((int)op >= 0 && (int)op < FI_ATOMIC_OP_LAST); + assert((int)datatype >= 0 && (int)datatype < OFI_DATATYPE_LAST); + assert((int)op >= 0 && (int)op < OFI_ATOMIC_OP_LAST); while (count && !iov[count-1].count) count--; @@ -1920,7 +1920,7 @@ static int psmx3_atomic_writevalid_internal(size_t chunk_size, enum fi_datatype datatype, enum fi_op op, size_t *count) { - if (datatype >= FI_DATATYPE_LAST) + if (datatype >= OFI_DATATYPE_LAST) return -FI_EOPNOTSUPP; switch (op) { @@ -1951,7 +1951,7 @@ static int psmx3_atomic_readwritevalid_internal(size_t chunk_size, enum fi_datatype datatype, enum fi_op op, size_t *count) { - if (datatype >= FI_DATATYPE_LAST) + if (datatype >= OFI_DATATYPE_LAST) return -FI_EOPNOTSUPP; switch (op) { @@ -1984,7 +1984,7 @@ static int psmx3_atomic_compwritevalid_internal(size_t chunk_size, enum fi_op op, size_t *count) { - if (datatype >= FI_DATATYPE_LAST) + if (datatype >= OFI_DATATYPE_LAST) return -FI_EOPNOTSUPP; switch (op) { diff --git a/prov/sm2/src/sm2.h b/prov/sm2/src/sm2.h index baa2af7f645..c016510b730 100644 --- a/prov/sm2/src/sm2.h +++ b/prov/sm2/src/sm2.h @@ -154,7 +154,7 @@ struct sm2_atomic_entry { }; struct sm2_ep_name { - char name[FI_NAME_MAX]; + char name[OFI_NAME_MAX]; struct sm2_region *region; struct dlist_entry entry; }; diff --git a/prov/sm2/src/sm2_av.c b/prov/sm2/src/sm2_av.c index 8214d292f04..8338aa7086e 100644 --- a/prov/sm2/src/sm2_av.c +++ b/prov/sm2/src/sm2_av.c @@ -172,7 +172,7 @@ static int sm2_av_lookup(struct fid_av *av, fi_addr_t fi_addr, void *addr, struct sm2_ep_allocation_entry *entries; sm2_gid_t gid; - *addrlen = MIN(FI_NAME_MAX, *addrlen); + *addrlen = MIN(OFI_NAME_MAX, *addrlen); util_av = container_of(av, struct util_av, av_fid); sm2_av = container_of(util_av, struct sm2_av, util_av); @@ -193,7 +193,7 @@ static int sm2_av_lookup(struct fid_av *av, fi_addr_t fi_addr, void *addr, entries = (void *) (sm2_av->mmap.base + header->ep_allocation_offset); strncpy(addr, entries[gid].ep_name, *addrlen); - *addrlen = strnlen(entries[gid].ep_name, FI_NAME_MAX); + *addrlen = strnlen(entries[gid].ep_name, OFI_NAME_MAX); FI_DBG(&sm2_prov, FI_LOG_AV, "sm2_av_lookup: %s\n", (char *) addr); diff --git a/prov/sm2/src/sm2_coordination.c b/prov/sm2/src/sm2_coordination.c index c0e6bb0a211..cd247e4e493 100644 --- a/prov/sm2/src/sm2_coordination.c +++ b/prov/sm2/src/sm2_coordination.c @@ -372,7 +372,7 @@ ssize_t sm2_entry_allocate(const char *name, struct sm2_mmap *map, "file size is reset)!\n", item); strncpy(entries[item].ep_name, - ZOMBIE_ALLOCATION_NAME, FI_NAME_MAX); + ZOMBIE_ALLOCATION_NAME, OFI_NAME_MAX); goto retry_lookup; } } @@ -471,8 +471,8 @@ ssize_t sm2_entry_allocate(const char *name, struct sm2_mmap *map, "Using sm2 region at allocation entry[%d] for %s\n", item, name); - strncpy(entries[item].ep_name, name, FI_NAME_MAX - 1); - entries[item].ep_name[FI_NAME_MAX - 1] = '\0'; + strncpy(entries[item].ep_name, name, OFI_NAME_MAX - 1); + entries[item].ep_name[OFI_NAME_MAX - 1] = '\0'; *gid = item; @@ -487,7 +487,7 @@ int sm2_entry_lookup(const char *name, struct sm2_mmap *map) entries = sm2_mmap_entries(map); /* TODO Optimize this lookup*/ for (item = 0; item < SM2_MAX_UNIVERSE_SIZE; item++) { - if (0 == strncmp(name, entries[item].ep_name, FI_NAME_MAX)) { + if (0 == strncmp(name, entries[item].ep_name, OFI_NAME_MAX)) { FI_DBG(&sm2_prov, FI_LOG_AV, "Found existing %s in slot %d\n", name, item); return item; diff --git a/prov/sm2/src/sm2_coordination.h b/prov/sm2/src/sm2_coordination.h index daddc217ab8..26f319a5db8 100644 --- a/prov/sm2/src/sm2_coordination.h +++ b/prov/sm2/src/sm2_coordination.h @@ -63,7 +63,7 @@ struct sm2_mmap { struct sm2_ep_allocation_entry { int pid; /* This is for allocation startup */ - char ep_name[FI_NAME_MAX]; + char ep_name[OFI_NAME_MAX]; bool startup_ready; /* TODO Do I need to make atomic */ }; diff --git a/prov/sm2/src/sm2_ep.c b/prov/sm2/src/sm2_ep.c index 6ae2741f7a4..69334e1c30e 100644 --- a/prov/sm2/src/sm2_ep.c +++ b/prov/sm2/src/sm2_ep.c @@ -54,9 +54,9 @@ int sm2_setname(fid_t fid, void *addr, size_t addrlen) struct sm2_ep *ep; char *name; - if (addrlen > FI_NAME_MAX) { + if (addrlen > OFI_NAME_MAX) { FI_WARN(&sm2_prov, FI_LOG_EP_CTRL, - "Addrlen exceeds max addrlen (%d)\n", FI_NAME_MAX); + "Addrlen exceeds max addrlen (%d)\n", OFI_NAME_MAX); return -FI_EINVAL; } @@ -498,8 +498,8 @@ static struct fi_ops sm2_ep_fi_ops = { static int sm2_endpoint_name(struct sm2_ep *ep, char *name, char *addr, size_t addrlen) { - memset(name, 0, FI_NAME_MAX); - if (!addr || addrlen > FI_NAME_MAX) + memset(name, 0, OFI_NAME_MAX); + if (!addr || addrlen > OFI_NAME_MAX) return -FI_EINVAL; pthread_mutex_lock(&sm2_ep_list_lock); @@ -507,11 +507,11 @@ static int sm2_endpoint_name(struct sm2_ep *ep, char *name, char *addr, pthread_mutex_unlock(&sm2_ep_list_lock); if (strstr(addr, SM2_PREFIX)) { - snprintf(name, FI_NAME_MAX - 1, "%s:%d:%d", addr, getuid(), + snprintf(name, OFI_NAME_MAX - 1, "%s:%d:%d", addr, getuid(), ep->ep_idx); } else { /* this is an fi_ns:// address.*/ - snprintf(name, FI_NAME_MAX - 1, "%s", addr); + snprintf(name, OFI_NAME_MAX - 1, "%s", addr); } return 0; @@ -522,7 +522,7 @@ int sm2_endpoint(struct fid_domain *domain, struct fi_info *info, { struct sm2_ep *ep; int ret; - char name[FI_NAME_MAX]; + char name[OFI_NAME_MAX]; ep = calloc(1, sizeof(*ep)); if (!ep) @@ -532,7 +532,7 @@ int sm2_endpoint(struct fid_domain *domain, struct fi_info *info, if (ret) goto ep; - ret = sm2_setname(&ep->util_ep.ep_fid.fid, name, FI_NAME_MAX); + ret = sm2_setname(&ep->util_ep.ep_fid.fid, name, OFI_NAME_MAX); if (ret) goto ep; diff --git a/prov/sm2/src/sm2_init.c b/prov/sm2/src/sm2_init.c index 12bb33e9ce2..57dfc45e52d 100644 --- a/prov/sm2/src/sm2_init.c +++ b/prov/sm2/src/sm2_init.c @@ -85,8 +85,8 @@ int sm2_create(const struct fi_provider *prov, const struct sm2_attr *attr, FI_WARN(prov, FI_LOG_EP_CTRL, "calloc error\n"); return -FI_ENOMEM; } - strncpy(ep_name->name, (char *) attr->name, FI_NAME_MAX - 1); - ep_name->name[FI_NAME_MAX - 1] = '\0'; + strncpy(ep_name->name, (char *) attr->name, OFI_NAME_MAX - 1); + ep_name->name[OFI_NAME_MAX - 1] = '\0'; if (ret < 0) { FI_WARN(prov, FI_LOG_EP_CTRL, "ftruncate error\n"); @@ -141,27 +141,27 @@ int sm2_create(const struct fi_provider *prov, const struct sm2_attr *attr, static void sm2_resolve_addr(const char *node, const char *service, char **addr, size_t *addrlen) { - char temp_name[FI_NAME_MAX]; + char temp_name[OFI_NAME_MAX]; FI_INFO(&sm2_prov, FI_LOG_EP_CTRL, "resolving node=%s, service=%s\n", node ? node : "NULL", service ? service : "NULL"); if (service) { if (node) *addrlen = - snprintf(temp_name, FI_NAME_MAX - 1, "%s%s:%s", + snprintf(temp_name, OFI_NAME_MAX - 1, "%s%s:%s", SM2_PREFIX_NS, node, service); else - *addrlen = snprintf(temp_name, FI_NAME_MAX - 1, "%s%s", + *addrlen = snprintf(temp_name, OFI_NAME_MAX - 1, "%s%s", SM2_PREFIX_NS, service); } else { if (node) - *addrlen = snprintf(temp_name, FI_NAME_MAX - 1, "%s%s", + *addrlen = snprintf(temp_name, OFI_NAME_MAX - 1, "%s%s", SM2_PREFIX, node); else - *addrlen = snprintf(temp_name, FI_NAME_MAX - 1, "%s%d", + *addrlen = snprintf(temp_name, OFI_NAME_MAX - 1, "%s%d", SM2_PREFIX, getpid()); } - *addr = strndup(temp_name, FI_NAME_MAX - 1); + *addr = strndup(temp_name, OFI_NAME_MAX - 1); FI_INFO(&sm2_prov, FI_LOG_EP_CTRL, "resolved to %s\n", temp_name); } diff --git a/prov/tcp/src/xnet_init.c b/prov/tcp/src/xnet_init.c index de8b2fe6ec5..0805ad94088 100644 --- a/prov/tcp/src/xnet_init.c +++ b/prov/tcp/src/xnet_init.c @@ -42,7 +42,7 @@ #include -static char xnet_prov_name[FI_NAME_MAX] = "tcp"; +static char xnet_prov_name[OFI_NAME_MAX] = "tcp"; static int xnet_getinfo(uint32_t version, const char *node, const char *service, uint64_t flags, const struct fi_info *hints, diff --git a/prov/util/src/util_av.c b/prov/util/src/util_av.c index adcb82b0b65..dcae29fd2ba 100644 --- a/prov/util/src/util_av.c +++ b/prov/util/src/util_av.c @@ -773,8 +773,8 @@ static int ip_av_nodesym_getaddr(struct util_av *av, const char *node, { struct addrinfo hints, *ai; void *addr_temp; - char name[FI_NAME_MAX]; - char svc[FI_NAME_MAX]; + char name[OFI_NAME_MAX]; + char svc[OFI_NAME_MAX]; size_t name_len, n, s; int ret, name_index, svc_index, count = (int)(nodecnt * svccnt); @@ -811,7 +811,7 @@ static int ip_av_nodesym_getaddr(struct util_av *av, const char *node, for (n = 0; n < nodecnt; n++) { if (nodecnt == 1) { strncpy(name, node, sizeof(name) - 1); - name[FI_NAME_MAX - 1] = '\0'; + name[OFI_NAME_MAX - 1] = '\0'; } else { snprintf(name + name_len, sizeof(name) - name_len - 1, "%zu", name_index + n); @@ -820,7 +820,7 @@ static int ip_av_nodesym_getaddr(struct util_av *av, const char *node, for (s = 0; s < svccnt; s++) { if (svccnt == 1) { strncpy(svc, service, sizeof(svc) - 1); - svc[FI_NAME_MAX - 1] = '\0'; + svc[OFI_NAME_MAX - 1] = '\0'; } else { snprintf(svc, sizeof(svc) - 1, "%zu", svc_index + s); @@ -854,7 +854,7 @@ int ofi_ip_av_sym_getaddr(struct util_av *av, const char *node, struct in_addr ip4; int ret; - if (strlen(node) >= FI_NAME_MAX || strlen(service) >= FI_NAME_MAX) { + if (strlen(node) >= OFI_NAME_MAX || strlen(service) >= OFI_NAME_MAX) { FI_WARN(av->prov, FI_LOG_AV, "node or service name is too long\n"); return -FI_ENOSYS; diff --git a/src/log.c b/src/log.c index 95ec543a8c6..951bad48198 100644 --- a/src/log.c +++ b/src/log.c @@ -46,6 +46,11 @@ #include "ofi_util.h" +enum { + OFI_LOG_SUBSYS_MAX = 10, + OFI_LOG_MAX = 4 +}; + static const char * const log_subsys[] = { [FI_LOG_CORE] = "core", [FI_LOG_FABRIC] = "fabric", @@ -57,7 +62,6 @@ static const char * const log_subsys[] = { [FI_LOG_EQ] = "eq", [FI_LOG_MR] = "mr", [FI_LOG_CNTR] = "cntr", - [FI_LOG_SUBSYS_MAX] = NULL }; static const char * const log_levels[] = { @@ -65,14 +69,13 @@ static const char * const log_levels[] = { [FI_LOG_TRACE] = "trace", [FI_LOG_INFO] = "info", [FI_LOG_DEBUG] = "debug", - [FI_LOG_MAX] = NULL }; enum { - FI_LOG_SUBSYS_OFFSET = FI_LOG_MAX, - FI_LOG_PROV_OFFSET = FI_LOG_SUBSYS_OFFSET + FI_LOG_SUBSYS_MAX, - FI_LOG_LEVEL_MASK = ((1 << FI_LOG_MAX) - 1), - FI_LOG_SUBSYS_MASK = (((1 << FI_LOG_SUBSYS_MAX) - 1) << + FI_LOG_SUBSYS_OFFSET = OFI_LOG_MAX, + FI_LOG_PROV_OFFSET = FI_LOG_SUBSYS_OFFSET + OFI_LOG_SUBSYS_MAX, + FI_LOG_LEVEL_MASK = ((1 << OFI_LOG_MAX) - 1), + FI_LOG_SUBSYS_MASK = (((1 << OFI_LOG_SUBSYS_MAX) - 1) << FI_LOG_SUBSYS_OFFSET), // FI_LOG_PROV_MASK = (((1 << (64 - FI_LOG_PROV_OFFSET)) - 1) << // FI_LOG_PROV_OFFSET) @@ -131,7 +134,7 @@ void fi_log_init(void) "Specify specific subsystem to log (default: all)"); fi_param_get_str(NULL, "log_subsys", &subsysstr); ofi_create_filter(&subsys_filter, subsysstr); - for (i = 0; i < FI_LOG_SUBSYS_MAX; i++) { + for (i = 0; i < OFI_LOG_SUBSYS_MAX; i++) { if (!ofi_apply_filter(&subsys_filter, log_subsys[i])) log_mask |= (1ULL << (i + FI_LOG_SUBSYS_OFFSET)); } @@ -298,7 +301,7 @@ int ofi_open_log(uint32_t version, void *attr, size_t attr_len, void ofi_tostr_log_level(char *buf, size_t len, enum fi_log_level level) { - if (level >= FI_LOG_MAX) + if (level > FI_LOG_DEBUG) ofi_strncatf(buf, len, "Unknown"); else ofi_strncatf(buf, len, log_levels[level]); @@ -306,7 +309,7 @@ void ofi_tostr_log_level(char *buf, size_t len, enum fi_log_level level) void ofi_tostr_log_subsys(char *buf, size_t len, enum fi_log_subsys subsys) { - if (subsys >= FI_LOG_SUBSYS_MAX) + if (subsys > FI_LOG_CNTR) ofi_strncatf(buf, len, "Unknown"); else ofi_strncatf(buf, len, log_subsys[subsys]); From 57984a2b91bb5b8377fc4dfcb1b527328db7470e Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Wed, 20 Sep 2023 16:50:27 -0700 Subject: [PATCH 09/34] core: Move FI_PRIORITY to internal flag Flag is only used between rxm and verbs. Signed-off-by: Sean Hefty --- include/ofi_util.h | 1 + include/rdma/fabric.h | 2 +- prov/rxm/src/rxm_domain.c | 2 +- prov/rxm/src/rxm_ep.c | 2 +- prov/verbs/src/verbs_ep.c | 2 +- 5 files changed, 5 insertions(+), 4 deletions(-) diff --git a/include/ofi_util.h b/include/ofi_util.h index 64248a4f83c..81e9eefa393 100644 --- a/include/ofi_util.h +++ b/include/ofi_util.h @@ -1236,6 +1236,7 @@ void *ofi_ns_resolve_name(struct util_ns *ns, const char *server, * the core by calling add_credits. */ #define OFI_OPS_FLOW_CTRL "ofix_flow_ctrl_v1" +#define OFI_PRIORITY (1ULL << 62) struct ofi_ops_flow_ctrl { size_t size; diff --git a/include/rdma/fabric.h b/include/rdma/fabric.h index cd9413ba1f5..93909cffe3c 100644 --- a/include/rdma/fabric.h +++ b/include/rdma/fabric.h @@ -150,7 +150,7 @@ typedef struct fid *fid_t; #define FI_PEEK (1ULL << 19) #define FI_TRIGGER (1ULL << 20) #define FI_FENCE (1ULL << 21) -#define FI_PRIORITY (1ULL << 22) +/* #define FI_PRIORITY (1ULL << 22) */ #define FI_COMPLETION (1ULL << 24) #define FI_EVENT FI_COMPLETION diff --git a/prov/rxm/src/rxm_domain.c b/prov/rxm/src/rxm_domain.c index 643aa153e1b..321297afe1f 100644 --- a/prov/rxm/src/rxm_domain.c +++ b/prov/rxm/src/rxm_domain.c @@ -753,7 +753,7 @@ static ssize_t rxm_send_credits(struct fid_ep *ep, uint64_t credits) msg.context = tx_buf; msg.desc = &tx_buf->hdr.desc; - ret = fi_sendmsg(ep, &msg, FI_PRIORITY); + ret = fi_sendmsg(ep, &msg, OFI_PRIORITY); if (!ret) return FI_SUCCESS; diff --git a/prov/rxm/src/rxm_ep.c b/prov/rxm/src/rxm_ep.c index 0f27100fa1f..116017c036b 100644 --- a/prov/rxm/src/rxm_ep.c +++ b/prov/rxm/src/rxm_ep.c @@ -960,7 +960,7 @@ void rxm_ep_progress_deferred_queue(struct rxm_ep *rxm_ep, msg.msg_iov = &iov; ret = fi_sendmsg(def_tx_entry->rxm_conn->msg_ep, &msg, - FI_PRIORITY); + OFI_PRIORITY); if (ret) { if (ret != -FI_EAGAIN) { rxm_cq_write_error( diff --git a/prov/verbs/src/verbs_ep.c b/prov/verbs/src/verbs_ep.c index dd0ba3c0389..8eacc748299 100644 --- a/prov/verbs/src/verbs_ep.c +++ b/prov/verbs/src/verbs_ep.c @@ -155,7 +155,7 @@ ssize_t vrb_post_send(struct vrb_ep *ep, struct ibv_send_wr *wr, uint64_t flags) } if (vrb_wr_consumes_recv(wr) && !--ep->peer_rq_credits && - !(flags & FI_PRIORITY)) { + !(flags & OFI_PRIORITY)) { /* Last credit is reserved for credit update */ ep->peer_rq_credits++; goto freectx; From 486d48d0466a86ae2885d1844c88f80f6cb8c681 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Wed, 20 Sep 2023 16:58:18 -0700 Subject: [PATCH 10/34] core: Remove FI_PROVIDER_SPECIFIC The value is constrained to 32-bit int flags, not u64 flags. Signed-off-by: Sean Hefty --- include/rdma/fabric.h | 11 +---------- prov/psm2/include/fi_ext_psm2.h | 2 +- prov/rxm/src/rxm_domain.c | 2 +- src/fi_tostr.c | 10 ++-------- 4 files changed, 5 insertions(+), 20 deletions(-) diff --git a/include/rdma/fabric.h b/include/rdma/fabric.h index 93909cffe3c..0748e5bfcfe 100644 --- a/include/rdma/fabric.h +++ b/include/rdma/fabric.h @@ -115,11 +115,6 @@ struct fid_nic; typedef struct fid *fid_t; -/* - * Provider specific values are indicated by setting the high-order bit. - */ -#define FI_PROV_SPECIFIC (1U << 31) - /* * Flags * The 64-bit flag field is used as follows: @@ -691,11 +686,7 @@ static inline int fi_alias(struct fid *fid, struct fid **alias_fid, uint64_t fla return fi_control(fid, FI_ALIAS, &alias); } -/* fid value names */ -/* - * Currently no common name is defined. Provider specific names should - * have the FI_PROV_SPECIFIC bit set. - */ +/* Provider specific names should set the uppermost bit. */ static inline int fi_get_val(struct fid *fid, int name, void *val) { diff --git a/prov/psm2/include/fi_ext_psm2.h b/prov/psm2/include/fi_ext_psm2.h index 3a48d83e17f..804fbf37ecc 100644 --- a/prov/psm2/include/fi_ext_psm2.h +++ b/prov/psm2/include/fi_ext_psm2.h @@ -38,7 +38,7 @@ extern "C" { #endif /* Provider specific name for fi_set_val() / fi_get_val() */ -#define FI_PSM2_DISCONNECT (1U | FI_PROV_SPECIFIC) +#define FI_PSM2_DISCONNECT (1U | (1UL << 31)) #ifdef __cplusplus } diff --git a/prov/rxm/src/rxm_domain.c b/prov/rxm/src/rxm_domain.c index 321297afe1f..1b159b528c3 100644 --- a/prov/rxm/src/rxm_domain.c +++ b/prov/rxm/src/rxm_domain.c @@ -472,7 +472,7 @@ int rxm_msg_mr_reg_internal(struct rxm_domain *rxm_domain, const void *buf, /* If we can't get a key within 1024 tries, give up */ do { ret = fi_mr_reg(rxm_domain->msg_domain, buf, len, acs, 0, - rxm_domain->mr_key++ | FI_PROV_SPECIFIC, + rxm_domain->mr_key++ | (1UL << 31), flags, mr, NULL); } while (ret == -FI_ENOKEY && tries++ < 1024); diff --git a/src/fi_tostr.c b/src/fi_tostr.c index b4d99bc830b..21d2660ff4d 100644 --- a/src/fi_tostr.c +++ b/src/fi_tostr.c @@ -126,10 +126,7 @@ static void ofi_tostr_addr_format(char *buf, size_t len, uint32_t addr_format) CASEENUMSTRN(FI_ADDR_OPX, len); CASEENUMSTRN(FI_ADDR_CXI, len); default: - if (addr_format & FI_PROV_SPECIFIC) - ofi_strncatf(buf, len, "Provider specific"); - else - ofi_strncatf(buf, len, "Unknown"); + ofi_strncatf(buf, len, "Unknown"); break; } } @@ -276,10 +273,7 @@ static void ofi_tostr_protocol(char *buf, size_t len, uint32_t protocol) CASEENUMSTRN(FI_PROTO_XNET, len); CASEENUMSTRN(FI_PROTO_SM2, len); default: - if (protocol & FI_PROV_SPECIFIC) - ofi_strncatf(buf, len, "Provider specific"); - else - ofi_strncatf(buf, len, "Unknown"); + ofi_strncatf(buf, len, "Unknown"); break; } } From 224195ddfce4b18c03886a87767a623efcb19d43 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Wed, 20 Sep 2023 17:27:31 -0700 Subject: [PATCH 11/34] core: Remove unimplemented EP types Signed-off-by: Sean Hefty --- fabtests/Makefile.am | 5 - fabtests/functional/stream_msg.c | 277 ------------------------------- include/rdma/fabric.h | 4 +- man/fi_endpoint.3.md | 14 -- src/fi_tostr.c | 2 - util/info.c | 2 - 6 files changed, 2 insertions(+), 302 deletions(-) delete mode 100644 fabtests/functional/stream_msg.c diff --git a/fabtests/Makefile.am b/fabtests/Makefile.am index cee15821b19..e832e8404ee 100644 --- a/fabtests/Makefile.am +++ b/fabtests/Makefile.am @@ -15,7 +15,6 @@ endif bin_PROGRAMS = \ functional/fi_av_xfer \ functional/fi_msg \ - functional/fi_stream_msg \ functional/fi_msg_sockets \ functional/fi_rdm \ functional/fi_rdm_rma_event \ @@ -248,10 +247,6 @@ functional_fi_msg_SOURCES = \ functional/msg.c functional_fi_msg_LDADD = libfabtests.la -functional_fi_stream_msg_SOURCES = \ - functional/stream_msg.c -functional_fi_stream_msg_LDADD = libfabtests.la - functional_fi_rdm_SOURCES = \ functional/rdm.c functional_fi_rdm_LDADD = libfabtests.la diff --git a/fabtests/functional/stream_msg.c b/fabtests/functional/stream_msg.c deleted file mode 100644 index ee6de1eab2f..00000000000 --- a/fabtests/functional/stream_msg.c +++ /dev/null @@ -1,277 +0,0 @@ -/* - * Copyright (c) 2018 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include "shared.h" -#include -#include -#include -#include - -const char *msg = "hello stream!"; - - -int send_stream(struct fid_ep *ep, const char *msg, size_t msg_len) -{ - int offset, ret; - - for (offset = 0; offset < msg_len; ) { - ret = fi_send(ep, (msg + offset), (msg_len - offset), NULL, 0, NULL); - if (ret < 0 && ret != -FI_EAGAIN) { - fprintf(stderr, "%s error %s\n", __func__, fi_strerror(-ret)); - return ret; - } - - if (ret > 0) - offset += ret; - } - - return offset; -} - -int recv_stream(struct fid_ep *ep, char *msg, size_t msg_len) -{ - int offset, ret; - - for (offset = 0; offset < msg_len; ) { - ret = fi_recv(ep, (msg + offset), (msg_len - offset), NULL, 0, NULL); - if (ret < 0 && ret != -FI_EAGAIN) { - fprintf(stderr, "%s error %s\n", __func__, fi_strerror(-ret)); - return ret; - } - if (ret > 0) - offset += ret; - } - - return offset; -} - -static int send_greeting(struct fid_ep *ep) -{ - const size_t msg_len = strlen(msg); - char buffer[msg_len]; - int ret; - - ret = send_stream(ep, msg, msg_len); - if (ret < 0) - return ret; - - ret = recv_stream(ep, buffer, msg_len); - if (ret < 0) - return ret; - - if (strncmp(buffer, msg, msg_len) != 0) { - printf("error recv: %s\n", buffer); - return -FI_EIO; - } - - return 0; -} - -static int recv_greeting(struct fid_ep *ep) -{ - const size_t msg_len = strlen(msg); - char buffer[msg_len]; - int ret; - - ret = recv_stream(ep, buffer, msg_len); - if (ret < 0) - return ret; - - if (strncmp(buffer, msg, msg_len) != 0) { - printf("error recv: %s\n", buffer); - return -FI_EIO; - } - - ret = send_stream(ep, msg, msg_len); - if (ret < 0) - return ret; - - return 0; -} - -static int send_recv_greeting(struct fid_ep *ep) -{ - return opts.dst_addr ? recv_greeting(ep) : send_greeting(ep); -} - -int stream_init_ep() -{ - int ret = fi_endpoint(domain, fi, &ep, NULL); - if (ret) { - FT_PRINTERR("fi_endpoint", ret); - return ret; - } - - FT_EP_BIND(ep, eq, 0); - - ret = fi_enable(ep); - if (ret) { - FT_PRINTERR("fi_enable", ret); - return ret; - } - return 0; -} - -void print_address(struct sockaddr_in *addr) -{ - printf(" accepted IPv4: %s port: %u\n", inet_ntoa(addr->sin_addr), - ntohs(addr->sin_port)); -} - -int stream_server_connect(void) -{ - int ret; - struct sockaddr_in peer_addr; - size_t addrlen = sizeof(struct sockaddr_in); - - ret = ft_retrieve_conn_req(eq, &fi); - if (ret) - goto err; - - ret = fi_domain(fabric, fi, &domain, NULL); - if (ret) { - FT_PRINTERR("fi_domain", ret); - goto err; - } - - ret = stream_init_ep(); - if (ret) - goto err; - - ret = ft_accept_connection(ep, eq); - if (ret) - goto err; - - ret = fi_getpeer(ep, &peer_addr, &addrlen); - print_address(&peer_addr); - return 0; - -err: - return ret; -} - - -int stream_client_connect() -{ - int ret; - - ret = ft_getinfo(hints, &fi); - if (ret) - return ret; - - ret = ft_open_fabric_res(); - if (ret) - return ret; - - ret = stream_init_ep(); - if (ret) - return ret; - - ret = ft_connect_ep(ep, eq, fi->dest_addr); - if (ret) - return ret; - - return 0; -} - -void set_stream_hints(void) { - hints->ep_attr->type = FI_EP_SOCK_STREAM; - hints->caps = FI_MSG; - hints->domain_attr->mr_mode = 0; - hints->addr_format = FI_SOCKADDR; - hints->domain_attr->threading = FI_THREAD_SAFE; - hints->domain_attr->data_progress = FI_PROGRESS_MANUAL; - hints->domain_attr->control_progress = FI_PROGRESS_AUTO; - hints->tx_attr->msg_order = FI_ORDER_SAS; - hints->rx_attr->msg_order = FI_ORDER_SAS; -} -static int stream_run(void) -{ - int ret; - - if (!opts.dst_addr) { - ret = ft_start_server(); - if (ret) - return ret; - } - - ret = opts.dst_addr ? stream_client_connect() : stream_server_connect(); - if (ret) { - return ret; - } - - ret = send_recv_greeting(ep); - if (ret < 0) - return ret; - - fi_shutdown(ep, 0); - return ret; -} - -int main(int argc, char **argv) -{ - int op, ret; - - opts = INIT_OPTS; - /* remove CQ usage on ep */ - opts.options = FT_OPT_SIZE; - - hints = fi_allocinfo(); - if (!hints) - return EXIT_FAILURE; - - while ((op = getopt(argc, argv, "h" ADDR_OPTS INFO_OPTS)) != -1) { - switch (op) { - default: - ft_parse_addr_opts(op, optarg, &opts); - ft_parseinfo(op, optarg, hints, &opts); - break; - case '?': - case 'h': - ft_usage(argv[0], "A simple MSG client-sever example."); - return EXIT_FAILURE; - } - } - - if (optind < argc) - opts.dst_addr = argv[optind]; - - set_stream_hints(); - - ret = stream_run(); - - ft_free_res(); - return ft_exit_code(ret); -} diff --git a/include/rdma/fabric.h b/include/rdma/fabric.h index 0748e5bfcfe..226f395f772 100644 --- a/include/rdma/fabric.h +++ b/include/rdma/fabric.h @@ -293,8 +293,8 @@ enum fi_ep_type { FI_EP_MSG, FI_EP_DGRAM, FI_EP_RDM, - FI_EP_SOCK_STREAM, - FI_EP_SOCK_DGRAM, + /* FI_EP_SOCK_STREAM, */ + /* FI_EP_SOCK_DGRAM, */ }; /* Endpoint protocol diff --git a/man/fi_endpoint.3.md b/man/fi_endpoint.3.md index 19188184c84..bf7855c185e 100644 --- a/man/fi_endpoint.3.md +++ b/man/fi_endpoint.3.md @@ -655,20 +655,6 @@ desired. Supported types are: transfer service with flow control that maintains message boundaries. -*FI_EP_SOCK_DGRAM* -: A connectionless, unreliable datagram endpoint with UDP socket-like - semantics. FI_EP_SOCK_DGRAM is most useful for applications designed - around using UDP sockets. See the SOCKET ENDPOINT section for additional - details and restrictions that apply to datagram socket endpoints. - -*FI_EP_SOCK_STREAM* -: Data streaming endpoint with TCP socket-like semantics. Provides - a reliable, connection-oriented data transfer service that does - not maintain message boundaries. FI_EP_SOCK_STREAM is most useful for - applications designed around using TCP sockets. See the SOCKET - ENDPOINT section for additional details and restrictions that apply - to stream endpoints. - *FI_EP_UNSPEC* : The type of endpoint is not specified. This is usually provided as input, with other attributes of the endpoint or the provider diff --git a/src/fi_tostr.c b/src/fi_tostr.c index 21d2660ff4d..592f43036fd 100644 --- a/src/fi_tostr.c +++ b/src/fi_tostr.c @@ -238,8 +238,6 @@ static void ofi_tostr_ep_type(char *buf, size_t len, enum fi_ep_type ep_type) CASEENUMSTRN(FI_EP_MSG, len); CASEENUMSTRN(FI_EP_DGRAM, len); CASEENUMSTRN(FI_EP_RDM, len); - CASEENUMSTRN(FI_EP_SOCK_STREAM, len); - CASEENUMSTRN(FI_EP_SOCK_DGRAM, len); default: ofi_strncatf(buf, len, "Unknown"); break; diff --git a/util/info.c b/util/info.c index 6dc7789f2d1..c5ae758e6c5 100644 --- a/util/info.c +++ b/util/info.c @@ -177,8 +177,6 @@ static int str2ep_type(char *inputstr, enum fi_ep_type *value) ORCASE(FI_EP_MSG); ORCASE(FI_EP_DGRAM); ORCASE(FI_EP_RDM); - ORCASE(FI_EP_SOCK_STREAM); - ORCASE(FI_EP_SOCK_DGRAM); fprintf(stderr, "error: Unrecognized endpoint type: %s\n", inputstr); From d40be4aa95fd2a5bb5bd615a7c18c8683784912f Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Wed, 20 Sep 2023 17:30:02 -0700 Subject: [PATCH 12/34] core: Remove unimplemented FI_VARIABLE_MSG Signed-off-by: Sean Hefty --- fabtests/unit/getinfo_test.c | 3 +-- include/ofi.h | 6 ++---- include/rdma/fabric.h | 2 +- man/fi_endpoint.3.md | 2 +- man/fi_getinfo.3.md | 13 +------------ man/fi_msg.3.md | 28 ++-------------------------- man/fi_tagged.3.md | 14 ++++---------- src/fi_tostr.c | 1 - 8 files changed, 12 insertions(+), 57 deletions(-) diff --git a/fabtests/unit/getinfo_test.c b/fabtests/unit/getinfo_test.c index a21e268aff3..d46f96668da 100644 --- a/fabtests/unit/getinfo_test.c +++ b/fabtests/unit/getinfo_test.c @@ -214,8 +214,7 @@ static int init_caps(struct fi_info *hints, uint64_t bits) FI_MULTICAST | FI_NAMED_RX_CTX | FI_HMEM | \ FI_COLLECTIVE) #define PRIMARY_RX_CAPS (FI_MSG | FI_RMA | FI_TAGGED | FI_ATOMIC | \ - FI_DIRECTED_RECV | FI_VARIABLE_MSG | \ - FI_HMEM | FI_COLLECTIVE) + FI_DIRECTED_RECV | FI_HMEM | FI_COLLECTIVE) #define PRIMARY_CAPS (PRIMARY_TX_CAPS | PRIMARY_RX_CAPS) #define DOMAIN_CAPS (FI_LOCAL_COMM | FI_REMOTE_COMM | FI_SHARED_AV) diff --git a/include/ofi.h b/include/ofi.h index b5bda966011..60146dce9a3 100644 --- a/include/ofi.h +++ b/include/ofi.h @@ -103,8 +103,7 @@ enum { #define OFI_PRIMARY_RX_CAPS \ (FI_MSG | FI_RMA | FI_TAGGED | FI_ATOMIC | \ FI_REMOTE_READ | FI_REMOTE_WRITE | FI_RECV | \ - FI_DIRECTED_RECV | FI_VARIABLE_MSG | \ - FI_COLLECTIVE | FI_HMEM) + FI_DIRECTED_RECV | FI_COLLECTIVE | FI_HMEM) #define OFI_SECONDARY_RX_CAPS \ (FI_MULTI_RECV | FI_TRIGGER | FI_RMA_PMEM | FI_SOURCE | \ @@ -125,8 +124,7 @@ enum { #define OFI_IGNORED_TX_CAPS /* older Rx caps not applicable to Tx */ \ (FI_REMOTE_READ | FI_REMOTE_WRITE | FI_RECV | FI_DIRECTED_RECV | \ - FI_VARIABLE_MSG | FI_MULTI_RECV | FI_SOURCE | FI_RMA_EVENT | \ - FI_SOURCE_ERR) + FI_MULTI_RECV | FI_SOURCE | FI_RMA_EVENT | FI_SOURCE_ERR) #define OFI_IGNORED_RX_CAPS /* Older Tx caps not applicable to Rx */ \ (FI_READ | FI_WRITE | FI_SEND | FI_FENCE | FI_MULTICAST | \ FI_NAMED_RX_CTX) diff --git a/include/rdma/fabric.h b/include/rdma/fabric.h index 226f395f772..8ac038ad666 100644 --- a/include/rdma/fabric.h +++ b/include/rdma/fabric.h @@ -165,7 +165,7 @@ typedef struct fid *fid_t; #define FI_HMEM_HOST_ALLOC (1ULL << 45) #define FI_HMEM_DEVICE_ONLY (1ULL << 46) #define FI_HMEM (1ULL << 47) -#define FI_VARIABLE_MSG (1ULL << 48) +/* #define FI_VARIABLE_MSG (1ULL << 48) */ #define FI_RMA_PMEM (1ULL << 49) #define FI_SOURCE_ERR (1ULL << 50) #define FI_LOCAL_COMM (1ULL << 51) diff --git a/man/fi_endpoint.3.md b/man/fi_endpoint.3.md index bf7855c185e..cadf455535f 100644 --- a/man/fi_endpoint.3.md +++ b/man/fi_endpoint.3.md @@ -1244,7 +1244,7 @@ capability bits from the fi_info structure will be used. The following capabilities apply to the receive attributes: FI_MSG, FI_RMA, FI_TAGGED, FI_ATOMIC, FI_REMOTE_READ, FI_REMOTE_WRITE, FI_RECV, -FI_HMEM, FI_TRIGGER, FI_RMA_PMEM, FI_DIRECTED_RECV, FI_VARIABLE_MSG, +FI_HMEM, FI_TRIGGER, FI_RMA_PMEM, FI_DIRECTED_RECV, FI_MULTI_RECV, FI_SOURCE, FI_RMA_EVENT, FI_SOURCE_ERR, FI_COLLECTIVE, and FI_XPU. diff --git a/man/fi_getinfo.3.md b/man/fi_getinfo.3.md index cf3a0ffdc26..a0cf53f6e35 100644 --- a/man/fi_getinfo.3.md +++ b/man/fi_getinfo.3.md @@ -428,16 +428,6 @@ additional optimizations. Endpoints support this capability must meet the usage model as described by [`fi_trigger`(3)](fi_trigger.3.html). -*FI_VARIABLE_MSG* - -: Requests that the provider must notify a receiver when a variable - length message is ready to be received prior to attempting to place - the data. Such notification will include the size of the message and - any associated message tag (for FI_TAGGED). See 'Variable Length - Messages' in fi_msg.3 for full details. Variable length messages - are any messages larger than an endpoint configurable size. This - flag requires that FI_MSG and/or FI_TAGGED be set. - *FI_WRITE* : Indicates that the user requires an endpoint capable of initiating writes against remote memory regions. This flag requires that FI_RMA @@ -465,8 +455,7 @@ may optionally report non-selected secondary capabilities if doing so would not compromise performance or security. Primary capabilities: FI_MSG, FI_RMA, FI_TAGGED, FI_ATOMIC, FI_MULTICAST, -FI_NAMED_RX_CTX, FI_DIRECTED_RECV, FI_VARIABLE_MSG, FI_HMEM, FI_COLLECTIVE, -FI_XPU +FI_NAMED_RX_CTX, FI_DIRECTED_RECV, FI_HMEM, FI_COLLECTIVE, FI_XPU Primary modifiers: FI_READ, FI_WRITE, FI_RECV, FI_SEND, FI_REMOTE_READ, FI_REMOTE_WRITE diff --git a/man/fi_msg.3.md b/man/fi_msg.3.md index 170c08b2217..dd608ac3c95 100644 --- a/man/fi_msg.3.md +++ b/man/fi_msg.3.md @@ -222,7 +222,7 @@ fi_sendmsg. *FI_CLAIM* : Applies to posted receive operations for endpoints configured - for FI_BUFFERED_RECV or FI_VARIABLE_MSG. This flag is used to + for FI_BUFFERED_RECV. This flag is used to retrieve a message that was buffered by the provider. See the Buffered Receives section for details. @@ -234,7 +234,7 @@ fi_sendmsg. *FI_DISCARD* : Applies to posted receive operations for endpoints configured - for FI_BUFFERED_RECV or FI_VARIABLE_MSG. This flag is used to + for FI_BUFFERED_RECV. This flag is used to free a message that was buffered by the provider. See the Buffered Receives section for details. @@ -389,30 +389,6 @@ restrictions assigned to an endpoint. For example, completions may indicate the order in which received messages arrived at the receiver based on the endpoint attributes. -# Variable Length Messages - -Variable length messages, or simply variable messages, are transfers -where the size of the message is unknown to the receiver prior to the -message being sent. It indicates that the recipient of a message does -not know the amount of data to expect prior to the message arriving. -It is most commonly used when the size of message transfers varies -greatly, with very large messages interspersed with much smaller -messages, making receive side message buffering difficult to manage. -Variable messages are not subject to max message length -restrictions (i.e. struct fi_ep_attr::max_msg_size limits), and may -be up to the maximum value of size_t (e.g. SIZE_MAX) in length. - -Variable length messages support requests that the provider allocate and -manage the network message buffers. As a result, the application -requirements and provider behavior is identical as those defined -for supporting the FI_BUFFERED_RECV mode bit. See the Buffered -Receive section above for details. The main difference is that buffered -receives are limited by the fi_ep_attr::max_msg_size threshold, whereas -variable length messages are not. - -Support for variable messages is indicated through the FI_VARIABLE_MSG -capability bit. - # NOTES If an endpoint has been configured with FI_MSG_PREFIX, the application diff --git a/man/fi_tagged.3.md b/man/fi_tagged.3.md index 34378600ddc..39f4eff12a1 100644 --- a/man/fi_tagged.3.md +++ b/man/fi_tagged.3.md @@ -319,8 +319,8 @@ The following flags may be used with fi_trecvmsg. fi_context structure used for an FI_PEEK + FI_CLAIM operation must be used by the paired FI_CLAIM request. - This flag also applies to endpoints configured for FI_BUFFERED_RECV or - FI_VARIABLE_MSG. When set, it is used to retrieve a tagged message that + This flag also applies to endpoints configured for FI_BUFFERED_RECV. + When set, it is used to retrieve a tagged message that was buffered by the provider. See Buffered Tagged Receives section for details. @@ -333,8 +333,8 @@ The following flags may be used with fi_trecvmsg. FI_CLAIM in order to discard a message previously claimed using an FI_PEEK + FI_CLAIM request. - This flag also applies to endpoints configured for FI_BUFFERED_RECV or - FI_VARIABLE_MSG. When set, it indicates that the provider should free + This flag also applies to endpoints configured for FI_BUFFERED_RECV. + When set, it indicates that the provider should free a buffered messages. See Buffered Tagged Receives section for details. If this flag is set, the input buffer(s) and length parameters are ignored. @@ -375,12 +375,6 @@ After being notified that a buffered receive has arrived, applications must either claim or discard the message as described in [`fi_msg`(3)](fi_msg.3.html). -# Variable Length Tagged Messages - -Variable length messages are defined in [`fi_msg`(3)](fi_msg.3.html). -The requirements for handling variable length tagged messages is identical -to those defined above for buffered tagged receives. - # RETURN VALUE The tagged send and receive calls return 0 on success. On error, a diff --git a/src/fi_tostr.c b/src/fi_tostr.c index 592f43036fd..61c2a4b092b 100644 --- a/src/fi_tostr.c +++ b/src/fi_tostr.c @@ -216,7 +216,6 @@ static void ofi_tostr_caps(char *buf, size_t len, uint64_t caps) IFFLAGSTRN(caps, FI_TRIGGER, len); IFFLAGSTRN(caps, FI_FENCE, len); - IFFLAGSTRN(caps, FI_VARIABLE_MSG, len); IFFLAGSTRN(caps, FI_RMA_PMEM, len); IFFLAGSTRN(caps, FI_SOURCE_ERR, len); IFFLAGSTRN(caps, FI_LOCAL_COMM, len); From 50717fcadd939ca899c5f571e0b9904c12c75d4d Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Wed, 20 Sep 2023 17:32:28 -0700 Subject: [PATCH 13/34] core: Remove unimplemented FI_XPU_TRIGGER Signed-off-by: Sean Hefty --- include/rdma/fabric.h | 2 +- include/rdma/fi_endpoint.h | 2 +- man/fi_endpoint.3.md | 16 ----- man/fi_trigger.3.md | 118 ------------------------------------- 4 files changed, 2 insertions(+), 136 deletions(-) diff --git a/include/rdma/fabric.h b/include/rdma/fabric.h index 8ac038ad666..fee2f71966d 100644 --- a/include/rdma/fabric.h +++ b/include/rdma/fabric.h @@ -161,7 +161,7 @@ typedef struct fid *fid_t; #define FI_MR_DMABUF (1ULL << 40) #define FI_AV_USER_ID (1ULL << 41) #define FI_PEER (1ULL << 43) -#define FI_XPU_TRIGGER (1ULL << 44) +/* #define FI_XPU_TRIGGER (1ULL << 44) */ #define FI_HMEM_HOST_ALLOC (1ULL << 45) #define FI_HMEM_DEVICE_ONLY (1ULL << 46) #define FI_HMEM (1ULL << 47) diff --git a/include/rdma/fi_endpoint.h b/include/rdma/fi_endpoint.h index 69d4a6c4ff0..6b7a561f4b9 100644 --- a/include/rdma/fi_endpoint.h +++ b/include/rdma/fi_endpoint.h @@ -67,7 +67,7 @@ enum { FI_OPT_TX_SIZE, FI_OPT_RX_SIZE, FI_OPT_FI_HMEM_P2P, /* int */ - FI_OPT_XPU_TRIGGER, /* struct fi_trigger_xpu */ + FI_OPT_XPU_TRIGGER, /* reserved for compatibility */ FI_OPT_CUDA_API_PERMITTED, /* bool */ }; diff --git a/man/fi_endpoint.3.md b/man/fi_endpoint.3.md index cadf455535f..95f83a60de2 100644 --- a/man/fi_endpoint.3.md +++ b/man/fi_endpoint.3.md @@ -550,22 +550,6 @@ The following option levels and option names and parameters are defined. : The FI_HMEM_DISABLE_P2P environment variable discussed in [`fi_mr`(3)](fi_mr.3.html) takes precedence over this setopt option. -- *FI_OPT_XPU_TRIGGER - struct fi_trigger_xpu \** -: This option only applies to the fi_getopt() call. It is used to query - the maximum number of variables required to support XPU - triggered operations, along with the size of each variable. - - The user provides a filled out struct fi_trigger_xpu on input. The iface - and device fields should reference an HMEM domain. If the provider does not - support XPU triggered operations from the given device, fi_getopt() will - return -FI_EOPNOTSUPP. On input, var should reference an array of - struct fi_trigger_var data structures, with count set to the size of the - referenced array. If count is 0, the var field will be ignored, and the - provider will return the number of fi_trigger_var structures needed. If - count is > 0, the provider will set count to the needed value, and for - each fi_trigger_var available, set the datatype and count of the variable - used for the trigger. - - *FI_OPT_CUDA_API_PERMITTED - bool \** : This option only applies to the fi_setopt call. It is used to control endpoint's behavior in making calls to CUDA API. By default, an endpoint diff --git a/man/fi_trigger.3.md b/man/fi_trigger.3.md index 9e4e2036a25..98671803e87 100644 --- a/man/fi_trigger.3.md +++ b/man/fi_trigger.3.md @@ -103,124 +103,6 @@ struct fi_trigger_threshold { they will be triggered in the order in which they were submitted to the endpoint. -# XPU TRIGGERS - -XPU based triggers work in conjunction with heterogenous memory (FI_HMEM -capability). XPU triggers define a split execution model for specifying -a data transfer separately from initiating the transfer. Unlike completion -triggers, the user controls the timing of when the transfer starts by -writing data into a trigger variable location. - -XPU transfers allow the requesting and triggering to occur on separate -computational domains. For example, a process running on the host CPU can -setup a data transfer, with a compute kernel running on a GPU signaling -the start of the transfer. XPU refers to a CPU, GPU, FPGA, or other -acceleration device with some level of computational ability. - -Endpoints must be created with both the FI_TRIGGER and FI_XPU capabilities -to use XPU triggers. XPU triggered enabled endpoints only support XPU -triggered operations. The behavior of mixing XPU triggered operations with -normal data transfers or non-XPU triggered operations is not defined by -the API and subject to provider support and implementation. - -The use of XPU triggers requires coordination between the fabric provider, -application, and submitting XPU. The result is that hardware -implementation details need to be conveyed across the computational domains. -The XPU trigger API abstracts those details. When submitting a XPU trigger -operation, the user identifies the XPU where the triggering will -occur. The triggering XPU must match with the location of the local memory -regions. For example, if triggering will be done by a GPU kernel, the -type of GPU and its local identifier are given. As output, the fabric -provider will return a list of variables and corresponding values. -The XPU signals that the data transfer is safe to initiate by writing -the given values to the specified variable locations. The number of -variables and their sizes are provider specific. - -XPU trigger operations are submitted using the FI_TRIGGER flag with -struct fi_triggered_context or struct fi_triggered_context2, as -required by the provider. The trigger event_type is: - -*FI_TRIGGER_XPU* -: Indicates that the data transfer operation will be deferred until - the user writes provider specified data to provider indicated - memory locations. The user indicates which device will initiate - the write. The struct fi_trigger_xpu is used to convey both - input and output data regarding the signaling of the trigger. - -```c -struct fi_trigger_var { - enum fi_datatype datatype; - int count; - void *addr; - union { - uint8_t val8; - uint16_t val16; - uint32_t val32; - uint64_t val64; - uint8_t *data; - } value; -}; - -struct fi_trigger_xpu { - int count; - enum fi_hmem_iface iface; - union { - uint64_t reserved; - int cuda; - int ze; - } device; - struct fi_trigger_var *var; -}; -``` - -On input to a triggered operation, the iface field indicates the software -interface that will be used to write the variables. The device union -specifies the device identifier. For valid iface and device values, see -[`fi_mr`(3)](fi_mr.3.html). The iface and device must match with the -iface and device of any local HMEM memory regions. Count should be set -to the number of fi_trigger_var structures available, with the var field -pointing to an array of struct fi_trigger_var. The user is responsible for -ensuring that there are sufficient fi_trigger_var structures available and of -an appropriate size. The count and size of fi_trigger_var structures -can be obtained by calling fi_getopt() on the endpoint with the -FI_OPT_XPU_TRIGGER option. See [`fi_endpoint`(3)](fi_endpoint.3.html) -for details. - -Each fi_trigger_var structure referenced should have the datatype -and count fields initialized to the number of values referenced by the -struct fi_trigger_val. If the count is 1, one of the val fields will be used -to return the necessary data (val8, val16, etc.). If count > 1, the data -field will return all necessary data used to signal the trigger. The data -field must reference a buffer large enough to hold the returned bytes. - -On output, the provider will set the fi_trigger_xpu count to the number of -fi_trigger_var variables that must be signaled. Count will be less than or -equal to the input value. The provider will initialize each valid -fi_trigger_var entry with information needed to signal the trigger. The -datatype indicates the size of the data that must be written. Valid datatype -values are FI_UINT8, FI_UINT16, FI_UINT32, and FI_UINT64. For signal -variables <= 64 bits, the count field will be 1. If a trigger requires writing -more than 64-bits, the datatype field will be set to FI_UINT8, with count set -to the number of bytes that must be written. The data that must be written -to signal the start of an operation is returned through either the value -union val fields or data array. - -Users signal the start of a transfer by writing the returned data to the -given memory address. The write must occur from the specified input XPU -location (based on the iface and device fields). If a transfer cannot -be initiated for some reason, such as an error occurring before the -transfer can start, the triggered operation should -be canceled to release any allocated resources. If multiple variables are -specified, they must be updated in order. - -Note that the provider will not modify the fi_trigger_xpu or fi_trigger_var -structures after returning from the data transfer call. - -In order to support multiple provider implementations, users should trigger -data transfer operations in the same order that they are queued and should -serialize the writing of triggers that reference the same endpoint. Providers -may return the same trigger variable for multiple data transfer requests. - # DEFERRED WORK QUEUES The following feature and description are enhancements to triggered From d3ba9f8343c2580c51c1cdd7625d129a3baa86b5 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Wed, 20 Sep 2023 17:35:43 -0700 Subject: [PATCH 14/34] core: Remove unused FI_RESTRICTED_COMP and FI_NOTIFY_FLAGS_ONLY Signed-off-by: Sean Hefty --- fabtests/unit/eq_test.c | 4 +--- include/rdma/fabric.h | 4 ++-- man/fi_cq.3.md | 6 ------ man/fi_domain.3.md | 9 --------- man/fi_getinfo.3.md | 14 -------------- src/fi_tostr.c | 2 -- util/info.c | 2 -- 7 files changed, 3 insertions(+), 38 deletions(-) diff --git a/fabtests/unit/eq_test.c b/fabtests/unit/eq_test.c index 96ce477ed66..f7a03a0e7d5 100644 --- a/fabtests/unit/eq_test.c +++ b/fabtests/unit/eq_test.c @@ -610,9 +610,7 @@ int main(int argc, char **argv) } hints->mode = FI_CONTEXT | FI_CONTEXT2 | FI_MSG_PREFIX | FI_ASYNC_IOV | - FI_RX_CQ_DATA | FI_NOTIFY_FLAGS_ONLY | FI_RESTRICTED_COMP | - FI_BUFFERED_RECV; - hints->domain_attr->mode = FI_RESTRICTED_COMP; + FI_RX_CQ_DATA | FI_BUFFERED_RECV; hints->domain_attr->mr_mode = ~(FI_MR_BASIC | FI_MR_SCALABLE); ret = fi_getinfo(FT_FIVERSION, NULL, 0, 0, hints, &fi); diff --git a/include/rdma/fabric.h b/include/rdma/fabric.h index fee2f71966d..b02a17e6169 100644 --- a/include/rdma/fabric.h +++ b/include/rdma/fabric.h @@ -367,8 +367,8 @@ static inline uint8_t fi_tc_dscp_get(uint32_t tclass) #define FI_ASYNC_IOV (1ULL << 57) #define FI_RX_CQ_DATA (1ULL << 56) #define FI_LOCAL_MR (1ULL << 55) -#define FI_NOTIFY_FLAGS_ONLY (1ULL << 54) -#define FI_RESTRICTED_COMP (1ULL << 53) +/* #define FI_NOTIFY_FLAGS_ONLY (1ULL << 54) */ +/* #define FI_RESTRICTED_COMP (1ULL << 53) */ #define FI_CONTEXT2 (1ULL << 52) #define FI_BUFFERED_RECV (1ULL << 51) /* #define FI_PEER_TRANSFER (1ULL << 36) */ diff --git a/man/fi_cq.3.md b/man/fi_cq.3.md index 6bf145b001a..f7b81c9147a 100644 --- a/man/fi_cq.3.md +++ b/man/fi_cq.3.md @@ -974,12 +974,6 @@ A completion queue must be bound to at least one enabled endpoint before any operation such as fi_cq_read, fi_cq_readfrom, fi_cq_sread, fi_cq_sreadfrom etc. can be called on it. -Completion flags may be suppressed if the FI_NOTIFY_FLAGS_ONLY mode bit -has been set. When enabled, only the following flags are guaranteed to -be set in completion data when they are valid: FI_REMOTE_READ and -FI_REMOTE_WRITE (when FI_RMA_EVENT capability bit has been set), -FI_REMOTE_CQ_DATA, and FI_MULTI_RECV. - If a completion queue has been overrun, it will be placed into an 'overrun' state. Read operations will continue to return any valid, non-corrupted completions, if available. After all valid completions have been retrieved, diff --git a/man/fi_domain.3.md b/man/fi_domain.3.md index f6b46d47e7a..3ee6b8bfe35 100644 --- a/man/fi_domain.3.md +++ b/man/fi_domain.3.md @@ -727,15 +727,6 @@ See [`fi_getinfo`(3)](fi_getinfo.3.html) for a discussion on primary versus secondary capabilities. All domain capabilities are considered secondary capabilities. -## mode - -The operational mode bit related to using the domain. - -*FI_RESTRICTED_COMP* -: This bit indicates that the domain limits completion queues and counters - to only be used with endpoints, transmit contexts, and receive contexts that - have the same set of capability flags. - ## Default authorization key (auth_key) The default authorization key to associate with endpoint and memory diff --git a/man/fi_getinfo.3.md b/man/fi_getinfo.3.md index a0cf53f6e35..193bb7da6b2 100644 --- a/man/fi_getinfo.3.md +++ b/man/fi_getinfo.3.md @@ -581,20 +581,6 @@ supported set of modes will be returned in the info structure(s). must be a contiguous region, though it may or may not be directly adjacent to the payload portion of the buffer. -*FI_NOTIFY_FLAGS_ONLY* -: This bit indicates that general completion flags may not be set by - the provider, and are not needed by the application. If specified, - completion flags which simply report the type of operation that - completed (e.g. send or receive) may not be set. However, - completion flags that are used for remote notifications will still - be set when applicable. See [`fi_cq`(3)](fi_cq.3.html) for details on - which completion flags are valid when this mode bit is enabled. - -*FI_RESTRICTED_COMP* -: This bit indicates that the application will only share completion queues - and counters among endpoints, transmit contexts, and receive contexts that - have the same set of capability flags. - *FI_RX_CQ_DATA* : This mode bit only applies to data transfers that set FI_REMOTE_CQ_DATA. When set, a data transfer that carries remote CQ data will consume a diff --git a/src/fi_tostr.c b/src/fi_tostr.c index 61c2a4b092b..df232ab82c5 100644 --- a/src/fi_tostr.c +++ b/src/fi_tostr.c @@ -282,8 +282,6 @@ static void ofi_tostr_mode(char *buf, size_t len, uint64_t mode) IFFLAGSTRN(mode, FI_ASYNC_IOV, len); IFFLAGSTRN(mode, FI_RX_CQ_DATA, len); IFFLAGSTRN(mode, FI_LOCAL_MR, len); - IFFLAGSTRN(mode, FI_NOTIFY_FLAGS_ONLY, len); - IFFLAGSTRN(mode, FI_RESTRICTED_COMP, len); IFFLAGSTRN(mode, FI_CONTEXT2, len); IFFLAGSTRN(mode, FI_BUFFERED_RECV, len); diff --git a/util/info.c b/util/info.c index c5ae758e6c5..698b340c38f 100644 --- a/util/info.c +++ b/util/info.c @@ -162,8 +162,6 @@ static int str2mode(char *inputstr, uint64_t *value) ORCASE(FI_ASYNC_IOV); ORCASE(FI_RX_CQ_DATA); ORCASE(FI_LOCAL_MR); - ORCASE(FI_NOTIFY_FLAGS_ONLY); - ORCASE(FI_RESTRICTED_COMP); ORCASE(FI_CONTEXT2); fprintf(stderr, "error: Unrecognized mode: %s\n", inputstr); From ec2be575cb82a07d4a3f671c85e87e7f621340b8 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Wed, 20 Sep 2023 18:14:27 -0700 Subject: [PATCH 15/34] core/av: Simplify the AV API Remove support for asynchronous insertions and AV_MAP. The format of the fi_addr_t value will either be indexed based in the standard case or provider defined in more advanced use cases, based on the AV configuration (such as using auth_keys). Signed-off-by: Sean Hefty --- Makefile.am | 1 - fabtests/unit/av_test.c | 444 +--------------------- include/ofi_util.h | 4 - include/rdma/fi_domain.h | 6 - man/fi_av.3.md | 190 +++------ man/fi_domain.3.md | 2 +- prov/efa/src/efa_av.c | 18 +- prov/mrail/src/mrail_av.c | 7 +- prov/psm2/include/rdma/fi_direct_domain.h | 6 - prov/rxd/src/rxd_av.c | 15 +- prov/shm/src/smr_av.c | 11 +- prov/sm2/src/sm2_av.c | 12 +- prov/util/src/rxm_av.c | 20 +- prov/util/src/util_av.c | 83 +--- prov/verbs/src/verbs_dgram_av.c | 7 +- 15 files changed, 77 insertions(+), 749 deletions(-) diff --git a/Makefile.am b/Makefile.am index 6a7e6771988..8aac2b0999b 100644 --- a/Makefile.am +++ b/Makefile.am @@ -305,7 +305,6 @@ dummy_man_pages = \ man/man3/fi_atomic_valid.3 \ man/man3/fi_atomicmsg.3 \ man/man3/fi_atomicv.3 \ - man/man3/fi_av_bind.3 \ man/man3/fi_av_insert.3 \ man/man3/fi_av_insertsvc.3 \ man/man3/fi_av_lookup.3 \ diff --git a/fabtests/unit/av_test.c b/fabtests/unit/av_test.c index 4d2b61e889a..3c4f06ce773 100644 --- a/fabtests/unit/av_test.c +++ b/fabtests/unit/av_test.c @@ -51,91 +51,8 @@ int num_good_addr; char *bad_address; static enum fi_av_type av_type; - static char err_buf[512]; -static int -check_eq_readerr(struct fid_eq *eq, fid_t fid, void *context, int index) -{ - int ret; - struct fi_eq_err_entry err_entry; - - memset(&err_entry, 0, sizeof(err_entry)); - ret = fi_eq_readerr(eq, &err_entry, 0); - if (ret != sizeof(err_entry)) { - sprintf(err_buf, "fi_eq_readerr ret = %d, %s", ret, - (ret < 0) ? fi_strerror(-ret) : "unknown"); - return -1; - } - if (err_entry.fid != fid) { - sprintf(err_buf, "fi_eq_readerr fid = %p, should be %p", - err_entry.fid, fid); - return -1; - } - if (err_entry.context != context) { - sprintf(err_buf, "fi_eq_readerr fid = %p, should be %p", - err_entry.context, context); - return -1; - } - if (err_entry.data != index) { - sprintf(err_buf, "fi_eq_readerr index = %" PRIu64 ", should be %d", - err_entry.data, index); - return -1; - } - if (err_entry.err <= 0) { - sprintf(err_buf, "fi_eq_readerr err = %d, should be > 0", - err_entry.err); - return -1; - } - return 0; -} - -static int -check_eq_result(int ret, uint32_t event, struct fi_eq_entry *entry, - fid_t fid, void *context, uint32_t count) -{ - if (ret != sizeof(*entry)) { - sprintf(err_buf, "fi_eq_sread ret = %d, %s", ret, - (ret < 0) ? fi_strerror(-ret) : "unknown"); - return -1; - } - if (event != FI_AV_COMPLETE) { - sprintf(err_buf, "fi_eq_sread event = %u, should be %u", event, - FI_AV_COMPLETE); - return -1; - } - if (entry->fid != fid) { - sprintf(err_buf, "fi_eq_sread fid = %p, should be %p", - entry->fid, fid); - return -1; - } - /* context == NULL means skip check */ - if (context != NULL && entry->context != context) { - sprintf(err_buf, "fi_eq_sread fid = %p, should be %p", entry->context, - context); - return -1; - } - if (count != ~0 && entry->data != count) { - sprintf(err_buf, "count = %" PRIu64 ", should be %u", entry->data, count); - return -1; - } - return 0; -} - -static int -check_eq_sread(struct fid_eq *eq, fid_t fid, void *context, uint32_t count, - int timeout, uint64_t flags) -{ - struct fi_eq_entry entry; - uint32_t event; - int ret; - - event = ~0; - memset(&entry, 0, sizeof(entry)); - - ret = fi_eq_sread(eq, &event, &entry, sizeof(entry), timeout, flags); - return check_eq_result(ret, event, &entry, fid, context, count); -} static int av_test_open_close(enum fi_av_type type, int count, uint64_t flags) @@ -286,10 +203,10 @@ av_create_address_list(char *first_address, int base, int num_addr, /* * Tests: - * - synchronous resolution of good address + * - resolution of good address */ static int -av_good_sync() +av_good() { int testret; int ret; @@ -390,10 +307,10 @@ av_null_fi_addr() /* * Tests: - * - synchronous resolution of bad address + * - resolution of bad address */ static int -av_bad_sync() +av_bad() { int testret; int ret; @@ -446,10 +363,10 @@ av_bad_sync() /* * Tests: - * - sync vector with 1 good and 1 bad + * - vector with 1 good and 1 bad */ static int -av_goodbad_vector_sync() +av_goodbad_vector() { int testret; int ret; @@ -517,7 +434,7 @@ av_goodbad_vector_sync() /* * Tests: - * - sync vector with 1 good and 1 bad using FI_SYNC_ERR + * - vector with 1 good and 1 bad using FI_SYNC_ERR */ static int av_goodbad_vector_sync_err() @@ -591,338 +508,6 @@ av_goodbad_vector_sync_err() return TEST_RET_VAL(ret, testret); } -/* - * Tests: - * - async good vector - */ -static int -av_good_vector_async() -{ - int testret; - int ret; - int i; - struct fid_av *av; - struct fi_av_attr attr; - uint8_t addrbuf[4096]; - uint32_t ctx; - int buflen; - fi_addr_t fi_addr[MAX_ADDR]; - - testret = FAIL; - - memset(&attr, 0, sizeof(attr)); - attr.type = av_type; - attr.count = 32; - attr.flags = FI_EVENT; - - av = NULL; - ret = fi_av_open(domain, &attr, &av, NULL); - if (ret != 0) { - sprintf(err_buf, "fi_av_open(%s) = %d, %s", - fi_tostr(&av_type, FI_TYPE_AV_TYPE), - ret, fi_strerror(-ret)); - goto fail; - } - ret = fi_av_bind(av, &eq->fid, 0); - if (ret != 0) { - sprintf(err_buf, "fi_av_bind() = %d, %s", ret, fi_strerror(-ret)); - goto fail; - } - - for (i = 0; i < MAX_ADDR; ++i) { - fi_addr[i] = FI_ADDR_NOTAVAIL; - } - - buflen = sizeof(addrbuf); - ret = av_create_address_list(good_address, 0, num_good_addr, - addrbuf, 0, buflen); - if (ret < 0) { - goto fail; // av_create_address_list filled err_buf - } - - for (i = 0; i < num_good_addr; ++i) { - fi_addr[i] = FI_ADDR_NOTAVAIL; - } - ret = fi_av_insert(av, addrbuf, num_good_addr, fi_addr, 0, &ctx); - if (ret) { - sprintf(err_buf, "fi_av_insert ret=%d, %s", ret, fi_strerror(-ret)); - goto fail; - } - - if (check_eq_sread(eq, &av->fid, &ctx, num_good_addr, 20000, 0) != 0) { - goto fail; - } - for (i = 0; i < num_good_addr; ++i) { - if (fi_addr[i] == FI_ADDR_NOTAVAIL) { - sprintf(err_buf, "fi_addr[%d] = FI_ADDR_NOTAVAIL", i); - goto fail; - } - } - - testret = PASS; -fail: - FT_CLOSE_FID(av); - return TEST_RET_VAL(ret, testret); -} - -/* - * Tests: - * - async good vector - */ -static int -av_zero_async() -{ - int testret; - int ret; - struct fid_av *av; - struct fi_av_attr attr; - uint8_t addrbuf[4096]; - uint32_t ctx; - fi_addr_t fi_addr[MAX_ADDR]; - - testret = FAIL; - - memset(&attr, 0, sizeof(attr)); - attr.type = av_type; - attr.count = 32; - attr.flags = FI_EVENT; - - av = NULL; - ret = fi_av_open(domain, &attr, &av, NULL); - if (ret != 0) { - sprintf(err_buf, "fi_av_open(%s) = %d, %s", - fi_tostr(&av_type, FI_TYPE_AV_TYPE), - ret, fi_strerror(-ret)); - goto fail; - } - ret = fi_av_bind(av, &eq->fid, 0); - if (ret != 0) { - sprintf(err_buf, "fi_av_bind() = %d, %s", ret, fi_strerror(-ret)); - goto fail; - } - - ret = fi_av_insert(av, addrbuf, 0, fi_addr, 0, &ctx); - if (ret != 0) { - sprintf(err_buf, "fi_av_insert ret=%d, %s", ret, fi_strerror(-ret)); - goto fail; - } - - if (check_eq_sread(eq, &av->fid, &ctx, 0, 20000, 0) != 0) { - goto fail; - } - - testret = PASS; -fail: - FT_CLOSE_FID(av); - return TEST_RET_VAL(ret, testret); -} - -/* - * Tests: - * - async 2 good vectors - */ -static int -av_good_2vector_async() -{ - int testret; - int ret; - int i; - struct fid_av *av; - struct fi_av_attr attr; - uint8_t addrbuf[4096]; - uint32_t event; - struct fi_eq_entry entry; - uint32_t ctx[2]; - int buflen; - fi_addr_t fi_addr[MAX_ADDR]; - - testret = FAIL; - - memset(&attr, 0, sizeof(attr)); - attr.type = av_type; - attr.count = 32; - attr.flags = FI_EVENT; - - av = NULL; - ret = fi_av_open(domain, &attr, &av, NULL); - if (ret != 0) { - sprintf(err_buf, "fi_av_open(%s) = %d, %s", - fi_tostr(&av_type, FI_TYPE_AV_TYPE), - ret, fi_strerror(-ret)); - goto fail; - } - ret = fi_av_bind(av, &eq->fid, 0); - if (ret != 0) { - sprintf(err_buf, "fi_av_bind() = %d, %s", ret, fi_strerror(-ret)); - goto fail; - } - - for (i = 0; i < MAX_ADDR; ++i) { - fi_addr[i] = FI_ADDR_NOTAVAIL; - } - - buflen = sizeof(addrbuf); - - /* 1st vector is just first address */ - ret = av_create_address_list(good_address, 0, 1, addrbuf, 0, buflen); - if (ret < 0) { - goto fail; // av_create_address_list filled err_buf - } - ret = fi_av_insert(av, addrbuf, 1, fi_addr, FI_MORE, &ctx[0]); - if (ret) { - sprintf(err_buf, "fi_av_insert ret=%d, %s", ret, fi_strerror(-ret)); - goto fail; - } - ctx[0] = 1; - - /* 2nd vector is remaining addresses */ - ret = av_create_address_list(good_address, 1, num_good_addr-1, - addrbuf, 0, buflen); - if (ret < 0) { - goto fail; // av_create_address_list filled err_buf - } - ret = fi_av_insert(av, addrbuf, num_good_addr-1, &fi_addr[1], 0, &ctx[1]); - if (ret != num_good_addr-1) { - sprintf(err_buf, "fi_av_insert ret=%d, %s", ret, fi_strerror(-ret)); - goto fail; - } - ctx[1] = num_good_addr-1; - - /* - * Handle completions in either order - */ - for (i = 0; i < 2; ++i) { - ret = fi_eq_sread(eq, &event, &entry, sizeof(entry), 20000, 0); - ret = check_eq_result(ret, event, &entry, &av->fid, NULL, ~0); - if (ret != 0) { - goto fail; - } - if (entry.context != &ctx[0] && entry.context != &ctx[1]) { - sprintf(err_buf, "bad context: %p", entry.context); - goto fail; - } - if (*(uint32_t *)(entry.context) == ~0) { - sprintf(err_buf, "duplicate context: %p", entry.context); - goto fail; - } - if (*(uint32_t *)(entry.context) != entry.data) { - sprintf(err_buf, "count = %" PRIu64 ", should be %d", entry.data, - *(uint32_t *)(entry.context)); - goto fail; - } - *(uint32_t *)(entry.context) = ~0; - } - for (i = 0; i < num_good_addr; ++i) { - if (fi_addr[i] == FI_ADDR_NOTAVAIL) { - sprintf(err_buf, "fi_addr[%d] = FI_ADDR_NOTAVAIL", i); - goto fail; - } - } - - testret = PASS; -fail: - FT_CLOSE_FID(av); - return TEST_RET_VAL(ret, testret); -} - -/* - * Tests: - * - async vector with 1 good and 1 bad - */ -static int -av_goodbad_vector_async() -{ - int testret; - int ret; - int i; - struct fid_av *av; - struct fi_av_attr attr; - uint8_t addrbuf[4096]; - uint32_t event; - uint32_t ctx; - struct fi_eq_entry entry; - int buflen; - fi_addr_t fi_addr[MAX_ADDR]; - - testret = FAIL; - - memset(&attr, 0, sizeof(attr)); - attr.type = av_type; - attr.count = 32; - attr.flags = FI_EVENT; - - av = NULL; - ret = fi_av_open(domain, &attr, &av, NULL); - if (ret != 0) { - sprintf(err_buf, "fi_av_open(%s) = %d, %s", - fi_tostr(&av_type, FI_TYPE_AV_TYPE), - ret, fi_strerror(-ret)); - goto fail; - } - ret = fi_av_bind(av, &eq->fid, 0); - if (ret != 0) { - sprintf(err_buf, "fi_av_bind() = %d, %s", ret, fi_strerror(-ret)); - goto fail; - } - - for (i = 0; i < MAX_ADDR; ++i) { - fi_addr[i] = FI_ADDR_NOTAVAIL; - } - fi_addr[1] = ~FI_ADDR_NOTAVAIL; - - buflen = sizeof(addrbuf); - - /* vector is good address + bad address */ - ret = av_create_address_list(good_address, 0, 1, addrbuf, 0, buflen); - if (ret < 0) { - goto fail; // av_create_address_list filled err_buf - } - ret = av_create_address_list(bad_address, 0, 1, addrbuf, 1, buflen); - if (ret < 0) { - goto fail; // av_create_address_list filled err_buf - } - ret = fi_av_insert(av, addrbuf, 2, fi_addr, 0, &ctx); - if (ret) { - sprintf(err_buf, "fi_av_insert ret=%d, %s", ret, fi_strerror(-ret)); - goto fail; - } - - /* - * Read event after sync, verify we get FI_EAVAIL, then read and - * verify the error completion - */ - ret = fi_eq_sread(eq, &event, &entry, sizeof(entry), 20000, 0); - if (ret != -FI_EAVAIL) { - sprintf(err_buf, "fi_eq_sread ret = %d, should be -FI_EAVAIL", ret); - goto fail; - } - ret = check_eq_readerr(eq, &av->fid, &ctx, 1); - if (ret != 0) { - goto fail; - } - - /* - * Now we should get a good completion, and all fi_addr except fd_addr[1] - * should have good values. - */ - if (check_eq_sread(eq, &av->fid, &ctx, 1, 20000, 0) != 0) { - goto fail; - } - if (fi_addr[0] == FI_ADDR_NOTAVAIL) { - sprintf(err_buf, "fi_addr[0] = FI_ADDR_NOTAVAIL"); - goto fail; - } - if (fi_addr[1] != FI_ADDR_NOTAVAIL) { - sprintf(err_buf, "fi_addr[1] != FI_ADDR_NOTAVAIL"); - goto fail; - } - - testret = PASS; -fail: - FT_CLOSE_FID(av); - return TEST_RET_VAL(ret, testret); -} - /* * Test AV insert at different stages */ @@ -1031,23 +616,16 @@ av_insert_stages(void) struct test_entry test_array_good[] = { TEST_ENTRY(av_open_close, "Test open and close AVs of varying sizes"), - TEST_ENTRY(av_good_sync, "Test sync AV insert with good address"), + TEST_ENTRY(av_good, "Test AV insert with good address"), TEST_ENTRY(av_null_fi_addr, "Test AV insert without specifying fi_addr"), - TEST_ENTRY(av_good_vector_async, - "Test async AV insert with vector of good addresses"), - TEST_ENTRY(av_zero_async, "Test async insert AV insert of zero addresses"), - TEST_ENTRY(av_good_2vector_async, - "Test async AV inserts with two address vectors"), TEST_ENTRY(av_insert_stages, "Test AV insert at various stages"), { NULL, "" } }; struct test_entry test_array_bad[] = { - TEST_ENTRY(av_bad_sync, "Test sync AV insert of bad address"), - TEST_ENTRY(av_goodbad_vector_sync, - "Test sync AV insert of 1 good and 1 bad address"), - TEST_ENTRY(av_goodbad_vector_async, - "Test async AV insert with good and bad address"), + TEST_ENTRY(av_bad, "Test AV insert of bad address"), + TEST_ENTRY(av_goodbad_vector, + "Test AV insert of 1 good and 1 bad address"), TEST_ENTRY(av_goodbad_vector_sync_err, "Test AV insert of 1 good, 1 bad address using FI_SYNC_ERR"), { NULL, "" } diff --git a/include/ofi_util.h b/include/ofi_util.h index 81e9eefa393..4b4110f4655 100644 --- a/include/ofi_util.h +++ b/include/ofi_util.h @@ -863,7 +863,6 @@ struct util_av_entry { struct util_av { struct fid_av av_fid; struct util_domain *domain; - struct util_eq *eq; ofi_atomic32_t ref; ofi_mutex_t lock; const struct fi_provider *prov; @@ -968,9 +967,6 @@ int ofi_av_insert_addr(struct util_av *av, const void *addr, fi_addr_t *fi_addr) int ofi_av_remove_addr(struct util_av *av, fi_addr_t fi_addr); fi_addr_t ofi_av_lookup_fi_addr_unsafe(struct util_av *av, const void *addr); fi_addr_t ofi_av_lookup_fi_addr(struct util_av *av, const void *addr); -int ofi_av_bind(struct fid *av_fid, struct fid *eq_fid, uint64_t flags); -void ofi_av_write_event(struct util_av *av, uint64_t data, - int err, void *context); int ofi_ip_av_create(struct fid_domain *domain_fid, struct fi_av_attr *attr, struct fid_av **av, void *context); diff --git a/include/rdma/fi_domain.h b/include/rdma/fi_domain.h index d5bc5c18734..ec9834fd89c 100644 --- a/include/rdma/fi_domain.h +++ b/include/rdma/fi_domain.h @@ -477,12 +477,6 @@ fi_av_open(struct fid_domain *domain, struct fi_av_attr *attr, return domain->ops->av_open(domain, attr, av, context); } -static inline int -fi_av_bind(struct fid_av *av, struct fid *fid, uint64_t flags) -{ - return av->fid.ops->bind(&av->fid, fid, flags); -} - static inline int fi_av_insert(struct fid_av *av, const void *addr, size_t count, fi_addr_t *fi_addr, uint64_t flags, void *context) diff --git a/man/fi_av.3.md b/man/fi_av.3.md index 2c9c3eb7453..142b4c58eb0 100644 --- a/man/fi_av.3.md +++ b/man/fi_av.3.md @@ -12,9 +12,6 @@ fi_av \- Address vector operations fi_av_open / fi_close : Open or close an address vector -fi_av_bind -: Associate an address vector with an event queue. - fi_av_insert / fi_av_insertsvc / fi_av_remove : Insert/remove an address into/from the address vector. @@ -34,8 +31,6 @@ int fi_av_open(struct fid_domain *domain, struct fi_av_attr *attr, int fi_close(struct fid *av); -int fi_av_bind(struct fid_av *av, struct fid *eq, uint64_t flags); - int fi_av_insert(struct fid_av *av, void *addr, size_t count, fi_addr_t *fi_addr, uint64_t flags, void *context); @@ -108,17 +103,11 @@ endpoint communicates using a proprietary network protocol. The purpose of the AV is to associate a higher-level address with a simpler, more efficient value that can be used by the libfabric API in a fabric agnostic way. The mapped address is of type fi_addr_t and is -returned through an AV insertion call. The fi_addr_t is designed such -that it may be a simple index into an array, a pointer to a structure, -or a compact network address that may be placed directly into protocol -headers. +returned through an AV insertion call. The process of mapping an address is fabric and provider specific, but may involve lengthy address resolution and fabric management -protocols. AV operations are synchronous by default, but may be set -to operate asynchronously by specifying the FI_EVENT flag to -`fi_av_open`. When requesting asynchronous operation, the application -must first bind an event queue to the AV before inserting addresses. See +protocols. AV operations are synchronous by default. See the NOTES section for AV restrictions on duplicate addresses. ## fi_av_open @@ -139,34 +128,36 @@ struct fi_av_attr { ``` *type* -: An AV type corresponds to a conceptual implementation of an address - vector. The type specifies how an application views data stored in - the AV, including how it may be accessed. Valid values are: - -- *FI_AV_MAP* -: Addresses which are inserted into an AV are mapped to a native - fabric address for use by the application. The use of FI_AV_MAP - requires that an application store the returned fi_addr_t value - that is associated with each inserted address. The advantage of - using FI_AV_MAP is that the returned fi_addr_t value may contain - encoded address data, which is immediately available when - processing data transfer requests. This can eliminate or reduce - the number of memory lookups needed when initiating a transfer. - The disadvantage of FI_AV_MAP is the increase in memory usage - needed to store the returned addresses. Addresses are stored in - the AV using a provider specific mechanism, including, but not - limited to a tree, hash table, or maintained on the heap. +: This field provides compatibility with the libfabric version 1 series. + The AV type defines a conceptual implementation of an address + vector as visible to the application. The type specifies how an + application views data stored in the AV along with requirements on + how addresses are accessed. Valid values are: - *FI_AV_TABLE* -: Addresses which are inserted into an AV of type FI_AV_TABLE are - accessible using a simple index. Conceptually, the AV may be - treated as an array of addresses, though the provider may implement - the AV using a variety of mechanisms. When FI_AV_TABLE is used, the - returned fi_addr_t is an index, with the index for an inserted - address the same as its insertion order into the table. The index - of the first address inserted into an FI_AV_TABLE will be 0, and - successive insertions will be given sequential indices. Sequential - indices will be assigned across insertion calls on the same AV. +: Addresses inserted into an AV of type FI_AV_TABLE are accessible using + a simple index. Conceptually, the AV may be treated as an array of + addresses. When FI_AV_TABLE is used, the assigned fi_addr_t to an inserted + address is index that corresponds to its insertion order into the table. + The index of the first address inserted into an FI_AV_TABLE will be 0, + and successive insertions will be given sequential indices. Sequential + indices will be assigned across insertion calls on the same AV. Because + the fi_addr_t values returned from an insertion call are deterministic, + applications may not need to provide the fi_addr_t output parameters to + insertion calls. The exception is when authentication keys are required + for communication. + + By default, all AVs act as FI_AV_TABLE. + +- *FI_AV_MAP* +: In the libfabric version 1 series, FI_AV_MAP allowed the provider to assign + an arbitrary value (such as a virtual address) to the fi_addr_t value + associated with an inserted address. As a result, the use of FI_AV_MAP required + that an application store the returned fi_addr_t value associated with each + inserted address. In the version 2 series, the behavior of FI_AV_MAP is + aligned with that of FI_AV_TABLE. The returned fi_addr_t values will + correspond with an index based on the address' insertion order. An exception + is made when authentication keys are required for communication. - *FI_AV_UNSPEC* : Provider will choose its preferred AV type. The AV type used will @@ -224,36 +215,6 @@ struct fi_av_attr { *flags* : The following flags may be used when opening an AV. -- *FI_EVENT* -: When the flag FI_EVENT is specified, all insert operations on this - AV will occur asynchronously. There will be one EQ error entry - generated for each failed address insertion, followed by one - non-error event indicating that the insertion operation has - completed. There will always be one non-error completion event for - each insert operation, even if all addresses fail. The context - field in all completions will be the context specified to the insert - call, and the data field in the final completion entry will report - the number of addresses successfully inserted. - If an error occurs during the asynchronous insertion, an error - completion entry is returned (see [`fi_eq`(3)](fi_eq.3.html) for a - discussion of the fi_eq_err_entry error completion struct). The - context field of the error completion will be the context that was - specified in the insert call; the data field will contain the index - of the failed address. There will be one error completion returned - for each address that fails to insert into the AV. - - If an AV is opened with FI_EVENT, any insertions attempted before an - EQ is bound to the AV will fail with -FI_ENOEQ. - - Error completions for failed insertions will contain the index of - the failed address in the index field of the error completion entry. - - Note that the order of delivery of insert completions may not match - the order in which the calls to fi_av_insert were made. The only - guarantee is that all error completions for a given call to - fi_av_insert will precede the single associated non-error - completion. - - *FI_READ* : Opens an AV for read-only access. An AV opened for read-only access must be named (name attribute specified), and the AV must exist. @@ -276,14 +237,6 @@ When closing the address vector, there must be no opened endpoints associated with the AV. If resources are still associated with the AV when attempting to close, the call will return -FI_EBUSY. -## fi_av_bind - -Associates an event queue with the AV. If an AV has been opened with -`FI_EVENT`, then an event queue must be bound to the AV before any -insertion calls are attempted. Any calls to insert addresses before -an event queue has been bound will fail with `-FI_ENOEQ`. Flags are -reserved for future use and must be 0. - ## fi_av_insert The fi_av_insert call inserts zero or more addresses into an AV. The @@ -294,19 +247,8 @@ as specified in the addr_format field of the fi_info struct provided when opening the corresponding domain. When using the `FI_ADDR_STR` format, the `addr` parameter should reference an array of strings (char \*\*). -For AV's of type FI_AV_MAP, once inserted addresses have been mapped, -the mapped values are written into the buffer referenced by fi_addr. -The fi_addr buffer must remain valid until the AV insertion has -completed and an event has been generated to an associated event -queue. The value of the returned fi_addr should be considered opaque -by the application for AVs of type FI_AV_MAP. The returned value may -point to an internal structure or a provider specific encoding of -low-level addressing data, for example. In the latter case, use of -FI_AV_MAP may be able to avoid memory references during data transfer -operations. - -For AV's of type FI_AV_TABLE, addresses are placed into the table in -order. An address is inserted at the lowest index that corresponds to +Inserted addresses are placed into the table in order. An address is +inserted at the lowest index that corresponds to an unused table location, with indices starting at 0. That is, the first address inserted may be referenced at index 0, the second at index 1, and so forth. When addresses are inserted into an AV table, @@ -320,13 +262,10 @@ Because insertions occur at a pre-determined index, the fi_addr parameter may be NULL. If fi_addr is non-NULL, it must reference an array of fi_addr_t, and the buffer must remain valid until the insertion operation completes. Note that if fi_addr is NULL and -synchronous operation is requested without using FI_SYNC_ERR flag, individual +the FI_SYNC_ERR flag is not set, individual insertion failures cannot be reported and the application must use other calls, such as `fi_av_lookup` to learn which specific addresses -failed to insert. Since fi_av_remove is provider-specific, it is recommended -that calls to fi_av_insert following a call to fi_av_remove always reference a -valid buffer in the fi_addr parameter. Otherwise it may be difficult to -determine what the next assigned index will be. +failed to insert. *flags* : The following flag may be passed to AV insertion calls: fi_av_insert, @@ -343,10 +282,9 @@ determine what the next assigned index will be. Providers are free to ignore FI_MORE. - *FI_SYNC_ERR* -: This flag applies to synchronous insertions only, and is used to - retrieve error details of failed insertions. If set, the context - parameter of insertion calls references an array of integers, with - context set to address of the first element of the array. +: This flag may be used to retrieve error details of failed insertions. + If set, the context parameter of insertion calls references an array + of integers, with context set to address of the first element of the array. The resulting status of attempting to insert each address will be written to the corresponding array location. Successful insertions will be updated to 0. Failures will contain a fabric errno code. @@ -399,20 +337,14 @@ Supported flags are the same as for fi_av_insert. ## fi_av_remove -fi_av_remove removes a set of addresses from an address vector. All -resources associated with the indicated addresses are released. -The removed address - either the mapped address (in the case of FI_AV_MAP) -or index (FI_AV_TABLE) - is invalid until it is returned again by a -new fi_av_insert. - -The behavior of operations in progress that reference the removed addresses -is undefined. +fi_av_remove removes a set of addresses from an address vector. +The corresponding fi_addr_t values are invalidated and may not +be used in data transfer calls. The behavior of operations in +progress that reference the removed addresses is undefined. -The use of fi_av_remove is an optimization that applications may use -to free memory allocated with addresses that will no longer be -accessed. Inserted addresses are not required to be removed. -fi_av_close will automatically cleanup any resources associated with -addresses remaining in the AV when it is invoked. +Note that removing an address may not disable receiving data from the +peer endpoint. fi_av_close will automatically cleanup any associated +resources. Flags are reserved for future use and must be 0. @@ -430,9 +362,7 @@ address, which may be larger than the input value. This function is used to convert an endpoint address, returned by fi_av_insert, into an address that specifies a target receive context. -The specified fi_addr parameter must either be a value returned from -fi_av_insert, in the case of FI_AV_MAP, or an index, in the case of -FI_AV_TABLE. The value for rx_ctx_bits must match that specified in +The value for rx_ctx_bits must match that specified in the AV attributes for the given address. Connected endpoints that support multiple receive contexts, but are @@ -462,12 +392,6 @@ into a given AV in order to avoid duplicate entries. However, providers are required to support the removal, followed by the re-insertion of an address. Only duplicate insertions are restricted. -Providers may implement AV's using a variety of mechanisms. -Specifically, a provider may begin resolving inserted addresses as -soon as they have been added to an AV, even if asynchronous operation -has been specified. Similarly, a provider may lazily release -resources from removed entries. - # USER IDENTIFIERS FOR ADDRESSES As described above, endpoint addresses that are inserted into an AV are @@ -501,26 +425,14 @@ used for all data transfer operations. # RETURN VALUES -Insertion calls for an AV opened for synchronous operation will return -the number of addresses that were successfully inserted. In the case of -failure, the return value will be less than the number of addresses that -was specified. - -Insertion calls for an AV opened for asynchronous operation (with FI_EVENT -flag specified) will return 0 if the operation was successfully initiated. -In the case of failure, a negative fabric errno will be returned. Providers -are allowed to abort insertion operations in the case of an error. Addresses -that are not inserted because they were aborted will fail with an error code -of FI_ECANCELED. - -In both the synchronous and asynchronous modes of operation, the fi_addr -buffer associated with a failed or aborted insertion will be set to -FI_ADDR_NOTAVAIL. +Insertion calls will return the number of addresses that were successfully +inserted. In the case of failure, the return value will be less than the +number of addresses that were specified. Providers may abort inserting +addresses on the the insertion failure. The fi_addr buffer associated with +a failed or aborted insertion will be set to FI_ADDR_NOTAVAIL. All other calls return 0 on success, or a negative value corresponding to -fabric errno on error. -Fabric errno values are defined in -`rdma/fi_errno.h`. +fabric errno on error. Fabric errno values are defined in `rdma/fi_errno.h`. # SEE ALSO diff --git a/man/fi_domain.3.md b/man/fi_domain.3.md index 3ee6b8bfe35..7de8ad9e812 100644 --- a/man/fi_domain.3.md +++ b/man/fi_domain.3.md @@ -167,7 +167,7 @@ asynchronously, with the completion reported through the event queue. If an event queue is not bound to the domain with the FI_REG_MR flag, then memory registration requests complete synchronously. -See [`fi_av_bind`(3)](fi_av_bind.3.html), +See [`fi_ep_bind`(3)](fi_ep_bind.3.html), [`fi_mr_bind`(3)](fi_mr_bind.3.html), [`fi_pep_bind`(3)](fi_pep_bind.3.html), and diff --git a/prov/efa/src/efa_av.c b/prov/efa/src/efa_av.c index af99fcb9660..af0bd560a22 100644 --- a/prov/efa/src/efa_av.c +++ b/prov/efa/src/efa_av.c @@ -694,17 +694,10 @@ int efa_av_insert(struct fid_av *av_fid, const void *addr, /* cancel remaining request and log to event queue */ for (; i < count ; i++) { - if (av->util_av.eq) - ofi_av_write_event(&av->util_av, i, FI_ECANCELED, - context); if (fi_addr) fi_addr[i] = FI_ADDR_NOTAVAIL; } - /* update success to event queue */ - if (av->util_av.eq) - ofi_av_write_event(&av->util_av, success_cnt, 0, context); - return success_cnt; } @@ -784,10 +777,6 @@ static int efa_av_remove(struct fid_av *av_fid, fi_addr_t *fi_addr, if (i < count) { /* something went wrong, so err cannot be zero */ assert(err); - if (av->util_av.eq) { - for (; i < count; ++i) - ofi_av_write_event(&av->util_av, i, FI_ECANCELED, NULL); - } } ofi_mutex_unlock(&av->util_av.lock); @@ -859,15 +848,10 @@ static int efa_av_close(struct fid *fid) return err; } -static int efa_av_bind(struct fid *fid, struct fid *bfid, uint64_t flags) -{ - return ofi_av_bind(fid, bfid, flags); -} - static struct fi_ops efa_av_fi_ops = { .size = sizeof(struct fi_ops), .close = efa_av_close, - .bind = efa_av_bind, + .bind = fi_no_bind, .control = fi_no_control, .ops_open = fi_no_ops_open, }; diff --git a/prov/mrail/src/mrail_av.c b/prov/mrail/src/mrail_av.c index f4d53ae29c4..4d3aeee0548 100644 --- a/prov/mrail/src/mrail_av.c +++ b/prov/mrail/src/mrail_av.c @@ -52,11 +52,6 @@ static int mrail_av_close(struct fid *fid) return retv; } -static int mrail_av_bind(struct fid *fid, struct fid *bfid, uint64_t flags) -{ - return ofi_av_bind(fid, bfid, flags); -} - static const char *mrail_av_straddr(struct fid_av *av, const void *addr, char *buf, size_t *len) { @@ -167,7 +162,7 @@ static struct fi_ops_av mrail_av_ops = { static struct fi_ops mrail_av_fi_ops = { .size = sizeof(struct fi_ops), .close = mrail_av_close, - .bind = mrail_av_bind, + .bind = fi_no_bind, .control = fi_no_control, .ops_open = fi_no_ops_open, }; diff --git a/prov/psm2/include/rdma/fi_direct_domain.h b/prov/psm2/include/rdma/fi_direct_domain.h index 6d5b7247d35..8488c9ee177 100644 --- a/prov/psm2/include/rdma/fi_direct_domain.h +++ b/prov/psm2/include/rdma/fi_direct_domain.h @@ -211,12 +211,6 @@ fi_av_open(struct fid_domain *domain, struct fi_av_attr *attr, return psmx2_av_open(domain, attr, av, context); } -static inline int -fi_av_bind(struct fid_av *av, struct fid *fid, uint64_t flags) -{ - return psmx2_av_bind(&av->fid, fid, flags); -} - static inline int fi_av_insert(struct fid_av *av, const void *addr, size_t count, fi_addr_t *fi_addr, uint64_t flags, void *context) diff --git a/prov/rxd/src/rxd_av.c b/prov/rxd/src/rxd_av.c index 12849af0534..e3adc042778 100644 --- a/prov/rxd/src/rxd_av.c +++ b/prov/rxd/src/rxd_av.c @@ -249,8 +249,6 @@ static int rxd_av_insert(struct fid_av *av_fid, const void *addr, size_t count, i, -ret, fi_strerror(-ret)); if (fi_addr) fi_addr[i] = FI_ADDR_NOTAVAIL; - if (av->util_av.eq) - ofi_av_write_event(&av->util_av, i, -ret, context); else if (sync_err) sync_err[i] = -ret; i++; @@ -262,16 +260,10 @@ static int rxd_av_insert(struct fid_av *av_fid, const void *addr, size_t count, for (; i < count; i++) { if (fi_addr) fi_addr[i] = FI_ADDR_NOTAVAIL; - if (av->util_av.eq) - ofi_av_write_event(&av->util_av, i, FI_ECANCELED, context); else if (sync_err) sync_err[i] = FI_ECANCELED; } - if (av->util_av.eq) { - ofi_av_write_event(&av->util_av, success_cnt, 0, context); - return 0; - } return success_cnt; } @@ -392,15 +384,10 @@ static int rxd_av_close(struct fid *fid) return 0; } -static int rxd_av_bind(struct fid *fid, struct fid *bfid, uint64_t flags) -{ - return ofi_av_bind(fid, bfid, flags); -} - static struct fi_ops rxd_av_fi_ops = { .size = sizeof(struct fi_ops), .close = rxd_av_close, - .bind = rxd_av_bind, + .bind = fi_no_bind, .control = fi_no_control, .ops_open = fi_no_ops_open, }; diff --git a/prov/shm/src/smr_av.c b/prov/shm/src/smr_av.c index f4805498fae..78d9c189ea4 100644 --- a/prov/shm/src/smr_av.c +++ b/prov/shm/src/smr_av.c @@ -105,8 +105,6 @@ static int smr_av_insert(struct fid_av *av_fid, const void *addr, size_t count, if (ret) { if (fi_addr) fi_addr[i] = util_addr; - if (util_av->eq) - ofi_av_write_event(util_av, i, -ret, context); if (shm_id >= 0) smr_map_del(smr_av->smr_map, shm_id); continue; @@ -138,12 +136,7 @@ static int smr_av_insert(struct fid_av *av_fid, const void *addr, size_t count, } } - if (!(flags & FI_EVENT)) - return succ_count; - - assert(util_av->eq); - ofi_av_write_event(util_av, succ_count, 0, context); - return 0; + return succ_count; } static int smr_av_remove(struct fid_av *av_fid, fi_addr_t *fi_addr, size_t count, @@ -226,7 +219,7 @@ static const char *smr_av_straddr(struct fid_av *av, const void *addr, static struct fi_ops smr_av_fi_ops = { .size = sizeof(struct fi_ops), .close = smr_av_close, - .bind = ofi_av_bind, + .bind = fi_no_bind, .control = fi_no_control, .ops_open = fi_no_ops_open, }; diff --git a/prov/sm2/src/sm2_av.c b/prov/sm2/src/sm2_av.c index 8338aa7086e..18606ee4856 100644 --- a/prov/sm2/src/sm2_av.c +++ b/prov/sm2/src/sm2_av.c @@ -91,18 +91,13 @@ static int sm2_av_insert(struct fid_av *av_fid, const void *addr, size_t count, "resulting AV Found = %d\n", gid); - if (ret) { - if (util_av->eq) - ofi_av_write_event(util_av, i, -ret, context); + if (ret) continue; - } ofi_mutex_lock(&util_av->lock); ret = ofi_av_insert_addr(util_av, &gid, &util_addr); if (ret) { - if (util_av->eq) - ofi_av_write_event(util_av, i, -ret, context); ofi_mutex_unlock(&util_av->lock); continue; } @@ -128,9 +123,6 @@ static int sm2_av_insert(struct fid_av *av_fid, const void *addr, size_t count, srx->owner_ops->foreach_unspec_addr(srx, &sm2_get_addr); } - if (flags & FI_EVENT) - ofi_av_write_event(util_av, succ_count, 0, context); - return succ_count; } @@ -214,7 +206,7 @@ static const char *sm2_av_straddr(struct fid_av *av, const void *addr, static struct fi_ops sm2_av_fi_ops = { .size = sizeof(struct fi_ops), .close = sm2_av_close, - .bind = ofi_av_bind, + .bind = fi_no_bind, .control = fi_no_control, .ops_open = fi_no_ops_open, }; diff --git a/prov/util/src/rxm_av.c b/prov/util/src/rxm_av.c index a921b3f09b2..69a68a884db 100644 --- a/prov/util/src/rxm_av.c +++ b/prov/util/src/rxm_av.c @@ -283,26 +283,15 @@ static int rxm_av_insert(struct fid_av *av_fid, const void *addr, size_t count, if (ret < 0) return ret; - if (!av->util_av.eq) - count = ret; + count = ret; ret = rxm_av_add_peers(av, addr, count, fi_addr); if (ret) { - /* If insert was async, ofi_ip_av_insert() will have written - * an event to the EQ with the number of insertions. For - * correctness we need to delay writing the event to the EQ - * until all processing has completed. This should be done - * when separating the rxm av from the util av. For now, - * assume synchronous operation (most common case) and fail - * the insert. This could leave a bogus entry on the EQ. - * But the app should detect that insert failed and is likely - * to abort. - */ rxm_av_remove(av_fid, fi_addr, count, flags); return ret; } - return av->util_av.eq ? 0 : (int) count; + return (int) count; } static int rxm_av_insertsym(struct fid_av *av_fid, const char *node, @@ -332,13 +321,12 @@ static int rxm_av_insertsym(struct fid_av *av_fid, const char *node, ret = rxm_av_add_peers(av, addr, count, fi_addr); if (ret) { - /* See comment in rxm_av_insert. */ rxm_av_remove(av_fid, fi_addr, count, flags); return ret; } free(addr); - return av->util_av.eq ? 0 : (int) count; + return (int) count; } int rxm_av_insertsvc(struct fid_av *av, const char *node, const char *service, @@ -397,7 +385,7 @@ static int rxm_av_close(struct fid *av_fid) static struct fi_ops rxm_av_fi_ops = { .size = sizeof(struct fi_ops), .close = rxm_av_close, - .bind = ofi_av_bind, + .bind = fi_no_bind, .control = fi_no_control, .ops_open = fi_no_ops_open, }; diff --git a/prov/util/src/util_av.c b/prov/util/src/util_av.c index dcae29fd2ba..cd8a8b42b23 100644 --- a/prov/util/src/util_av.c +++ b/prov/util/src/util_av.c @@ -258,18 +258,6 @@ void *ofi_av_addr_context(struct util_av *av, fi_addr_t fi_addr) int ofi_verify_av_insert(struct util_av *av, uint64_t flags, void *context) { - if (av->flags & FI_EVENT) { - if (!av->eq) { - FI_WARN(av->prov, FI_LOG_AV, "no EQ bound to AV\n"); - return -FI_ENOEQ; - } - - if (flags & FI_SYNC_ERR) { - FI_WARN(av->prov, FI_LOG_AV, "invalid flag\n"); - return -FI_EBADFLAGS; - } - } - if (flags & ~(FI_MORE | FI_SYNC_ERR)) { FI_WARN(av->prov, FI_LOG_AV, "unsupported flags\n"); return -FI_EBADFLAGS; @@ -358,35 +346,6 @@ ofi_av_lookup_addr(struct util_av *av, fi_addr_t fi_addr, size_t *addrlen) return ofi_av_get_addr(av, fi_addr); } -int ofi_av_bind(struct fid *av_fid, struct fid *eq_fid, uint64_t flags) -{ - struct util_av *av; - struct util_eq *eq; - - av = container_of(av_fid, struct util_av, av_fid.fid); - if (eq_fid->fclass != FI_CLASS_EQ) { - FI_WARN(av->prov, FI_LOG_AV, "invalid fid class\n"); - return -FI_EINVAL; - } - - if (!(av->flags & FI_EVENT)) { - FI_WARN(av->prov, FI_LOG_AV, "cannot bind EQ to an AV that was " - "configured for synchronous operation: FI_EVENT flag was" - " not specified in fi_av_attr when AV was opened\n"); - return -FI_EINVAL; - } - - if (flags) { - FI_WARN(av->prov, FI_LOG_AV, "invalid flags\n"); - return -FI_EINVAL; - } - - eq = container_of(eq_fid, struct util_eq, eq_fid.fid); - av->eq = eq; - ofi_atomic_inc32(&eq->ref); - return 0; -} - static void util_av_close(struct util_av *av) { HASH_CLEAR(hh, av->hash); @@ -400,9 +359,6 @@ int ofi_av_close_lightweight(struct util_av *av) return -FI_EBUSY; } - if (av->eq) - ofi_atomic_dec32(&av->eq->ref); - ofi_genlock_destroy(&av->ep_list_lock); ofi_atomic_dec32(&av->domain->ref); @@ -572,34 +528,6 @@ int ofi_av_init(struct util_domain *domain, const struct fi_av_attr *attr, return ret; } -void ofi_av_write_event(struct util_av *av, uint64_t data, - int err, void *context) -{ - struct fi_eq_err_entry entry = { 0 }; - size_t size; - ssize_t ret; - uint64_t flags; - - entry.fid = &av->av_fid.fid; - entry.context = context; - entry.data = data; - - if (err) { - FI_INFO(av->prov, FI_LOG_AV, "writing error entry to EQ\n"); - entry.err = err; - size = sizeof(struct fi_eq_err_entry); - flags = UTIL_FLAG_ERROR; - } else { - FI_DBG(av->prov, FI_LOG_AV, "writing entry to EQ\n"); - size = sizeof(struct fi_eq_entry); - flags = 0; - } - - ret = fi_eq_write(&av->eq->eq_fid, FI_AV_COMPLETE, &entry, - size, flags); - if ((size_t) ret != size) - FI_WARN(av->prov, FI_LOG_AV, "error writing to EQ\n"); -} /************************************************************************* * @@ -669,20 +597,13 @@ int ofi_ip_av_insertv(struct util_av *av, const void *addr, size_t addrlen, fi_addr ? &fi_addr[i] : NULL, context); if (!ret) success_cnt++; - else if (av->eq) - ofi_av_write_event(av, i, -ret, context); else if (sync_err) sync_err[i] = -ret; } done: FI_DBG(av->prov, FI_LOG_AV, "%d addresses successful\n", success_cnt); - if (av->eq) { - ofi_av_write_event(av, success_cnt, 0, context); - ret = 0; - } else { - ret = success_cnt; - } + ret = success_cnt; return ret; } @@ -984,7 +905,7 @@ static int ip_av_close(struct fid *av_fid) static struct fi_ops ip_av_fi_ops = { .size = sizeof(struct fi_ops), .close = ip_av_close, - .bind = ofi_av_bind, + .bind = fi_no_bind, .control = fi_no_control, .ops_open = fi_no_ops_open, }; diff --git a/prov/verbs/src/verbs_dgram_av.c b/prov/verbs/src/verbs_dgram_av.c index 2142373ab73..cb5449dec12 100644 --- a/prov/verbs/src/verbs_dgram_av.c +++ b/prov/verbs/src/verbs_dgram_av.c @@ -42,11 +42,6 @@ static inline int vrb_dgram_av_is_addr_valid(struct vrb_dgram_av *av, static inline int vrb_dgram_verify_av_flags(struct util_av *av, uint64_t flags) { - if ((av->flags & FI_EVENT) && !av->eq) { - VRB_WARN(FI_LOG_AV, "No EQ bound to AV\n"); - return -FI_ENOEQ; - } - if (flags & ~(FI_MORE)) { VRB_WARN(FI_LOG_AV, "Unsupported flags\n"); return -FI_ENOEQ; @@ -213,7 +208,7 @@ static int vrb_dgram_av_close(struct fid *av_fid) static struct fi_ops vrb_dgram_fi_ops = { .size = sizeof(vrb_dgram_fi_ops), .close = vrb_dgram_av_close, - .bind = ofi_av_bind, + .bind = fi_no_bind, .control = fi_no_control, .ops_open = fi_no_ops_open, }; From c59d16c96744437dae93207eda534cc691ac8b7f Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Thu, 21 Sep 2023 12:24:49 -0700 Subject: [PATCH 16/34] core: Move FI_BUFFERED_RECV to internal flag Remove FI_BUFFERED_RECV as an exported API option. Since it's currently used internally between mrail and rxm, make it an internal only option. It has a limited use case for multirail over rxm over connected endpoints where shared receive queues are not available. With shared receive queues, the feature wouldn't be needed, as mrail could own the buffers outright. Signed-off-by: Sean Hefty --- fabtests/unit/eq_test.c | 2 +- include/ofi.h | 4 ++ include/rdma/fabric.h | 2 +- man/fi_cq.3.md | 14 ------- man/fi_endpoint.3.md | 28 ------------- man/fi_getinfo.3.md | 11 ----- man/fi_msg.3.md | 84 ------------------------------------- man/fi_tagged.3.md | 45 -------------------- prov/mrail/src/mrail_init.c | 6 +-- prov/rxm/src/rxm_cq.c | 6 +-- prov/rxm/src/rxm_ep.c | 24 ----------- prov/rxm/src/rxm_init.c | 4 +- prov/rxm/src/rxm_msg.c | 4 +- prov/rxm/src/rxm_tagged.c | 6 +-- src/fi_tostr.c | 1 - 15 files changed, 18 insertions(+), 223 deletions(-) diff --git a/fabtests/unit/eq_test.c b/fabtests/unit/eq_test.c index f7a03a0e7d5..80cfeb4a720 100644 --- a/fabtests/unit/eq_test.c +++ b/fabtests/unit/eq_test.c @@ -610,7 +610,7 @@ int main(int argc, char **argv) } hints->mode = FI_CONTEXT | FI_CONTEXT2 | FI_MSG_PREFIX | FI_ASYNC_IOV | - FI_RX_CQ_DATA | FI_BUFFERED_RECV; + FI_RX_CQ_DATA; hints->domain_attr->mr_mode = ~(FI_MR_BASIC | FI_MR_SCALABLE); ret = fi_getinfo(FT_FIVERSION, NULL, 0, 0, hints, &fi); diff --git a/include/ofi.h b/include/ofi.h index 60146dce9a3..ed7799614b2 100644 --- a/include/ofi.h +++ b/include/ofi.h @@ -83,6 +83,10 @@ enum { #define OFI_GETINFO_HIDDEN (1ULL << 60) #define OFI_OFFLOAD_PROV_ONLY (1ULL << 61) +/* internal mode bit carried over from v1 */ +#define OFI_BUFFERED_RECV (1ULL << 51) + + #define OFI_ORDER_RAR_SET (FI_ORDER_RAR | FI_ORDER_RMA_RAR | \ FI_ORDER_ATOMIC_RAR) #define OFI_ORDER_RAW_SET (FI_ORDER_RAW | FI_ORDER_RMA_RAW | \ diff --git a/include/rdma/fabric.h b/include/rdma/fabric.h index b02a17e6169..72133042c3a 100644 --- a/include/rdma/fabric.h +++ b/include/rdma/fabric.h @@ -370,7 +370,7 @@ static inline uint8_t fi_tc_dscp_get(uint32_t tclass) /* #define FI_NOTIFY_FLAGS_ONLY (1ULL << 54) */ /* #define FI_RESTRICTED_COMP (1ULL << 53) */ #define FI_CONTEXT2 (1ULL << 52) -#define FI_BUFFERED_RECV (1ULL << 51) +/* #define FI_BUFFERED_RECV (1ULL << 51) */ /* #define FI_PEER_TRANSFER (1ULL << 36) */ struct fi_tx_attr { diff --git a/man/fi_cq.3.md b/man/fi_cq.3.md index f7b81c9147a..4a45ad50ab3 100644 --- a/man/fi_cq.3.md +++ b/man/fi_cq.3.md @@ -621,20 +621,6 @@ operation. The following completion flags are defined. buffer has been released, and the completion entry is not associated with a received message. -*FI_MORE* -: See the 'Buffered Receives' section in `fi_msg`(3) for more details. - This flag is associated with receive completions on endpoints that - have FI_BUFFERED_RECV mode enabled. When set to one, it indicates that - the buffer referenced by the completion is limited by the - FI_OPT_BUFFERED_LIMIT threshold, and additional message data must be - retrieved by the application using an FI_CLAIM operation. - -*FI_CLAIM* -: See the 'Buffered Receives' section in `fi_msg`(3) for more details. - This flag is set on completions associated with receive operations - that claim buffered receive data. Note that this flag only applies - to endpoints configured with the FI_BUFFERED_RECV mode bit. - # COMPLETION EVENT SEMANTICS Libfabric defines several completion 'levels', identified using operational diff --git a/man/fi_endpoint.3.md b/man/fi_endpoint.3.md index 95f83a60de2..e3411e289f6 100644 --- a/man/fi_endpoint.3.md +++ b/man/fi_endpoint.3.md @@ -484,34 +484,6 @@ The following option levels and option names and parameters are defined. *FI_OPT_ENDPOINT* -- *FI_OPT_BUFFERED_LIMIT - size_t* -: Defines the maximum size of a buffered message that will be reported - to users as part of a receive completion when the FI_BUFFERED_RECV mode - is enabled on an endpoint. - - fi_getopt() will return the currently configured threshold, or the - provider's default threshold if one has not be set by the application. - fi_setopt() allows an application to configure the threshold. If the - provider cannot support the requested threshold, it will fail the - fi_setopt() call with FI_EMSGSIZE. Calling fi_setopt() with the - threshold set to SIZE_MAX will set the threshold to the maximum - supported by the provider. fi_getopt() can then be used to retrieve - the set size. - - In most cases, the sending and receiving endpoints must be - configured to use the same threshold value, and the threshold must be - set prior to enabling the endpoint. - -- *FI_OPT_BUFFERED_MIN - size_t* -: Defines the minimum size of a buffered message that will be reported. - Applications would set this to a size that's big enough to decide whether - to discard or claim a buffered receive or when to claim a buffered receive - on getting a buffered receive completion. The value is typically used by a - provider when sending a rendezvous protocol request where it would send - at least FI_OPT_BUFFERED_MIN bytes of application data along with it. A smaller - sized rendezvous protocol message usually results in better latency for the - overall transfer of a large message. - - *FI_OPT_CM_DATA_SIZE - size_t* : Defines the size of available space in CM messages for user-defined data. This value limits the amount of data that applications can exchange diff --git a/man/fi_getinfo.3.md b/man/fi_getinfo.3.md index 193bb7da6b2..d217c27fcd2 100644 --- a/man/fi_getinfo.3.md +++ b/man/fi_getinfo.3.md @@ -494,17 +494,6 @@ supported set of modes will be returned in the info structure(s). related memory descriptor array, until the associated operation has completed. -*FI_BUFFERED_RECV* -: The buffered receive mode bit indicates that the provider owns the - data buffer(s) that are accessed by the networking layer for received - messages. Typically, this implies that data must be copied from the - provider buffer into the application buffer. Applications that can - handle message processing from network allocated data buffers can set - this mode bit to avoid copies. For full details on application - requirements to support this mode, see the 'Buffered Receives' section - in [`fi_msg`(3)](fi_msg.3.html). This mode bit applies to FI_MSG and - FI_TAGGED receive operations. - *FI_CONTEXT* : Specifies that the provider requires that applications use struct fi_context as their per operation context parameter for operations diff --git a/man/fi_msg.3.md b/man/fi_msg.3.md index dd608ac3c95..e2f4320031c 100644 --- a/man/fi_msg.3.md +++ b/man/fi_msg.3.md @@ -305,90 +305,6 @@ fi_sendmsg. be used in all multicast transfers, in conjunction with a multicast fi_addr_t. -# Buffered Receives - -Buffered receives indicate that the networking layer allocates and -manages the data buffers used to receive network data transfers. As -a result, received messages must be copied from the network buffers -into application buffers for processing. However, applications can -avoid this copy if they are able to process the message in place -(directly from the networking buffers). - -Handling buffered receives differs based on the size of the message -being sent. In general, smaller messages are passed directly to the -application for processing. However, for large messages, an application -will only receive the start of the message and must claim the rest. -The details for how small messages are reported and large messages may -be claimed are described below. - -When a provider receives a message, it will write an entry to the completion -queue associated with the receiving endpoint. For discussion purposes, -the completion queue is assumed to be configured for FI_CQ_FORMAT_DATA. -Since buffered receives are not associated with application posted buffers, -the CQ entry op_context will point to a struct fi_recv_context. - -{% highlight c %} -struct fi_recv_context { - struct fid_ep *ep; - void *context; -}; -{% endhighlight %} - -The 'ep' field will point to the receiving endpoint or Rx context, and -'context' will be NULL. The CQ entry's 'buf' will point to a provider -managed buffer where the start of the received message is located, and -'len' will be set to the total size of the message. - -The maximum sized message that a provider can buffer is limited by -an FI_OPT_BUFFERED_LIMIT. This threshold can be obtained and may be adjusted -by the application using the fi_getopt and fi_setopt calls, respectively. -Any adjustments must be made prior to enabling the endpoint. The CQ entry 'buf' -will point to a buffer of received data. If the sent message is larger than the -buffered amount, the CQ entry 'flags' will have the FI_MORE bit set. When the -FI_MORE bit is set, 'buf' will reference at least FI_OPT_BUFFERED_MIN bytes -of data (see fi_endpoint.3 for more info). - -After being notified that a buffered receive has arrived, -applications must either claim or discard the message. Typically, -small messages are processed and discarded, while large messages -are claimed. However, an application is free to claim or discard any -message regardless of message size. - -To claim a message, an application must post a receive operation with the -FI_CLAIM flag set. The struct fi_recv_context returned as part of the -notification must be provided as the receive operation's context. The -struct fi_recv_context contains a 'context' field. Applications may -modify this field prior to claiming the message. When the claim -operation completes, a standard receive completion entry will be -generated on the completion queue. The 'context' of the associated -CQ entry will be set to the 'context' value passed in through -the fi_recv_context structure, and the CQ entry flags will have the -FI_CLAIM bit set. - -Buffered receives that are not claimed must be discarded by the application -when it is done processing the CQ entry data. To discard a message, an -application must post a receive operation with the FI_DISCARD flag set. -The struct fi_recv_context returned as part of the notification must be -provided as the receive operation's context. When the FI_DISCARD flag is set -for a receive operation, the receive input buffer(s) and length parameters -are ignored. - -IMPORTANT: Buffered receives must be claimed or discarded in a timely manner. -Failure to do so may result in increased memory usage for network buffering -or communication stalls. Once a buffered receive has been claimed or -discarded, the original CQ entry 'buf' or struct fi_recv_context data may no -longer be accessed by the application. - -The use of the FI_CLAIM and FI_DISCARD operation flags is also -described with respect to tagged message transfers in fi_tagged.3. -Buffered receives of tagged messages will include the message tag as part -of the CQ entry, if available. - -The handling of buffered receives follows all message ordering -restrictions assigned to an endpoint. For example, completions -may indicate the order in which received messages arrived at the -receiver based on the endpoint attributes. - # NOTES If an endpoint has been configured with FI_MSG_PREFIX, the application diff --git a/man/fi_tagged.3.md b/man/fi_tagged.3.md index 39f4eff12a1..eca1080beea 100644 --- a/man/fi_tagged.3.md +++ b/man/fi_tagged.3.md @@ -319,11 +319,6 @@ The following flags may be used with fi_trecvmsg. fi_context structure used for an FI_PEEK + FI_CLAIM operation must be used by the paired FI_CLAIM request. - This flag also applies to endpoints configured for FI_BUFFERED_RECV. - When set, it is used to retrieve a tagged message that - was buffered by the provider. See Buffered Tagged Receives section for - details. - *FI_DISCARD* : This flag may be used in conjunction with either FI_PEEK or FI_CLAIM. If this flag is used in conjunction with FI_PEEK, it indicates if the @@ -333,48 +328,8 @@ The following flags may be used with fi_trecvmsg. FI_CLAIM in order to discard a message previously claimed using an FI_PEEK + FI_CLAIM request. - This flag also applies to endpoints configured for FI_BUFFERED_RECV. - When set, it indicates that the provider should free - a buffered messages. See Buffered Tagged Receives section for details. - If this flag is set, the input buffer(s) and length parameters are ignored. -# Buffered Tagged Receives - -See [`fi_msg`(3)](fi_msg.3.html) for an introduction to buffered receives. -The handling of buffered receives differs between fi_msg operations and -fi_tagged. Although the provider is responsible for allocating and -managing network buffers, the application is responsible for identifying -the tags that will be used to match incoming messages. The provider -handles matching incoming receives to the application specified tags. - -When FI_BUFFERED_RECV is enabled, the application posts the tags that -will be used for matching purposes. Tags are posted using fi_trecv, -fi_trecvv, and fi_trecvmsg; however, parameters related -to the input buffers are ignored (e.g. buf, len, iov, desc). When -a provider receives a message for which there is a matching tag, -it will write an entry to the completion queue associated with the -receiving endpoint. - -For discussion purposes, the completion queue is assumed to be configured -for FI_CQ_FORMAT_TAGGED. The op_context field will point to a struct -fi_recv_context. - -{% highlight c %} -struct fi_recv_context { - struct fid_ep *ep; - void *context; -}; -{% endhighlight %} - -The 'ep' field will be NULL. The 'context' field will match the -application context specified when posting the tag. Other fields are -set as defined in [`fi_msg`(3)](fi_msg.3.html). - -After being notified that a buffered receive has arrived, -applications must either claim or discard the message as described in -[`fi_msg`(3)](fi_msg.3.html). - # RETURN VALUE The tagged send and receive calls return 0 on success. On error, a diff --git a/prov/mrail/src/mrail_init.c b/prov/mrail/src/mrail_init.c index 5a230eb9967..94c4b1acb1f 100644 --- a/prov/mrail/src/mrail_init.c +++ b/prov/mrail/src/mrail_init.c @@ -182,7 +182,7 @@ static struct fi_info *mrail_create_core_hints(const struct fi_info *hints) } } - core_hints->mode |= FI_BUFFERED_RECV; + core_hints->mode |= OFI_BUFFERED_RECV; core_hints->caps |= FI_SOURCE; if (!core_hints->fabric_attr) { @@ -378,7 +378,7 @@ static int mrail_get_core_info(uint32_t version, const char *node, const char *s static void mrail_adjust_info(struct fi_info *info, const struct fi_info *hints) { - info->mode &= ~FI_BUFFERED_RECV; + info->mode &= ~OFI_BUFFERED_RECV; if (!hints) return; @@ -432,8 +432,6 @@ static struct fi_info *mrail_get_prefix_info(struct fi_info *core_info, int id) assert(fi->tx_attr->iov_limit); fi->tx_attr->iov_limit--; - /* Claiming messages larger than FI_OPT_BUFFERED_LIMIT would consume - * a scatter/gather entry for mrail_hdr */ fi->rx_attr->iov_limit--; if (fi->tx_attr->inject_size < sizeof(struct mrail_hdr)) diff --git a/prov/rxm/src/rxm_cq.c b/prov/rxm/src/rxm_cq.c index b5cddc59051..8e062869064 100644 --- a/prov/rxm/src/rxm_cq.c +++ b/prov/rxm/src/rxm_cq.c @@ -165,7 +165,7 @@ static void rxm_finish_recv(struct rxm_rx_buf *rx_buf, size_t done_len) } if (rx_buf->recv_entry->flags & FI_COMPLETION || - rx_buf->ep->rxm_info->mode & FI_BUFFERED_RECV) { + rx_buf->ep->rxm_info->mode & OFI_BUFFERED_RECV) { rxm_cq_write_recv_comp(rx_buf, rx_buf->recv_entry->context, rx_buf->recv_entry->comp_flags | rx_buf->pkt.hdr.flags | @@ -436,7 +436,7 @@ static void rxm_handle_seg_data(struct rxm_rx_buf *rx_buf) int done; rxm_process_seg_data(rx_buf, &done); - if (done || !(rx_buf->ep->rxm_info->mode & FI_BUFFERED_RECV)) + if (done || !(rx_buf->ep->rxm_info->mode & OFI_BUFFERED_RECV)) return; recv_entry = rx_buf->recv_entry; @@ -767,7 +767,7 @@ static ssize_t rxm_handle_recv_comp(struct rxm_rx_buf *rx_buf) match_attr.addr = rx_buf->conn->peer->fi_addr; } - if (rx_buf->ep->rxm_info->mode & FI_BUFFERED_RECV) { + if (rx_buf->ep->rxm_info->mode & OFI_BUFFERED_RECV) { rxm_finish_buf_recv(rx_buf); return 0; } diff --git a/prov/rxm/src/rxm_ep.c b/prov/rxm/src/rxm_ep.c index 116017c036b..3174675abed 100644 --- a/prov/rxm/src/rxm_ep.c +++ b/prov/rxm/src/rxm_ep.c @@ -489,11 +489,6 @@ static int rxm_ep_getopt(fid_t fid, int level, int optname, void *optval, *(size_t *)optval = rxm_ep->buffered_min; *optlen = sizeof(size_t); break; - case FI_OPT_BUFFERED_LIMIT: - assert(sizeof(rxm_ep->buffered_limit) == sizeof(size_t)); - *(size_t *)optval = rxm_ep->buffered_limit; - *optlen = sizeof(size_t); - break; default: return -FI_ENOPROTOOPT; } @@ -535,25 +530,6 @@ static int rxm_ep_setopt(fid_t fid, int level, int optname, rxm_ep->buffered_min); } break; - case FI_OPT_BUFFERED_LIMIT: - if (rxm_ep->rx_pool) { - FI_WARN(&rxm_prov, FI_LOG_EP_DATA, - "Endpoint already enabled. Can't set opt now!\n"); - ret = -FI_EOPBADSTATE; - /* We do not check for maximum as we allow sizes up to SIZE_MAX */ - } else if (*(size_t *)optval < rxm_ep->buffered_min) { - FI_WARN(&rxm_prov, FI_LOG_EP_DATA, - "Invalid value for FI_OPT_BUFFERED_LIMIT: %zu" - " ( < FI_OPT_BUFFERED_MIN: %zu)\n", - *(size_t *)optval, rxm_ep->buffered_min); - ret = -FI_EINVAL; - } else { - rxm_ep->buffered_limit = *(size_t *)optval; - FI_INFO(&rxm_prov, FI_LOG_CORE, - "FI_OPT_BUFFERED_LIMIT set to %zu\n", - rxm_ep->buffered_limit); - } - break; case FI_OPT_CUDA_API_PERMITTED: if (!hmem_ops[FI_HMEM_CUDA].initialized) { FI_WARN(&rxm_prov, FI_LOG_EP_DATA, diff --git a/prov/rxm/src/rxm_init.c b/prov/rxm/src/rxm_init.c index 3ca3c22593f..011acb88e94 100644 --- a/prov/rxm/src/rxm_init.c +++ b/prov/rxm/src/rxm_init.c @@ -461,8 +461,8 @@ static void rxm_alter_info(const struct fi_info *hints, struct fi_info *info) cur->rx_attr->caps &= ~FI_DIRECTED_RECV; } - if (hints->mode & FI_BUFFERED_RECV) - cur->mode |= FI_BUFFERED_RECV; + if (hints->mode & OFI_BUFFERED_RECV) + cur->mode |= OFI_BUFFERED_RECV; if (hints->caps & FI_ATOMIC) { cur->tx_attr->msg_order &= diff --git a/prov/rxm/src/rxm_msg.c b/prov/rxm/src/rxm_msg.c index e48746394f2..40d01608d56 100644 --- a/prov/rxm/src/rxm_msg.c +++ b/prov/rxm/src/rxm_msg.c @@ -240,7 +240,7 @@ rxm_recvmsg(struct fid_ep *ep_fid, const struct fi_msg *msg, uint64_t flags) struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep, util_ep.ep_fid.fid); - if (rxm_ep->rxm_info->mode & FI_BUFFERED_RECV) + if (rxm_ep->rxm_info->mode & OFI_BUFFERED_RECV) return rxm_buf_recv(rxm_ep, msg->msg_iov, msg->desc, msg->iov_count, msg->addr, msg->context, flags | rxm_ep->util_ep.rx_msg_flags); @@ -329,7 +329,7 @@ rxm_alloc_rndv_buf(struct rxm_ep *rxm_ep, struct rxm_conn *rxm_conn, len = sizeof(struct rxm_pkt) + sizeof(struct rxm_rndv_hdr); - if (rxm_ep->rxm_info->mode & FI_BUFFERED_RECV) { + if (rxm_ep->rxm_info->mode & OFI_BUFFERED_RECV) { ret = ofi_copy_from_hmem_iov(rxm_pkt_rndv_data(&(*rndv_buf)->pkt), rxm_ep->buffered_min, iface, device, iov, count, 0); diff --git a/prov/rxm/src/rxm_tagged.c b/prov/rxm/src/rxm_tagged.c index 7c0883d88c0..78e3d3ff0e9 100644 --- a/prov/rxm/src/rxm_tagged.c +++ b/prov/rxm/src/rxm_tagged.c @@ -166,14 +166,14 @@ rxm_trecvmsg(struct fid_ep *ep_fid, const struct fi_msg_tagged *msg, flags |= rxm_ep->util_ep.rx_msg_flags; if (!(flags & (FI_CLAIM | FI_PEEK)) && - !(rxm_ep->rxm_info->mode & FI_BUFFERED_RECV)) { + !(rxm_ep->rxm_info->mode & OFI_BUFFERED_RECV)) { return rxm_trecv_common(rxm_ep, msg->msg_iov, msg->desc, msg->iov_count, msg->addr, msg->tag, msg->ignore, context, flags); } ofi_genlock_lock(&rxm_ep->util_ep.lock); - if (rxm_ep->rxm_info->mode & FI_BUFFERED_RECV) { + if (rxm_ep->rxm_info->mode & OFI_BUFFERED_RECV) { recv_ctx = msg->context; context = recv_ctx->context; rx_buf = container_of(recv_ctx, struct rxm_rx_buf, recv_context); @@ -216,7 +216,7 @@ rxm_trecvmsg(struct fid_ep *ep_fid, const struct fi_msg_tagged *msg, goto unlock; } - if (rxm_ep->rxm_info->mode & FI_BUFFERED_RECV) + if (rxm_ep->rxm_info->mode & OFI_BUFFERED_RECV) recv_entry->comp_flags |= FI_CLAIM; rx_buf->recv_entry = recv_entry; diff --git a/src/fi_tostr.c b/src/fi_tostr.c index df232ab82c5..5567bd93493 100644 --- a/src/fi_tostr.c +++ b/src/fi_tostr.c @@ -283,7 +283,6 @@ static void ofi_tostr_mode(char *buf, size_t len, uint64_t mode) IFFLAGSTRN(mode, FI_RX_CQ_DATA, len); IFFLAGSTRN(mode, FI_LOCAL_MR, len); IFFLAGSTRN(mode, FI_CONTEXT2, len); - IFFLAGSTRN(mode, FI_BUFFERED_RECV, len); ofi_remove_comma(buf); } From ce9622f7fd70fc4a7bfe9aa885a506afa6d822d5 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Thu, 21 Sep 2023 16:47:09 -0700 Subject: [PATCH 17/34] core: Document preferred threading model for scalable endpoints Recommend that applications and providers use FI_THREAD_COMPLETION as the preferred threading model for lockless operation when using scalable endpoints. This helps align application design with the provider implementation. Signed-off-by: Sean Hefty --- man/fi_domain.3.md | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/man/fi_domain.3.md b/man/fi_domain.3.md index 7de8ad9e812..1bd38c218b0 100644 --- a/man/fi_domain.3.md +++ b/man/fi_domain.3.md @@ -243,18 +243,16 @@ serialization in their access of provider allocated resources and interfaces enables a provider to eliminate lower-level locks. *FI_THREAD_COMPLETION* -: The completion threading model is intended for providers that make use - of manual progress. Applications must serialize access to all objects - that are associated through the use of having a shared completion - structure. This includes endpoint, transmit context, receive context, - completion queue, counter, wait set, and poll set objects. - - For example, threads must serialize access to an endpoint and its - bound completion queue(s) and/or counters. Access to endpoints that - share the same completion queue must also be serialized. - - The use of FI_THREAD_COMPLETION can increase parallelism over - FI_THREAD_SAFE, but requires the use of isolated resources. +: The completion threading model is best suited for multi-threaded applications + using scalable endpoints which desire lockless operation. Applications must + serialize access to all objects that are associated by a common completion + mechanism (for example, endpoints bound to the same CQ or counter). It is + recommended that providers which support scalable endpoints also support this + threading model. + + Applications wanting to leverage FI_THREAD_COMPLETION should allocate + transmit contexts, receive contexts, and completion queues and counters to + individual threads. *FI_THREAD_DOMAIN* : A domain serialization model requires applications to serialize From f3ac4bc3312ce5ac5044e17d573e871c3180b8d2 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Thu, 21 Sep 2023 16:58:23 -0700 Subject: [PATCH 18/34] core: Simplify threading models Remove overly complicated threading models and focus on specific models to allow better alignment between application designs and provider implementation. Use FI_THREAD_DOMAIN as the preferred lockless threading model for standard endpoints. Signed-off-by: Sean Hefty --- fabtests/man/fabtests.7.md | 3 +- fabtests/ubertest/config.c | 2 - man/fi_domain.3.md | 49 +++++---------------- prov/opx/configure.m4 | 4 +- prov/opx/include/rdma/opx/fi_opx_internal.h | 3 -- prov/util/src/util_attr.c | 10 ++--- src/fi_tostr.c | 2 - 7 files changed, 16 insertions(+), 57 deletions(-) diff --git a/fabtests/man/fabtests.7.md b/fabtests/man/fabtests.7.md index e588fb11773..d916d529c29 100644 --- a/fabtests/man/fabtests.7.md +++ b/fabtests/man/fabtests.7.md @@ -361,8 +361,7 @@ The following keys and respective key values may be used in the config file. : FI_WAIT_NONE, FI_WAIT_UNSPEC, FI_WAIT_FD, FI_WAIT_MUTEX_COND *threading* -: FI_THREAD_UNSPEC, FI_THREAD_SAFE, FI_THREAD_FID, FI_THREAD_DOMAIN, - FI_THREAD_COMPLETION, FI_THREAD_ENDPOINT +: FI_THREAD_UNSPEC, FI_THREAD_SAFE, FI_THREAD_DOMAIN, FI_THREAD_COMPLETION *progress* : FI_PROGRESS_MANUAL, FI_PROGRESS_AUTO, FI_PROGRESS_UNSPEC diff --git a/fabtests/ubertest/config.c b/fabtests/ubertest/config.c index eaa0dac6a3e..7ab97fc482f 100644 --- a/fabtests/ubertest/config.c +++ b/fabtests/ubertest/config.c @@ -373,10 +373,8 @@ static int ft_parse_num(char *str, int len, struct key_t *key, void *buf) } else if (!strncmp(key->str, "threading", strlen("threading"))) { TEST_ENUM_SET_N_RETURN(str, len, FI_THREAD_UNSPEC, int, buf); TEST_ENUM_SET_N_RETURN(str, len, FI_THREAD_SAFE, int, buf); - TEST_ENUM_SET_N_RETURN(str, len, FI_THREAD_FID, int, buf); TEST_ENUM_SET_N_RETURN(str, len, FI_THREAD_DOMAIN, int, buf); TEST_ENUM_SET_N_RETURN(str, len, FI_THREAD_COMPLETION, int, buf); - TEST_ENUM_SET_N_RETURN(str, len, FI_THREAD_ENDPOINT, int, buf); FT_ERR("Unknown threading level"); } else if (!strncmp(key->str, "constant_caps", strlen("constant_caps"))) { TEST_ENUM_SET_N_RETURN(str, len, FI_RMA, uint64_t, buf); diff --git a/man/fi_domain.3.md b/man/fi_domain.3.md index 1bd38c218b0..e5521bf2117 100644 --- a/man/fi_domain.3.md +++ b/man/fi_domain.3.md @@ -246,49 +246,20 @@ interfaces enables a provider to eliminate lower-level locks. : The completion threading model is best suited for multi-threaded applications using scalable endpoints which desire lockless operation. Applications must serialize access to all objects that are associated by a common completion - mechanism (for example, endpoints bound to the same CQ or counter). It is - recommended that providers which support scalable endpoints also support this - threading model. + mechanism (for example, transmit and receive contexts bound to the same CQ + or counter). It is recommended that providers which support scalable + endpoints support this threading model. - Applications wanting to leverage FI_THREAD_COMPLETION should allocate - transmit contexts, receive contexts, and completion queues and counters to + Applications wanting to leverage FI_THREAD_COMPLETION should dedicate + transmit contexts, receive contexts, completion queues, and counters to individual threads. *FI_THREAD_DOMAIN* -: A domain serialization model requires applications to serialize - access to all objects belonging to a domain. - -*FI_THREAD_ENDPOINT* -: The endpoint threading model is similar to FI_THREAD_FID, but with - the added restriction that serialization is required when accessing - the same endpoint, even if multiple transmit and receive contexts are - used. Conceptually, FI_THREAD_ENDPOINT maps well to providers that - implement fabric services in hardware but use a single command - queue to access different data flows. - -*FI_THREAD_FID* -: A fabric descriptor (FID) serialization model requires applications - to serialize access to individual fabric resources associated with - data transfer operations and completions. Multiple threads must - be serialized when accessing the same endpoint, transmit context, - receive context, completion queue, counter, wait set, or poll set. - Serialization is required only by threads accessing the same object. - - For example, one thread may be initiating a data transfer on an - endpoint, while another thread reads from a completion queue - associated with the endpoint. - - Serialization to endpoint access is only required when accessing - the same endpoint data flow. Multiple threads may initiate transfers - on different transmit contexts of the same endpoint without serializing, - and no serialization is required between the submission of data - transmit requests and data receive operations. - - In general, FI_THREAD_FID allows the provider to be implemented - without needing internal locking when handling data transfers. - Conceptually, FI_THREAD_FID maps well to providers that implement - fabric services in hardware and provide separate command queues to - different data flows. +: The domain threading model is best suited for single-threaded applications + and multi-threaded applications using standard endpoints which desire lockless + operation. Applications must serialize access to all objects + under the same domain. This includes endpoints, transmit and receive contexts, + completion queues and counters, and registered memory regions. *FI_THREAD_SAFE* : A thread safe serialization model allows a multi-threaded diff --git a/prov/opx/configure.m4 b/prov/opx/configure.m4 index c98a13382af..3da849ffd45 100644 --- a/prov/opx/configure.m4 +++ b/prov/opx/configure.m4 @@ -81,8 +81,8 @@ AC_DEFUN([FI_OPX_CONFIGURE],[ AC_SUBST(opx_mr, [$OPX_MR_MODE]) AC_DEFINE_UNQUOTED(OPX_MR, [$OPX_MR_MODE], [fabric direct memory region]) - dnl Only FI_THREAD_ENDPOINT is supported by the opx provider - OPX_THREAD_MODE=FI_THREAD_ENDPOINT + dnl Only FI_THREAD_DOMAIN is supported by the opx provider + OPX_THREAD_MODE=FI_THREAD_DOMAIN AC_SUBST(opx_thread, [$OPX_THREAD_MODE]) AC_DEFINE_UNQUOTED(OPX_THREAD, [$OPX_THREAD_MODE], [fabric direct thread]) diff --git a/prov/opx/include/rdma/opx/fi_opx_internal.h b/prov/opx/include/rdma/opx/fi_opx_internal.h index 2eb970b7500..a9873a4c03d 100644 --- a/prov/opx/include/rdma/opx/fi_opx_internal.h +++ b/prov/opx/include/rdma/opx/fi_opx_internal.h @@ -175,17 +175,14 @@ struct fi_opx_context_ext { static inline int fi_opx_threading_unknown(const enum fi_threading threading) { return threading != FI_THREAD_DOMAIN && // Most likely - threading != FI_THREAD_ENDPOINT && threading != FI_THREAD_SAFE && threading != FI_THREAD_COMPLETION && - threading != FI_THREAD_FID && threading != FI_THREAD_UNSPEC; // Least likely } static inline int fi_opx_threading_lock_required(const enum fi_threading threading, enum fi_progress progress) { return !(threading == FI_THREAD_DOMAIN || - threading == FI_THREAD_ENDPOINT || threading == FI_THREAD_COMPLETION) || progress == FI_PROGRESS_AUTO; } diff --git a/prov/util/src/util_attr.c b/prov/util/src/util_attr.c index f40e2bf8330..d152993b2b5 100644 --- a/prov/util/src/util_attr.c +++ b/prov/util/src/util_attr.c @@ -429,16 +429,12 @@ static int fi_thread_level(enum fi_threading thread_model) switch (thread_model) { case FI_THREAD_SAFE: return 1; - case FI_THREAD_FID: - return 2; - case FI_THREAD_ENDPOINT: - return 3; case FI_THREAD_COMPLETION: - return 4; + return 2; case FI_THREAD_DOMAIN: - return 5; + return 3; case FI_THREAD_UNSPEC: - return 6; + return 4; default: return -1; } diff --git a/src/fi_tostr.c b/src/fi_tostr.c index 5567bd93493..851d45bc7b3 100644 --- a/src/fi_tostr.c +++ b/src/fi_tostr.c @@ -149,10 +149,8 @@ ofi_tostr_threading(char *buf, size_t len, enum fi_threading threading) switch (threading) { CASEENUMSTRN(FI_THREAD_UNSPEC, len); CASEENUMSTRN(FI_THREAD_SAFE, len); - CASEENUMSTRN(FI_THREAD_FID, len); CASEENUMSTRN(FI_THREAD_DOMAIN, len); CASEENUMSTRN(FI_THREAD_COMPLETION, len); - CASEENUMSTRN(FI_THREAD_ENDPOINT, len); default: ofi_strncatf(buf, len, "Unknown"); break; From 141348671d0ed39f04826a8e0912532daf14af3f Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Mon, 25 Sep 2023 16:01:41 -0700 Subject: [PATCH 19/34] core: Simplify progress definition Combine data and control progress into one progress option. Signed-off-by: Sean Hefty --- fabtests/component/dmabuf-rdma/fi-rdmabw-xe.c | 1 - fabtests/multinode/src/core_coll.c | 1 - fabtests/unit/getinfo_test.c | 30 ------------------- include/rdma/fabric.h | 5 +++- man/fi_domain.3.md | 10 +++++-- man/fi_info.1.md | 3 +- man/fi_setup.7.md | 3 +- prov/coll/src/coll_attr.c | 3 +- prov/efa/src/efa_user_info.c | 1 - prov/opx/src/fi_opx_domain.c | 15 +++++----- prov/psm2/src/psmx2_attr.c | 5 ---- prov/util/src/util_attr.c | 18 ++++------- src/fi_tostr.c | 7 ++--- 13 files changed, 28 insertions(+), 74 deletions(-) diff --git a/fabtests/component/dmabuf-rdma/fi-rdmabw-xe.c b/fabtests/component/dmabuf-rdma/fi-rdmabw-xe.c index 3bde5141238..b50259075f0 100644 --- a/fabtests/component/dmabuf-rdma/fi-rdmabw-xe.c +++ b/fabtests/component/dmabuf-rdma/fi-rdmabw-xe.c @@ -345,7 +345,6 @@ static int init_nic(int nic, char *domain_name, char *server_name, int port, if (buf_location != MALLOC) hints->caps |= FI_HMEM; hints->mode = FI_CONTEXT; - hints->domain_attr->control_progress = FI_PROGRESS_MANUAL; hints->domain_attr->data_progress = FI_PROGRESS_MANUAL; hints->domain_attr->mr_mode = FI_MR_ALLOCATED | FI_MR_PROV_KEY | FI_MR_VIRT_ADDR | FI_MR_LOCAL | diff --git a/fabtests/multinode/src/core_coll.c b/fabtests/multinode/src/core_coll.c index a8341ca9f05..eeac1e97d5d 100644 --- a/fabtests/multinode/src/core_coll.c +++ b/fabtests/multinode/src/core_coll.c @@ -525,7 +525,6 @@ static inline void setup_hints(void) hints->ep_attr->type = FI_EP_RDM; hints->caps = FI_MSG | FI_COLLECTIVE; hints->mode = FI_CONTEXT; - hints->domain_attr->control_progress = FI_PROGRESS_MANUAL; hints->domain_attr->data_progress = FI_PROGRESS_MANUAL; } diff --git a/fabtests/unit/getinfo_test.c b/fabtests/unit/getinfo_test.c index d46f96668da..5cce2289f1e 100644 --- a/fabtests/unit/getinfo_test.c +++ b/fabtests/unit/getinfo_test.c @@ -624,18 +624,6 @@ static int init_data_auto(struct fi_info *hints) return 0; } -static int init_ctrl_manual(struct fi_info *hints) -{ - hints->domain_attr->control_progress = FI_PROGRESS_MANUAL; - return 0; -} - -static int init_ctrl_auto(struct fi_info *hints) -{ - hints->domain_attr->control_progress = FI_PROGRESS_AUTO; - return 0; -} - static int check_data_manual(struct fi_info *info) { return (info->domain_attr->data_progress != FI_PROGRESS_MANUAL) ? @@ -648,18 +636,6 @@ static int check_data_auto(struct fi_info *info) EXIT_FAILURE : 0; } -static int check_ctrl_manual(struct fi_info *info) -{ - return (info->domain_attr->control_progress != FI_PROGRESS_MANUAL) ? - EXIT_FAILURE : 0; -} - -static int check_ctrl_auto(struct fi_info *info) -{ - return (info->domain_attr->control_progress != FI_PROGRESS_AUTO) ? - EXIT_FAILURE : 0; -} - static int init_domain_caps(struct fi_info *hints, uint64_t caps) { @@ -946,10 +922,6 @@ getinfo_test(progress, 1, "Test data manual progress", NULL, NULL, 0, hints, init_data_manual, NULL, check_data_manual, 0) getinfo_test(progress, 2, "Test data auto progress", NULL, NULL, 0, hints, init_data_auto, NULL, check_data_auto, 0) -getinfo_test(progress, 3, "Test ctrl manual progress", NULL, NULL, 0, - hints, init_ctrl_manual, NULL, check_ctrl_manual, 0) -getinfo_test(progress, 4, "Test ctrl auto progress", NULL, NULL, 0, - hints, init_ctrl_auto, NULL, check_ctrl_auto, 0) /* Capability test */ getinfo_test(caps, 1, "Test capability bits supported are set", @@ -1040,8 +1012,6 @@ int main(int argc, char **argv) TEST_ENTRY_GETINFO(mr_mode6), TEST_ENTRY_GETINFO(progress1), TEST_ENTRY_GETINFO(progress2), - TEST_ENTRY_GETINFO(progress3), - TEST_ENTRY_GETINFO(progress4), TEST_ENTRY_GETINFO(caps1), TEST_ENTRY_GETINFO(caps2), TEST_ENTRY_GETINFO(caps3), diff --git a/include/rdma/fabric.h b/include/rdma/fabric.h index 72133042c3a..c114a5defd0 100644 --- a/include/rdma/fabric.h +++ b/include/rdma/fabric.h @@ -418,7 +418,10 @@ struct fi_domain_attr { char *name; enum fi_threading threading; enum fi_progress control_progress; - enum fi_progress data_progress; + union { + enum fi_progress data_progress; + enum fi_progress progress; + }; enum fi_resource_mgmt resource_mgmt; enum fi_av_type av_type; int mr_mode; diff --git a/man/fi_domain.3.md b/man/fi_domain.3.md index e5521bf2117..6d82e520b9f 100644 --- a/man/fi_domain.3.md +++ b/man/fi_domain.3.md @@ -190,8 +190,7 @@ struct fi_domain_attr { struct fid_domain *domain; char *name; enum fi_threading threading; - enum fi_progress control_progress; - enum fi_progress data_progress; + enum fi_progress progress; enum fi_resource_mgmt resource_mgmt; enum fi_av_type av_type; int mr_mode; @@ -273,7 +272,7 @@ interfaces enables a provider to eliminate lower-level locks. providers will return a threading model that allows for the greatest level of parallelism. -## Progress Models (control_progress / data_progress) +## Progress Models (progress) Progress is the ability of the underlying implementation to complete processing of an asynchronous request. In many cases, the processing @@ -295,6 +294,11 @@ progress on data transfer operations. This includes message queue, RMA, tagged messaging, and atomic operations, along with their completion processing. +The progress field defines the behavior of both control and data operations. +For applications that require compilation portability between the version 1 +and version 2 libfabric series, the progress field may be referenced as +data_progress. + Progress frequently requires action being taken at both the transmitting and receiving sides of an operation. This is often a requirement for reliable transfers, as a result of retry and acknowledgement processing. diff --git a/man/fi_info.1.md b/man/fi_info.1.md index 437ee1db1e5..590c0ee0e76 100644 --- a/man/fi_info.1.md +++ b/man/fi_info.1.md @@ -176,8 +176,7 @@ fi_info: domain: 0x0 name: mlx5_0-dgram threading: FI_THREAD_SAFE - control_progress: FI_PROGRESS_MANUAL - data_progress: FI_PROGRESS_MANUAL + progress: FI_PROGRESS_MANUAL resource_mgmt: FI_RM_ENABLED av_type: FI_AV_UNSPEC mr_mode: [ ] diff --git a/man/fi_setup.7.md b/man/fi_setup.7.md index 430aa95c493..7c836e38a03 100644 --- a/man/fi_setup.7.md +++ b/man/fi_setup.7.md @@ -371,8 +371,7 @@ struct fi_domain_attr { struct fid_domain *domain; char *name; enum fi_threading threading; - enum fi_progress control_progress; - enum fi_progress data_progress; + enum fi_progress progress; enum fi_resource_mgmt resource_mgmt; enum fi_av_type av_type; enum fi_mr_mode mr_mode; diff --git a/prov/coll/src/coll_attr.c b/prov/coll/src/coll_attr.c index 80875e2bacc..b541fb90191 100644 --- a/prov/coll/src/coll_attr.c +++ b/prov/coll/src/coll_attr.c @@ -72,8 +72,7 @@ static struct fi_domain_attr coll_domain_attr = { .name = "util-coll", .caps = COLL_DOMAIN_CAPS, .threading = FI_THREAD_SAFE, - .control_progress = FI_PROGRESS_AUTO, - .data_progress = FI_PROGRESS_AUTO, + .progress = FI_PROGRESS_AUTO, .resource_mgmt = FI_RM_ENABLED, .av_type = FI_AV_UNSPEC, .mr_mode = 0, diff --git a/prov/efa/src/efa_user_info.c b/prov/efa/src/efa_user_info.c index 8eef343d9d0..11544f8add8 100644 --- a/prov/efa/src/efa_user_info.c +++ b/prov/efa/src/efa_user_info.c @@ -449,7 +449,6 @@ int efa_user_info_alter_rdm(int version, struct fi_info *info, const struct fi_i /* We only support manual progress for RMA operations */ if (hints->caps & FI_RMA) { - info->domain_attr->control_progress = FI_PROGRESS_MANUAL; info->domain_attr->data_progress = FI_PROGRESS_MANUAL; } diff --git a/prov/opx/src/fi_opx_domain.c b/prov/opx/src/fi_opx_domain.c index a48e543c4ae..48eb6f0cf22 100644 --- a/prov/opx/src/fi_opx_domain.c +++ b/prov/opx/src/fi_opx_domain.c @@ -191,7 +191,6 @@ int fi_opx_choose_domain(uint64_t caps, struct fi_domain_attr *domain_attr, stru } else { if (hints->threading) domain_attr->threading = hints->threading; - if (hints->control_progress) domain_attr->control_progress = hints->control_progress; if (hints->resource_mgmt) domain_attr->resource_mgmt = hints->resource_mgmt; if (hints->av_type) domain_attr->av_type = hints->av_type; if (hints->mr_key_size) domain_attr->mr_key_size = hints->mr_key_size; @@ -230,7 +229,7 @@ int fi_opx_check_domain_attr(struct fi_domain_attr *attr) FI_DBG(fi_opx_global.prov, FI_LOG_DOMAIN, "incorrect threading level\n"); goto err; } - + if (attr->mr_mode == FI_MR_UNSPEC) { attr->mr_mode = OPX_MR == FI_MR_UNSPEC ? FI_MR_BASIC : OPX_MR; } @@ -290,11 +289,11 @@ int fi_opx_domain(struct fid_fabric *fabric, if (fi_opx_global.default_domain_attr == NULL) { if (fi_opx_alloc_default_domain_attr(&fi_opx_global.default_domain_attr)) { FI_DBG(fi_opx_global.prov, FI_LOG_DOMAIN, "alloc function could not allocate block of memory\n"); - errno = FI_ENOMEM; + errno = FI_ENOMEM; goto err; } } - + struct opx_tid_domain *opx_tid_domain; struct opx_tid_fabric *opx_tid_fabric = opx_fabric->tid_fabric; @@ -346,7 +345,7 @@ int fi_opx_domain(struct fid_fabric *fabric, opx_domain->domain_fid.fid.context = context; opx_domain->domain_fid.fid.ops = &fi_opx_fi_ops; opx_domain->domain_fid.ops = &fi_opx_domain_ops; - + char * env_var_prog_affinity = OPX_DEFAULT_PROG_AFFINITY_STR; get_param_check = fi_param_get_str(fi_opx_global.prov, "prog_affinity", &env_var_prog_affinity); if (get_param_check == FI_SUCCESS) { @@ -359,7 +358,7 @@ int fi_opx_domain(struct fid_fabric *fabric, } else { env_var_prog_affinity = OPX_DEFAULT_PROG_AFFINITY_STR; } - + if (strncmp(env_var_prog_affinity, OPX_DEFAULT_PROG_AFFINITY_STR, OPX_JOB_KEY_STR_SIZE)){ goto skip; @@ -424,7 +423,7 @@ int fi_opx_domain(struct fid_fabric *fabric, FI_WARN(fi_opx_global.prov, FI_LOG_DOMAIN, "UUID too long. UUID must consist of 1-32 hexadecimal digits. Using default OPX uuid instead\n"); env_var_uuid = OPX_DEFAULT_JOB_KEY_STR; - } + } int i; for (i=0; i < OPX_JOB_KEY_STR_SIZE && env_var_uuid[i] != 0; i++) { @@ -434,7 +433,7 @@ int fi_opx_domain(struct fid_fabric *fabric, env_var_uuid = OPX_DEFAULT_JOB_KEY_STR; } } - + // Copy the job key and guarantee null termination. strncpy(opx_domain->unique_job_key_str, env_var_uuid, OPX_JOB_KEY_STR_SIZE-1); opx_domain->unique_job_key_str[OPX_JOB_KEY_STR_SIZE-1] = '\0'; diff --git a/prov/psm2/src/psmx2_attr.c b/prov/psm2/src/psmx2_attr.c index c582868e7a4..e45467d6907 100644 --- a/prov/psm2/src/psmx2_attr.c +++ b/prov/psm2/src/psmx2_attr.c @@ -418,11 +418,6 @@ void psmx2_alter_prov_info(uint32_t api_version, * checking. Now change them back to the preferred values. */ for (; info; info = info->next) { - if (!hints || !hints->domain_attr || - !hints->domain_attr->control_progress) - info->domain_attr->control_progress = - FI_PROGRESS_MANUAL; - if (!hints || !hints->domain_attr || !hints->domain_attr->data_progress) info->domain_attr->data_progress = diff --git a/prov/util/src/util_attr.c b/prov/util/src/util_attr.c index d152993b2b5..29d52096c29 100644 --- a/prov/util/src/util_attr.c +++ b/prov/util/src/util_attr.c @@ -562,15 +562,9 @@ int ofi_check_domain_attr(const struct fi_provider *prov, uint32_t api_version, return -FI_ENODATA; } - if (fi_progress_level(user_attr->control_progress) < - fi_progress_level(prov_attr->control_progress)) { - FI_INFO(prov, FI_LOG_CORE, "Invalid control progress model\n"); - return -FI_ENODATA; - } - - if (fi_progress_level(user_attr->data_progress) < - fi_progress_level(prov_attr->data_progress)) { - FI_INFO(prov, FI_LOG_CORE, "Invalid data progress model\n"); + if (fi_progress_level(user_attr->progress) < + fi_progress_level(prov_attr->progress)) { + FI_INFO(prov, FI_LOG_CORE, "Invalid progress model\n"); return -FI_ENODATA; } @@ -1158,10 +1152,8 @@ static void fi_alter_domain_attr(struct fi_domain_attr *attr, if (hints->threading) attr->threading = hints->threading; - if (hints->control_progress) - attr->control_progress = hints->control_progress; - if (hints->data_progress) - attr->data_progress = hints->data_progress; + if (hints->progress) + attr->progress = hints->progress; if (hints->av_type) attr->av_type = hints->av_type; } diff --git a/src/fi_tostr.c b/src/fi_tostr.c index 851d45bc7b3..c76841333cd 100644 --- a/src/fi_tostr.c +++ b/src/fi_tostr.c @@ -506,11 +506,8 @@ ofi_tostr_domain_attr(char *buf, size_t len, const struct fi_domain_attr *attr, ofi_tostr_threading(buf, len, attr->threading); ofi_strncatf(buf, len, "\n"); - ofi_strncatf(buf, len, "%s%scontrol_progress: ", prefix,TAB); - ofi_tostr_progress(buf, len, attr->control_progress); - ofi_strncatf(buf, len, "\n"); - ofi_strncatf(buf, len, "%s%sdata_progress: ", prefix, TAB); - ofi_tostr_progress(buf, len, attr->data_progress); + ofi_strncatf(buf, len, "%s%sprogress: ", prefix, TAB); + ofi_tostr_progress(buf, len, attr->progress); ofi_strncatf(buf, len, "\n"); ofi_strncatf(buf, len, "%s%sresource_mgmt: ", prefix, TAB); ofi_tostr_resource_mgmt(buf, len, attr->resource_mgmt); From 1466a8081cf9726e1ea8cabcfbeb48d3c461ea66 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Mon, 25 Sep 2023 17:47:13 -0700 Subject: [PATCH 20/34] core: Remove comp_order attribute Completions are always unordered. Signed-off-by: Sean Hefty --- man/fi_endpoint.3.md | 49 +++--------------------------- man/fi_info.1.md | 2 -- man/fi_setup.7.md | 2 -- prov/coll/src/coll_attr.c | 2 -- prov/efa/src/efa_prov_info.c | 4 --- prov/mrail/src/mrail_attr.c | 2 -- prov/opx/src/fi_opx_ep.c | 58 +++++++++++++++++------------------- prov/opx/src/fi_opx_info.c | 4 +-- prov/psm2/src/psmx2.h | 1 - prov/psm2/src/psmx2_attr.c | 2 -- prov/rxd/src/rxd_attr.c | 2 -- prov/rxm/src/rxm_attr.c | 6 ---- prov/rxm/src/rxm_init.c | 6 ---- prov/shm/src/smr_attr.c | 4 --- prov/sm2/src/sm2_attr.c | 4 --- prov/tcp/src/xnet_attr.c | 6 ---- prov/ucx/src/ucx_init.c | 2 -- prov/udp/src/udpx_attr.c | 2 -- prov/util/src/util_attr.c | 4 +-- prov/verbs/src/verbs_info.c | 4 --- src/fi_tostr.c | 21 ------------- 21 files changed, 35 insertions(+), 152 deletions(-) diff --git a/man/fi_endpoint.3.md b/man/fi_endpoint.3.md index e3411e289f6..1fedf0576b6 100644 --- a/man/fi_endpoint.3.md +++ b/man/fi_endpoint.3.md @@ -1049,36 +1049,8 @@ transfer operation in order to guarantee that ordering is met. ## comp_order - Completion Ordering -Completion ordering refers to the order in which completed requests are -written into the completion queue. Completion ordering is similar to -message order. Relaxed completion order may enable faster reporting of -completed transfers, allow acknowledgments to be sent over different -fabric paths, and support more sophisticated retry mechanisms. -This can result in lower-latency completions, particularly when -using connectionless endpoints. Strict completion ordering may require -that providers queue completed operations or limit available optimizations. - -For transmit requests, completion ordering depends on the endpoint -communication type. For unreliable communication, completion ordering -applies to all data transfer requests submitted to an endpoint. -For reliable communication, completion ordering only applies to requests -that target a single destination endpoint. Completion ordering of -requests that target different endpoints over a reliable transport -is not defined. - -Applications should specify the completion ordering that they support -or require. Providers should return the completion order that they -actually provide, with the constraint that the returned ordering is -stricter than that specified by the application. Supported completion -order values are: - -*FI_ORDER_NONE* -: No ordering is defined for completed operations. Requests submitted - to the transmit context may complete in any order. - -*FI_ORDER_STRICT* -: Requests complete in the order in which they are submitted to the - transmit context. +This field is provided for version 1 compatibility and should be set +to 0. ## inject_size @@ -1247,21 +1219,8 @@ FI_ORDER_ATOMIC_RAW, FI_ORDER_ATOMIC_WAR, and FI_ORDER_ATOMIC_WAW. ## comp_order - Completion Ordering -For a description of completion ordering, see the comp_order field in -the _Transmit Context Attribute_ section. - -*FI_ORDER_DATA* -: When set, this bit indicates that received data is written into memory - in order. Data ordering applies to memory accessed as part of a single - operation and between operations if message ordering is guaranteed. - -*FI_ORDER_NONE* -: No ordering is defined for completed operations. Receive operations may - complete in any order, regardless of their submission order. - -*FI_ORDER_STRICT* -: Receive operations complete in the order in which they are processed by - the receive context, based on the receive side msg_order attribute. +This field is provided for version 1 compatibility and should be set +to 0. ## total_buffered_recv diff --git a/man/fi_info.1.md b/man/fi_info.1.md index 590c0ee0e76..cbc93c54658 100644 --- a/man/fi_info.1.md +++ b/man/fi_info.1.md @@ -144,7 +144,6 @@ fi_info: mode: [ ] op_flags: [ ] msg_order: [ FI_ORDER_RAR, FI_ORDER_RAW, FI_ORDER_RAS, FI_ORDER_WAW, FI_ORDER_WAS, FI_ORDER_SAW, FI_ORDER_SAS, FI_ORDER_RMA_RAR, FI_ORDER_RMA_RAW, FI_ORDER_RMA_WAW, FI_ORDER_ATOMIC_RAR, FI_ORDER_ATOMIC_RAW, FI_ORDER_ATOMIC_WAR, FI_ORDER_ATOMIC_WAW ] - comp_order: [ FI_ORDER_NONE ] inject_size: 3840 size: 1024 iov_limit: 4 @@ -155,7 +154,6 @@ fi_info: mode: [ ] op_flags: [ ] msg_order: [ FI_ORDER_RAR, FI_ORDER_RAW, FI_ORDER_RAS, FI_ORDER_WAW, FI_ORDER_WAS, FI_ORDER_SAW, FI_ORDER_SAS, FI_ORDER_RMA_RAR, FI_ORDER_RMA_RAW, FI_ORDER_RMA_WAW, FI_ORDER_ATOMIC_RAR, FI_ORDER_ATOMIC_RAW, FI_ORDER_ATOMIC_WAR, FI_ORDER_ATOMIC_WAW ] - comp_order: [ FI_ORDER_NONE ] total_buffered_recv: 0 size: 1024 iov_limit: 4 diff --git a/man/fi_setup.7.md b/man/fi_setup.7.md index 7c836e38a03..8964ea15863 100644 --- a/man/fi_setup.7.md +++ b/man/fi_setup.7.md @@ -754,7 +754,6 @@ struct fi_rx_attr { uint64_t mode; uint64_t op_flags; uint64_t msg_order; - uint64_t comp_order; ... }; @@ -763,7 +762,6 @@ struct fi_tx_attr { uint64_t mode; uint64_t op_flags; uint64_t msg_order; - uint64_t comp_order; size_t inject_size; ... }; diff --git a/prov/coll/src/coll_attr.c b/prov/coll/src/coll_attr.c index b541fb90191..b7a15f1f90b 100644 --- a/prov/coll/src/coll_attr.c +++ b/prov/coll/src/coll_attr.c @@ -40,7 +40,6 @@ static struct fi_tx_attr coll_tx_attr = { .caps = COLL_TX_CAPS | FI_COLLECTIVE, .op_flags = COLL_TX_OP_FLAGS, .msg_order = ~0x0ULL, - .comp_order = FI_ORDER_NONE, .size = COLL_TX_SIZE, .iov_limit = COLL_IOV_LIMIT, .rma_iov_limit = COLL_IOV_LIMIT, @@ -50,7 +49,6 @@ static struct fi_rx_attr coll_rx_attr = { .caps = COLL_RX_CAPS | FI_COLLECTIVE, .op_flags = COLL_RX_OP_FLAGS, .msg_order = ~0x0ULL, - .comp_order = FI_ORDER_NONE, .size = COLL_RX_SIZE, .iov_limit= COLL_IOV_LIMIT, }; diff --git a/prov/efa/src/efa_prov_info.c b/prov/efa/src/efa_prov_info.c index ecc5fe89096..e42115d7551 100644 --- a/prov/efa/src/efa_prov_info.c +++ b/prov/efa/src/efa_prov_info.c @@ -232,7 +232,6 @@ const struct fi_tx_attr efa_dgrm_tx_attr = { .mode = FI_MSG_PREFIX, .op_flags = EFA_TX_OP_FLAGS, .msg_order = EFA_MSG_ORDER, - .comp_order = FI_ORDER_NONE, .inject_size = 0, .rma_iov_limit = 0, }; @@ -245,7 +244,6 @@ const struct fi_rx_attr efa_dgrm_rx_attr = { .mode = FI_MSG_PREFIX | EFA_RX_MODE, .op_flags = EFA_RX_DGRM_OP_FLAGS, .msg_order = EFA_MSG_ORDER, - .comp_order = FI_ORDER_NONE, .total_buffered_recv = 0, .iov_limit = 1 }; @@ -258,7 +256,6 @@ const struct fi_tx_attr efa_rdm_tx_attr = { .mode = 0, .op_flags = EFA_TX_OP_FLAGS, .msg_order = EFA_MSG_ORDER, - .comp_order = FI_ORDER_NONE, .inject_size = 0, .rma_iov_limit = 1, }; @@ -271,7 +268,6 @@ const struct fi_rx_attr efa_rdm_rx_attr = { .mode = EFA_RX_MODE, .op_flags = EFA_RX_RDM_OP_FLAGS, .msg_order = EFA_MSG_ORDER, - .comp_order = FI_ORDER_NONE, .total_buffered_recv = 0, .iov_limit = 1 }; diff --git a/prov/mrail/src/mrail_attr.c b/prov/mrail/src/mrail_attr.c index 5b9d81413ec..4a3d58b5dcc 100644 --- a/prov/mrail/src/mrail_attr.c +++ b/prov/mrail/src/mrail_attr.c @@ -36,7 +36,6 @@ struct fi_tx_attr mrail_tx_attr = { .caps = ~0x0ULL, .op_flags = MRAIL_PASSTHRU_TX_OP_FLAGS | MRAIL_TX_OP_FLAGS, .msg_order = ~0x0ULL, - .comp_order = ~0x0ULL, .inject_size = SIZE_MAX, .size = SIZE_MAX, .iov_limit = MRAIL_IOV_LIMIT, @@ -47,7 +46,6 @@ struct fi_rx_attr mrail_rx_attr = { .caps = ~0x0ULL, .op_flags = MRAIL_PASSTHRU_RX_OP_FLAGS | MRAIL_RX_OP_FLAGS, .msg_order = ~0x0ULL, - .comp_order = ~0x0ULL, .total_buffered_recv = SIZE_MAX, .size = SIZE_MAX, .iov_limit = SIZE_MAX, diff --git a/prov/opx/src/fi_opx_ep.c b/prov/opx/src/fi_opx_ep.c index c9ca6f8be21..abe7d9df136 100644 --- a/prov/opx/src/fi_opx_ep.c +++ b/prov/opx/src/fi_opx_ep.c @@ -388,8 +388,8 @@ static void fi_opx_unbind_cq_ep(struct fi_opx_cq *cq, struct fi_opx_ep *ep) } if (found && ind < cq->progress.ep_count - 1) { cq->progress.ep[ind] = cq->progress.ep[ind+1]; - } - } + } + } if (found) { cq->progress.ep_count--; } @@ -401,16 +401,16 @@ static void fi_opx_unbind_cq_ep(struct fi_opx_cq *cq, struct fi_opx_ep *ep) } if (found && ind < cq->ep_bind_count - 1) { cq->ep[ind] = cq->ep[ind+1]; - } - } + } + } if (found) { cq->ep_bind_count--; } - + } static int fi_opx_close_ep(fid_t fid) -{ +{ FI_DBG_TRACE(fi_opx_global.prov, FI_LOG_EP_DATA, "close ep\n"); if (!fid) { FI_LOG(fi_opx_global.prov, FI_LOG_DEBUG, FI_LOG_FABRIC, @@ -756,9 +756,9 @@ static int fi_opx_ep_tx_init (struct fi_opx_ep *opx_ep, opx_ep->tx->pio_credits_addr = hfi->info.pio.credits_addr; /* Now that we know how many PIO Tx send credits we have, calculate the threshold to switch from EAGER send to RTS/CTS - * With max credits, there should be enough PIO Eager buffer to send 1 full-size message and 1 credit leftover for min reliablity. + * With max credits, there should be enough PIO Eager buffer to send 1 full-size message and 1 credit leftover for min reliablity. */ - uint64_t l_pio_max_eager_tx_bytes = MIN(FI_OPX_HFI1_PACKET_MTU, + uint64_t l_pio_max_eager_tx_bytes = MIN(FI_OPX_HFI1_PACKET_MTU, ((hfi->state.pio.credits_total - FI_OPX_HFI1_TX_RELIABILITY_RESERVED_CREDITS) * 64)); assert(l_pio_max_eager_tx_bytes < ((2<<15) -1) ); // Make sure the value won't wrap a uint16_t @@ -766,16 +766,16 @@ static int fi_opx_ep_tx_init (struct fi_opx_ep *opx_ep, assert((l_pio_max_eager_tx_bytes & 0x3f) == 0); //Make sure the value is 64 bit aligned opx_ep->tx->pio_max_eager_tx_bytes = l_pio_max_eager_tx_bytes; - OPX_LOG_OBSERVABLE(FI_LOG_EP_DATA, "Credits_total is %d, so set pio_max_eager_tx_bytes to %d \n", + OPX_LOG_OBSERVABLE(FI_LOG_EP_DATA, "Credits_total is %d, so set pio_max_eager_tx_bytes to %d \n", hfi->state.pio.credits_total, opx_ep->tx->pio_max_eager_tx_bytes); /* Similar logic to l_pio_max_eager_tx_bytes, calculate l_pio_flow_eager_tx_bytes to be an 'optimal' value for PIO - * credit count that respects the HFI credit return threshold. The threshold is default 33%, so multiply credits_total - * by .66. The idea is to not wait for an overly long time on credit-constrained systems to get almost all the PIO + * credit count that respects the HFI credit return threshold. The threshold is default 33%, so multiply credits_total + * by .66. The idea is to not wait for an overly long time on credit-constrained systems to get almost all the PIO * send credits back, rather wait to get the optimal number of credits determined by the return threshold. - * TODO: multiply by user_credit_return_threshold from the hfi1 driver parms. Default is 33 + * TODO: multiply by user_credit_return_threshold from the hfi1 driver parms. Default is 33 */ - uint64_t l_pio_flow_eager_tx_bytes = MIN(FI_OPX_HFI1_PACKET_MTU, + uint64_t l_pio_flow_eager_tx_bytes = MIN(FI_OPX_HFI1_PACKET_MTU, ((uint16_t)((hfi->state.pio.credits_total - FI_OPX_HFI1_TX_RELIABILITY_RESERVED_CREDITS) * .66) * 64) ); assert((l_pio_flow_eager_tx_bytes & 0x3f) == 0); //Make sure the value is 64 bit aligned @@ -787,8 +787,8 @@ static int fi_opx_ep_tx_init (struct fi_opx_ep *opx_ep, OPX_LOG_OBSERVABLE(FI_LOG_EP_DATA, "Set pio_flow_eager_tx_bytes to %d \n", opx_ep->tx->pio_flow_eager_tx_bytes); - /* Set delivery completion max threshold. Any messages larger than this value in bytes will not be copied to - * replay bounce buffers. Instead, hold the sender's large message buffer until we get all ACKs back from the Rx + /* Set delivery completion max threshold. Any messages larger than this value in bytes will not be copied to + * replay bounce buffers. Instead, hold the sender's large message buffer until we get all ACKs back from the Rx * side of the message. Since no copy of the message is made, it will need to be used to handle NAKs. */ int l_dcomp_threshold; @@ -800,16 +800,16 @@ static int fi_opx_ep_tx_init (struct fi_opx_ep *opx_ep, } else if (l_dcomp_threshold < OPX_MIN_DCOMP_THRESHOLD || l_dcomp_threshold > (OPX_MAX_DCOMP_THRESHOLD)) { opx_ep->tx->dcomp_threshold = OPX_DEFAULT_DCOMP_THRESHOLD; FI_WARN(fi_opx_global.prov, FI_LOG_EP_DATA, - "Error: FI_OPX_DELIVERY_COMPLETION_THRESHOLD was set but is outside of MIN/MAX thresholds. Using default setting of %d\n", + "Error: FI_OPX_DELIVERY_COMPLETION_THRESHOLD was set but is outside of MIN/MAX thresholds. Using default setting of %d\n", opx_ep->tx->dcomp_threshold); } else { opx_ep->tx->dcomp_threshold = l_dcomp_threshold; - OPX_LOG_OBSERVABLE(FI_LOG_EP_DATA, "FI_OPX_DELIVERY_COMPLETION_THRESHOLD was specified. Set to %d\n", + OPX_LOG_OBSERVABLE(FI_LOG_EP_DATA, "FI_OPX_DELIVERY_COMPLETION_THRESHOLD was specified. Set to %d\n", opx_ep->tx->dcomp_threshold); } - OPX_LOG_OBSERVABLE(FI_LOG_EP_DATA, "Multi-packet eager max message length is %d, chunk-size is %d.\n", - FI_OPX_MP_EGR_MAX_PAYLOAD_BYTES, FI_OPX_MP_EGR_CHUNK_SIZE); + OPX_LOG_OBSERVABLE(FI_LOG_EP_DATA, "Multi-packet eager max message length is %d, chunk-size is %d.\n", + FI_OPX_MP_EGR_MAX_PAYLOAD_BYTES, FI_OPX_MP_EGR_CHUNK_SIZE); opx_ep->tx->force_credit_return = 0; @@ -821,7 +821,7 @@ static int fi_opx_ep_tx_init (struct fi_opx_ep *opx_ep, int sdma_disable; if (fi_param_get_int(fi_opx_global.prov, "sdma_disable", &sdma_disable) == FI_SUCCESS) { opx_ep->tx->use_sdma = !sdma_disable; - OPX_LOG_OBSERVABLE(FI_LOG_EP_DATA, + OPX_LOG_OBSERVABLE(FI_LOG_EP_DATA, "sdma_disable parm specified as %0hhX; opx_ep->tx->use_sdma set to %0hhX\n", sdma_disable, opx_ep->tx->use_sdma); } else { OPX_LOG_OBSERVABLE(FI_LOG_EP_DATA, "sdma_disable parm not specified; using SDMA\n"); @@ -988,7 +988,7 @@ static int fi_opx_ep_rx_init (struct fi_opx_ep *opx_ep) FI_OPX_SHM_FIFO_SIZE, FI_OPX_SHM_PACKET_SIZE); } - /* Now that endpoint is complete enough to have context information from the hfi, + /* Now that endpoint is complete enough to have context information from the hfi, ** update the function pointers in the cq for the rx polling loop */ fi_opx_cq_finalize_ops((struct fid_ep *) opx_ep); @@ -1595,7 +1595,7 @@ int fi_opx_ep_rx_cancel (struct fi_opx_ep_rx * rx, prev = item; item = item->next; - } + } /* context not found in 'kind' match queue */ FI_DBG_TRACE(fi_opx_global.prov, FI_LOG_EP_DATA, "(end) not found\n"); @@ -1617,7 +1617,7 @@ ssize_t fi_opx_cancel(fid_t fid, void *context) FI_MSG, (const union fi_opx_context *) context, FI_OPX_LOCK_NOT_REQUIRED); - + } if (opx_ep->rx->caps & FI_TAGGED) { @@ -1666,7 +1666,6 @@ int fi_opx_alloc_default_rx_attr(struct fi_rx_attr **rx_attr) attr->mode = FI_CONTEXT2 | FI_ASYNC_IOV; attr->op_flags = 0; attr->msg_order = FI_OPX_DEFAULT_MSG_ORDER; - attr->comp_order = FI_ORDER_NONE; attr->total_buffered_recv = FI_OPX_HFI1_PACKET_MTU; attr->size = SIZE_MAX; //FI_OPX_RX_SIZE; attr->iov_limit = FI_OPX_IOV_LIMIT; @@ -1682,7 +1681,7 @@ int fi_opx_alloc_default_rx_attr(struct fi_rx_attr **rx_attr) int fi_opx_check_rx_attr(struct fi_rx_attr *attr) { /* TODO: more error checking of rx_attr */ - if (attr->comp_order && attr->comp_order == FI_ORDER_STRICT) { + if (attr->comp_order) { FI_WARN(fi_opx_global.prov, FI_LOG_EP_DATA, "unavailable [bad rx comp_order (%lx)] ", attr->comp_order); @@ -1707,7 +1706,6 @@ int fi_opx_alloc_default_tx_attr(struct fi_tx_attr **tx_attr) attr->mode = FI_CONTEXT2 | FI_ASYNC_IOV; attr->op_flags = FI_TRANSMIT_COMPLETE; attr->msg_order = FI_OPX_DEFAULT_MSG_ORDER; - attr->comp_order = FI_ORDER_NONE; attr->inject_size = FI_OPX_HFI1_PACKET_IMM; attr->size = SIZE_MAX; attr->iov_limit = FI_OPX_IOV_LIMIT; @@ -1731,7 +1729,7 @@ int fi_opx_check_tx_attr(struct fi_tx_attr *attr) } /* TODO: more error checking of tx_attr */ - if (attr->comp_order && attr->comp_order == FI_ORDER_STRICT) { + if (attr->comp_order) { FI_LOG(fi_opx_global.prov, FI_LOG_DEBUG, FI_LOG_EP_DATA, "unavailable [bad tx comp_order (%lx)] ", attr->comp_order); @@ -2096,7 +2094,7 @@ void fi_opx_ep_rx_process_context_noinline (struct fi_opx_ep * opx_ep, assert((ext->opx_context.flags & FI_OPX_CQ_CONTEXT_EXT) != 0); } else { if (posix_memalign((void**)&ext, 32, sizeof(struct fi_opx_context_ext))) { - FI_WARN(fi_opx_global.prov, FI_LOG_EP_DATA, + FI_WARN(fi_opx_global.prov, FI_LOG_EP_DATA, "Out of memory.\n"); abort(); } @@ -2135,7 +2133,7 @@ void fi_opx_ep_rx_process_context_noinline (struct fi_opx_ep * opx_ep, struct fi_opx_hfi1_ue_packet * claimed_pkt = context->claim; const unsigned is_intranode = - fi_opx_hfi_is_intranode(claimed_pkt->hdr.stl.lrh.slid); + fi_opx_hfi_is_intranode(claimed_pkt->hdr.stl.lrh.slid); complete_receive_operation(ep, &claimed_pkt->hdr, @@ -2516,7 +2514,7 @@ static void fi_opx_update_daos_av_rank(struct fi_opx_ep *opx_ep, fi_addr_t addr) if (cur_av_rank) { union fi_opx_addr cur_av_addr; cur_av_addr.fi = cur_av_rank->fi_addr; - + if (cur_av_addr.fi == addr) { found = 1; cur_av_rank->updated++; diff --git a/prov/opx/src/fi_opx_info.c b/prov/opx/src/fi_opx_info.c index 23827c08596..4373ca39f6f 100644 --- a/prov/opx/src/fi_opx_info.c +++ b/prov/opx/src/fi_opx_info.c @@ -46,7 +46,6 @@ void fi_opx_set_info(struct fi_info *fi, enum fi_progress progress) .mode = FI_OPX_DEFAULT_MODE, .op_flags = FI_TRANSMIT_COMPLETE, .msg_order = FI_OPX_DEFAULT_MSG_ORDER, - .comp_order = FI_ORDER_NONE, .inject_size = FI_OPX_HFI1_PACKET_IMM, .size = SIZE_MAX, .iov_limit = SIZE_MAX, @@ -58,7 +57,6 @@ void fi_opx_set_info(struct fi_info *fi, enum fi_progress progress) .mode = FI_OPX_DEFAULT_MODE, .op_flags = FI_MULTI_RECV, .msg_order = FI_OPX_DEFAULT_MSG_ORDER, - .comp_order = FI_ORDER_NONE, .total_buffered_recv = FI_OPX_HFI1_PACKET_MTU + 64 /* header */, .size = SIZE_MAX, .iov_limit = SIZE_MAX @@ -144,7 +142,7 @@ int fi_opx_set_default_info() fi_opx_set_info(fi, FI_PROGRESS_MANUAL); fi_opx_set_info(fi_auto, FI_PROGRESS_AUTO); fi->next = fi_auto; - + return 0; err: diff --git a/prov/psm2/src/psmx2.h b/prov/psm2/src/psmx2.h index 93f4b1f8d06..cea411aaabf 100644 --- a/prov/psm2/src/psmx2.h +++ b/prov/psm2/src/psmx2.h @@ -110,7 +110,6 @@ extern struct fi_provider psmx2_prov; #define PSMX2_RMA_ORDER_SIZE (4096) #define PSMX2_MSG_ORDER (FI_ORDER_SAS | OFI_ORDER_RAR_SET | OFI_ORDER_RAW_SET | \ OFI_ORDER_WAR_SET | OFI_ORDER_WAW_SET) -#define PSMX2_COMP_ORDER FI_ORDER_NONE /* * Four bits are reserved from the 64-bit tag space as a flags to identify the diff --git a/prov/psm2/src/psmx2_attr.c b/prov/psm2/src/psmx2_attr.c index e45467d6907..5c174624363 100644 --- a/prov/psm2/src/psmx2_attr.c +++ b/prov/psm2/src/psmx2_attr.c @@ -50,7 +50,6 @@ static struct fi_tx_attr psmx2_tx_attr = { .mode = FI_CONTEXT, /* 0 */ .op_flags = PSMX2_OP_FLAGS, .msg_order = PSMX2_MSG_ORDER, - .comp_order = PSMX2_COMP_ORDER, .inject_size = 64, /* psmx2_env.inject_size */ .size = UINT64_MAX, .iov_limit = PSMX2_IOV_MAX_COUNT, @@ -62,7 +61,6 @@ static struct fi_rx_attr psmx2_rx_attr = { .mode = FI_CONTEXT, /* 0 */ .op_flags = PSMX2_OP_FLAGS, .msg_order = PSMX2_MSG_ORDER, - .comp_order = PSMX2_COMP_ORDER, .total_buffered_recv = UINT64_MAX, .size = UINT64_MAX, .iov_limit = 1, diff --git a/prov/rxd/src/rxd_attr.c b/prov/rxd/src/rxd_attr.c index 26b45798c47..f59b581103c 100644 --- a/prov/rxd/src/rxd_attr.c +++ b/prov/rxd/src/rxd_attr.c @@ -50,7 +50,6 @@ struct fi_tx_attr rxd_tx_attr = { .caps = RXD_TX_CAPS, .op_flags = RXD_TX_OP_FLAGS, - .comp_order = FI_ORDER_NONE, .msg_order = RXD_MSG_ORDER, .inject_size = RXD_MAX_MTU_SIZE - sizeof(struct rxd_base_hdr), .size = (1ULL << RXD_MAX_TX_BITS), @@ -61,7 +60,6 @@ struct fi_tx_attr rxd_tx_attr = { struct fi_rx_attr rxd_rx_attr = { .caps = RXD_RX_CAPS, .op_flags = RXD_RX_OP_FLAGS, - .comp_order = FI_ORDER_NONE, .msg_order = RXD_MSG_ORDER, .total_buffered_recv = 0, .size = (1ULL << RXD_MAX_RX_BITS), diff --git a/prov/rxm/src/rxm_attr.c b/prov/rxm/src/rxm_attr.c index bfb7cc1a584..7d20bd84ec0 100644 --- a/prov/rxm/src/rxm_attr.c +++ b/prov/rxm/src/rxm_attr.c @@ -52,7 +52,6 @@ struct fi_tx_attr rxm_tx_attr = { .caps = RXM_TX_CAPS | FI_HMEM, .op_flags = RXM_PASSTHRU_TX_OP_FLAGS | RXM_TX_OP_FLAGS, .msg_order = ~0x0ULL, - .comp_order = FI_ORDER_NONE, .size = RXM_TX_SIZE, .iov_limit = RXM_IOV_LIMIT, .rma_iov_limit = RXM_IOV_LIMIT, @@ -62,7 +61,6 @@ struct fi_rx_attr rxm_rx_attr = { .caps = RXM_RX_CAPS | FI_HMEM, .op_flags = RXM_PASSTHRU_RX_OP_FLAGS | RXM_RX_OP_FLAGS, .msg_order = ~0x0ULL, - .comp_order = FI_ORDER_NONE, .size = RXM_RX_SIZE, .iov_limit= RXM_IOV_LIMIT, }; @@ -71,7 +69,6 @@ static struct fi_tx_attr rxm_tx_attr_coll = { .caps = RXM_TX_CAPS | FI_COLLECTIVE, .op_flags = RXM_PASSTHRU_TX_OP_FLAGS | RXM_TX_OP_FLAGS, .msg_order = ~0x0ULL, - .comp_order = FI_ORDER_NONE, .size = RXM_TX_SIZE, .iov_limit = RXM_IOV_LIMIT, .rma_iov_limit = RXM_IOV_LIMIT, @@ -81,7 +78,6 @@ static struct fi_rx_attr rxm_rx_attr_coll = { .caps = RXM_RX_CAPS | FI_COLLECTIVE, .op_flags = RXM_PASSTHRU_RX_OP_FLAGS | RXM_RX_OP_FLAGS, .msg_order = ~0x0ULL, - .comp_order = FI_ORDER_NONE, .size = RXM_RX_SIZE, .iov_limit= RXM_IOV_LIMIT, }; @@ -155,7 +151,6 @@ static struct fi_tx_attr rxm_tx_thru_attr = { .caps = OFI_PRIMARY_TX_CAPS | OFI_SECONDARY_TX_CAPS, .op_flags = OFI_TX_OP_FLAGS, .msg_order = ~0x0ULL, - .comp_order = FI_ORDER_NONE, .inject_size = SIZE_MAX, .size = RXM_TX_SIZE, .iov_limit = SIZE_MAX, @@ -166,7 +161,6 @@ static struct fi_rx_attr rxm_rx_thru_attr = { .caps = OFI_PRIMARY_RX_CAPS | OFI_SECONDARY_RX_CAPS, .op_flags = OFI_RX_OP_FLAGS, .msg_order = ~0x0ULL, - .comp_order = FI_ORDER_NONE, .size = SIZE_MAX, .iov_limit= SIZE_MAX, }; diff --git a/prov/rxm/src/rxm_init.c b/prov/rxm/src/rxm_init.c index 011acb88e94..cfd8c150116 100644 --- a/prov/rxm/src/rxm_init.c +++ b/prov/rxm/src/rxm_init.c @@ -232,13 +232,11 @@ int rxm_info_to_core(uint32_t version, const struct fi_info *hints, core_info->tx_attr->op_flags = hints->tx_attr->op_flags & RXM_PASSTHRU_TX_OP_FLAGS; core_info->tx_attr->msg_order = hints->tx_attr->msg_order; - core_info->tx_attr->comp_order = hints->tx_attr->comp_order; } if (hints->rx_attr) { core_info->rx_attr->op_flags = hints->rx_attr->op_flags & RXM_PASSTHRU_RX_OP_FLAGS; core_info->rx_attr->msg_order = hints->rx_attr->msg_order; - core_info->rx_attr->comp_order = hints->rx_attr->comp_order; } if ((hints->caps & FI_HMEM) && ofi_hmem_p2p_disabled()) return -FI_ENODATA; @@ -274,11 +272,9 @@ rxm_info_thru_rxm(uint32_t version, const struct fi_info *core_info, info->mode = core_info->mode; *info->tx_attr = *core_info->tx_attr; - info->tx_attr->comp_order = base_info->tx_attr->comp_order; info->tx_attr->size = MIN(base_info->tx_attr->size, rxm_def_tx_size); *info->rx_attr = *core_info->rx_attr; - info->rx_attr->comp_order = base_info->rx_attr->comp_order; info->rx_attr->size = MIN(base_info->rx_attr->size, rxm_def_rx_size); *info->ep_attr = *base_info->ep_attr; @@ -320,7 +316,6 @@ int rxm_info_to_rxm(uint32_t version, const struct fi_info *core_info, info->tx_attr->caps = base_info->tx_attr->caps; info->tx_attr->mode = info->mode; info->tx_attr->msg_order = core_info->tx_attr->msg_order; - info->tx_attr->comp_order = base_info->tx_attr->comp_order; /* If the core provider requires registering send buffers, it's * usually faster to copy small transfer through bounce buffers @@ -355,7 +350,6 @@ int rxm_info_to_rxm(uint32_t version, const struct fi_info *core_info, info->rx_attr->caps = base_info->rx_attr->caps; info->rx_attr->mode = info->rx_attr->mode & ~FI_RX_CQ_DATA; info->rx_attr->msg_order = core_info->rx_attr->msg_order; - info->rx_attr->comp_order = base_info->rx_attr->comp_order; info->rx_attr->iov_limit = MIN(base_info->rx_attr->iov_limit, core_info->rx_attr->iov_limit); diff --git a/prov/shm/src/smr_attr.c b/prov/shm/src/smr_attr.c index 1c60d5c8ef3..70ebee0aa4f 100644 --- a/prov/shm/src/smr_attr.c +++ b/prov/shm/src/smr_attr.c @@ -45,7 +45,6 @@ struct fi_tx_attr smr_tx_attr = { .caps = SMR_TX_CAPS, .op_flags = SMR_TX_OP_FLAGS, - .comp_order = FI_ORDER_NONE, .msg_order = SMR_RMA_ORDER | FI_ORDER_SAS, .inject_size = SMR_INJECT_SIZE, .size = 1024, @@ -56,7 +55,6 @@ struct fi_tx_attr smr_tx_attr = { struct fi_rx_attr smr_rx_attr = { .caps = SMR_RX_CAPS, .op_flags = SMR_RX_OP_FLAGS, - .comp_order = FI_ORDER_STRICT, .msg_order = SMR_RMA_ORDER | FI_ORDER_SAS, .size = 1024, .iov_limit = SMR_IOV_LIMIT @@ -65,7 +63,6 @@ struct fi_rx_attr smr_rx_attr = { struct fi_tx_attr smr_hmem_tx_attr = { .caps = SMR_HMEM_TX_CAPS, .op_flags = SMR_TX_OP_FLAGS, - .comp_order = FI_ORDER_NONE, .msg_order = SMR_RMA_ORDER | FI_ORDER_SAS, .inject_size = 0, .size = 1024, @@ -76,7 +73,6 @@ struct fi_tx_attr smr_hmem_tx_attr = { struct fi_rx_attr smr_hmem_rx_attr = { .caps = SMR_HMEM_RX_CAPS, .op_flags = SMR_RX_OP_FLAGS, - .comp_order = FI_ORDER_STRICT, .msg_order = SMR_RMA_ORDER | FI_ORDER_SAS, .size = 1024, .iov_limit = SMR_IOV_LIMIT diff --git a/prov/sm2/src/sm2_attr.c b/prov/sm2/src/sm2_attr.c index 3562b4d09fc..148bff5792a 100644 --- a/prov/sm2/src/sm2_attr.c +++ b/prov/sm2/src/sm2_attr.c @@ -49,7 +49,6 @@ struct fi_tx_attr sm2_tx_attr = { .caps = SM2_TX_CAPS, .op_flags = SM2_TX_OP_FLAGS, - .comp_order = FI_ORDER_NONE, .msg_order = FI_ORDER_SAS, .inject_size = SM2_ATOMIC_INJECT_SIZE, .size = 1024, @@ -60,7 +59,6 @@ struct fi_tx_attr sm2_tx_attr = { struct fi_rx_attr sm2_rx_attr = { .caps = SM2_RX_CAPS, .op_flags = SM2_RX_OP_FLAGS, - .comp_order = FI_ORDER_STRICT, .msg_order = FI_ORDER_SAS, .size = 1024, .iov_limit = SM2_IOV_LIMIT, @@ -69,7 +67,6 @@ struct fi_rx_attr sm2_rx_attr = { struct fi_tx_attr sm2_hmem_tx_attr = { .caps = SM2_HMEM_TX_CAPS, .op_flags = SM2_TX_OP_FLAGS, - .comp_order = FI_ORDER_NONE, .msg_order = FI_ORDER_SAS, .inject_size = 0, .size = 1024, @@ -80,7 +77,6 @@ struct fi_tx_attr sm2_hmem_tx_attr = { struct fi_rx_attr sm2_hmem_rx_attr = { .caps = SM2_HMEM_RX_CAPS, .op_flags = SM2_RX_OP_FLAGS, - .comp_order = FI_ORDER_STRICT, .msg_order = FI_ORDER_SAS, .size = 1024, .iov_limit = SM2_IOV_LIMIT, diff --git a/prov/tcp/src/xnet_attr.c b/prov/tcp/src/xnet_attr.c index 0a7b7be53fa..93711caa550 100644 --- a/prov/tcp/src/xnet_attr.c +++ b/prov/tcp/src/xnet_attr.c @@ -58,7 +58,6 @@ static struct fi_tx_attr xnet_tx_attr = { .caps = XNET_EP_CAPS | XNET_TX_CAPS, .op_flags = XNET_TX_OP_FLAGS, - .comp_order = FI_ORDER_STRICT, .msg_order = XNET_MSG_ORDER, .inject_size = XNET_DEF_INJECT, .size = 1024, @@ -69,7 +68,6 @@ static struct fi_tx_attr xnet_tx_attr = { static struct fi_rx_attr xnet_rx_attr = { .caps = XNET_EP_CAPS | XNET_RX_CAPS, .op_flags = XNET_RX_OP_FLAGS, - .comp_order = FI_ORDER_STRICT, .msg_order = XNET_MSG_ORDER, .total_buffered_recv = 0, .size = 1024, @@ -90,7 +88,6 @@ static struct fi_ep_attr xnet_ep_attr = { static struct fi_tx_attr xnet_srx_tx_attr = { .caps = XNET_SRX_EP_CAPS | XNET_TX_CAPS, .op_flags = XNET_TX_OP_FLAGS, - .comp_order = FI_ORDER_NONE, .msg_order = XNET_MSG_ORDER, .inject_size = XNET_DEF_INJECT, .size = 1024, @@ -101,7 +98,6 @@ static struct fi_tx_attr xnet_srx_tx_attr = { static struct fi_rx_attr xnet_srx_rx_attr = { .caps = XNET_SRX_EP_CAPS | XNET_SRX_CAPS, .op_flags = XNET_SRX_OP_FLAGS, - .comp_order = FI_ORDER_NONE, .msg_order = XNET_MSG_ORDER, .total_buffered_recv = 0, .size = 65536, @@ -123,7 +119,6 @@ static struct fi_ep_attr xnet_srx_ep_attr = { static struct fi_tx_attr xnet_rdm_tx_attr = { .caps = XNET_RDM_EP_CAPS | XNET_TX_CAPS, .op_flags = XNET_TX_OP_FLAGS, - .comp_order = FI_ORDER_STRICT, .msg_order = XNET_MSG_ORDER, .inject_size = XNET_DEF_INJECT, .size = 65536, @@ -134,7 +129,6 @@ static struct fi_tx_attr xnet_rdm_tx_attr = { static struct fi_rx_attr xnet_rdm_rx_attr = { .caps = XNET_RDM_EP_CAPS | XNET_SRX_CAPS, .op_flags = XNET_SRX_OP_FLAGS, - .comp_order = FI_ORDER_STRICT, .msg_order = XNET_MSG_ORDER, .total_buffered_recv = 0, .size = 65536, diff --git a/prov/ucx/src/ucx_init.c b/prov/ucx/src/ucx_init.c index ec246fe8d0e..8bb7ba26c30 100644 --- a/prov/ucx/src/ucx_init.c +++ b/prov/ucx/src/ucx_init.c @@ -109,7 +109,6 @@ static struct fi_rx_attr ucx_rx_attrs = { .mode = FI_UCX_MODE, .op_flags = FI_UCX_RX_FLAGS, .msg_order = FI_ORDER_SAS, - .comp_order = FI_ORDER_NONE, .total_buffered_recv = ~(0ULL), .size = 384, .iov_limit = 4, @@ -120,7 +119,6 @@ static struct fi_tx_attr ucx_tx_attrs = { .mode = FI_UCX_MODE, .op_flags = FI_UCX_TX_FLAGS, .msg_order = FI_ORDER_SAS, - .comp_order = FI_ORDER_NONE, .inject_size = FI_UCX_DEFAULT_INJECT_SIZE, /* Should be setup after init */ .size = 384, .iov_limit = 1, diff --git a/prov/udp/src/udpx_attr.c b/prov/udp/src/udpx_attr.c index fb1f974c9a6..62e2437754c 100644 --- a/prov/udp/src/udpx_attr.c +++ b/prov/udp/src/udpx_attr.c @@ -38,7 +38,6 @@ struct fi_tx_attr udpx_tx_attr = { .caps = UDPX_TX_CAPS, - .comp_order = FI_ORDER_STRICT, .inject_size = 1472, .size = 1024, .iov_limit = UDPX_IOV_LIMIT @@ -46,7 +45,6 @@ struct fi_tx_attr udpx_tx_attr = { struct fi_rx_attr udpx_rx_attr = { .caps = UDPX_RX_CAPS, - .comp_order = FI_ORDER_STRICT, .total_buffered_recv = (1 << 16), .size = 1024, .iov_limit = UDPX_IOV_LIMIT diff --git a/prov/util/src/util_attr.c b/prov/util/src/util_attr.c index 29d52096c29..b4d0af0901f 100644 --- a/prov/util/src/util_attr.c +++ b/prov/util/src/util_attr.c @@ -816,7 +816,7 @@ int ofi_check_rx_attr(const struct fi_provider *prov, return -FI_ENODATA; } - if (user_attr->comp_order & ~(prov_attr->comp_order)) { + if (user_attr->comp_order) { FI_INFO(prov, FI_LOG_CORE, "comp_order not supported\n"); OFI_INFO_CHECK(prov, prov_attr, user_attr, comp_order, FI_TYPE_MSG_ORDER); @@ -914,7 +914,7 @@ int ofi_check_tx_attr(const struct fi_provider *prov, return -FI_ENODATA; } - if (user_attr->comp_order & ~(prov_attr->comp_order)) { + if (user_attr->comp_order) { FI_INFO(prov, FI_LOG_CORE, "comp_order not supported\n"); OFI_INFO_CHECK(prov, prov_attr, user_attr, comp_order, FI_TYPE_MSG_ORDER); diff --git a/prov/verbs/src/verbs_info.c b/prov/verbs/src/verbs_info.c index b692788e82d..f2f8a8ecd89 100644 --- a/prov/verbs/src/verbs_info.c +++ b/prov/verbs/src/verbs_info.c @@ -109,7 +109,6 @@ const struct fi_rx_attr verbs_rx_attr = { .mode = VERBS_RX_MODE, .op_flags = FI_COMPLETION, .msg_order = VERBS_MSG_ORDER, - .comp_order = FI_ORDER_STRICT | FI_ORDER_DATA, .total_buffered_recv = 0, }; @@ -118,7 +117,6 @@ const struct fi_rx_attr verbs_dgram_rx_attr = { .mode = VERBS_DGRAM_RX_MODE | VERBS_RX_MODE, .op_flags = FI_COMPLETION, .msg_order = VERBS_MSG_ORDER, - .comp_order = FI_ORDER_STRICT | FI_ORDER_DATA, .total_buffered_recv = 0, }; @@ -127,7 +125,6 @@ const struct fi_tx_attr verbs_tx_attr = { .mode = 0, .op_flags = VERBS_TX_OP_FLAGS, .msg_order = VERBS_MSG_ORDER, - .comp_order = FI_ORDER_STRICT, .inject_size = 0, .rma_iov_limit = 1, }; @@ -137,7 +134,6 @@ const struct fi_tx_attr verbs_dgram_tx_attr = { .mode = 0, .op_flags = VERBS_TX_OP_FLAGS, .msg_order = VERBS_MSG_ORDER, - .comp_order = FI_ORDER_STRICT, .inject_size = 0, .rma_iov_limit = 1, }; diff --git a/src/fi_tostr.c b/src/fi_tostr.c index c76841333cd..1e5bec86379 100644 --- a/src/fi_tostr.c +++ b/src/fi_tostr.c @@ -180,19 +180,6 @@ static void ofi_tostr_msgorder(char *buf, size_t len, uint64_t flags) ofi_remove_comma(buf); } -static void ofi_tostr_comporder(char *buf, size_t len, uint64_t flags) -{ - if ((flags & FI_ORDER_STRICT) == FI_ORDER_NONE) { - ofi_strncatf(buf, len, "FI_ORDER_NONE, "); - } else if ((flags & FI_ORDER_STRICT) == FI_ORDER_STRICT) { - ofi_strncatf(buf, len, "FI_ORDER_STRICT, "); - } - - IFFLAGSTRN(flags, FI_ORDER_DATA, len); - - ofi_remove_comma(buf); -} - static void ofi_tostr_caps(char *buf, size_t len, uint64_t caps) { IFFLAGSTRN(caps, FI_MSG, len); @@ -328,10 +315,6 @@ ofi_tostr_tx_attr(char *buf, size_t len, const struct fi_tx_attr *attr, ofi_tostr_msgorder(buf, len, attr->msg_order); ofi_strncatf(buf, len, " ]\n"); - ofi_strncatf(buf, len, "%s%scomp_order: [ ", prefix, TAB); - ofi_tostr_comporder(buf, len, attr->comp_order); - ofi_strncatf(buf, len, " ]\n"); - ofi_strncatf(buf, len, "%s%sinject_size: %zu\n", prefix, TAB, attr->inject_size); ofi_strncatf(buf, len, "%s%ssize: %zu\n", prefix, TAB, attr->size); @@ -368,10 +351,6 @@ ofi_tostr_rx_attr(char *buf, size_t len, const struct fi_rx_attr *attr, ofi_tostr_msgorder(buf, len, attr->msg_order); ofi_strncatf(buf, len, " ]\n"); - ofi_strncatf(buf, len, "%s%scomp_order: [ ", prefix, TAB); - ofi_tostr_comporder(buf, len, attr->comp_order); - ofi_strncatf(buf, len, " ]\n"); - ofi_strncatf(buf, len, "%s%stotal_buffered_recv: %zu\n", prefix, TAB, attr->total_buffered_recv); ofi_strncatf(buf, len, "%s%ssize: %zu\n", prefix, TAB, attr->size); From 133d96578f1444d4c5f28e6c09585a4f6ea688ab Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Mon, 25 Sep 2023 17:59:06 -0700 Subject: [PATCH 21/34] core: Remove total_buffered_recv Field was deprecated and only serves as a placeholder for compatility. Signed-off-by: Sean Hefty --- fabtests/functional/unexpected_msg.c | 1 - man/fi_endpoint.3.md | 18 ++---------------- man/fi_info.1.md | 1 - man/fi_tagged.3.md | 2 +- prov/efa/src/efa_prov_info.c | 2 -- prov/mrail/src/mrail_attr.c | 1 - prov/opx/include/rdma/opx/fi_opx_endpoint.h | 17 ++++++++--------- prov/opx/src/fi_opx_ep.c | 3 --- prov/opx/src/fi_opx_info.c | 1 - prov/opx/src/fi_opx_init.c | 13 +++++-------- prov/opx/src/fi_opx_sep.c | 5 ++--- prov/psm2/src/psmx2_attr.c | 1 - prov/rxd/src/rxd_attr.c | 1 - prov/tcp/src/xnet_attr.c | 3 --- prov/ucx/src/ucx_init.c | 1 - prov/udp/src/udpx_attr.c | 1 - prov/util/src/util_attr.c | 13 +------------ prov/verbs/src/verbs_info.c | 2 -- src/fi_tostr.c | 2 -- 19 files changed, 19 insertions(+), 69 deletions(-) diff --git a/fabtests/functional/unexpected_msg.c b/fabtests/functional/unexpected_msg.c index 273657656c9..b01300c4210 100644 --- a/fabtests/functional/unexpected_msg.c +++ b/fabtests/functional/unexpected_msg.c @@ -383,7 +383,6 @@ int main(int argc, char **argv) hints->mode = FI_CONTEXT; hints->domain_attr->mr_mode = opts.mr_mode; hints->domain_attr->resource_mgmt = FI_RM_ENABLED; - hints->rx_attr->total_buffered_recv = 0; hints->caps = FI_TAGGED; hints->addr_format = opts.address_format; diff --git a/man/fi_endpoint.3.md b/man/fi_endpoint.3.md index 1fedf0576b6..aa6892138b9 100644 --- a/man/fi_endpoint.3.md +++ b/man/fi_endpoint.3.md @@ -1156,7 +1156,6 @@ struct fi_rx_attr { uint64_t op_flags; uint64_t msg_order; uint64_t comp_order; - size_t total_buffered_recv; size_t size; size_t iov_limit; }; @@ -1224,21 +1223,8 @@ to 0. ## total_buffered_recv -This field is supported for backwards compatibility purposes. -It is a hint to the provider of the total available space -that may be needed to buffer messages that are received for which there -is no matching receive operation. The provider may adjust or ignore -this value. The allocation of internal network buffering among received -message is provider specific. For instance, a provider may limit the size -of messages which can be buffered or the amount of buffering allocated to -a single message. - -If receive side buffering is disabled (total_buffered_recv = 0) -and a message is received by an endpoint, then the behavior is dependent on -whether resource management has been enabled (FI_RM_ENABLED has be set or not). -See the Resource Management section of fi_domain.3 for further clarification. -It is recommended that applications enable resource management if they -anticipate receiving unexpected messages, rather than modifying this value. +This field is provided for version 1 compatibility and should be set +to 0. ## size diff --git a/man/fi_info.1.md b/man/fi_info.1.md index cbc93c54658..3b02c2f778d 100644 --- a/man/fi_info.1.md +++ b/man/fi_info.1.md @@ -154,7 +154,6 @@ fi_info: mode: [ ] op_flags: [ ] msg_order: [ FI_ORDER_RAR, FI_ORDER_RAW, FI_ORDER_RAS, FI_ORDER_WAW, FI_ORDER_WAS, FI_ORDER_SAW, FI_ORDER_SAS, FI_ORDER_RMA_RAR, FI_ORDER_RMA_RAW, FI_ORDER_RMA_WAW, FI_ORDER_ATOMIC_RAR, FI_ORDER_ATOMIC_RAW, FI_ORDER_ATOMIC_WAR, FI_ORDER_ATOMIC_WAW ] - total_buffered_recv: 0 size: 1024 iov_limit: 4 fi_ep_attr: diff --git a/man/fi_tagged.3.md b/man/fi_tagged.3.md index eca1080beea..63c126645db 100644 --- a/man/fi_tagged.3.md +++ b/man/fi_tagged.3.md @@ -290,7 +290,7 @@ The following flags may be used with fi_trecvmsg. *FI_PEEK* : The peek flag may be used to see if a specified message has arrived. A peek request is often useful on endpoints that have provider - allocated buffering enabled (see fi_rx_attr total_buffered_recv). + allocated buffering enabled. Unlike standard receive operations, a receive operation with the FI_PEEK flag set does not remain queued with the provider after the peek completes successfully. The peek operation operates asynchronously, and the results diff --git a/prov/efa/src/efa_prov_info.c b/prov/efa/src/efa_prov_info.c index e42115d7551..827cb29146b 100644 --- a/prov/efa/src/efa_prov_info.c +++ b/prov/efa/src/efa_prov_info.c @@ -244,7 +244,6 @@ const struct fi_rx_attr efa_dgrm_rx_attr = { .mode = FI_MSG_PREFIX | EFA_RX_MODE, .op_flags = EFA_RX_DGRM_OP_FLAGS, .msg_order = EFA_MSG_ORDER, - .total_buffered_recv = 0, .iov_limit = 1 }; @@ -268,7 +267,6 @@ const struct fi_rx_attr efa_rdm_rx_attr = { .mode = EFA_RX_MODE, .op_flags = EFA_RX_RDM_OP_FLAGS, .msg_order = EFA_MSG_ORDER, - .total_buffered_recv = 0, .iov_limit = 1 }; diff --git a/prov/mrail/src/mrail_attr.c b/prov/mrail/src/mrail_attr.c index 4a3d58b5dcc..e8fda11fe2b 100644 --- a/prov/mrail/src/mrail_attr.c +++ b/prov/mrail/src/mrail_attr.c @@ -46,7 +46,6 @@ struct fi_rx_attr mrail_rx_attr = { .caps = ~0x0ULL, .op_flags = MRAIL_PASSTHRU_RX_OP_FLAGS | MRAIL_RX_OP_FLAGS, .msg_order = ~0x0ULL, - .total_buffered_recv = SIZE_MAX, .size = SIZE_MAX, .iov_limit = SIZE_MAX, }; diff --git a/prov/opx/include/rdma/opx/fi_opx_endpoint.h b/prov/opx/include/rdma/opx/fi_opx_endpoint.h index 4114084d3df..9977bd7f47c 100644 --- a/prov/opx/include/rdma/opx/fi_opx_endpoint.h +++ b/prov/opx/include/rdma/opx/fi_opx_endpoint.h @@ -362,7 +362,6 @@ struct fi_opx_ep_rx { uint64_t caps; uint64_t mode; - size_t total_buffered_recv; /* TODO - is this only used by receive operations? */ union fi_opx_addr self; struct fi_opx_context_slist *cq_err_ptr; @@ -646,7 +645,7 @@ static void fi_opx_dump_daos_av_addr_rank(struct fi_opx_ep *opx_ep, if (cur_av_rank) { union fi_opx_addr addr; addr.fi = cur_av_rank->fi_addr; - + if ((addr.uid.lid == find_addr.uid.lid) && (cur_av_rank->key.rank == opx_ep->daos_info.rank)) { found = 1; FI_DBG_TRACE(fi_opx_global.prov, FI_LOG_EP_DATA, "Dump av_rank_hashmap[%d] = rank:%d LID:0x%x fi_addr:0x%08lx - Found.\n", @@ -707,7 +706,7 @@ static struct fi_opx_daos_av_rank * fi_opx_get_daos_av_rank(struct fi_opx_ep *op if (cur_av_rank) { union fi_opx_addr addr; addr.fi = cur_av_rank->fi_addr; - + FI_DBG_TRACE(fi_opx_global.prov, FI_LOG_EP_DATA, "GET Dump av_rank_hashmap[%d] = rank:%d LID:0x%x fi_addr:0x%08lx\n", i++, cur_av_rank->key.rank, addr.uid.lid, addr.fi); @@ -971,7 +970,7 @@ void complete_receive_operation_internal (struct fid_ep *ep, abort(); break; } - + FI_DBG_TRACE(fi_opx_global.prov, FI_LOG_EP_DATA, "INJECT send_len %lu <= recv_len %lu; enqueue cq (completed)\n", send_len, recv_len); @@ -1536,8 +1535,8 @@ void complete_receive_operation_internal (struct fid_ep *ep, if (lock_required) { fprintf(stderr, "%s:%s():%d\n", __FILE__, __func__, __LINE__); abort(); } fi_opx_context_slist_insert_tail(context, rx->cq_pending_ptr); - /* Post a E_TRUNC to our local RX error queue because a client called receive - with too small a buffer. Tell them about it via the error cq */ + /* Post a E_TRUNC to our local RX error queue because a client called receive + with too small a buffer. Tell them about it via the error cq */ struct fi_opx_context_ext * ext = NULL; if (is_context_ext) { @@ -3136,7 +3135,7 @@ ssize_t fi_opx_ep_rx_recvmsg_internal (struct fi_opx_ep *opx_ep, FI_WARN(fi_opx_global.prov, FI_LOG_EP_DATA,"===================================== POST RECVMSG RETURN FI_ENOMEM\n"); return -FI_ENOMEM; } - + ext->opx_context.flags = flags | FI_OPX_CQ_CONTEXT_EXT; ext->opx_context.byte_counter = (uint64_t)-1; @@ -3604,11 +3603,11 @@ ssize_t fi_opx_ep_tx_send_internal (struct fid_ep *ep, FI_DBG_TRACE(fi_opx_global.prov, FI_LOG_EP_DATA, "===================================== SEND -- Eager send failed, trying next method\n"); } - + #ifndef FI_OPX_MP_EGR_DISABLE if (is_contiguous && total_len <= FI_OPX_MP_EGR_MAX_PAYLOAD_BYTES && - total_len > FI_OPX_MP_EGR_CHUNK_PAYLOAD_SIZE && + total_len > FI_OPX_MP_EGR_CHUNK_PAYLOAD_SIZE && !fi_opx_hfi1_tx_is_intranode(ep, addr.fi, caps)) { rc = fi_opx_hfi1_tx_send_try_mp_egr(ep, buf, len, desc, addr.fi, tag, context, data, lock_required, override_flags, diff --git a/prov/opx/src/fi_opx_ep.c b/prov/opx/src/fi_opx_ep.c index abe7d9df136..70454d51b5e 100644 --- a/prov/opx/src/fi_opx_ep.c +++ b/prov/opx/src/fi_opx_ep.c @@ -1029,8 +1029,6 @@ static int fi_opx_apply_info_and_init_ops(struct fi_opx_ep *opx_ep) { opx_ep->rx->caps |= info->rx_attr ? info->rx_attr->caps : info->caps; opx_ep->rx->mode |= info->rx_attr ? info->rx_attr->mode : 0; opx_ep->rx->op_flags |= info->rx_attr ? info->rx_attr->op_flags : 0; - opx_ep->rx->total_buffered_recv = info->rx_attr ? - info->rx_attr->total_buffered_recv : 0; // Init oprations per endpoint FI_DBG_TRACE(fi_opx_global.prov, FI_LOG_EP_DATA, "\n"); @@ -1666,7 +1664,6 @@ int fi_opx_alloc_default_rx_attr(struct fi_rx_attr **rx_attr) attr->mode = FI_CONTEXT2 | FI_ASYNC_IOV; attr->op_flags = 0; attr->msg_order = FI_OPX_DEFAULT_MSG_ORDER; - attr->total_buffered_recv = FI_OPX_HFI1_PACKET_MTU; attr->size = SIZE_MAX; //FI_OPX_RX_SIZE; attr->iov_limit = FI_OPX_IOV_LIMIT; diff --git a/prov/opx/src/fi_opx_info.c b/prov/opx/src/fi_opx_info.c index 4373ca39f6f..90473e9a7e0 100644 --- a/prov/opx/src/fi_opx_info.c +++ b/prov/opx/src/fi_opx_info.c @@ -57,7 +57,6 @@ void fi_opx_set_info(struct fi_info *fi, enum fi_progress progress) .mode = FI_OPX_DEFAULT_MODE, .op_flags = FI_MULTI_RECV, .msg_order = FI_OPX_DEFAULT_MSG_ORDER, - .total_buffered_recv = FI_OPX_HFI1_PACKET_MTU + 64 /* header */, .size = SIZE_MAX, .iov_limit = SIZE_MAX }; diff --git a/prov/opx/src/fi_opx_init.c b/prov/opx/src/fi_opx_init.c index 58a17b825ec..9c0a537894d 100644 --- a/prov/opx/src/fi_opx_init.c +++ b/prov/opx/src/fi_opx_init.c @@ -138,7 +138,7 @@ static int fi_opx_fillinfo(struct fi_info *fi, const char *node, uint64_t caps; union fi_opx_addr *addr; uint32_t fmt; - size_t len; + size_t len; if (!fi) goto err; @@ -322,9 +322,6 @@ static int fi_opx_fillinfo(struct fi_info *fi, const char *node, /* adjust parameters down from what requested if required */ fi->rx_attr->op_flags = hints->rx_attr->op_flags; - if (hints->rx_attr->total_buffered_recv > 0 && - hints->rx_attr->total_buffered_recv < fi_opx_global.default_rx_attr->total_buffered_recv) - fi->rx_attr->total_buffered_recv = hints->rx_attr->total_buffered_recv; } else if (hints && hints->caps) { fi->rx_attr->caps = hints->caps; } @@ -516,7 +513,7 @@ static int fi_opx_getinfo(uint32_t version, const char *node, *info = NULL; fi_opx_count = opx_hfi_get_hfi1_count(); FI_LOG(fi_opx_global.prov, FI_LOG_TRACE, FI_LOG_FABRIC, - "Detected %d hfi1(s) in the system\n", fi_opx_count); + "Detected %d hfi1(s) in the system\n", fi_opx_count); if (!fi_opx_count) { return -FI_ENODATA; @@ -532,7 +529,7 @@ static int fi_opx_getinfo(uint32_t version, const char *node, } FI_LOG(fi_opx_global.prov, FI_LOG_TRACE, FI_LOG_FABRIC, - "Successfully got getinfo for HFI %d\n", i); + "Successfully got getinfo for HFI %d\n", i); if (!*info) { *info = cur; @@ -558,7 +555,7 @@ static void fi_opx_fini() * so do our best and free storage */ pthread_mutex_trylock(&mm_lock); int locked = pthread_mutex_unlock(&mm_lock); /* rc 0 is unlocked */ - + struct dlist_entry *tmp; struct opx_tid_domain *tid_domain; @@ -644,7 +641,7 @@ OPX_INI fi_opx_global.progress = FI_PROGRESS_MANUAL; fi_opx_set_default_info(); // TODO: fold into fi_opx_set_defaults - /* Refrain from allocating memory dynamically in this INI function. + /* Refrain from allocating memory dynamically in this INI function. That sort of behavior will results in memory leaks for the fi_info executable. */ diff --git a/prov/opx/src/fi_opx_sep.c b/prov/opx/src/fi_opx_sep.c index 18879b90025..433430e8941 100644 --- a/prov/opx/src/fi_opx_sep.c +++ b/prov/opx/src/fi_opx_sep.c @@ -292,7 +292,6 @@ static int fi_opx_rx_ctx(struct fid_ep *sep, int index, info.rx_attr->mode = attr->mode; info.rx_attr->op_flags = attr->op_flags; info.rx_attr->msg_order = attr->msg_order; - info.rx_attr->total_buffered_recv = attr->total_buffered_recv; info.rx_attr->iov_limit = attr->iov_limit; info.ep_attr = calloc(1, sizeof(*info.ep_attr)); @@ -367,7 +366,7 @@ static int fi_opx_rx_ctx(struct fid_ep *sep, int index, info.domain_attr = NULL; info.ep_attr = NULL; info.tx_attr = NULL; - + return -errno; } @@ -512,7 +511,7 @@ int fi_opx_scalable_ep (struct fid_domain *domain, } memptr = opx_sep->memptr; free(memptr); - opx_sep = NULL; + opx_sep = NULL; } return -errno; } diff --git a/prov/psm2/src/psmx2_attr.c b/prov/psm2/src/psmx2_attr.c index 5c174624363..02f1a33d854 100644 --- a/prov/psm2/src/psmx2_attr.c +++ b/prov/psm2/src/psmx2_attr.c @@ -61,7 +61,6 @@ static struct fi_rx_attr psmx2_rx_attr = { .mode = FI_CONTEXT, /* 0 */ .op_flags = PSMX2_OP_FLAGS, .msg_order = PSMX2_MSG_ORDER, - .total_buffered_recv = UINT64_MAX, .size = UINT64_MAX, .iov_limit = 1, }; diff --git a/prov/rxd/src/rxd_attr.c b/prov/rxd/src/rxd_attr.c index f59b581103c..d06515bec98 100644 --- a/prov/rxd/src/rxd_attr.c +++ b/prov/rxd/src/rxd_attr.c @@ -61,7 +61,6 @@ struct fi_rx_attr rxd_rx_attr = { .caps = RXD_RX_CAPS, .op_flags = RXD_RX_OP_FLAGS, .msg_order = RXD_MSG_ORDER, - .total_buffered_recv = 0, .size = (1ULL << RXD_MAX_RX_BITS), .iov_limit = RXD_IOV_LIMIT }; diff --git a/prov/tcp/src/xnet_attr.c b/prov/tcp/src/xnet_attr.c index 93711caa550..2ae79529cc5 100644 --- a/prov/tcp/src/xnet_attr.c +++ b/prov/tcp/src/xnet_attr.c @@ -69,7 +69,6 @@ static struct fi_rx_attr xnet_rx_attr = { .caps = XNET_EP_CAPS | XNET_RX_CAPS, .op_flags = XNET_RX_OP_FLAGS, .msg_order = XNET_MSG_ORDER, - .total_buffered_recv = 0, .size = 1024, .iov_limit = XNET_IOV_LIMIT, }; @@ -99,7 +98,6 @@ static struct fi_rx_attr xnet_srx_rx_attr = { .caps = XNET_SRX_EP_CAPS | XNET_SRX_CAPS, .op_flags = XNET_SRX_OP_FLAGS, .msg_order = XNET_MSG_ORDER, - .total_buffered_recv = 0, .size = 65536, .iov_limit = XNET_IOV_LIMIT, }; @@ -130,7 +128,6 @@ static struct fi_rx_attr xnet_rdm_rx_attr = { .caps = XNET_RDM_EP_CAPS | XNET_SRX_CAPS, .op_flags = XNET_SRX_OP_FLAGS, .msg_order = XNET_MSG_ORDER, - .total_buffered_recv = 0, .size = 65536, .iov_limit = XNET_IOV_LIMIT, }; diff --git a/prov/ucx/src/ucx_init.c b/prov/ucx/src/ucx_init.c index 8bb7ba26c30..ebd02a898ee 100644 --- a/prov/ucx/src/ucx_init.c +++ b/prov/ucx/src/ucx_init.c @@ -109,7 +109,6 @@ static struct fi_rx_attr ucx_rx_attrs = { .mode = FI_UCX_MODE, .op_flags = FI_UCX_RX_FLAGS, .msg_order = FI_ORDER_SAS, - .total_buffered_recv = ~(0ULL), .size = 384, .iov_limit = 4, }; diff --git a/prov/udp/src/udpx_attr.c b/prov/udp/src/udpx_attr.c index 62e2437754c..c6ad7df46fe 100644 --- a/prov/udp/src/udpx_attr.c +++ b/prov/udp/src/udpx_attr.c @@ -45,7 +45,6 @@ struct fi_tx_attr udpx_tx_attr = { struct fi_rx_attr udpx_rx_attr = { .caps = UDPX_RX_CAPS, - .total_buffered_recv = (1 << 16), .size = 1024, .iov_limit = UDPX_IOV_LIMIT }; diff --git a/prov/util/src/util_attr.c b/prov/util/src/util_attr.c index b4d0af0901f..de9a7335c7d 100644 --- a/prov/util/src/util_attr.c +++ b/prov/util/src/util_attr.c @@ -784,7 +784,6 @@ int ofi_check_rx_attr(const struct fi_provider *prov, const struct fi_rx_attr *user_attr, uint64_t info_mode) { const struct fi_rx_attr *prov_attr = prov_info->rx_attr; - int rm_enabled = (prov_info->domain_attr->resource_mgmt == FI_RM_ENABLED); if (user_attr->caps & ~OFI_IGNORED_RX_CAPS) FI_INFO(prov, FI_LOG_CORE, "Tx only caps ignored in Rx caps\n"); @@ -823,7 +822,7 @@ int ofi_check_rx_attr(const struct fi_provider *prov, return -FI_ENODATA; } - if (user_attr->total_buffered_recv > prov_attr->total_buffered_recv) { + if (user_attr->total_buffered_recv) { FI_INFO(prov, FI_LOG_CORE, "total_buffered_recv too large\n"); OFI_INFO_CHECK_SIZE(prov, prov_attr, user_attr, total_buffered_recv); @@ -842,15 +841,6 @@ int ofi_check_rx_attr(const struct fi_provider *prov, return -FI_ENODATA; } - if (!rm_enabled && - user_attr->total_buffered_recv > prov_attr->total_buffered_recv) { - /* Just log a notification, but ignore the value */ - FI_INFO(prov, FI_LOG_CORE, - "Total buffered recv size exceeds supported size\n"); - OFI_INFO_CHECK_SIZE(prov, prov_attr, user_attr, - total_buffered_recv); - } - return 0; } @@ -1188,7 +1178,6 @@ static void fi_alter_rx_attr(struct fi_rx_attr *attr, return; attr->op_flags = hints->op_flags; - attr->total_buffered_recv = hints->total_buffered_recv; if (hints->size) attr->size = hints->size; if (hints->iov_limit) diff --git a/prov/verbs/src/verbs_info.c b/prov/verbs/src/verbs_info.c index f2f8a8ecd89..f6bc3b42e11 100644 --- a/prov/verbs/src/verbs_info.c +++ b/prov/verbs/src/verbs_info.c @@ -109,7 +109,6 @@ const struct fi_rx_attr verbs_rx_attr = { .mode = VERBS_RX_MODE, .op_flags = FI_COMPLETION, .msg_order = VERBS_MSG_ORDER, - .total_buffered_recv = 0, }; const struct fi_rx_attr verbs_dgram_rx_attr = { @@ -117,7 +116,6 @@ const struct fi_rx_attr verbs_dgram_rx_attr = { .mode = VERBS_DGRAM_RX_MODE | VERBS_RX_MODE, .op_flags = FI_COMPLETION, .msg_order = VERBS_MSG_ORDER, - .total_buffered_recv = 0, }; const struct fi_tx_attr verbs_tx_attr = { diff --git a/src/fi_tostr.c b/src/fi_tostr.c index 1e5bec86379..aa765772295 100644 --- a/src/fi_tostr.c +++ b/src/fi_tostr.c @@ -351,8 +351,6 @@ ofi_tostr_rx_attr(char *buf, size_t len, const struct fi_rx_attr *attr, ofi_tostr_msgorder(buf, len, attr->msg_order); ofi_strncatf(buf, len, " ]\n"); - ofi_strncatf(buf, len, "%s%stotal_buffered_recv: %zu\n", prefix, TAB, - attr->total_buffered_recv); ofi_strncatf(buf, len, "%s%ssize: %zu\n", prefix, TAB, attr->size); ofi_strncatf(buf, len, "%s%siov_limit: %zu\n", prefix, TAB, attr->iov_limit); From d629944e2c1d80c80834af08ef318fbadb7a1a56 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Tue, 26 Sep 2023 09:46:27 -0700 Subject: [PATCH 22/34] core: Remove fid_wait API Support for wait sets adds significant complexity to the provider implementation and is basically an abstraction around the OS constructs for poll/epoll (on Linux). Remove the feature from the API, but keep the internal implementation for now. This allows providers to move away from wait set support. Note that blocking support and support for native wait objects (e.g. epoll fd's) are still supported by the API. Only the wait set abstraction is removed, which allows providers control over creating wait objects. Signed-off-by: Sean Hefty --- Makefile.am | 2 - fabtests/Makefile.am | 6 - fabtests/Makefile.win | 4 +- fabtests/common/shared.c | 1 - fabtests/fabtests.vcxproj | 1 - fabtests/fabtests.vcxproj.filters | 3 - fabtests/functional/dgram_waitset.c | 192 ------------------- fabtests/man/fabtests.7.md | 3 - fabtests/man/man1/fi_dgram_waitset.1 | 1 - fabtests/pytest/default/test_dgram.py | 6 - fabtests/scripts/runfabtests.sh | 1 - fabtests/test_configs/efa/efa-neuron.exclude | 3 - fabtests/test_configs/efa/efa.exclude | 3 - fabtests/test_configs/psm3/psm3.exclude | 1 - fabtests/ubertest/uber.c | 2 - include/ofi.h | 38 ++++ include/ofi_enosys.h | 7 - include/ofi_util.h | 2 +- include/rdma/fi_domain.h | 7 - include/rdma/fi_eq.h | 36 +--- man/fi_cntr.3.md | 18 +- man/fi_cq.3.md | 19 +- man/fi_eq.3.md | 26 +-- man/fi_poll.3.md | 124 ++---------- man/man3/fi_wait.3 | 1 - man/man3/fi_wait_open.3 | 1 - prov/efa/src/efa_cntr.c | 2 +- prov/hook/src/hook_cntr.c | 3 - prov/hook/src/hook_cq.c | 3 - prov/hook/src/hook_eq.c | 3 - prov/hook/src/hook_wait.c | 8 +- prov/hook/trace/src/hook_trace.c | 8 +- prov/opx/include/rdma/fi_direct_domain.h | 2 +- prov/opx/include/rdma/fi_direct_eq.h | 2 +- prov/psm2/include/rdma/fi_direct_domain.h | 2 +- prov/psm2/include/rdma/fi_direct_eq.h | 2 +- prov/psm2/src/psmx2_cntr.c | 13 +- prov/psm2/src/psmx2_cq.c | 13 +- prov/psm2/src/psmx2_wait.c | 6 +- prov/psm3/psm3/ptl_ips/ptl_rcvthread.c | 4 +- prov/psm3/src/psmx3_cntr.c | 11 +- prov/psm3/src/psmx3_cq.c | 11 +- prov/rxd/src/rxd_cntr.c | 2 +- prov/rxd/src/rxd_cq.c | 2 +- prov/util/src/util_cntr.c | 16 +- prov/util/src/util_cq.c | 13 +- prov/util/src/util_eq.c | 14 +- prov/util/src/util_wait.c | 8 +- src/enosys.c | 6 +- src/fi_tostr.c | 1 - 50 files changed, 109 insertions(+), 554 deletions(-) delete mode 100644 fabtests/functional/dgram_waitset.c delete mode 100644 fabtests/man/man1/fi_dgram_waitset.1 delete mode 100644 man/man3/fi_wait.3 delete mode 100644 man/man3/fi_wait_open.3 diff --git a/Makefile.am b/Makefile.am index 8aac2b0999b..57d2bb3a6cb 100644 --- a/Makefile.am +++ b/Makefile.am @@ -413,8 +413,6 @@ dummy_man_pages = \ man/man3/fi_tsendmsg.3 \ man/man3/fi_tsendv.3 \ man/man3/fi_tx_size_left.3 \ - man/man3/fi_wait.3 \ - man/man3/fi_wait_open.3 \ man/man3/fi_write.3 \ man/man3/fi_writedata.3 \ man/man3/fi_writemsg.3 \ diff --git a/fabtests/Makefile.am b/fabtests/Makefile.am index e832e8404ee..c6757044b89 100644 --- a/fabtests/Makefile.am +++ b/fabtests/Makefile.am @@ -22,7 +22,6 @@ bin_PROGRAMS = \ functional/fi_rdm_deferred_wq \ functional/fi_dgram \ functional/fi_mcast \ - functional/fi_dgram_waitset \ functional/fi_rdm_tagged_peek \ functional/fi_cq_data \ functional/fi_poll \ @@ -275,10 +274,6 @@ functional_fi_mcast_SOURCES = \ functional/mcast.c functional_fi_mcast_LDADD = libfabtests.la -functional_fi_dgram_waitset_SOURCES = \ - functional/dgram_waitset.c -functional_fi_dgram_waitset_LDADD = libfabtests.la - functional_fi_rdm_tagged_peek_SOURCES = \ functional/rdm_tagged_peek.c functional_fi_rdm_tagged_peek_LDADD = libfabtests.la @@ -592,7 +587,6 @@ dummy_man_pages = \ man/man1/fi_cm_data.1 \ man/man1/fi_cq_data.1 \ man/man1/fi_dgram.1 \ - man/man1/fi_dgram_waitset.1 \ man/man1/fi_inj_complete.1 \ man/man1/fi_mcast.1 \ man/man1/fi_msg.1 \ diff --git a/fabtests/Makefile.win b/fabtests/Makefile.win index 48487cd56dc..bb579e24f1d 100644 --- a/fabtests/Makefile.win +++ b/fabtests/Makefile.win @@ -78,7 +78,7 @@ benchmarks: $(outdir)\dgram_pingpong.exe $(outdir)\msg_bw.exe \ $(outdir)\rdm_tagged_pingpong.exe $(outdir)\rma_bw.exe functional: $(outdir)\av_xfer.exe $(outdir)\bw.exe $(outdir)\cm_data.exe $(outdir)\cq_data.exe \ - $(outdir)\dgram.exe $(outdir)\dgram_waitset.exe $(outdir)\msg.exe $(outdir)\msg_epoll.exe \ + $(outdir)\dgram.exe $(outdir)\msg.exe $(outdir)\msg_epoll.exe \ $(outdir)\inject_test.exe $(outdir)\msg_sockets.exe $(outdir)\multi_mr.exe \ $(outdir)\multi_ep.exe $(outdir)\multi_recv.exe $(outdir)\poll.exe $(outdir)\rdm.exe \ $(outdir)\rdm_atomic.exe $(outdir)\rdm_multi_client.exe $(outdir)\rdm_rma_event.exe \ @@ -124,8 +124,6 @@ $(outdir)\cq_data.exe: {functional}cq_data.c $(basedeps) $(outdir)\dgram.exe: {functional}dgram.c $(basedeps) -$(outdir)\dgram_waitset.exe: {functional}dgram_waitset.c $(basedeps) - $(outdir)\msg.exe: {functional}msg.c $(basedeps) $(outdir)\msg_epoll.exe: {functional}msg_epoll.c $(basedeps) diff --git a/fabtests/common/shared.c b/fabtests/common/shared.c index e3e6065df18..d5a5ab3d3eb 100644 --- a/fabtests/common/shared.c +++ b/fabtests/common/shared.c @@ -1732,7 +1732,6 @@ void ft_close_fids(void) FT_CLOSE_FID(stx); FT_CLOSE_FID(domain); FT_CLOSE_FID(eq); - FT_CLOSE_FID(waitset); FT_CLOSE_FID(fabric); } diff --git a/fabtests/fabtests.vcxproj b/fabtests/fabtests.vcxproj index 6f393187490..d4062c0749a 100644 --- a/fabtests/fabtests.vcxproj +++ b/fabtests/fabtests.vcxproj @@ -224,7 +224,6 @@ - diff --git a/fabtests/fabtests.vcxproj.filters b/fabtests/fabtests.vcxproj.filters index e6fc8842b58..fa97ff0698e 100644 --- a/fabtests/fabtests.vcxproj.filters +++ b/fabtests/fabtests.vcxproj.filters @@ -78,9 +78,6 @@ Source Files\functional - - Source Files\functional - Source Files\functional diff --git a/fabtests/functional/dgram_waitset.c b/fabtests/functional/dgram_waitset.c deleted file mode 100644 index 8b72d76e254..00000000000 --- a/fabtests/functional/dgram_waitset.c +++ /dev/null @@ -1,192 +0,0 @@ -/* - * Copyright (c) 2013-2015 Intel Corporation. All rights reserved. - * Copyright (c) 2015-2016 Cisco Systems, Inc. All rights reserved. - * - * This software is available to you under the BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include - -#include - -#include - -static int alloc_ep_res(struct fi_info *fi) -{ - struct fi_wait_attr wait_attr; - int ret; - - memset(&wait_attr, 0, sizeof wait_attr); - wait_attr.wait_obj = FI_WAIT_UNSPEC; - ret = fi_wait_open(fabric, &wait_attr, &waitset); - if (ret) { - FT_PRINTERR("fi_wait_open", ret); - return ret; - } - - ret = ft_alloc_active_res(fi); - if (ret) - return ret; - - return 0; -} - -static int init_fabric(void) -{ - int ret; - - ret = ft_getinfo(hints, &fi); - if (ret) - return ret; - - ret = ft_open_fabric_res(); - if (ret) - return ret; - - ret = alloc_ep_res(fi); - if (ret) - return ret; - - ret = ft_enable_ep_recv(); - if (ret) - return ret; - - return 0; -} - -static int send_recv() -{ - struct fi_cq_entry comp; - int ret; - - ret = fi_recv(ep, rx_buf, rx_size + ft_rx_prefix_size(), - mr_desc, 0, &rx_ctx); - if (ret) - return ret; - - ft_sync(); - - fprintf(stdout, "Posting a send...\n"); - ret = ft_post_tx(ep, remote_fi_addr, tx_size, NO_CQ_DATA, &tx_ctx); - if (ret) - return ret; - - while ((tx_cq_cntr < tx_seq) || (rx_cq_cntr < rx_seq)) { - /* Wait for completion events on CQs */ - ret = fi_wait(waitset, -1); - if (ret < 0) { - FT_PRINTERR("fi_wait", ret); - return ret; - } - - /* Read the send completion entry */ - ret = fi_cq_read(txcq, &comp, 1); - if (ret > 0) { - tx_cq_cntr++; - fprintf(stdout, "Received send completion event!\n"); - } else if (ret < 0 && ret != -FI_EAGAIN) { - if (ret == -FI_EAVAIL) { - ret = ft_cq_readerr(txcq); - } else { - FT_PRINTERR("fi_cq_read", ret); - } - return ret; - } - - /* Read the recv completion entry */ - ret = fi_cq_read(rxcq, &comp, 1); - if (ret > 0) { - rx_cq_cntr++; - fprintf(stdout, "Received recv completion event!\n"); - } else if (ret < 0 && ret != -FI_EAGAIN) { - if (ret == -FI_EAVAIL) { - ret = ft_cq_readerr(rxcq); - } else { - FT_PRINTERR("fi_cq_read", ret); - } - return ret; - } - } - - return 0; -} - -static int run(void) -{ - int ret; - - ret = init_fabric(); - if (ret) - return ret; - - ret = ft_init_av(); - if (ret) - return ret; - - return send_recv(); -} - -int main(int argc, char **argv) -{ - int op, ret = 0; - - opts = INIT_OPTS; - opts.options |= FT_OPT_SIZE; - opts.comp_method = FT_COMP_WAITSET; - - hints = fi_allocinfo(); - if (!hints) - return EXIT_FAILURE; - - while ((op = getopt(argc, argv, "h" ADDR_OPTS INFO_OPTS)) != -1) { - switch (op) { - default: - ft_parse_addr_opts(op, optarg, &opts); - ft_parseinfo(op, optarg, hints, &opts); - break; - case '?': - case 'h': - ft_usage(argv[0], "A DGRAM client-server example that uses waitset.\n"); - return EXIT_FAILURE; - } - } - - if (optind < argc) - opts.dst_addr = argv[optind]; - - hints->ep_attr->type = FI_EP_DGRAM; - hints->caps = FI_MSG; - hints->mode = FI_CONTEXT; - hints->domain_attr->mr_mode = opts.mr_mode; - hints->addr_format = opts.address_format; - - ret = run(); - - ft_free_res(); - return ft_exit_code(ret); -} diff --git a/fabtests/man/fabtests.7.md b/fabtests/man/fabtests.7.md index d916d529c29..15f44705594 100644 --- a/fabtests/man/fabtests.7.md +++ b/fabtests/man/fabtests.7.md @@ -50,9 +50,6 @@ features of libfabric. *fi_dgram* : A basic datagram endpoint example. -*fi_dgram_waitset* -: Transfers datagrams using waitsets for completion notification. - *fi_inj_complete* : Sends messages using the FI_INJECT_COMPLETE operation flag. diff --git a/fabtests/man/man1/fi_dgram_waitset.1 b/fabtests/man/man1/fi_dgram_waitset.1 deleted file mode 100644 index 3f6ccf96f11..00000000000 --- a/fabtests/man/man1/fi_dgram_waitset.1 +++ /dev/null @@ -1 +0,0 @@ -.so man7/fabtests.7 diff --git a/fabtests/pytest/default/test_dgram.py b/fabtests/pytest/default/test_dgram.py index af118f6fcab..3cffa601532 100644 --- a/fabtests/pytest/default/test_dgram.py +++ b/fabtests/pytest/default/test_dgram.py @@ -12,12 +12,6 @@ def test_dgram(cmdline_args): test = ClientServerTest(cmdline_args, "fi_dgram") test.run() -@pytest.mark.functional -def test_dgram_waitset(cmdline_args): - from common import ClientServerTest - test = ClientServerTest(cmdline_args, "fi_dgram_waitset") - test.run() - @pytest.mark.parametrize("iteration_type", [pytest.param("short", marks=pytest.mark.short), pytest.param("standard", marks=pytest.mark.standard)]) diff --git a/fabtests/scripts/runfabtests.sh b/fabtests/scripts/runfabtests.sh index 3fbf846631f..508c859146d 100755 --- a/fabtests/scripts/runfabtests.sh +++ b/fabtests/scripts/runfabtests.sh @@ -108,7 +108,6 @@ functional_tests=( "fi_cq_data -e rdm -o writedata" "fi_cq_data -e dgram -o writedata" "fi_dgram" - "fi_dgram_waitset" "fi_msg" "fi_msg_epoll" "fi_msg_sockets" diff --git a/fabtests/test_configs/efa/efa-neuron.exclude b/fabtests/test_configs/efa/efa-neuron.exclude index c5a8fd706c3..49aa4408e33 100644 --- a/fabtests/test_configs/efa/efa-neuron.exclude +++ b/fabtests/test_configs/efa/efa-neuron.exclude @@ -69,9 +69,6 @@ cmatose # shared AV isn't supported shared_av -# wait isn't supported -dgram_waitset - # Remove this once ubertest supports setting MR modes ubertest diff --git a/fabtests/test_configs/efa/efa.exclude b/fabtests/test_configs/efa/efa.exclude index 6743d1d3f77..6798f678936 100644 --- a/fabtests/test_configs/efa/efa.exclude +++ b/fabtests/test_configs/efa/efa.exclude @@ -74,9 +74,6 @@ cmatose # shared AV isn't supported shared_av -# wait isn't supported -dgram_waitset - # Remove this once ubertest supports setting MR modes ubertest diff --git a/fabtests/test_configs/psm3/psm3.exclude b/fabtests/test_configs/psm3/psm3.exclude index b02ce740b93..1e07f776e88 100644 --- a/fabtests/test_configs/psm3/psm3.exclude +++ b/fabtests/test_configs/psm3/psm3.exclude @@ -15,5 +15,4 @@ scalable_ep shared_av rdm_cntr_pingpong multi_recv -dgram_waitset multinode diff --git a/fabtests/ubertest/uber.c b/fabtests/ubertest/uber.c index 35e9719a45c..3139fdda06f 100644 --- a/fabtests/ubertest/uber.c +++ b/fabtests/ubertest/uber.c @@ -159,8 +159,6 @@ static char *ft_wait_obj_str(enum fi_wait_obj enum_str) return "wait_none"; case FI_WAIT_UNSPEC: return "wait_unspec"; - case FI_WAIT_SET: - return "wait_set"; case FI_WAIT_FD: return "wait_fd"; case FI_WAIT_MUTEX_COND: diff --git a/include/ofi.h b/include/ofi.h index ed7799614b2..95f186f197e 100644 --- a/include/ofi.h +++ b/include/ofi.h @@ -140,6 +140,44 @@ enum { #define OFI_RX_OP_FLAGS \ (FI_COMPLETION | FI_MULTI_RECV) + +struct fi_wait_attr { + enum fi_wait_obj wait_obj; + uint64_t flags; +}; + +struct ofi_ops_wait { + size_t size; + int (*wait)(struct fid_wait *waitset, int timeout); +}; + +struct fid_wait { + struct fid fid; + struct ofi_ops_wait *ops; +}; + +#ifndef _WIN32 +// TODO: Remove +struct ofi_mutex_cond { + pthread_mutex_t *mutex; + pthread_cond_t *cond; +}; +#endif /* _WIN32 */ + + +static inline int +ofi_wait_open(struct fid_fabric *fabric, struct fi_wait_attr *attr, + struct fid_wait **waitset) +{ + return fabric->ops->wait_open(fabric, attr, waitset); +} + +static inline int +ofi_wait(struct fid_wait *waitset, int timeout) +{ + return waitset->ops->wait(waitset, timeout); +} + #ifndef container_of #define container_of(ptr, type, field) \ ((type *) ((char *)ptr - offsetof(type, field))) diff --git a/include/ofi_enosys.h b/include/ofi_enosys.h index 322ae0cc935..c59e63f7b0d 100644 --- a/include/ofi_enosys.h +++ b/include/ofi_enosys.h @@ -304,13 +304,6 @@ ssize_t fi_no_msg_senddata(struct fid_ep *ep, const void *buf, size_t len, void ssize_t fi_no_msg_injectdata(struct fid_ep *ep, const void *buf, size_t len, uint64_t data, fi_addr_t dest_addr); -/* -static struct fi_ops_wait X = { - .size = sizeof(struct fi_ops_wait), - .wait = X, -}; -*/ - /* static struct fi_ops_poll X = { .size = sizeof(struct fi_ops_poll), diff --git a/include/ofi_util.h b/include/ofi_util.h index 4b4110f4655..df9c820a139 100644 --- a/include/ofi_util.h +++ b/include/ofi_util.h @@ -421,7 +421,7 @@ struct util_wait { int ofi_wait_init(struct util_fabric *fabric, struct fi_wait_attr *attr, struct util_wait *wait); -int fi_wait_cleanup(struct util_wait *wait); +int ofi_wait_cleanup(struct util_wait *wait); struct util_wait_fd { struct util_wait util_wait; diff --git a/include/rdma/fi_domain.h b/include/rdma/fi_domain.h index ec9834fd89c..a386824a9f6 100644 --- a/include/rdma/fi_domain.h +++ b/include/rdma/fi_domain.h @@ -367,13 +367,6 @@ fi_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr, return domain->ops->cntr_open(domain, attr, cntr, context); } -static inline int -fi_wait_open(struct fid_fabric *fabric, struct fi_wait_attr *attr, - struct fid_wait **waitset) -{ - return fabric->ops->wait_open(fabric, attr, waitset); -} - static inline int fi_poll_open(struct fid_domain *domain, struct fi_poll_attr *attr, struct fid_poll **pollset) diff --git a/include/rdma/fi_eq.h b/include/rdma/fi_eq.h index f3d78779380..13b45c47f38 100644 --- a/include/rdma/fi_eq.h +++ b/include/rdma/fi_eq.h @@ -60,33 +60,11 @@ enum fi_wait_obj { FI_WAIT_UNSPEC, FI_WAIT_SET, FI_WAIT_FD, - FI_WAIT_MUTEX_COND, /* pthread mutex & cond */ + FI_WAIT_MUTEX_COND, /* pthread mutex & cond */ // 'remove' FI_WAIT_YIELD, FI_WAIT_POLLFD, }; -struct fi_wait_attr { - enum fi_wait_obj wait_obj; - uint64_t flags; -}; - -struct fi_ops_wait { - size_t size; - int (*wait)(struct fid_wait *waitset, int timeout); -}; - -struct fid_wait { - struct fid fid; - struct fi_ops_wait *ops; -}; - -#ifndef _WIN32 -struct fi_mutex_cond { - pthread_mutex_t *mutex; - pthread_cond_t *cond; -}; -#endif /* _WIN32 */ - struct fi_wait_pollfd { uint64_t change_index; size_t nfds; @@ -126,7 +104,7 @@ struct fi_eq_attr { uint64_t flags; enum fi_wait_obj wait_obj; int signaling_vector; - struct fid_wait *wait_set; + void *wait_set; }; /* Standard EQ events */ @@ -253,7 +231,7 @@ struct fi_cq_attr { enum fi_wait_obj wait_obj; int signaling_vector; enum fi_cq_wait_cond wait_cond; - struct fid_wait *wait_set; + void *wait_set; }; struct fi_ops_cq { @@ -290,7 +268,7 @@ enum fi_cntr_events { struct fi_cntr_attr { enum fi_cntr_events events; enum fi_wait_obj wait_obj; - struct fid_wait *wait_set; + void *wait_set; uint64_t flags; }; @@ -323,12 +301,6 @@ fi_trywait(struct fid_fabric *fabric, struct fid **fids, int count) return fabric->ops->trywait(fabric, fids, count); } -static inline int -fi_wait(struct fid_wait *waitset, int timeout) -{ - return waitset->ops->wait(waitset, timeout); -} - static inline int fi_poll(struct fid_poll *pollset, void **context, int count) { diff --git a/man/fi_cntr.3.md b/man/fi_cntr.3.md index cc87f5ab909..e3335a19db8 100644 --- a/man/fi_cntr.3.md +++ b/man/fi_cntr.3.md @@ -106,7 +106,7 @@ behavior of the counter are defined by `struct fi_cntr_attr`. struct fi_cntr_attr { enum fi_cntr_events events; /* type of events to count */ enum fi_wait_obj wait_obj; /* requested wait object */ - struct fid_wait *wait_set; /* optional wait set */ + void *wait_set; /* compatibility */ uint64_t flags; /* operation flags */ }; ``` @@ -131,7 +131,7 @@ struct fi_cntr_attr { object associated with a counter, in order to use it in other system calls. The following values may be used to specify the type of wait object associated with a counter: FI_WAIT_NONE, FI_WAIT_UNSPEC, - FI_WAIT_SET, FI_WAIT_FD, FI_WAIT_MUTEX_COND, and FI_WAIT_YIELD. + FI_WAIT_FD, FI_WAIT_MUTEX_COND, and FI_WAIT_YIELD. The default is FI_WAIT_NONE. - *FI_WAIT_NONE* @@ -146,11 +146,6 @@ struct fi_cntr_attr { Applications that select FI_WAIT_UNSPEC are not guaranteed to retrieve the underlying wait object. -- *FI_WAIT_SET* -: Indicates that the event counter should use a wait set object to - wait for events. If specified, the wait_set field must reference an - existing wait set object. - - *FI_WAIT_FD* : Indicates that the counter should use a file descriptor as its wait mechanism. A file descriptor wait object must be usable in select, @@ -166,12 +161,7 @@ struct fi_cntr_attr { yield on every wait. Allows usage of fi_cntr_wait through a spin. *wait_set* -: If wait_obj is FI_WAIT_SET, this field references a wait object to - which the event counter should attach. When an event is added to - the event counter, the corresponding wait set will be signaled if - all necessary conditions are met. The use of a wait_set enables an - optimized method of waiting for events across multiple event - counters. This field is ignored if wait_obj is not FI_WAIT_SET. +: This field is for version 1 compatibility and must be set to NULL. *flags* : Flags are reserved for future use, and must be set to 0. @@ -246,7 +236,7 @@ If the call returns due to timeout, -FI_ETIMEDOUT will be returned. The error value associated with the counter remains unchanged. It is invalid for applications to call this function if the counter -has been configured with a wait object of FI_WAIT_NONE or FI_WAIT_SET. +has been configured with a wait object of FI_WAIT_NONE. # RETURN VALUES diff --git a/man/fi_cq.3.md b/man/fi_cq.3.md index 4a45ad50ab3..a7b3301e7f0 100644 --- a/man/fi_cq.3.md +++ b/man/fi_cq.3.md @@ -136,7 +136,7 @@ struct fi_cq_attr { enum fi_wait_obj wait_obj; /* requested wait object */ int signaling_vector; /* interrupt affinity */ enum fi_cq_wait_cond wait_cond; /* wait condition format */ - struct fid_wait *wait_set; /* optional wait set */ + void *wait_set; /* compatibility */ }; ``` @@ -221,7 +221,7 @@ struct fi_cq_tagged_entry { fi_control to retrieve the underlying wait object associated with a CQ, in order to use it in other system calls. The following values may be used to specify the type of wait object associated with a - CQ: FI_WAIT_NONE, FI_WAIT_UNSPEC, FI_WAIT_SET, FI_WAIT_FD, + CQ: FI_WAIT_NONE, FI_WAIT_UNSPEC, FI_WAIT_FD, FI_WAIT_MUTEX_COND, and FI_WAIT_YIELD. The default is FI_WAIT_NONE. - *FI_WAIT_NONE* @@ -237,11 +237,6 @@ struct fi_cq_tagged_entry { mechanisms. Applications that select FI_WAIT_UNSPEC are not guaranteed to retrieve the underlying wait object. -- *FI_WAIT_SET* -: Indicates that the completion queue should use a wait set object to - wait for completions. If specified, the wait_set field must - reference an existing wait set object. - - *FI_WAIT_FD* : Indicates that the CQ should use a file descriptor as its wait mechanism. A file descriptor wait object must be usable in select, @@ -287,13 +282,7 @@ struct fi_cq_tagged_entry { This field is ignored if wait_obj is set to FI_WAIT_NONE. *wait_set* -: If wait_obj is FI_WAIT_SET, this field references a wait object to - which the completion queue should attach. When an event is inserted - into the completion queue, the corresponding wait set will be - signaled if all necessary conditions are met. The use of a wait_set - enables an optimized method of waiting for events across multiple - event and completion queues. This field is ignored if wait_obj is - not FI_WAIT_SET. +: This field is for version 1 compatibility and must be set to NULL. ## fi_close @@ -389,7 +378,7 @@ they are signaled by some external source. This is true even if the timeout has not occurred or was specified as infinite. It is invalid for applications to call these functions if the CQ -has been configured with a wait object of FI_WAIT_NONE or FI_WAIT_SET. +has been configured with a wait object of FI_WAIT_NONE. ## fi_cq_readerr diff --git a/man/fi_eq.3.md b/man/fi_eq.3.md index cb39caba4cf..ea84b9659a7 100644 --- a/man/fi_eq.3.md +++ b/man/fi_eq.3.md @@ -116,11 +116,11 @@ fi_eq_attr`. ```c struct fi_eq_attr { - size_t size; /* # entries for EQ */ - uint64_t flags; /* operation flags */ - enum fi_wait_obj wait_obj; /* requested wait object */ - int signaling_vector; /* interrupt affinity */ - struct fid_wait *wait_set; /* optional wait set */ + size_t size; /* # entries for EQ */ + uint64_t flags; /* operation flags */ + enum fi_wait_obj wait_obj; /* requested wait object */ + int signaling_vector; /* interrupt affinity */ + void *wait_set; /* compatibility */ }; ``` @@ -161,11 +161,6 @@ struct fi_eq_attr { that select FI_WAIT_UNSPEC are not guaranteed to retrieve the underlying wait object. -- *FI_WAIT_SET* -: Indicates that the event queue should use a wait set object to wait - for events. If specified, the wait_set field must reference an - existing wait set object. - - *FI_WAIT_FD* : Indicates that the EQ should use a file descriptor as its wait mechanism. A file descriptor wait object must be usable in select, @@ -187,12 +182,7 @@ struct fi_eq_attr { ignored if the provider does not support interrupt affinity. *wait_set* -: If wait_obj is FI_WAIT_SET, this field references a wait object to - which the event queue should attach. When an event is inserted into - the event queue, the corresponding wait set will be signaled if all - necessary conditions are met. The use of a wait_set enables an - optimized method of waiting for events across multiple event queues. - This field is ignored if wait_obj is not FI_WAIT_SET. +: This field is for version 1 compatibility and must be set to NULL. ## fi_close @@ -217,7 +207,7 @@ commands are usable with an EQ. parameter should be an address where a pointer to the returned wait object will be written. This should be an 'int *' for FI_WAIT_FD, or 'struct fi_mutex_cond' for FI_WAIT_MUTEX_COND. - + ```c struct fi_mutex_cond { pthread_mutex_t *mutex; @@ -360,7 +350,7 @@ they are signaled by some external source. This is true even if the timeout has not occurred or was specified as infinite. It is invalid for applications to call this function if the EQ -has been configured with a wait object of FI_WAIT_NONE or FI_WAIT_SET. +has been configured with a wait object of FI_WAIT_NONE. ## fi_eq_readerr diff --git a/man/fi_poll.3.md b/man/fi_poll.3.md index 2ab5a27b108..2b6d5530f49 100644 --- a/man/fi_poll.3.md +++ b/man/fi_poll.3.md @@ -19,17 +19,11 @@ fi_poll : Poll for progress and events across multiple completion queues and counters. -fi_wait_open / fi_close -: Open/close a wait set - -fi_wait -: Waits for one or more wait objects in a set to be signaled. - fi_trywait : Indicate when it is safe to block on wait objects using native OS calls. fi_control -: Control wait set operation or attributes. +: Control fid attributes. # SYNOPSIS @@ -49,16 +43,9 @@ int fi_poll_del(struct fid_poll *pollset, struct fid *event_fid, int fi_poll(struct fid_poll *pollset, void **context, int count); -int fi_wait_open(struct fid_fabric *fabric, struct fi_wait_attr *attr, - struct fid_wait **waitset); - -int fi_close(struct fid *waitset); - -int fi_wait(struct fid_wait *waitset, int timeout); - int fi_trywait(struct fid_fabric *fabric, struct fid **fids, size_t count); -int fi_control(struct fid *waitset, int command, void *arg); +int fi_control(struct fid *fid, int command, void *arg); ``` # ARGUMENTS @@ -72,11 +59,8 @@ int fi_control(struct fid *waitset, int command, void *arg); *pollset* : Event poll set -*waitset* -: Wait object set - *attr* -: Poll or wait set attributes +: Poll set attributes *context* : On success, an array of user context values associated with @@ -89,14 +73,11 @@ int fi_control(struct fid *waitset, int command, void *arg); *count* : Number of entries in context or fids array. -*timeout* -: Time to wait for a signal, in milliseconds. - *command* -: Command of control operation to perform on the wait set. +: Command of control operation to perform on the fid. *arg* -: Optional control argument. +: Optional control argument # DESCRIPTION @@ -151,81 +132,6 @@ should drive their progress based on the results of reading events from a completion queue or reading counter values. The fi_poll function will always return all completion queues and counters that do have new events. -## fi_wait_open - -fi_wait_open allocates a new wait set. A wait set enables an -optimized method of waiting for events across multiple completion queues -and counters. Where possible, a wait set uses a single underlying -wait object that is signaled when a specified condition occurs on an -associated completion queue or counter. - -The properties and behavior of a wait set are defined by struct -fi_wait_attr. - -```c -struct fi_wait_attr { - enum fi_wait_obj wait_obj; /* requested wait object */ - uint64_t flags; /* operation flags */ -}; -``` - -*wait_obj* -: Wait sets are associated with specific wait object(s). Wait objects - allow applications to block until the wait object is signaled, - indicating that an event is available to be read. The following - values may be used to specify the type of wait object associated - with a wait set: FI_WAIT_UNSPEC, FI_WAIT_FD, FI_WAIT_MUTEX_COND, - and FI_WAIT_YIELD. - -- *FI_WAIT_UNSPEC* -: Specifies that the user will only wait on the wait set using - fabric interface calls, such as fi_wait. In this case, the - underlying provider may select the most appropriate or highest - performing wait object available, including custom wait mechanisms. - Applications that select FI_WAIT_UNSPEC are not guaranteed to - retrieve the underlying wait object. - -- *FI_WAIT_FD* -: Indicates that the wait set should use a single file descriptor as - its wait mechanism, as exposed to the application. Internally, this - may require the use of epoll in order to support waiting on a single - file descriptor. File descriptor wait objects must be usable in the - POSIX select(2) and poll(2), and Linux epoll(7) routines (if - available). Provider signal an FD wait object by marking it as - readable or with an error. - -- *FI_WAIT_MUTEX_COND* -: Specifies that the wait set should use a pthread mutex and cond - variable as a wait object. - -- *FI_WAIT_POLLFD* -: This option is similar to FI_WAIT_FD, but allows the wait mechanism to use - multiple file descriptors as its wait mechanism, as viewed by the - application. The use of FI_WAIT_POLLFD can eliminate the need to use - epoll to abstract away needing to check multiple file descriptors when - waiting for events. The file descriptors must be usable in the POSIX - select(2) and poll(2) routines, and match directly to being used with - poll. See the NOTES section below for details on using pollfd. - -- *FI_WAIT_YIELD* -: Indicates that the wait set will wait without a wait object but instead - yield on every wait. - -*flags* -: Flags that set the default operation of the wait set. The use of - this field is reserved and must be set to 0 by the caller. - -## fi_close - -The fi_close call releases all resources associated with a wait set. -The wait set must not be bound to any other opened resources prior to -being closed, otherwise the call will return -FI_EBUSY. - -## fi_wait - -Waits on a wait set until one or more of its underlying wait objects -is signaled. - ## fi_trywait The fi_trywait call was introduced in libfabric version 1.3. The behavior @@ -286,24 +192,22 @@ processing. ## fi_control The fi_control call is used to access provider or implementation specific -details of a fids that support blocking calls, such as wait sets, completion -queues, counters, and event queues. Access to the wait set or fid should be +details of a fids that support blocking calls, such as completion +queues, counters, and event queues. Access to the fid should be serialized across all calls when fi_control is invoked, as it may redirect the implementation of wait set operations. The following control commands are usable with a wait set or fid. *FI_GETWAIT (void \*\*)* : This command allows the user to retrieve the low-level wait object - associated with a wait set or fid. The format of the wait set is specified - during wait set creation, through the wait set attributes. The fi_control + associated with a fid. The format of the wait object is specified + during its creation, through the corresponding attributes. The fi_control arg parameter should be an address where a pointer to the returned wait - object will be written. This should be an 'int *' for FI_WAIT_FD, - 'struct fi_mutex_cond' for FI_WAIT_MUTEX_COND, or 'struct fi_wait_pollfd' - for FI_WAIT_POLLFD. Support for FI_GETWAIT is provider specific. + object will be written. This should be an 'int *' for FI_WAIT_FD. + Support for FI_GETWAIT is provider specific. *FI_GETWAITOBJ (enum fi_wait_obj \*)* -: This command returns the type of wait object associated with a wait set - or fid. +: This command returns the type of wait object associated with a fid. # RETURN VALUES @@ -330,8 +234,8 @@ to support FI_WAIT_FD. However, in order to support waiting on multiple file descriptors on systems where epoll support is not available, or where epoll performance may negatively impact performance, FI_WAIT_POLLFD provides this mechanism. -A significant different between using POLLFD versus FD wait objects -is that with FI_WAIT_POLLFD, the file descriptors may change dynamically. +A significant difference between using FI_WAIT_POLLFD versus FI_WAIT_FD +is the file descriptors to poll may change dynamically. As an example, the file descriptors associated with a completion queues' wait set may change as endpoint associations with the CQ are added and removed. diff --git a/man/man3/fi_wait.3 b/man/man3/fi_wait.3 deleted file mode 100644 index d969259bf84..00000000000 --- a/man/man3/fi_wait.3 +++ /dev/null @@ -1 +0,0 @@ -.so man3/fi_poll.3 diff --git a/man/man3/fi_wait_open.3 b/man/man3/fi_wait_open.3 deleted file mode 100644 index d969259bf84..00000000000 --- a/man/man3/fi_wait_open.3 +++ /dev/null @@ -1 +0,0 @@ -.so man3/fi_poll.3 diff --git a/prov/efa/src/efa_cntr.c b/prov/efa/src/efa_cntr.c index 3f76d689614..9a35a8677e8 100644 --- a/prov/efa/src/efa_cntr.c +++ b/prov/efa/src/efa_cntr.c @@ -76,7 +76,7 @@ static int efa_cntr_wait(struct fid_cntr *cntr_fid, uint64_t threshold, int time } } - ret = fi_wait(&cntr->wait->wait_fid, waitim); + ret = ofi_wait(&cntr->wait->wait_fid, waitim); if (ret == -FI_ETIMEDOUT) ret = 0; diff --git a/prov/hook/src/hook_cntr.c b/prov/hook/src/hook_cntr.c index e4d3cfa3ab5..5cd0e7ff45f 100644 --- a/prov/hook/src/hook_cntr.c +++ b/prov/hook/src/hook_cntr.c @@ -113,9 +113,6 @@ int hook_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr, mycntr->cntr.ops = &hook_cntr_ops; hattr = *attr; - if (attr->wait_obj == FI_WAIT_SET) - hattr.wait_set = hook_to_hwait(attr->wait_set); - ret = fi_cntr_open(dom->hdomain, &hattr, &mycntr->hcntr, &mycntr->cntr.fid); if (ret) diff --git a/prov/hook/src/hook_cq.c b/prov/hook/src/hook_cq.c index ad93b0db6ed..5b425718268 100644 --- a/prov/hook/src/hook_cq.c +++ b/prov/hook/src/hook_cq.c @@ -116,9 +116,6 @@ int hook_cq_init(struct fid_domain *domain, struct fi_cq_attr *attr, mycq->cq.ops = &hook_cq_ops; hattr = *attr; - if (attr->wait_obj == FI_WAIT_SET) - hattr.wait_set = hook_to_hwait(attr->wait_set); - ret = fi_cq_open(dom->hdomain, &hattr, &mycq->hcq, &mycq->cq.fid); if (ret) return ret; diff --git a/prov/hook/src/hook_eq.c b/prov/hook/src/hook_eq.c index 4e2f5f659fe..f02b1cae649 100644 --- a/prov/hook/src/hook_eq.c +++ b/prov/hook/src/hook_eq.c @@ -124,9 +124,6 @@ int hook_eq_init(struct fid_fabric *fabric, struct fi_eq_attr *attr, int ret; hattr = *attr; - if (attr->wait_obj == FI_WAIT_SET) - hattr.wait_set = hook_to_hwait(attr->wait_set); - ret = fi_eq_open(fab->hfabric, &hattr, &myeq->heq, &myeq->eq.fid); if (ret) return ret; diff --git a/prov/hook/src/hook_wait.c b/prov/hook/src/hook_wait.c index 31e8fa47e4a..1aba7886be2 100644 --- a/prov/hook/src/hook_wait.c +++ b/prov/hook/src/hook_wait.c @@ -121,11 +121,11 @@ static int hook_do_wait(struct fid_wait *waitset, int timeout) { struct hook_wait *wait = container_of(waitset, struct hook_wait, wait); - return fi_wait(wait->hwait, timeout); + return ofi_wait(wait->hwait, timeout); } -static struct fi_ops_wait hook_wait_ops = { - .size = sizeof(struct fi_ops_wait), +static struct ofi_ops_wait hook_wait_ops = { + .size = sizeof(struct ofi_ops_wait), .wait = hook_do_wait, }; @@ -145,7 +145,7 @@ int hook_wait_open(struct fid_fabric *fabric, struct fi_wait_attr *attr, wait->wait.fid.ops = &hook_fid_ops; wait->wait.ops = &hook_wait_ops; - ret = fi_wait_open(fab->hfabric, attr, &wait->hwait); + ret = ofi_wait_open(fab->hfabric, attr, &wait->hwait); if (ret) free(wait); else diff --git a/prov/hook/trace/src/hook_trace.c b/prov/hook/trace/src/hook_trace.c index 1a743504420..80d481551fb 100644 --- a/prov/hook/trace/src/hook_trace.c +++ b/prov/hook/trace/src/hook_trace.c @@ -223,7 +223,7 @@ trace_cq_err(struct hook_cq *cq, const char *func, int line, "ctx %p flags 0x%lx, len %zu buf %p data %lu tag 0x%lx " "olen %zu err %d (%s) prov_errno %d (%s)\n", entry->op_context, entry->flags, entry->len, entry->buf, - entry->data, entry->tag, entry->olen, + entry->data, entry->tag, entry->olen, entry->err, fi_strerror(entry->err), entry->prov_errno, err_buf); } else { @@ -1207,9 +1207,6 @@ trace_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, mycq->cq.fid.ops = &hook_fid_ops; mycq->cq.ops = &trace_cq_ops; - if (attr->wait_obj == FI_WAIT_SET) - hattr.wait_set = hook_to_hwait(attr->wait_set); - ret = fi_cq_open(dom->hdomain, &hattr, &mycq->hcq, &mycq->cq.fid); if (!ret) { FI_TRACE(dom->fabric->hprov, FI_LOG_DOMAIN, @@ -1288,9 +1285,6 @@ trace_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr, mycntr->cntr.fid.ops = &hook_fid_ops; mycntr->cntr.ops = &hook_cntr_ops; - if (attr->wait_obj == FI_WAIT_SET) - hattr.wait_set = hook_to_hwait(attr->wait_set); - ret = fi_cntr_open(dom->hdomain, &hattr, &mycntr->hcntr, &mycntr->cntr.fid); if (!ret) { diff --git a/prov/opx/include/rdma/fi_direct_domain.h b/prov/opx/include/rdma/fi_direct_domain.h index cf57bb1833e..02f334ebeb0 100644 --- a/prov/opx/include/rdma/fi_direct_domain.h +++ b/prov/opx/include/rdma/fi_direct_domain.h @@ -137,7 +137,7 @@ fi_rx_addr(fi_addr_t fi_addr, int rx_index, int rx_ctx_bits) return (fi_addr_t) (((uint64_t) rx_index << (64 - rx_ctx_bits)) | fi_addr); } -static inline int fi_wait_open(struct fid_fabric *fabric, +static inline int ofi_wait_open(struct fid_fabric *fabric, struct fi_wait_attr *attr, struct fid_wait **waitset) { diff --git a/prov/opx/include/rdma/fi_direct_eq.h b/prov/opx/include/rdma/fi_direct_eq.h index 278a9b22959..abfce3c57ec 100644 --- a/prov/opx/include/rdma/fi_direct_eq.h +++ b/prov/opx/include/rdma/fi_direct_eq.h @@ -82,7 +82,7 @@ int fi_trywait(struct fid_fabric *fabric, struct fid **fids, int count) } static inline -int fi_wait(struct fid_wait *waitset, int timeout) +int ofi_wait(struct fid_wait *waitset, int timeout) { return -FI_ENOSYS; /* TODO - implement this */ } diff --git a/prov/psm2/include/rdma/fi_direct_domain.h b/prov/psm2/include/rdma/fi_direct_domain.h index 8488c9ee177..c0fdfdb513f 100644 --- a/prov/psm2/include/rdma/fi_direct_domain.h +++ b/prov/psm2/include/rdma/fi_direct_domain.h @@ -118,7 +118,7 @@ fi_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr, } static inline int -fi_wait_open(struct fid_fabric *fabric, struct fi_wait_attr *attr, +ofi_wait_open(struct fid_fabric *fabric, struct fi_wait_attr *attr, struct fid_wait **waitset) { return psmx2_wait_open(fabric, attr, waitset); diff --git a/prov/psm2/include/rdma/fi_direct_eq.h b/prov/psm2/include/rdma/fi_direct_eq.h index c3c43358a67..b1be82f7b02 100644 --- a/prov/psm2/include/rdma/fi_direct_eq.h +++ b/prov/psm2/include/rdma/fi_direct_eq.h @@ -79,7 +79,7 @@ fi_trywait(struct fid_fabric *fabric, struct fid **fids, int count) } static inline int -fi_wait(struct fid_wait *waitset, int timeout) +ofi_wait(struct fid_wait *waitset, int timeout) { return psmx2_wait_wait(waitset, timeout); } diff --git a/prov/psm2/src/psmx2_cntr.c b/prov/psm2/src/psmx2_cntr.c index 50fdf12d633..861dc3b8096 100644 --- a/prov/psm2/src/psmx2_cntr.c +++ b/prov/psm2/src/psmx2_cntr.c @@ -225,7 +225,7 @@ STATIC int psmx2_cntr_wait(struct fid_cntr *cntr, uint64_t threshold, int timeou } if (cntr_priv->wait) { - ret = fi_wait((struct fid_wait *)cntr_priv->wait, + ret = ofi_wait((struct fid_wait *)cntr_priv->wait, timeout - msec_passed); if (ret == -FI_ETIMEDOUT) break; @@ -378,20 +378,11 @@ int psmx2_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr, case FI_WAIT_UNSPEC: break; - case FI_WAIT_SET: - if (!attr->wait_set) { - FI_INFO(&psmx2_prov, FI_LOG_CQ, - "FI_WAIT_SET is specified but attr->wait_set is NULL\n"); - return -FI_EINVAL; - } - wait = attr->wait_set; - break; - case FI_WAIT_FD: case FI_WAIT_MUTEX_COND: wait_attr.wait_obj = attr->wait_obj; wait_attr.flags = 0; - err = fi_wait_open(&domain_priv->fabric->util_fabric.fabric_fid, + err = ofi_wait_open(&domain_priv->fabric->util_fabric.fabric_fid, &wait_attr, (struct fid_wait **)&wait); if (err) return err; diff --git a/prov/psm2/src/psmx2_cq.c b/prov/psm2/src/psmx2_cq.c index 958304780fa..3f9cf760e08 100644 --- a/prov/psm2/src/psmx2_cq.c +++ b/prov/psm2/src/psmx2_cq.c @@ -1734,7 +1734,7 @@ STATIC ssize_t psmx2_cq_sreadfrom(struct fid_cq *cq, void *buf, size_t count, ofi_atomic_set32(&cq_priv->signaled, 0); return -FI_ECANCELED; } - fi_wait((struct fid_wait *)cq_priv->wait, timeout); + ofi_wait((struct fid_wait *)cq_priv->wait, timeout); } else { clock_gettime(CLOCK_REALTIME, &ts0); while (!sth_happened) { @@ -1941,21 +1941,12 @@ int psmx2_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, case FI_WAIT_NONE: break; - case FI_WAIT_SET: - if (!attr->wait_set) { - FI_INFO(&psmx2_prov, FI_LOG_CQ, - "FI_WAIT_SET is specified but attr->wait_set is NULL\n"); - return -FI_EINVAL; - } - wait = attr->wait_set; - break; - case FI_WAIT_UNSPEC: case FI_WAIT_FD: case FI_WAIT_MUTEX_COND: wait_attr.wait_obj = attr->wait_obj; wait_attr.flags = 0; - err = fi_wait_open(&domain_priv->fabric->util_fabric.fabric_fid, + err = ofi_wait_open(&domain_priv->fabric->util_fabric.fabric_fid, &wait_attr, (struct fid_wait **)&wait); if (err) return err; diff --git a/prov/psm2/src/psmx2_wait.c b/prov/psm2/src/psmx2_wait.c index 62df2d6dafc..929ca2fb1b3 100644 --- a/prov/psm2/src/psmx2_wait.c +++ b/prov/psm2/src/psmx2_wait.c @@ -137,8 +137,8 @@ static void psmx2_wait_stop_progress(void) ; } -static struct fi_ops_wait *psmx2_wait_ops_save; -static struct fi_ops_wait psmx2_wait_ops; +static struct ofi_ops_wait *psmx2_wait_ops_save; +static struct ofi_ops_wait psmx2_wait_ops; DIRECT_FN STATIC int psmx2_wait_wait(struct fid_wait *wait, int timeout) @@ -146,7 +146,7 @@ STATIC int psmx2_wait_wait(struct fid_wait *wait, int timeout) struct util_wait *wait_priv; struct psmx2_fid_fabric *fabric; int err; - + wait_priv = container_of(wait, struct util_wait, wait_fid); fabric = container_of(wait_priv->fabric, struct psmx2_fid_fabric, util_fabric); diff --git a/prov/psm3/psm3/ptl_ips/ptl_rcvthread.c b/prov/psm3/psm3/ptl_ips/ptl_rcvthread.c index f1cee4faffd..0aae04bbd8a 100644 --- a/prov/psm3/psm3/ptl_ips/ptl_rcvthread.c +++ b/prov/psm3/psm3/ptl_ips/ptl_rcvthread.c @@ -376,7 +376,7 @@ psm2_error_t psm3_wait(int timeout) goto inval; } - // TBD - similar test in fi_wait_open or when PSM3 YIELD_MODE env parsed + // TBD - similar test in ofi_wait_open or when PSM3 YIELD_MODE env parsed // want warning if FI_PSM3_YIELD_MODE without NIC loopback fully enabled // such as without ips enabled, or with shm enabled instead of NIC_LOOPBACK if (! psm3_ep_device_is_enabled(ep, PTL_DEVID_IPS)) { @@ -440,7 +440,7 @@ psm2_error_t psm3_wait(int timeout) wait_time.tv_sec += timeout / 1000; wait_time.tv_nsec += (timeout % 1000) * 1000; if (wait_time.tv_nsec > 1000000000) { // handle carry from nsec to sec - wait_time.tv_sec++; + wait_time.tv_sec++; wait_time.tv_nsec -= 1000000000; } if (0 > pthread_cond_timedwait(&wait_condvar, &wait_mutex, &wait_time)) { diff --git a/prov/psm3/src/psmx3_cntr.c b/prov/psm3/src/psmx3_cntr.c index e3d58c5c6a2..6712fd4b862 100644 --- a/prov/psm3/src/psmx3_cntr.c +++ b/prov/psm3/src/psmx3_cntr.c @@ -383,20 +383,11 @@ int psmx3_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr, case FI_WAIT_UNSPEC: break; - case FI_WAIT_SET: - if (!attr->wait_set) { - PSMX3_INFO(&psmx3_prov, FI_LOG_CQ, - "FI_WAIT_SET is specified but attr->wait_set is NULL\n"); - return -FI_EINVAL; - } - wait = attr->wait_set; - break; - case FI_WAIT_FD: case FI_WAIT_MUTEX_COND: wait_attr.wait_obj = attr->wait_obj; wait_attr.flags = 0; - err = fi_wait_open(&domain_priv->fabric->util_fabric.fabric_fid, + err = ofi_wait_open(&domain_priv->fabric->util_fabric.fabric_fid, &wait_attr, (struct fid_wait **)&wait); if (err) return err; diff --git a/prov/psm3/src/psmx3_cq.c b/prov/psm3/src/psmx3_cq.c index 84a88979f98..614e3f6bdcb 100644 --- a/prov/psm3/src/psmx3_cq.c +++ b/prov/psm3/src/psmx3_cq.c @@ -1219,21 +1219,12 @@ int psmx3_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, case FI_WAIT_NONE: break; - case FI_WAIT_SET: - if (!attr->wait_set) { - PSMX3_INFO(&psmx3_prov, FI_LOG_CQ, - "FI_WAIT_SET is specified but attr->wait_set is NULL\n"); - return -FI_EINVAL; - } - wait = attr->wait_set; - break; - case FI_WAIT_UNSPEC: case FI_WAIT_FD: case FI_WAIT_MUTEX_COND: wait_attr.wait_obj = attr->wait_obj; wait_attr.flags = 0; - err = fi_wait_open(&domain_priv->fabric->util_fabric.fabric_fid, + err = ofi_wait_open(&domain_priv->fabric->util_fabric.fabric_fid, &wait_attr, (struct fid_wait **)&wait); if (err) return err; diff --git a/prov/rxd/src/rxd_cntr.c b/prov/rxd/src/rxd_cntr.c index 1ccaa5738df..14f6c547c62 100644 --- a/prov/rxd/src/rxd_cntr.c +++ b/prov/rxd/src/rxd_cntr.c @@ -72,7 +72,7 @@ static int rxd_cntr_wait(struct fid_cntr *cntr_fid, uint64_t threshold, int time } ofi_genlock_unlock(&cntr->ep_list_lock); - ret = fi_wait(&cntr->wait->wait_fid, ep_retry == -1 ? + ret = ofi_wait(&cntr->wait->wait_fid, ep_retry == -1 ? timeout : rxd_get_timeout(ep_retry)); if (ep_retry != -1 && ret == -FI_ETIMEDOUT) ret = 0; diff --git a/prov/rxd/src/rxd_cq.c b/prov/rxd/src/rxd_cq.c index 4eb33994937..cac02a89a26 100644 --- a/prov/rxd/src/rxd_cq.c +++ b/prov/rxd/src/rxd_cq.c @@ -1269,7 +1269,7 @@ ssize_t rxd_cq_sreadfrom(struct fid_cq *cq_fid, void *buf, size_t count, } ofi_genlock_unlock(&cq->ep_list_lock); - ret = fi_wait(&cq->wait->wait_fid, ep_retry == -1 ? + ret = ofi_wait(&cq->wait->wait_fid, ep_retry == -1 ? timeout : rxd_get_timeout(ep_retry)); if (ep_retry != -1 && ret == -FI_ETIMEDOUT) diff --git a/prov/util/src/util_cntr.c b/prov/util/src/util_cntr.c index 4b47624045d..8f4fb48554f 100644 --- a/prov/util/src/util_cntr.c +++ b/prov/util/src/util_cntr.c @@ -50,13 +50,6 @@ static int ofi_check_cntr_attr(const struct fi_provider *prov, switch (attr->wait_obj) { case FI_WAIT_NONE: case FI_WAIT_YIELD: - break; - case FI_WAIT_SET: - if (!attr->wait_set) { - FI_WARN(prov, FI_LOG_CNTR, "invalid wait set\n"); - return -FI_EINVAL; - } - /* fall through */ case FI_WAIT_UNSPEC: case FI_WAIT_FD: case FI_WAIT_POLLFD: @@ -164,7 +157,7 @@ int ofi_cntr_wait(struct fid_cntr *cntr_fid, uint64_t threshold, int timeout) /* * Temporary work-around to avoid a thread hanging in underlying - * epoll_wait called from fi_wait. This can happen if one thread + * epoll_wait called from ofi_wait. This can happen if one thread * updates the counter, another thread reads it (thereby resetting * cntr signal fd) and the current thread is about to wait. The * current thread would never wake up and doesn't know the counter @@ -175,7 +168,7 @@ int ofi_cntr_wait(struct fid_cntr *cntr_fid, uint64_t threshold, int timeout) timeout_quantum = (timeout < 0 ? OFI_TIMEOUT_QUANTUM_MS : MIN(OFI_TIMEOUT_QUANTUM_MS, timeout)); - ret = fi_wait(&cntr->wait->wait_fid, timeout_quantum); + ret = ofi_wait(&cntr->wait->wait_fid, timeout_quantum); } while (!ret || (ret == -FI_ETIMEDOUT && (timeout < 0 || timeout_quantum < timeout))); @@ -368,14 +361,11 @@ int ofi_cntr_init(const struct fi_provider *prov, struct fid_domain *domain, memset(&wait_attr, 0, sizeof wait_attr); wait_attr.wait_obj = attr->wait_obj; cntr->internal_wait = 1; - ret = fi_wait_open(&cntr->domain->fabric->fabric_fid, + ret = ofi_wait_open(&cntr->domain->fabric->fabric_fid, &wait_attr, &wait); if (ret) return ret; break; - case FI_WAIT_SET: - wait = attr->wait_set; - break; default: assert(0); return -FI_EINVAL; diff --git a/prov/util/src/util_cq.c b/prov/util/src/util_cq.c index 329ea990545..99ad02077f7 100644 --- a/prov/util/src/util_cq.c +++ b/prov/util/src/util_cq.c @@ -192,12 +192,6 @@ int ofi_check_cq_attr(const struct fi_provider *prov, case FI_WAIT_NONE: case FI_WAIT_YIELD: break; - case FI_WAIT_SET: - if (!attr->wait_set) { - FI_WARN(prov, FI_LOG_CQ, "invalid wait set\n"); - return -FI_EINVAL; - } - /* fall through */ case FI_WAIT_UNSPEC: case FI_WAIT_FD: case FI_WAIT_POLLFD: @@ -350,7 +344,7 @@ ssize_t ofi_cq_sreadfrom(struct fid_cq *cq_fid, void *buf, size_t count, return -FI_EAGAIN; } - ret = fi_wait(&cq->wait->wait_fid, timeout); + ret = ofi_wait(&cq->wait->wait_fid, timeout); } while (!ret); return ret == -FI_ETIMEDOUT ? -FI_EAGAIN : ret; @@ -737,14 +731,11 @@ int ofi_cq_init(const struct fi_provider *prov, struct fid_domain *domain, memset(&wait_attr, 0, sizeof wait_attr); wait_attr.wait_obj = attr->wait_obj; cq->internal_wait = 1; - ret = fi_wait_open(&cq->domain->fabric->fabric_fid, + ret = ofi_wait_open(&cq->domain->fabric->fabric_fid, &wait_attr, &wait); if (ret) goto cleanup; break; - case FI_WAIT_SET: - wait = attr->wait_set; - break; default: assert(0); goto cleanup; diff --git a/prov/util/src/util_eq.c b/prov/util/src/util_eq.c index b1a0c8522e5..66366fd795a 100644 --- a/prov/util/src/util_eq.c +++ b/prov/util/src/util_eq.c @@ -190,7 +190,7 @@ ssize_t ofi_eq_sread(struct fid_eq *eq_fid, uint32_t *event, void *buf, if (ofi_adjust_timeout(endtime, &timeout)) return -FI_EAGAIN; - ret = fi_wait(&eq->wait->wait_fid, timeout); + ret = ofi_wait(&eq->wait->wait_fid, timeout); } while (!ret); return ret == -FI_ETIMEDOUT ? -FI_EAGAIN : ret; @@ -306,15 +306,11 @@ static int util_eq_init(struct fid_fabric *fabric, struct util_eq *eq, memset(&wait_attr, 0, sizeof wait_attr); wait_attr.wait_obj = attr->wait_obj; eq->internal_wait = 1; - ret = fi_wait_open(fabric, &wait_attr, &wait); + ret = ofi_wait_open(fabric, &wait_attr, &wait); if (ret) return ret; eq->wait = container_of(wait, struct util_wait, wait_fid); break; - case FI_WAIT_SET: - eq->wait = container_of(attr->wait_set, struct util_wait, - wait_fid); - break; default: assert(0); return -FI_EINVAL; @@ -379,12 +375,6 @@ static int util_verify_eq_attr(const struct fi_provider *prov, case FI_WAIT_MUTEX_COND: case FI_WAIT_YIELD: break; - case FI_WAIT_SET: - if (!attr->wait_set) { - FI_WARN(prov, FI_LOG_EQ, "invalid wait set\n"); - return -FI_EINVAL; - } - break; default: FI_WARN(prov, FI_LOG_EQ, "invalid wait object type\n"); return -FI_EINVAL; diff --git a/prov/util/src/util_wait.c b/prov/util/src/util_wait.c index d8f035e9050..40e3fcc6b5f 100644 --- a/prov/util/src/util_wait.c +++ b/prov/util/src/util_wait.c @@ -484,8 +484,8 @@ static int util_wait_fd_close(struct fid *fid) return 0; } -static struct fi_ops_wait util_wait_fd_ops = { - .size = sizeof(struct fi_ops_wait), +static struct ofi_ops_wait util_wait_fd_ops = { + .size = sizeof(struct ofi_ops_wait), .wait = util_wait_fd_run, }; @@ -633,8 +633,8 @@ static int util_wait_yield_close(struct fid *fid) return 0; } -static struct fi_ops_wait util_wait_yield_ops = { - .size = sizeof(struct fi_ops_wait), +static struct ofi_ops_wait util_wait_yield_ops = { + .size = sizeof(struct ofi_ops_wait), .wait = util_wait_yield_run, }; diff --git a/src/enosys.c b/src/enosys.c index d0e402a5786..e1d79d53192 100644 --- a/src/enosys.c +++ b/src/enosys.c @@ -93,7 +93,7 @@ int fi_no_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr, { return -FI_ENOSYS; } -int fi_no_wait_open(struct fid_fabric *fabric, struct fi_wait_attr *attr, +int ofi_no_wait_open(struct fid_fabric *fabric, struct fi_wait_attr *attr, struct fid_wait **waitset) { return -FI_ENOSYS; @@ -414,10 +414,6 @@ ssize_t fi_no_msg_injectdata(struct fid_ep *ep, const void *buf, size_t len, return -FI_ENOSYS; } -/* - * struct fi_ops_wait - */ - /* * struct fi_ops_poll */ diff --git a/src/fi_tostr.c b/src/fi_tostr.c index aa765772295..11dcc4428a8 100644 --- a/src/fi_tostr.c +++ b/src/fi_tostr.c @@ -769,7 +769,6 @@ ofi_tostr_wait_obj(char *buf, size_t len, enum fi_wait_obj obj) switch (obj) { CASEENUMSTRN(FI_WAIT_NONE, len); CASEENUMSTRN(FI_WAIT_UNSPEC, len); - CASEENUMSTRN(FI_WAIT_SET, len); CASEENUMSTRN(FI_WAIT_FD, len); CASEENUMSTRN(FI_WAIT_MUTEX_COND, len); CASEENUMSTRN(FI_WAIT_YIELD, len); From 32adc2c2159f2199f066ef1d6895ab816528a658 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Tue, 26 Sep 2023 16:05:01 -0700 Subject: [PATCH 23/34] core: Remove fid_poll from the public API Poll sets are a simple iterator around progressing multiple objects. Remove from the API to reduce provider complexity. Signed-off-by: Sean Hefty --- Makefile.am | 8 +- fabtests/Makefile.am | 7 - fabtests/common/shared.c | 1 - fabtests/functional/poll.c | 290 ---------------- fabtests/man/fabtests.7.md | 4 - fabtests/man/man1/fi_poll.1 | 1 - fabtests/pytest/default/test_poll.py | 9 - include/ofi.h | 47 ++- include/ofi_enosys.h | 9 - include/ofi_util.h | 4 +- include/rdma/fi_domain.h | 9 +- include/rdma/fi_eq.h | 40 --- man/fi_poll.3.md | 93 ----- man/man3/fi_poll.3 | 397 ---------------------- man/man3/fi_poll_add.3 | 1 - man/man3/fi_poll_del.3 | 1 - man/man3/fi_poll_open.3 | 1 - prov/coll/src/coll_domain.c | 2 +- prov/efa/src/efa_domain.c | 2 +- prov/hook/src/hook_wait.c | 12 +- prov/opx/include/rdma/fi_direct_eq.h | 6 +- prov/psm2/include/rdma/fi_direct_domain.h | 2 +- prov/psm2/include/rdma/fi_direct_eq.h | 6 +- prov/psm2/src/psmx2_cntr.c | 4 +- prov/psm2/src/psmx2_cq.c | 4 +- prov/psm2/src/psmx2_domain.c | 2 +- prov/rxd/src/rxd_domain.c | 2 +- prov/rxm/src/rxm_domain.c | 2 +- prov/shm/src/smr_domain.c | 2 +- prov/sm2/src/sm2_domain.c | 2 +- prov/tcp/src/xnet_domain.c | 2 +- prov/ucx/src/ucx_domain.c | 2 +- prov/udp/src/udpx_domain.c | 2 +- prov/util/src/util_cntr.c | 4 +- prov/util/src/util_cq.c | 4 +- prov/util/src/util_eq.c | 4 +- prov/util/src/util_poll.c | 10 +- prov/util/src/util_wait.c | 4 +- src/enosys.c | 4 - 39 files changed, 92 insertions(+), 914 deletions(-) delete mode 100644 fabtests/functional/poll.c delete mode 100644 fabtests/man/man1/fi_poll.1 delete mode 100644 fabtests/pytest/default/test_poll.py delete mode 100644 man/man3/fi_poll.3 delete mode 100644 man/man3/fi_poll_add.3 delete mode 100644 man/man3/fi_poll_del.3 delete mode 100644 man/man3/fi_poll_open.3 diff --git a/Makefile.am b/Makefile.am index 57d2bb3a6cb..bfc9068e6af 100644 --- a/Makefile.am +++ b/Makefile.am @@ -280,8 +280,7 @@ real_man_pages = \ man/man3/fi_getinfo.3 \ man/man3/fi_mr.3 \ man/man3/fi_msg.3 \ - man/man3/fi_nic.3 \ - man/man3/fi_poll.3 \ + man/man3/fi_nic.3 \ man/man3/fi_rma.3 \ man/man3/fi_tagged.3 \ man/man3/fi_trigger.3 \ @@ -290,7 +289,7 @@ real_man_pages = \ man/man7/fi_arch.7 \ man/man7/fi_direct.7 \ man/man7/fi_guide.7 \ - man/man7/fi_hook.7 \ + man/man7/fi_hook.7 \ man/man7/fi_intro.7 \ man/man7/fi_provider.7 \ man/man7/fi_setup.7 @@ -377,9 +376,6 @@ dummy_man_pages = \ man/man3/fi_open_ops.3 \ man/man3/fi_passive_ep.3 \ man/man3/fi_pep_bind.3 \ - man/man3/fi_poll_add.3 \ - man/man3/fi_poll_del.3 \ - man/man3/fi_poll_open.3 \ man/man3/fi_query_atomic.3 \ man/man3/fi_query_collective.3 \ man/man3/fi_read.3 \ diff --git a/fabtests/Makefile.am b/fabtests/Makefile.am index c6757044b89..a9b9d049e52 100644 --- a/fabtests/Makefile.am +++ b/fabtests/Makefile.am @@ -24,7 +24,6 @@ bin_PROGRAMS = \ functional/fi_mcast \ functional/fi_rdm_tagged_peek \ functional/fi_cq_data \ - functional/fi_poll \ functional/fi_scalable_ep \ functional/fi_shared_ctx \ functional/fi_msg_epoll \ @@ -143,7 +142,6 @@ nobase_dist_config_DATA = \ pytest/default/test_msg.py \ pytest/default/test_multinode.py \ pytest/default/test_multi_recv.py \ - pytest/default/test_poll.py \ pytest/default/test_rdm.py \ pytest/default/test_recv_cancel.py \ pytest/default/test_rma_bw.py \ @@ -294,10 +292,6 @@ functional_fi_shared_ctx_SOURCES = \ functional/shared_ctx.c functional_fi_shared_ctx_LDADD = libfabtests.la -functional_fi_poll_SOURCES = \ - functional/poll.c -functional_fi_poll_LDADD = libfabtests.la - functional_fi_multi_ep_SOURCES = \ functional/multi_ep.c functional_fi_multi_ep_LDADD = libfabtests.la @@ -594,7 +588,6 @@ dummy_man_pages = \ man/man1/fi_msg_sockets.1 \ man/man1/fi_multi_ep.1 \ man/man1/fi_multi_mr.1 \ - man/man1/fi_poll.1 \ man/man1/fi_rdm.1 \ man/man1/fi_rdm_atomic.1 \ man/man1/fi_rdm_deferred_wq.1 \ diff --git a/fabtests/common/shared.c b/fabtests/common/shared.c index d5a5ab3d3eb..ecc5a3e92d7 100644 --- a/fabtests/common/shared.c +++ b/fabtests/common/shared.c @@ -1724,7 +1724,6 @@ void ft_close_fids(void) FT_CLOSE_FID(rxcntr); FT_CLOSE_FID(txcntr); FT_CLOSE_FID(rma_cntr); - FT_CLOSE_FID(pollset); if (mr != &no_mr) FT_CLOSE_FID(mr); FT_CLOSE_FID(av); diff --git a/fabtests/functional/poll.c b/fabtests/functional/poll.c deleted file mode 100644 index f9a2079c94f..00000000000 --- a/fabtests/functional/poll.c +++ /dev/null @@ -1,290 +0,0 @@ -/* - * Copyright (c) 2013-2015 Intel Corporation. All rights reserved. - * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. - * - * This software is available to you under the BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include -#include - -#include - -#define MAX_POLL_CNT 10 - -static int alloc_ep_res(struct fi_info *fi) -{ - struct fi_poll_attr poll_attr; - int ret; - - ret = ft_alloc_active_res(fi); - if (ret) - return ret; - - memset(&poll_attr, 0, sizeof poll_attr); - ret = fi_poll_open(domain, &poll_attr, &pollset); - if (ret) { - FT_PRINTERR("fi_poll_open", ret); - return ret; - } - - if (txcq) { - ret = fi_poll_add(pollset, &txcq->fid, 0); - if (ret) - goto err; - } - - if (rxcq) { - ret = fi_poll_add(pollset, &rxcq->fid, 0); - if (ret) - goto err; - } - - if (txcntr) { - ret = fi_poll_add(pollset, &txcntr->fid, 0); - if (ret) - goto err; - } - - if (rxcntr) { - ret = fi_poll_add(pollset, &rxcntr->fid, 0); - if (ret) - goto err; - } - - return 0; -err: - FT_PRINTERR("fi_poll_add", ret); - return ret; -} - -static int free_poll_res(void) -{ - int ret; - - if (!pollset) - return 0; - - if (txcq) { - ret = fi_poll_del(pollset, &txcq->fid, 0); - if (ret) - goto err; - } - - if (rxcq) { - ret = fi_poll_del(pollset, &rxcq->fid, 0); - if (ret) - goto err; - } - - if (txcntr) { - ret = fi_poll_del(pollset, &txcntr->fid, 0); - if (ret) - goto err; - } - - if (rxcntr) { - ret = fi_poll_del(pollset, &rxcntr->fid, 0); - if (ret) - goto err; - } - return 0; -err: - FT_PRINTERR("fi_poll_del", ret); - return ret; -} - -static int init_fabric(void) -{ - int ret; - - ret = ft_init(); - if (ret) - return ret; - - ret = ft_init_oob(); - if (ret) - return ret; - - ret = ft_getinfo(hints, &fi); - if (ret) - return ret; - - ret = ft_open_fabric_res(); - if (ret) - return ret; - - ret = alloc_ep_res(fi); - if (ret) - return ret; - - ret = ft_enable_ep_recv(); - if (ret) - return ret; - return 0; -} - -static int send_recv() -{ - struct fid_cq *cq; - void *context[MAX_POLL_CNT]; - struct fi_cq_entry comp; - int ret; - int ret_count = 0; - int i, tx_cntr_val = 0, rx_cntr_val = 0; - - fprintf(stdout, "Posting a send...\n"); - ret = ft_post_tx(ep, remote_fi_addr, tx_size, NO_CQ_DATA, &tx_ctx); - if (ret) - return ret; - - while (((opts.options & FT_OPT_TX_CQ) && (tx_cq_cntr < tx_seq)) || - ((opts.options & FT_OPT_TX_CNTR) && (tx_cntr_val < tx_seq)) || - ((opts.options & FT_OPT_RX_CQ) && (rx_cq_cntr < rx_seq)) || - ((opts.options & FT_OPT_RX_CNTR) && (rx_cntr_val < rx_seq))) { - - /* Poll send and recv CQs/Cntrs */ - do { - ret_count = fi_poll(pollset, context, MAX_POLL_CNT); - if (ret_count < 0) { - FT_PRINTERR("fi_poll", ret_count); - return ret_count; - } - } while (!ret_count); - - fprintf(stdout, "Retrieved %d event(s)\n", ret_count); - - for (i = 0; i < ret_count; i++) { - if (context[i] == &txcq) { - printf("Send completion received\n"); - cq = txcq; - tx_cq_cntr++; - } else if (context[i] == &rxcq) { - printf("Recv completion received\n"); - cq = rxcq; - rx_cq_cntr++; - } else if (context[i] == &txcntr) { - printf("Send counter poll-event\n"); - tx_cntr_val = fi_cntr_read(txcntr); - if (tx_cntr_val > tx_seq) { - FT_ERR("Invalid tx counter event\n"); - FT_ERR("expected: %" PRIu64 ", found: " - "%d\n", tx_seq, tx_cntr_val); - return -1; - } - continue; - } else if (context[i] == &rxcntr) { - printf("Recv counter poll-event\n"); - rx_cntr_val = fi_cntr_read(rxcntr); - if (rx_cntr_val > rx_seq) { - FT_ERR("Invalid rx counter event\n"); - FT_ERR("expected: %" PRIu64 ", found: " - "%d\n", rx_seq, rx_cntr_val); - return -1; - } - continue; - } else { - FT_ERR("Unknown completion received\n"); - return -1; - } - - /* Read the completion entry */ - ret = fi_cq_read(cq, &comp, 1); - if (ret < 0) { - if (ret == -FI_EAVAIL) { - ret = ft_cq_readerr(cq); - } else { - FT_PRINTERR("fi_cq_read", ret); - } - return ret; - } - } - } - - return 0; -} - -static int run(void) -{ - int ret; - - ret = init_fabric(); - if (ret) - return ret; - - ret = ft_init_av(); - if (ret) - return ret; - - return send_recv(); -} - -int main(int argc, char **argv) -{ - int op, ret = 0; - - opts = INIT_OPTS; - opts.options |= FT_OPT_SIZE; - - hints = fi_allocinfo(); - if (!hints) - return EXIT_FAILURE; - - while ((op = getopt(argc, argv, "h" CS_OPTS INFO_OPTS)) != -1) { - switch (op) { - default: - ft_parse_addr_opts(op, optarg, &opts); - ft_parseinfo(op, optarg, hints, &opts); - ft_parsecsopts(op, optarg, &opts); - break; - case '?': - case 'h': - ft_usage(argv[0], "A client-server example that uses poll.\n"); - FT_PRINT_OPTS_USAGE("-t ", "completion type [queue, counter]"); - return EXIT_FAILURE; - } - } - - if (optind < argc) - opts.dst_addr = argv[optind]; - - hints->ep_attr->type = FI_EP_RDM; - hints->caps = FI_MSG; - hints->mode = FI_CONTEXT; - hints->domain_attr->mr_mode = opts.mr_mode; - hints->addr_format = opts.address_format; - - ret = run(); - - free_poll_res(); - ft_free_res(); - return ft_exit_code(ret); -} diff --git a/fabtests/man/fabtests.7.md b/fabtests/man/fabtests.7.md index 15f44705594..63f51ec624d 100644 --- a/fabtests/man/fabtests.7.md +++ b/fabtests/man/fabtests.7.md @@ -77,10 +77,6 @@ features of libfabric. completion counters of inbound writes as the notification mechanism. -*fi_poll* -: Exchanges data over RDM endpoints using poll sets to drive - completion notifications. - *fi_rdm* : A basic RDM endpoint example. diff --git a/fabtests/man/man1/fi_poll.1 b/fabtests/man/man1/fi_poll.1 deleted file mode 100644 index 3f6ccf96f11..00000000000 --- a/fabtests/man/man1/fi_poll.1 +++ /dev/null @@ -1 +0,0 @@ -.so man7/fabtests.7 diff --git a/fabtests/pytest/default/test_poll.py b/fabtests/pytest/default/test_poll.py deleted file mode 100644 index a3aa6ec35fc..00000000000 --- a/fabtests/pytest/default/test_poll.py +++ /dev/null @@ -1,9 +0,0 @@ -import pytest - -@pytest.mark.functional -@pytest.mark.parametrize("poll_type", ["queue", "counter"]) -def test_poll(cmdline_args, poll_type): - from common import ClientServerTest - test = ClientServerTest(cmdline_args, "fi_poll -t " + poll_type) - test.run() - diff --git a/include/ofi.h b/include/ofi.h index 95f186f197e..181db0e0b3c 100644 --- a/include/ofi.h +++ b/include/ofi.h @@ -164,7 +164,6 @@ struct ofi_mutex_cond { }; #endif /* _WIN32 */ - static inline int ofi_wait_open(struct fid_fabric *fabric, struct fi_wait_attr *attr, struct fid_wait **waitset) @@ -178,6 +177,52 @@ ofi_wait(struct fid_wait *waitset, int timeout) return waitset->ops->wait(waitset, timeout); } + +struct fi_poll_attr { + uint64_t flags; +}; + +struct ofi_ops_poll { + size_t size; + int (*poll)(struct fid_poll *pollset, void **context, int count); + int (*poll_add)(struct fid_poll *pollset, struct fid *event_fid, + uint64_t flags); + int (*poll_del)(struct fid_poll *pollset, struct fid *event_fid, + uint64_t flags); +}; + +struct fid_poll { + struct fid fid; + struct ofi_ops_poll *ops; +}; + +static inline int +ofi_poll_open(struct fid_domain *domain, struct fi_poll_attr *attr, + struct fid_poll **pollset) +{ + return domain->ops->poll_open(domain, attr, pollset); +} + +static inline int +ofi_poll(struct fid_poll *pollset, void **context, int count) +{ + return pollset->ops->poll(pollset, context, count); +} + +static inline int +ofi_poll_add(struct fid_poll *pollset, struct fid *event_fid, uint64_t flags) +{ + return pollset->ops->poll_add(pollset, event_fid, flags); +} + +static inline int +ofi_poll_del(struct fid_poll *pollset, struct fid *event_fid, uint64_t flags) +{ + return pollset->ops->poll_del(pollset, event_fid, flags); +} + + + #ifndef container_of #define container_of(ptr, type, field) \ ((type *) ((char *)ptr - offsetof(type, field))) diff --git a/include/ofi_enosys.h b/include/ofi_enosys.h index c59e63f7b0d..be5650c6b8e 100644 --- a/include/ofi_enosys.h +++ b/include/ofi_enosys.h @@ -304,15 +304,6 @@ ssize_t fi_no_msg_senddata(struct fid_ep *ep, const void *buf, size_t len, void ssize_t fi_no_msg_injectdata(struct fid_ep *ep, const void *buf, size_t len, uint64_t data, fi_addr_t dest_addr); -/* -static struct fi_ops_poll X = { - .size = sizeof(struct fi_ops_poll), - .poll = X, - .poll_add = X, - .poll_del = X, -}; -*/ - /* static struct fi_ops_eq X = { .size = sizeof(struct fi_ops_eq), diff --git a/include/ofi_util.h b/include/ofi_util.h index df9c820a139..5f1100f0f43 100644 --- a/include/ofi_util.h +++ b/include/ofi_util.h @@ -1015,9 +1015,9 @@ struct util_poll { const struct fi_provider *prov; }; -int fi_poll_create_(const struct fi_provider *prov, struct fid_domain *domain, +int ofi_poll_create_(const struct fi_provider *prov, struct fid_domain *domain, struct fi_poll_attr *attr, struct fid_poll **pollset); -int fi_poll_create(struct fid_domain *domain, struct fi_poll_attr *attr, +int ofi_poll_create(struct fid_domain *domain, struct fi_poll_attr *attr, struct fid_poll **pollset); /* diff --git a/include/rdma/fi_domain.h b/include/rdma/fi_domain.h index a386824a9f6..0bce6853f9b 100644 --- a/include/rdma/fi_domain.h +++ b/include/rdma/fi_domain.h @@ -265,6 +265,8 @@ struct fi_atomic_attr; struct fi_cq_attr; struct fi_cntr_attr; struct fi_collective_attr; +struct fi_wait_attr; +struct fi_poll_attr; struct fi_ops_domain { size_t size; @@ -367,13 +369,6 @@ fi_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr, return domain->ops->cntr_open(domain, attr, cntr, context); } -static inline int -fi_poll_open(struct fid_domain *domain, struct fi_poll_attr *attr, - struct fid_poll **pollset) -{ - return domain->ops->poll_open(domain, attr, pollset); -} - static inline int fi_mr_reg(struct fid_domain *domain, const void *buf, size_t len, uint64_t acs, uint64_t offset, uint64_t requested_key, diff --git a/include/rdma/fi_eq.h b/include/rdma/fi_eq.h index 13b45c47f38..aed91242306 100644 --- a/include/rdma/fi_eq.h +++ b/include/rdma/fi_eq.h @@ -71,28 +71,6 @@ struct fi_wait_pollfd { struct pollfd *fd; }; -/* - * Poll Set - * Allows polling multiple event queues and counters for progress - */ - -struct fi_poll_attr { - uint64_t flags; -}; - -struct fi_ops_poll { - size_t size; - int (*poll)(struct fid_poll *pollset, void **context, int count); - int (*poll_add)(struct fid_poll *pollset, struct fid *event_fid, - uint64_t flags); - int (*poll_del)(struct fid_poll *pollset, struct fid *event_fid, - uint64_t flags); -}; - -struct fid_poll { - struct fid fid; - struct fi_ops_poll *ops; -}; /* * EQ = Event Queue @@ -301,24 +279,6 @@ fi_trywait(struct fid_fabric *fabric, struct fid **fids, int count) return fabric->ops->trywait(fabric, fids, count); } -static inline int -fi_poll(struct fid_poll *pollset, void **context, int count) -{ - return pollset->ops->poll(pollset, context, count); -} - -static inline int -fi_poll_add(struct fid_poll *pollset, struct fid *event_fid, uint64_t flags) -{ - return pollset->ops->poll_add(pollset, event_fid, flags); -} - -static inline int -fi_poll_del(struct fid_poll *pollset, struct fid *event_fid, uint64_t flags) -{ - return pollset->ops->poll_del(pollset, event_fid, flags); -} - static inline int fi_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr, struct fid_eq **eq, void *context) diff --git a/man/fi_poll.3.md b/man/fi_poll.3.md index 2b6d5530f49..c0b04cc373b 100644 --- a/man/fi_poll.3.md +++ b/man/fi_poll.3.md @@ -7,18 +7,6 @@ tagline: Libfabric Programmer's Manual # NAME -fi_poll \- Polling and wait set operations - -fi_poll_open / fi_close -: Open/close a polling set - -fi_poll_add / fi_poll_del -: Add/remove a completion queue or counter to/from a poll set. - -fi_poll -: Poll for progress and events across multiple completion queues - and counters. - fi_trywait : Indicate when it is safe to block on wait objects using native OS calls. @@ -30,19 +18,6 @@ fi_control ```c #include -int fi_poll_open(struct fid_domain *domain, struct fi_poll_attr *attr, - struct fid_poll **pollset); - -int fi_close(struct fid *pollset); - -int fi_poll_add(struct fid_poll *pollset, struct fid *event_fid, - uint64_t flags); - -int fi_poll_del(struct fid_poll *pollset, struct fid *event_fid, - uint64_t flags); - -int fi_poll(struct fid_poll *pollset, void **context, int count); - int fi_trywait(struct fid_fabric *fabric, struct fid **fids, size_t count); int fi_control(struct fid *fid, int command, void *arg); @@ -53,19 +28,6 @@ int fi_control(struct fid *fid, int command, void *arg); *fabric* : Fabric provider -*domain* -: Resource domain - -*pollset* -: Event poll set - -*attr* -: Poll set attributes - -*context* -: On success, an array of user context values associated with - completion queues or counters. - *fids* : An array of fabric descriptors, each one associated with a native wait object. @@ -81,57 +43,6 @@ int fi_control(struct fid *fid, int command, void *arg); # DESCRIPTION - -## fi_poll_open - -fi_poll_open creates a new polling set. A poll set enables an -optimized method for progressing asynchronous operations across -multiple completion queues and counters and checking for their completions. - -A poll set is defined with the following attributes. - -```c -struct fi_poll_attr { - uint64_t flags; /* operation flags */ -}; -``` - -*flags* -: Flags that set the default operation of the poll set. The use of - this field is reserved and must be set to 0 by the caller. - -## fi_close - -The fi_close call releases all resources associated with a poll set. -The poll set must not be associated with any other resources prior to -being closed, otherwise the call will return -FI_EBUSY. - -## fi_poll_add - -Associates a completion queue or counter with a poll set. - -## fi_poll_del - -Removes a completion queue or counter from a poll set. - -## fi_poll - -Progresses all completion queues and counters associated with a poll set -and checks for events. If events might have occurred, contexts associated -with the completion queues and/or counters are returned. Completion -queues will return their context if they are not empty. The context -associated with a counter will be returned if the counter's success -value or error value have changed since the last time fi_poll, fi_cntr_set, -or fi_cntr_add were called. The number of contexts is limited to the -size of the context array, indicated by the count parameter. - -Note that fi_poll only indicates that events might be available. In some -cases, providers may consume such events internally, to drive progress, for -example. This can result in fi_poll returning false positives. Applications -should drive their progress based on the results of reading events from a -completion queue or reading counter values. The fi_poll function will always -return all completion queues and counters that do have new events. - ## fi_trywait The fi_trywait call was introduced in libfabric version 1.3. The behavior @@ -217,10 +128,6 @@ fabric errno is returned. Fabric errno values are defined in `rdma/fi_errno.h`. -fi_poll -: On success, if events are available, returns the number of entries - written to the context array. - # NOTES In many situations, blocking calls may need to wait on signals sent diff --git a/man/man3/fi_poll.3 b/man/man3/fi_poll.3 deleted file mode 100644 index 9d528a91273..00000000000 --- a/man/man3/fi_poll.3 +++ /dev/null @@ -1,397 +0,0 @@ -.\" Automatically generated by Pandoc 2.9.2.1 -.\" -.TH "fi_poll" "3" "2022\-12\-09" "Libfabric Programmer\[cq]s Manual" "#VERSION#" -.hy -.SH NAME -.PP -fi_poll - Polling and wait set operations -.TP -fi_poll_open / fi_close -Open/close a polling set -.TP -fi_poll_add / fi_poll_del -Add/remove a completion queue or counter to/from a poll set. -.TP -fi_poll -Poll for progress and events across multiple completion queues and -counters. -.TP -fi_wait_open / fi_close -Open/close a wait set -.TP -fi_wait -Waits for one or more wait objects in a set to be signaled. -.TP -fi_trywait -Indicate when it is safe to block on wait objects using native OS calls. -.TP -fi_control -Control wait set operation or attributes. -.SH SYNOPSIS -.IP -.nf -\f[C] -#include - -int fi_poll_open(struct fid_domain *domain, struct fi_poll_attr *attr, - struct fid_poll **pollset); - -int fi_close(struct fid *pollset); - -int fi_poll_add(struct fid_poll *pollset, struct fid *event_fid, - uint64_t flags); - -int fi_poll_del(struct fid_poll *pollset, struct fid *event_fid, - uint64_t flags); - -int fi_poll(struct fid_poll *pollset, void **context, int count); - -int fi_wait_open(struct fid_fabric *fabric, struct fi_wait_attr *attr, - struct fid_wait **waitset); - -int fi_close(struct fid *waitset); - -int fi_wait(struct fid_wait *waitset, int timeout); - -int fi_trywait(struct fid_fabric *fabric, struct fid **fids, size_t count); - -int fi_control(struct fid *waitset, int command, void *arg); -\f[R] -.fi -.SH ARGUMENTS -.TP -\f[I]fabric\f[R] -Fabric provider -.TP -\f[I]domain\f[R] -Resource domain -.TP -\f[I]pollset\f[R] -Event poll set -.TP -\f[I]waitset\f[R] -Wait object set -.TP -\f[I]attr\f[R] -Poll or wait set attributes -.TP -\f[I]context\f[R] -On success, an array of user context values associated with completion -queues or counters. -.TP -\f[I]fids\f[R] -An array of fabric descriptors, each one associated with a native wait -object. -.TP -\f[I]count\f[R] -Number of entries in context or fids array. -.TP -\f[I]timeout\f[R] -Time to wait for a signal, in milliseconds. -.TP -\f[I]command\f[R] -Command of control operation to perform on the wait set. -.TP -\f[I]arg\f[R] -Optional control argument. -.SH DESCRIPTION -.SS fi_poll_open -.PP -fi_poll_open creates a new polling set. -A poll set enables an optimized method for progressing asynchronous -operations across multiple completion queues and counters and checking -for their completions. -.PP -A poll set is defined with the following attributes. -.IP -.nf -\f[C] -struct fi_poll_attr { - uint64_t flags; /* operation flags */ -}; -\f[R] -.fi -.TP -\f[I]flags\f[R] -Flags that set the default operation of the poll set. -The use of this field is reserved and must be set to 0 by the caller. -.SS fi_close -.PP -The fi_close call releases all resources associated with a poll set. -The poll set must not be associated with any other resources prior to -being closed, otherwise the call will return -FI_EBUSY. -.SS fi_poll_add -.PP -Associates a completion queue or counter with a poll set. -.SS fi_poll_del -.PP -Removes a completion queue or counter from a poll set. -.SS fi_poll -.PP -Progresses all completion queues and counters associated with a poll set -and checks for events. -If events might have occurred, contexts associated with the completion -queues and/or counters are returned. -Completion queues will return their context if they are not empty. -The context associated with a counter will be returned if the -counter\[cq]s success value or error value have changed since the last -time fi_poll, fi_cntr_set, or fi_cntr_add were called. -The number of contexts is limited to the size of the context array, -indicated by the count parameter. -.PP -Note that fi_poll only indicates that events might be available. -In some cases, providers may consume such events internally, to drive -progress, for example. -This can result in fi_poll returning false positives. -Applications should drive their progress based on the results of reading -events from a completion queue or reading counter values. -The fi_poll function will always return all completion queues and -counters that do have new events. -.SS fi_wait_open -.PP -fi_wait_open allocates a new wait set. -A wait set enables an optimized method of waiting for events across -multiple completion queues and counters. -Where possible, a wait set uses a single underlying wait object that is -signaled when a specified condition occurs on an associated completion -queue or counter. -.PP -The properties and behavior of a wait set are defined by struct -fi_wait_attr. -.IP -.nf -\f[C] -struct fi_wait_attr { - enum fi_wait_obj wait_obj; /* requested wait object */ - uint64_t flags; /* operation flags */ -}; -\f[R] -.fi -.TP -\f[I]wait_obj\f[R] -Wait sets are associated with specific wait object(s). -Wait objects allow applications to block until the wait object is -signaled, indicating that an event is available to be read. -The following values may be used to specify the type of wait object -associated with a wait set: FI_WAIT_UNSPEC, FI_WAIT_FD, -FI_WAIT_MUTEX_COND, and FI_WAIT_YIELD. -.TP -- \f[I]FI_WAIT_UNSPEC\f[R] -Specifies that the user will only wait on the wait set using fabric -interface calls, such as fi_wait. -In this case, the underlying provider may select the most appropriate or -highest performing wait object available, including custom wait -mechanisms. -Applications that select FI_WAIT_UNSPEC are not guaranteed to retrieve -the underlying wait object. -.TP -- \f[I]FI_WAIT_FD\f[R] -Indicates that the wait set should use a single file descriptor as its -wait mechanism, as exposed to the application. -Internally, this may require the use of epoll in order to support -waiting on a single file descriptor. -File descriptor wait objects must be usable in the POSIX select(2) and -poll(2), and Linux epoll(7) routines (if available). -Provider signal an FD wait object by marking it as readable or with an -error. -.TP -- \f[I]FI_WAIT_MUTEX_COND\f[R] -Specifies that the wait set should use a pthread mutex and cond variable -as a wait object. -.TP -- \f[I]FI_WAIT_POLLFD\f[R] -This option is similar to FI_WAIT_FD, but allows the wait mechanism to -use multiple file descriptors as its wait mechanism, as viewed by the -application. -The use of FI_WAIT_POLLFD can eliminate the need to use epoll to -abstract away needing to check multiple file descriptors when waiting -for events. -The file descriptors must be usable in the POSIX select(2) and poll(2) -routines, and match directly to being used with poll. -See the NOTES section below for details on using pollfd. -.TP -- \f[I]FI_WAIT_YIELD\f[R] -Indicates that the wait set will wait without a wait object but instead -yield on every wait. -.TP -\f[I]flags\f[R] -Flags that set the default operation of the wait set. -The use of this field is reserved and must be set to 0 by the caller. -.SS fi_close -.PP -The fi_close call releases all resources associated with a wait set. -The wait set must not be bound to any other opened resources prior to -being closed, otherwise the call will return -FI_EBUSY. -.SS fi_wait -.PP -Waits on a wait set until one or more of its underlying wait objects is -signaled. -.SS fi_trywait -.PP -The fi_trywait call was introduced in libfabric version 1.3. -The behavior of using native wait objects without the use of fi_trywait -is provider specific and should be considered non-deterministic. -.PP -The fi_trywait() call is used in conjunction with native operating -system calls to block on wait objects, such as file descriptors. -The application must call fi_trywait and obtain a return value of -FI_SUCCESS prior to blocking on a native wait object. -Failure to do so may result in the wait object not being signaled, and -the application not observing the desired events. -The following pseudo-code demonstrates the use of fi_trywait in -conjunction with the OS select(2) call. -.IP -.nf -\f[C] -fi_control(&cq->fid, FI_GETWAIT, (void *) &fd); -FD_ZERO(&fds); -FD_SET(fd, &fds); - -while (1) { - if (fi_trywait(&cq, 1) == FI_SUCCESS) - select(fd + 1, &fds, NULL, &fds, &timeout); - - do { - ret = fi_cq_read(cq, &comp, 1); - } while (ret > 0); -} -\f[R] -.fi -.PP -fi_trywait() will return FI_SUCCESS if it is safe to block on the wait -object(s) corresponding to the fabric descriptor(s), or -FI_EAGAIN if -there are events queued on the fabric descriptor or if blocking could -hang the application. -.PP -The call takes an array of fabric descriptors. -For each wait object that will be passed to the native wait routine, the -corresponding fabric descriptor should first be passed to fi_trywait. -All fabric descriptors passed into a single fi_trywait call must make -use of the same underlying wait object type. -.PP -The following types of fabric descriptors may be passed into fi_trywait: -event queues, completion queues, counters, and wait sets. -Applications that wish to use native wait calls should select specific -wait objects when allocating such resources. -For example, by setting the item\[cq]s creation attribute wait_obj value -to FI_WAIT_FD. -.PP -In the case the wait object to check belongs to a wait set, only the -wait set itself needs to be passed into fi_trywait. -The fabric resources associated with the wait set do not. -.PP -On receiving a return value of -FI_EAGAIN from fi_trywait, an -application should read all queued completions and events, and call -fi_trywait again before attempting to block. -Applications can make use of a fabric poll set to identify completion -queues and counters that may require processing. -.SS fi_control -.PP -The fi_control call is used to access provider or implementation -specific details of a fids that support blocking calls, such as wait -sets, completion queues, counters, and event queues. -Access to the wait set or fid should be serialized across all calls when -fi_control is invoked, as it may redirect the implementation of wait set -operations. -The following control commands are usable with a wait set or fid. -.TP -\f[I]FI_GETWAIT (void **)\f[R] -This command allows the user to retrieve the low-level wait object -associated with a wait set or fid. -The format of the wait set is specified during wait set creation, -through the wait set attributes. -The fi_control arg parameter should be an address where a pointer to the -returned wait object will be written. -This should be an \[cq]int *\[cq] for FI_WAIT_FD, `struct fi_mutex_cond' -for FI_WAIT_MUTEX_COND, or `struct fi_wait_pollfd' for FI_WAIT_POLLFD. -Support for FI_GETWAIT is provider specific. -.TP -\f[I]FI_GETWAITOBJ (enum fi_wait_obj *)\f[R] -This command returns the type of wait object associated with a wait set -or fid. -.SH RETURN VALUES -.PP -Returns FI_SUCCESS on success. -On error, a negative value corresponding to fabric errno is returned. -.PP -Fabric errno values are defined in \f[C]rdma/fi_errno.h\f[R]. -.TP -fi_poll -On success, if events are available, returns the number of entries -written to the context array. -.SH NOTES -.PP -In many situations, blocking calls may need to wait on signals sent to a -number of file descriptors. -For example, this is the case for socket based providers, such as tcp -and udp, as well as utility providers such as multi-rail. -For simplicity, when epoll is available, it can be used to limit the -number of file descriptors that an application must monitor. -The use of epoll may also be required in order to support FI_WAIT_FD. -.PP -However, in order to support waiting on multiple file descriptors on -systems where epoll support is not available, or where epoll performance -may negatively impact performance, FI_WAIT_POLLFD provides this -mechanism. -A significant different between using POLLFD versus FD wait objects is -that with FI_WAIT_POLLFD, the file descriptors may change dynamically. -As an example, the file descriptors associated with a completion -queues\[cq] wait set may change as endpoint associations with the CQ are -added and removed. -.PP -Struct fi_wait_pollfd is used to retrieve all file descriptors for fids -using FI_WAIT_POLLFD to support blocking calls. -.IP -.nf -\f[C] -struct fi_wait_pollfd { - uint64_t change_index; - size_t nfds; - struct pollfd *fd; -}; -\f[R] -.fi -.TP -\f[I]change_index\f[R] -The change_index may be used to determine if there have been any changes -to the file descriptor list. -Anytime a file descriptor is added, removed, or its events are updated, -this field is incremented by the provider. -Applications wishing to wait on file descriptors directly should cache -the change_index value. -Before blocking on file descriptor events, the app should use -fi_control() to retrieve the current change_index and compare that -against its cached value. -If the values differ, then the app should update its file descriptor -list prior to blocking. -.TP -\f[I]nfds\f[R] -On input to fi_control(), this indicates the number of entries in the -struct pollfd * array. -On output, this will be set to the number of entries needed to store the -current number of file descriptors. -If the input value is smaller than the output value, fi_control() will -return the error -FI_ETOOSMALL. -Note that setting nfds = 0 allows an efficient way of checking the -change_index. -.TP -\f[I]fd\f[R] -This points to an array of struct pollfd entries. -The number of entries is specified through the nfds field. -If the number of needed entries is less than or equal to the number of -entries available, the struct pollfd array will be filled out with a -list of file descriptors and corresponding events that can be used in -the select(2) and poll(2) calls. -.PP -The change_index is updated only when the file descriptors associated -with the pollfd file set has changed. -Checking the change_index is an additional step needed when working with -FI_WAIT_POLLFD wait objects directly. -The use of the fi_trywait() function is still required if accessing wait -objects directly. -.SH SEE ALSO -.PP -\f[C]fi_getinfo\f[R](3), \f[C]fi_domain\f[R](3), \f[C]fi_cntr\f[R](3), -\f[C]fi_eq\f[R](3) -.SH AUTHORS -OpenFabrics. diff --git a/man/man3/fi_poll_add.3 b/man/man3/fi_poll_add.3 deleted file mode 100644 index d969259bf84..00000000000 --- a/man/man3/fi_poll_add.3 +++ /dev/null @@ -1 +0,0 @@ -.so man3/fi_poll.3 diff --git a/man/man3/fi_poll_del.3 b/man/man3/fi_poll_del.3 deleted file mode 100644 index d969259bf84..00000000000 --- a/man/man3/fi_poll_del.3 +++ /dev/null @@ -1 +0,0 @@ -.so man3/fi_poll.3 diff --git a/man/man3/fi_poll_open.3 b/man/man3/fi_poll_open.3 deleted file mode 100644 index d969259bf84..00000000000 --- a/man/man3/fi_poll_open.3 +++ /dev/null @@ -1 +0,0 @@ -.so man3/fi_poll.3 diff --git a/prov/coll/src/coll_domain.c b/prov/coll/src/coll_domain.c index 95ea77e3c18..5fae16f67fd 100644 --- a/prov/coll/src/coll_domain.c +++ b/prov/coll/src/coll_domain.c @@ -39,7 +39,7 @@ static struct fi_ops_domain coll_domain_ops = { .endpoint = coll_endpoint, .scalable_ep = fi_no_scalable_ep, .cntr_open = fi_no_cntr_open, - .poll_open = fi_poll_create, + .poll_open = ofi_poll_create, .stx_ctx = fi_no_stx_context, .srx_ctx = fi_no_srx_context, .query_atomic = fi_no_query_atomic, diff --git a/prov/efa/src/efa_domain.c b/prov/efa/src/efa_domain.c index 704b6f45e30..a53dc514207 100644 --- a/prov/efa/src/efa_domain.c +++ b/prov/efa/src/efa_domain.c @@ -76,7 +76,7 @@ static struct fi_ops_domain efa_ops_domain_rdm = { .endpoint = efa_rdm_ep_open, .scalable_ep = fi_no_scalable_ep, .cntr_open = efa_cntr_open, - .poll_open = fi_poll_create, + .poll_open = ofi_poll_create, .stx_ctx = fi_no_stx_context, .srx_ctx = fi_no_srx_context, .query_atomic = efa_rdm_atomic_query, diff --git a/prov/hook/src/hook_wait.c b/prov/hook/src/hook_wait.c index 1aba7886be2..2cedb532a69 100644 --- a/prov/hook/src/hook_wait.c +++ b/prov/hook/src/hook_wait.c @@ -40,7 +40,7 @@ static int hook_do_poll(struct fid_poll *pollset, void **context, int count) struct fid *fid; int i, ret; - ret = fi_poll(poll->hpoll, context, count); + ret = ofi_poll(poll->hpoll, context, count); for (i = 0; i < ret; i++) { fid = context[i]; context[i] = fid->context; @@ -54,7 +54,7 @@ static int hook_poll_add(struct fid_poll *pollset, struct fid *event_fid, { struct hook_poll *poll = container_of(pollset, struct hook_poll, poll); - return fi_poll_add(poll->hpoll, hook_to_hfid(event_fid), flags); + return ofi_poll_add(poll->hpoll, hook_to_hfid(event_fid), flags); } static int hook_poll_del(struct fid_poll *pollset, struct fid *event_fid, @@ -62,11 +62,11 @@ static int hook_poll_del(struct fid_poll *pollset, struct fid *event_fid, { struct hook_poll *poll = container_of(pollset, struct hook_poll, poll); - return fi_poll_del(poll->hpoll, hook_to_hfid(event_fid), flags); + return ofi_poll_del(poll->hpoll, hook_to_hfid(event_fid), flags); } -static struct fi_ops_poll hook_poll_ops = { - .size = sizeof(struct fi_ops_poll), +static struct ofi_ops_poll hook_poll_ops = { + .size = sizeof(struct ofi_ops_poll), .poll = hook_do_poll, .poll_add = hook_poll_add, .poll_del = hook_poll_del, @@ -88,7 +88,7 @@ int hook_poll_open(struct fid_domain *domain, struct fi_poll_attr *attr, poll->poll.fid.ops = &hook_fid_ops; poll->poll.ops = &hook_poll_ops; - ret = fi_poll_open(dom->hdomain, attr, &poll->hpoll); + ret = ofi_poll_open(dom->hdomain, attr, &poll->hpoll); if (ret) free(poll); else diff --git a/prov/opx/include/rdma/fi_direct_eq.h b/prov/opx/include/rdma/fi_direct_eq.h index abfce3c57ec..c600e802727 100644 --- a/prov/opx/include/rdma/fi_direct_eq.h +++ b/prov/opx/include/rdma/fi_direct_eq.h @@ -88,20 +88,20 @@ int ofi_wait(struct fid_wait *waitset, int timeout) } static inline -int fi_poll(struct fid_poll *pollset, void **context, int count) +int ofi_poll(struct fid_poll *pollset, void **context, int count) { return -FI_ENOSYS; /* TODO - implement this */ } static inline -int fi_poll_add(struct fid_poll *pollset, struct fid *event_fid, +int ofi_poll_add(struct fid_poll *pollset, struct fid *event_fid, uint64_t flags) { return -FI_ENOSYS; /* TODO - implement this */ } static inline -int fi_poll_del(struct fid_poll *pollset, struct fid *event_fid, +int ofi_poll_del(struct fid_poll *pollset, struct fid *event_fid, uint64_t flags) { return -FI_ENOSYS; /* TODO - implement this */ diff --git a/prov/psm2/include/rdma/fi_direct_domain.h b/prov/psm2/include/rdma/fi_direct_domain.h index c0fdfdb513f..ecd889d4190 100644 --- a/prov/psm2/include/rdma/fi_direct_domain.h +++ b/prov/psm2/include/rdma/fi_direct_domain.h @@ -125,7 +125,7 @@ ofi_wait_open(struct fid_fabric *fabric, struct fi_wait_attr *attr, } static inline int -fi_poll_open(struct fid_domain *domain, struct fi_poll_attr *attr, +ofi_poll_open(struct fid_domain *domain, struct fi_poll_attr *attr, struct fid_poll **pollset) { return domain->ops->poll_open(domain, attr, pollset); diff --git a/prov/psm2/include/rdma/fi_direct_eq.h b/prov/psm2/include/rdma/fi_direct_eq.h index b1be82f7b02..1e68723b83b 100644 --- a/prov/psm2/include/rdma/fi_direct_eq.h +++ b/prov/psm2/include/rdma/fi_direct_eq.h @@ -89,19 +89,19 @@ ofi_wait(struct fid_wait *waitset, int timeout) */ static inline int -fi_poll(struct fid_poll *pollset, void **context, int count) +ofi_poll(struct fid_poll *pollset, void **context, int count) { return pollset->ops->poll(pollset, context, count); } static inline int -fi_poll_add(struct fid_poll *pollset, struct fid *event_fid, uint64_t flags) +ofi_poll_add(struct fid_poll *pollset, struct fid *event_fid, uint64_t flags) { return pollset->ops->poll_add(pollset, event_fid, flags); } static inline int -fi_poll_del(struct fid_poll *pollset, struct fid *event_fid, uint64_t flags) +ofi_poll_del(struct fid_poll *pollset, struct fid *event_fid, uint64_t flags) { return pollset->ops->poll_del(pollset, event_fid, flags); } diff --git a/prov/psm2/src/psmx2_cntr.c b/prov/psm2/src/psmx2_cntr.c index 861dc3b8096..2112cd8b0f7 100644 --- a/prov/psm2/src/psmx2_cntr.c +++ b/prov/psm2/src/psmx2_cntr.c @@ -282,7 +282,7 @@ static int psmx2_cntr_close(fid_t fid) } if (cntr->wait) { - fi_poll_del(&cntr->wait->pollset->poll_fid, &cntr->cntr.fid, 0); + ofi_poll_del(&cntr->wait->pollset->poll_fid, &cntr->cntr.fid, 0); if (cntr->wait_is_local) fi_close((fid_t)cntr->wait); } @@ -420,7 +420,7 @@ int psmx2_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr, ofi_spin_init(&cntr_priv->trigger_lock); if (wait) - fi_poll_add(&cntr_priv->wait->pollset->poll_fid, + ofi_poll_add(&cntr_priv->wait->pollset->poll_fid, &cntr_priv->cntr.fid, 0); psmx2_domain_acquire(domain_priv); diff --git a/prov/psm2/src/psmx2_cq.c b/prov/psm2/src/psmx2_cq.c index 3f9cf760e08..d3c8bd60a80 100644 --- a/prov/psm2/src/psmx2_cq.c +++ b/prov/psm2/src/psmx2_cq.c @@ -1840,7 +1840,7 @@ static int psmx2_cq_close(fid_t fid) ofi_spin_destroy(&cq->lock); if (cq->wait) { - fi_poll_del(&cq->wait->pollset->poll_fid, &cq->cq.fid, 0); + ofi_poll_del(&cq->wait->pollset->poll_fid, &cq->cq.fid, 0); if (cq->wait_is_local) fi_close(&cq->wait->wait_fid.fid); } @@ -2014,7 +2014,7 @@ int psmx2_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, } if (wait) - fi_poll_add(&cq_priv->wait->pollset->poll_fid, &cq_priv->cq.fid, 0); + ofi_poll_add(&cq_priv->wait->pollset->poll_fid, &cq_priv->cq.fid, 0); *cq = &cq_priv->cq; return 0; diff --git a/prov/psm2/src/psmx2_domain.c b/prov/psm2/src/psmx2_domain.c index 189250c02d2..0f4ca31a3ea 100644 --- a/prov/psm2/src/psmx2_domain.c +++ b/prov/psm2/src/psmx2_domain.c @@ -291,7 +291,7 @@ static struct fi_ops_domain psmx2_domain_ops = { .endpoint = psmx2_ep_open, .scalable_ep = psmx2_sep_open, .cntr_open = psmx2_cntr_open, - .poll_open = fi_poll_create, + .poll_open = ofi_poll_create, .stx_ctx = psmx2_stx_ctx, .srx_ctx = fi_no_srx_context, .query_atomic = psmx2_query_atomic, diff --git a/prov/rxd/src/rxd_domain.c b/prov/rxd/src/rxd_domain.c index 7a5a7d4f1ec..e94a0d597f4 100644 --- a/prov/rxd/src/rxd_domain.c +++ b/prov/rxd/src/rxd_domain.c @@ -44,7 +44,7 @@ static struct fi_ops_domain rxd_domain_ops = { .endpoint = rxd_endpoint, .scalable_ep = fi_no_scalable_ep, .cntr_open = rxd_cntr_open, - .poll_open = fi_poll_create, + .poll_open = ofi_poll_create, .stx_ctx = fi_no_stx_context, .srx_ctx = fi_no_srx_context, .query_atomic = rxd_query_atomic, diff --git a/prov/rxm/src/rxm_domain.c b/prov/rxm/src/rxm_domain.c index 1b159b528c3..8b65bdc796f 100644 --- a/prov/rxm/src/rxm_domain.c +++ b/prov/rxm/src/rxm_domain.c @@ -344,7 +344,7 @@ static struct fi_ops_domain rxm_domain_ops = { .endpoint = rxm_endpoint, .scalable_ep = fi_no_scalable_ep, .cntr_open = rxm_cntr_open, - .poll_open = fi_poll_create, + .poll_open = ofi_poll_create, .stx_ctx = fi_no_stx_context, .srx_ctx = fi_no_srx_context, .query_atomic = rxm_ep_query_atomic, diff --git a/prov/shm/src/smr_domain.c b/prov/shm/src/smr_domain.c index 188cea31410..479678e3e4b 100644 --- a/prov/shm/src/smr_domain.c +++ b/prov/shm/src/smr_domain.c @@ -42,7 +42,7 @@ static struct fi_ops_domain smr_domain_ops = { .endpoint = smr_endpoint, .scalable_ep = fi_no_scalable_ep, .cntr_open = smr_cntr_open, - .poll_open = fi_poll_create, + .poll_open = ofi_poll_create, .stx_ctx = fi_no_stx_context, .srx_ctx = smr_srx_context, .query_atomic = smr_query_atomic, diff --git a/prov/sm2/src/sm2_domain.c b/prov/sm2/src/sm2_domain.c index 11deff319f6..c7fb10f9c49 100644 --- a/prov/sm2/src/sm2_domain.c +++ b/prov/sm2/src/sm2_domain.c @@ -43,7 +43,7 @@ static struct fi_ops_domain sm2_domain_ops = { .endpoint = sm2_endpoint, .scalable_ep = fi_no_scalable_ep, .cntr_open = sm2_cntr_open, - .poll_open = fi_poll_create, + .poll_open = ofi_poll_create, .stx_ctx = fi_no_stx_context, .srx_ctx = sm2_srx_context, .query_atomic = sm2_query_atomic, diff --git a/prov/tcp/src/xnet_domain.c b/prov/tcp/src/xnet_domain.c index e2a7fae66e9..f1f0256b57c 100644 --- a/prov/tcp/src/xnet_domain.c +++ b/prov/tcp/src/xnet_domain.c @@ -174,7 +174,7 @@ static struct fi_ops_domain xnet_domain_ops = { .endpoint = xnet_open_ep, .scalable_ep = fi_no_scalable_ep, .cntr_open = xnet_cntr_open, - .poll_open = fi_poll_create, + .poll_open = ofi_poll_create, .stx_ctx = fi_no_stx_context, .srx_ctx = xnet_srx_context, .query_atomic = xnet_query_atomic, diff --git a/prov/ucx/src/ucx_domain.c b/prov/ucx/src/ucx_domain.c index 9549c2bf27e..761b37d0328 100644 --- a/prov/ucx/src/ucx_domain.c +++ b/prov/ucx/src/ucx_domain.c @@ -115,7 +115,7 @@ struct fi_ops_domain ucx_domain_ops = { .endpoint = ucx_ep_open, .scalable_ep = fi_no_scalable_ep, .cntr_open = ucx_cntr_open, - .poll_open = fi_poll_create, + .poll_open = ofi_poll_create, .stx_ctx = fi_no_stx_context, .srx_ctx = fi_no_srx_context, .query_atomic = fi_no_query_atomic, diff --git a/prov/udp/src/udpx_domain.c b/prov/udp/src/udpx_domain.c index d62247a4d1c..36ad0d02389 100644 --- a/prov/udp/src/udpx_domain.c +++ b/prov/udp/src/udpx_domain.c @@ -43,7 +43,7 @@ static struct fi_ops_domain udpx_domain_ops = { .endpoint = udpx_endpoint, .scalable_ep = fi_no_scalable_ep, .cntr_open = fi_no_cntr_open, - .poll_open = fi_poll_create, + .poll_open = ofi_poll_create, .stx_ctx = fi_no_stx_context, .srx_ctx = fi_no_srx_context, .query_atomic = fi_no_query_atomic, diff --git a/prov/util/src/util_cntr.c b/prov/util/src/util_cntr.c index 8f4fb48554f..b46510d0e02 100644 --- a/prov/util/src/util_cntr.c +++ b/prov/util/src/util_cntr.c @@ -247,7 +247,7 @@ int ofi_cntr_cleanup(struct util_cntr *cntr) fi_close(&cntr->peer_cntr->fid); if (cntr->wait) { - fi_poll_del(&cntr->wait->pollset->poll_fid, + ofi_poll_del(&cntr->wait->pollset->poll_fid, &cntr->cntr_fid.fid, 0); if (cntr->internal_wait) fi_close(&cntr->wait->wait_fid.fid); @@ -389,7 +389,7 @@ int ofi_cntr_init(const struct fi_provider *prov, struct fid_domain *domain, /* CNTR must be fully operational before adding to wait set */ if (wait) { cntr->wait = container_of(wait, struct util_wait, wait_fid); - ret = fi_poll_add(&cntr->wait->pollset->poll_fid, + ret = ofi_poll_add(&cntr->wait->pollset->poll_fid, &cntr->cntr_fid.fid, 0); if (ret) { ofi_cntr_cleanup(cntr); diff --git a/prov/util/src/util_cq.c b/prov/util/src/util_cq.c index 99ad02077f7..0669c20565a 100644 --- a/prov/util/src/util_cq.c +++ b/prov/util/src/util_cq.c @@ -408,7 +408,7 @@ int ofi_cq_cleanup(struct util_cq *cq) util_peer_cq_cleanup(cq); if (cq->wait) { - fi_poll_del(&cq->wait->pollset->poll_fid, + ofi_poll_del(&cq->wait->pollset->poll_fid, &cq->cq_fid.fid, 0); if (cq->internal_wait) fi_close(&cq->wait->wait_fid.fid); @@ -744,7 +744,7 @@ int ofi_cq_init(const struct fi_provider *prov, struct fid_domain *domain, /* CQ must be fully operational before adding to wait set */ if (wait) { cq->wait = container_of(wait, struct util_wait, wait_fid); - ret = fi_poll_add(&cq->wait->pollset->poll_fid, + ret = ofi_poll_add(&cq->wait->pollset->poll_fid, &cq->cq_fid.fid, 0); if (ret) { if (cq->internal_wait) { diff --git a/prov/util/src/util_eq.c b/prov/util/src/util_eq.c index 66366fd795a..f060e724e0c 100644 --- a/prov/util/src/util_eq.c +++ b/prov/util/src/util_eq.c @@ -240,7 +240,7 @@ int ofi_eq_cleanup(struct fid *fid) } if (eq->wait) { - fi_poll_del(&eq->wait->pollset->poll_fid, + ofi_poll_del(&eq->wait->pollset->poll_fid, &eq->eq_fid.fid, 0); if (eq->internal_wait) fi_close(&eq->wait->wait_fid.fid); @@ -421,7 +421,7 @@ int ofi_eq_init(struct fid_fabric *fabric_fid, struct fi_eq_attr *attr, /* EQ must be fully operational before adding to wait set */ if (eq->wait) { - ret = fi_poll_add(&eq->wait->pollset->poll_fid, + ret = ofi_poll_add(&eq->wait->pollset->poll_fid, &eq->eq_fid.fid, 0); if (ret) { ofi_eq_cleanup(&eq->eq_fid.fid); diff --git a/prov/util/src/util_poll.c b/prov/util/src/util_poll.c index 8a010e11618..1977a357db6 100644 --- a/prov/util/src/util_poll.c +++ b/prov/util/src/util_poll.c @@ -148,8 +148,8 @@ static int util_poll_close(struct fid *fid) return 0; } -static struct fi_ops_poll util_poll_ops = { - .size = sizeof(struct fi_ops_poll), +static struct ofi_ops_poll util_poll_ops = { + .size = sizeof(struct ofi_ops_poll), .poll = util_poll_run, .poll_add = util_poll_add, .poll_del = util_poll_del, @@ -174,7 +174,7 @@ static int util_verify_poll_attr(const struct fi_provider *prov, return 0; } -int fi_poll_create_(const struct fi_provider *prov, struct fid_domain *domain, +int ofi_poll_create_(const struct fi_provider *prov, struct fid_domain *domain, struct fi_poll_attr *attr, struct fid_poll **poll_fid) { struct util_poll *pollset; @@ -208,11 +208,11 @@ int fi_poll_create_(const struct fi_provider *prov, struct fid_domain *domain, return 0; } -int fi_poll_create(struct fid_domain *domain_fid, struct fi_poll_attr *attr, +int ofi_poll_create(struct fid_domain *domain_fid, struct fi_poll_attr *attr, struct fid_poll **poll_fid) { struct util_domain *domain; domain = container_of(domain_fid, struct util_domain, domain_fid); - return fi_poll_create_(domain->prov, domain_fid, attr, poll_fid); + return ofi_poll_create_(domain->prov, domain_fid, attr, poll_fid); } diff --git a/prov/util/src/util_wait.c b/prov/util/src/util_wait.c index 40e3fcc6b5f..6887936513d 100644 --- a/prov/util/src/util_wait.c +++ b/prov/util/src/util_wait.c @@ -150,7 +150,7 @@ int ofi_wait_init(struct util_fabric *fabric, struct fi_wait_attr *attr, } memset(&poll_attr, 0, sizeof poll_attr); - ret = fi_poll_create_(fabric->prov, NULL, &poll_attr, &poll_fid); + ret = ofi_poll_create_(fabric->prov, NULL, &poll_attr, &poll_fid); if (ret) return ret; @@ -363,7 +363,7 @@ static int util_wait_fd_try(struct util_wait *wait) } ofi_mutex_unlock(&wait->lock); - ret = fi_poll(&wait->pollset->poll_fid, &context, 1); + ret = ofi_poll(&wait->pollset->poll_fid, &context, 1); return (ret > 0) ? -FI_EAGAIN : (ret == -FI_EAGAIN) ? FI_SUCCESS : ret; release: diff --git a/src/enosys.c b/src/enosys.c index e1d79d53192..aceada0e8b7 100644 --- a/src/enosys.c +++ b/src/enosys.c @@ -414,10 +414,6 @@ ssize_t fi_no_msg_injectdata(struct fid_ep *ep, const void *buf, size_t len, return -FI_ENOSYS; } -/* - * struct fi_ops_poll - */ - /* * struct fi_ops_eq */ From ed2f32e4b10ccb985c14e5b6673f1ad97e15396a Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Tue, 26 Sep 2023 19:01:40 -0700 Subject: [PATCH 24/34] core: Remove FI_WAIT_MUTEX_COND support from API There's never been an implementation. Signed-off-by: Sean Hefty --- fabtests/man/fabtests.7.md | 6 +++--- fabtests/test_configs/eq_cq.test | 3 --- fabtests/ubertest/config.c | 1 - fabtests/ubertest/cq.c | 1 - fabtests/ubertest/domain.c | 1 - fabtests/ubertest/uber.c | 2 -- include/ofi.h | 8 -------- include/rdma/fi_eq.h | 2 +- man/fi_cntr.3.md | 7 +------ man/fi_cq.3.md | 6 +----- man/fi_eq.3.md | 7 +------ prov/psm2/src/psmx2_cntr.c | 4 ---- prov/psm2/src/psmx2_cq.c | 4 ---- prov/psm3/src/psmx3_cntr.c | 4 ---- prov/psm3/src/psmx3_cq.c | 4 ---- prov/util/src/util_cntr.c | 1 - prov/util/src/util_cq.c | 1 - prov/util/src/util_eq.c | 2 -- prov/util/src/util_wait.c | 2 -- src/fi_tostr.c | 1 - 20 files changed, 7 insertions(+), 60 deletions(-) diff --git a/fabtests/man/fabtests.7.md b/fabtests/man/fabtests.7.md index 63f51ec624d..7760e7f7a9a 100644 --- a/fabtests/man/fabtests.7.md +++ b/fabtests/man/fabtests.7.md @@ -345,13 +345,13 @@ The following keys and respective key values may be used in the config file. : FI_AV_MAP, FI_AV_TABLE *eq_wait_obj* -: FI_WAIT_NONE, FI_WAIT_UNSPEC, FI_WAIT_FD, FI_WAIT_MUTEX_COND +: FI_WAIT_NONE, FI_WAIT_UNSPEC, FI_WAIT_FD *cq_wait_obj* -: FI_WAIT_NONE, FI_WAIT_UNSPEC, FI_WAIT_FD, FI_WAIT_MUTEX_COND +: FI_WAIT_NONE, FI_WAIT_UNSPEC, FI_WAIT_FD *cntr_wait_obj* -: FI_WAIT_NONE, FI_WAIT_UNSPEC, FI_WAIT_FD, FI_WAIT_MUTEX_COND +: FI_WAIT_NONE, FI_WAIT_UNSPEC, FI_WAIT_FD *threading* : FI_THREAD_UNSPEC, FI_THREAD_SAFE, FI_THREAD_DOMAIN, FI_THREAD_COMPLETION diff --git a/fabtests/test_configs/eq_cq.test b/fabtests/test_configs/eq_cq.test index 1641eef2154..82ca4cce692 100644 --- a/fabtests/test_configs/eq_cq.test +++ b/fabtests/test_configs/eq_cq.test @@ -20,7 +20,6 @@ FI_WAIT_NONE, FI_WAIT_UNSPEC, FI_WAIT_FD, - FI_WAIT_MUTEX_COND, ], cq_wait_obj: [ FI_WAIT_NONE, @@ -56,8 +55,6 @@ FI_WAIT_NONE, FI_WAIT_UNSPEC, FI_WAIT_FD, - FI_WAIT_MUTEX_COND, - ], test_class: [ FT_CAP_MSG, ], diff --git a/fabtests/ubertest/config.c b/fabtests/ubertest/config.c index 7ab97fc482f..2ac339186e3 100644 --- a/fabtests/ubertest/config.c +++ b/fabtests/ubertest/config.c @@ -313,7 +313,6 @@ static int ft_parse_num(char *str, int len, struct key_t *key, void *buf) TEST_ENUM_SET_N_RETURN(str, len, FI_WAIT_NONE, enum fi_wait_obj, buf); TEST_ENUM_SET_N_RETURN(str, len, FI_WAIT_UNSPEC, enum fi_wait_obj, buf); TEST_ENUM_SET_N_RETURN(str, len, FI_WAIT_FD, enum fi_wait_obj, buf); - TEST_ENUM_SET_N_RETURN(str, len, FI_WAIT_MUTEX_COND, enum fi_wait_obj, buf); FT_ERR("Unknown wait_obj"); } else if (!strncmp(key->str, "op", strlen("op"))) { TEST_ENUM_SET_N_RETURN(str, len, FI_MIN, enum fi_op, buf); diff --git a/fabtests/ubertest/cq.c b/fabtests/ubertest/cq.c index f5117f51f60..1e87772a99f 100644 --- a/fabtests/ubertest/cq.c +++ b/fabtests/ubertest/cq.c @@ -315,7 +315,6 @@ static size_t ft_comp_x(struct fid_cq *cq, struct ft_xcontrol *ft_x, break; case FI_WAIT_UNSPEC: case FI_WAIT_FD: - case FI_WAIT_MUTEX_COND: ft_cq_read(fi_cq_sread, cq, buf, ft_x->cq_format, ft_x->credits, completions, x_str, ret, verify, NULL, timeout); break; diff --git a/fabtests/ubertest/domain.c b/fabtests/ubertest/domain.c index d3eb03f42bb..ccc8669b29b 100644 --- a/fabtests/ubertest/domain.c +++ b/fabtests/ubertest/domain.c @@ -104,7 +104,6 @@ ssize_t ft_get_event(uint32_t *event, void *buf, size_t len, break; case FI_WAIT_UNSPEC: case FI_WAIT_FD: - case FI_WAIT_MUTEX_COND: ret = fi_eq_sread(eq, event, buf, len, FT_SREAD_TO, 0); if (ret == -FI_EAVAIL) { return ft_eq_readerr(); diff --git a/fabtests/ubertest/uber.c b/fabtests/ubertest/uber.c index 3139fdda06f..bc7a937bc81 100644 --- a/fabtests/ubertest/uber.c +++ b/fabtests/ubertest/uber.c @@ -161,8 +161,6 @@ static char *ft_wait_obj_str(enum fi_wait_obj enum_str) return "wait_unspec"; case FI_WAIT_FD: return "wait_fd"; - case FI_WAIT_MUTEX_COND: - return "wait_mutex_cond"; default: return ""; } diff --git a/include/ofi.h b/include/ofi.h index 181db0e0b3c..a13730d4911 100644 --- a/include/ofi.h +++ b/include/ofi.h @@ -156,14 +156,6 @@ struct fid_wait { struct ofi_ops_wait *ops; }; -#ifndef _WIN32 -// TODO: Remove -struct ofi_mutex_cond { - pthread_mutex_t *mutex; - pthread_cond_t *cond; -}; -#endif /* _WIN32 */ - static inline int ofi_wait_open(struct fid_fabric *fabric, struct fi_wait_attr *attr, struct fid_wait **waitset) diff --git a/include/rdma/fi_eq.h b/include/rdma/fi_eq.h index aed91242306..a21d69c3d6d 100644 --- a/include/rdma/fi_eq.h +++ b/include/rdma/fi_eq.h @@ -60,7 +60,7 @@ enum fi_wait_obj { FI_WAIT_UNSPEC, FI_WAIT_SET, FI_WAIT_FD, - FI_WAIT_MUTEX_COND, /* pthread mutex & cond */ // 'remove' + FI_WAIT_MUTEX_COND, FI_WAIT_YIELD, FI_WAIT_POLLFD, }; diff --git a/man/fi_cntr.3.md b/man/fi_cntr.3.md index e3335a19db8..065caaa82fd 100644 --- a/man/fi_cntr.3.md +++ b/man/fi_cntr.3.md @@ -131,8 +131,7 @@ struct fi_cntr_attr { object associated with a counter, in order to use it in other system calls. The following values may be used to specify the type of wait object associated with a counter: FI_WAIT_NONE, FI_WAIT_UNSPEC, - FI_WAIT_FD, FI_WAIT_MUTEX_COND, and FI_WAIT_YIELD. - The default is FI_WAIT_NONE. + FI_WAIT_FD, and FI_WAIT_YIELD. The default is FI_WAIT_NONE. - *FI_WAIT_NONE* : Used to indicate that the user will not block (wait) for events on @@ -152,10 +151,6 @@ struct fi_cntr_attr { poll, and epoll routines. However, a provider may signal an FD wait object by marking it as readable, writable, or with an error. -- *FI_WAIT_MUTEX_COND* -: Specifies that the counter should use a pthread mutex and cond - variable as a wait object. - - *FI_WAIT_YIELD* : Indicates that the counter will wait without a wait object but instead yield on every wait. Allows usage of fi_cntr_wait through a spin. diff --git a/man/fi_cq.3.md b/man/fi_cq.3.md index a7b3301e7f0..ef151c13675 100644 --- a/man/fi_cq.3.md +++ b/man/fi_cq.3.md @@ -222,7 +222,7 @@ struct fi_cq_tagged_entry { CQ, in order to use it in other system calls. The following values may be used to specify the type of wait object associated with a CQ: FI_WAIT_NONE, FI_WAIT_UNSPEC, FI_WAIT_FD, - FI_WAIT_MUTEX_COND, and FI_WAIT_YIELD. The default is FI_WAIT_NONE. + and FI_WAIT_YIELD. The default is FI_WAIT_NONE. - *FI_WAIT_NONE* : Used to indicate that the user will not block (wait) for completions @@ -243,10 +243,6 @@ struct fi_cq_tagged_entry { poll, and epoll routines. However, a provider may signal an FD wait object by marking it as readable, writable, or with an error. -- *FI_WAIT_MUTEX_COND* -: Specifies that the CQ should use a pthread mutex and cond variable - as a wait object. - - *FI_WAIT_YIELD* : Indicates that the CQ will wait without a wait object but instead yield on every wait. Allows usage of fi_cq_sread and fi_cq_sreadfrom diff --git a/man/fi_eq.3.md b/man/fi_eq.3.md index ea84b9659a7..d5465466d15 100644 --- a/man/fi_eq.3.md +++ b/man/fi_eq.3.md @@ -167,10 +167,6 @@ struct fi_eq_attr { poll, and epoll routines. However, a provider may signal an FD wait object by marking it as readable or with an error. -- *FI_WAIT_MUTEX_COND* -: Specifies that the EQ should use a pthread mutex and cond variable - as a wait object. - - *FI_WAIT_YIELD* : Indicates that the EQ will wait without a wait object but instead yield on every wait. Allows usage of fi_eq_sread through a spin. @@ -205,8 +201,7 @@ commands are usable with an EQ. associated with the EQ. The format of the wait-object is specified during EQ creation, through the EQ attributes. The fi_control arg parameter should be an address where a pointer to the returned wait - object will be written. This should be an 'int *' for FI_WAIT_FD, - or 'struct fi_mutex_cond' for FI_WAIT_MUTEX_COND. + object will be written. This should be an 'int *' for FI_WAIT_FD. ```c struct fi_mutex_cond { diff --git a/prov/psm2/src/psmx2_cntr.c b/prov/psm2/src/psmx2_cntr.c index 2112cd8b0f7..c2618706d72 100644 --- a/prov/psm2/src/psmx2_cntr.c +++ b/prov/psm2/src/psmx2_cntr.c @@ -379,7 +379,6 @@ int psmx2_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr, break; case FI_WAIT_FD: - case FI_WAIT_MUTEX_COND: wait_attr.wait_obj = attr->wait_obj; wait_attr.flags = 0; err = ofi_wait_open(&domain_priv->fabric->util_fabric.fabric_fid, @@ -390,9 +389,6 @@ int psmx2_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr, break; default: - FI_INFO(&psmx2_prov, FI_LOG_CQ, - "attr->wait_obj=%d, supported=%d...%d\n", - attr->wait_obj, FI_WAIT_NONE, FI_WAIT_MUTEX_COND); return -FI_EINVAL; } diff --git a/prov/psm2/src/psmx2_cq.c b/prov/psm2/src/psmx2_cq.c index d3c8bd60a80..555a2fd0d77 100644 --- a/prov/psm2/src/psmx2_cq.c +++ b/prov/psm2/src/psmx2_cq.c @@ -1943,7 +1943,6 @@ int psmx2_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, case FI_WAIT_UNSPEC: case FI_WAIT_FD: - case FI_WAIT_MUTEX_COND: wait_attr.wait_obj = attr->wait_obj; wait_attr.flags = 0; err = ofi_wait_open(&domain_priv->fabric->util_fabric.fabric_fid, @@ -1954,9 +1953,6 @@ int psmx2_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, break; default: - FI_INFO(&psmx2_prov, FI_LOG_CQ, - "attr->wait_obj=%d, supported=%d...%d\n", attr->wait_obj, - FI_WAIT_NONE, FI_WAIT_MUTEX_COND); return -FI_EINVAL; } diff --git a/prov/psm3/src/psmx3_cntr.c b/prov/psm3/src/psmx3_cntr.c index 6712fd4b862..2676d41e15b 100644 --- a/prov/psm3/src/psmx3_cntr.c +++ b/prov/psm3/src/psmx3_cntr.c @@ -384,7 +384,6 @@ int psmx3_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr, break; case FI_WAIT_FD: - case FI_WAIT_MUTEX_COND: wait_attr.wait_obj = attr->wait_obj; wait_attr.flags = 0; err = ofi_wait_open(&domain_priv->fabric->util_fabric.fabric_fid, @@ -395,9 +394,6 @@ int psmx3_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr, break; default: - PSMX3_INFO(&psmx3_prov, FI_LOG_CQ, - "attr->wait_obj=%d, supported=%d...%d\n", - attr->wait_obj, FI_WAIT_NONE, FI_WAIT_MUTEX_COND); return -FI_EINVAL; } diff --git a/prov/psm3/src/psmx3_cq.c b/prov/psm3/src/psmx3_cq.c index 614e3f6bdcb..58edd72c16a 100644 --- a/prov/psm3/src/psmx3_cq.c +++ b/prov/psm3/src/psmx3_cq.c @@ -1221,7 +1221,6 @@ int psmx3_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, case FI_WAIT_UNSPEC: case FI_WAIT_FD: - case FI_WAIT_MUTEX_COND: wait_attr.wait_obj = attr->wait_obj; wait_attr.flags = 0; err = ofi_wait_open(&domain_priv->fabric->util_fabric.fabric_fid, @@ -1232,9 +1231,6 @@ int psmx3_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, break; default: - PSMX3_INFO(&psmx3_prov, FI_LOG_CQ, - "attr->wait_obj=%d, supported=%d...%d\n", attr->wait_obj, - FI_WAIT_NONE, FI_WAIT_MUTEX_COND); return -FI_EINVAL; } diff --git a/prov/util/src/util_cntr.c b/prov/util/src/util_cntr.c index b46510d0e02..f12e10c2734 100644 --- a/prov/util/src/util_cntr.c +++ b/prov/util/src/util_cntr.c @@ -356,7 +356,6 @@ int ofi_cntr_init(const struct fi_provider *prov, struct fid_domain *domain, case FI_WAIT_UNSPEC: case FI_WAIT_FD: case FI_WAIT_POLLFD: - case FI_WAIT_MUTEX_COND: case FI_WAIT_YIELD: memset(&wait_attr, 0, sizeof wait_attr); wait_attr.wait_obj = attr->wait_obj; diff --git a/prov/util/src/util_cq.c b/prov/util/src/util_cq.c index 0669c20565a..71313a981fe 100644 --- a/prov/util/src/util_cq.c +++ b/prov/util/src/util_cq.c @@ -726,7 +726,6 @@ int ofi_cq_init(const struct fi_provider *prov, struct fid_domain *domain, case FI_WAIT_UNSPEC: case FI_WAIT_FD: case FI_WAIT_POLLFD: - case FI_WAIT_MUTEX_COND: case FI_WAIT_YIELD: memset(&wait_attr, 0, sizeof wait_attr); wait_attr.wait_obj = attr->wait_obj; diff --git a/prov/util/src/util_eq.c b/prov/util/src/util_eq.c index f060e724e0c..1474e43bbaa 100644 --- a/prov/util/src/util_eq.c +++ b/prov/util/src/util_eq.c @@ -301,7 +301,6 @@ static int util_eq_init(struct fid_fabric *fabric, struct util_eq *eq, case FI_WAIT_UNSPEC: case FI_WAIT_FD: case FI_WAIT_POLLFD: - case FI_WAIT_MUTEX_COND: case FI_WAIT_YIELD: memset(&wait_attr, 0, sizeof wait_attr); wait_attr.wait_obj = attr->wait_obj; @@ -372,7 +371,6 @@ static int util_verify_eq_attr(const struct fi_provider *prov, case FI_WAIT_UNSPEC: case FI_WAIT_FD: case FI_WAIT_POLLFD: - case FI_WAIT_MUTEX_COND: case FI_WAIT_YIELD: break; default: diff --git a/prov/util/src/util_wait.c b/prov/util/src/util_wait.c index 6887936513d..e28fbbfc02a 100644 --- a/prov/util/src/util_wait.c +++ b/prov/util/src/util_wait.c @@ -83,7 +83,6 @@ int ofi_check_wait_attr(const struct fi_provider *prov, case FI_WAIT_UNSPEC: case FI_WAIT_FD: case FI_WAIT_POLLFD: - case FI_WAIT_MUTEX_COND: case FI_WAIT_YIELD: break; default: @@ -140,7 +139,6 @@ int ofi_wait_init(struct util_fabric *fabric, struct fi_wait_attr *attr, break; case FI_WAIT_FD: case FI_WAIT_POLLFD: - case FI_WAIT_MUTEX_COND: case FI_WAIT_YIELD: wait->wait_obj = attr->wait_obj; break; diff --git a/src/fi_tostr.c b/src/fi_tostr.c index 11dcc4428a8..e47f3704daa 100644 --- a/src/fi_tostr.c +++ b/src/fi_tostr.c @@ -770,7 +770,6 @@ ofi_tostr_wait_obj(char *buf, size_t len, enum fi_wait_obj obj) CASEENUMSTRN(FI_WAIT_NONE, len); CASEENUMSTRN(FI_WAIT_UNSPEC, len); CASEENUMSTRN(FI_WAIT_FD, len); - CASEENUMSTRN(FI_WAIT_MUTEX_COND, len); CASEENUMSTRN(FI_WAIT_YIELD, len); CASEENUMSTRN(FI_WAIT_POLLFD, len); default: From 4e4041879e735f8cf9dea6b9633e47d0d08cc1f0 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Wed, 27 Sep 2023 14:55:21 -0700 Subject: [PATCH 25/34] core: Remove deprecated MR mode options Remove FI_MR_BASIC/SCALABLE/UNSPEC. These were deprecated in version 1.5. Remove FI_LOCAL_MR, which was an earlier version of FI_MR_LOCAL and also deprecated. Signed-off-by: Sean Hefty --- fabtests/common/check_hmem.c | 2 +- fabtests/common/shared.c | 6 +- fabtests/functional/rdm_atomic.c | 3 +- fabtests/unit/av_test.c | 2 +- fabtests/unit/cntr_test.c | 4 +- fabtests/unit/cq_test.c | 2 +- fabtests/unit/dom_test.c | 2 +- fabtests/unit/eq_test.c | 2 +- fabtests/unit/getinfo_test.c | 65 +------------------ fabtests/unit/mr_cache_evict.c | 2 +- fabtests/unit/mr_test.c | 2 +- fabtests/unit/setopt_test.c | 2 +- include/ofi_mr.h | 19 +----- include/rdma/fabric.h | 8 +-- man/fi_direct.7.md | 4 -- man/fi_domain.3.md | 23 +------ man/fi_getinfo.3.md | 11 ---- man/fi_mr.3.md | 24 ------- man/fi_netdir.7.md | 10 +-- man/fi_rma.3.md | 4 +- man/fi_setup.7.md | 2 +- man/fi_verbs.7.md | 4 +- prov/efa/src/efa_prov_info.c | 2 +- prov/efa/test/efa_unit_test_ep.c | 6 +- prov/efa/test/efa_unit_test_info.c | 58 ----------------- prov/efa/test/efa_unit_tests.c | 1 - prov/efa/test/efa_unit_tests.h | 1 - prov/mrail/src/mrail_attr.c | 2 +- prov/mrail/src/mrail_init.c | 2 - prov/opx/configure.m4 | 8 --- prov/opx/include/rdma/opx/fi_opx_domain.h | 2 +- prov/opx/include/rdma/opx/fi_opx_endpoint.h | 4 -- prov/opx/src/fi_opx_domain.c | 8 +-- prov/opx/src/fi_opx_info.c | 2 +- prov/opx/src/fi_opx_mr.c | 4 +- prov/opx/src/fi_opx_tagged.c | 4 -- prov/psm2/src/psmx2.h | 2 +- prov/psm2/src/psmx2_attr.c | 5 +- prov/psm2/src/psmx2_domain.c | 2 +- prov/psm2/src/psmx2_mr.c | 20 ++---- prov/psm3/src/psmx3.h | 2 +- prov/psm3/src/psmx3_attr.c | 5 +- prov/psm3/src/psmx3_domain.c | 2 +- prov/psm3/src/psmx3_mr.c | 20 ++---- prov/rxd/src/rxd_attr.c | 2 +- prov/rxd/src/rxd_init.c | 18 +----- prov/rxm/src/rxm.h | 6 +- prov/rxm/src/rxm_attr.c | 12 +--- prov/rxm/src/rxm_init.c | 40 +++++------- prov/shm/src/smr_attr.c | 2 +- prov/sm2/src/sm2_attr.c | 2 +- prov/tcp/src/xnet_attr.c | 4 +- prov/ucx/src/ucx_init.c | 2 +- prov/udp/src/udpx_attr.c | 2 +- prov/util/src/util_attr.c | 69 ++------------------- prov/util/src/util_mr_map.c | 15 +---- prov/verbs/src/verbs_info.c | 2 +- src/abi_1_0.c | 2 +- src/fi_tostr.c | 3 - util/info.c | 3 +- 60 files changed, 88 insertions(+), 461 deletions(-) diff --git a/fabtests/common/check_hmem.c b/fabtests/common/check_hmem.c index b7a97bc8e79..f45e4ee9226 100644 --- a/fabtests/common/check_hmem.c +++ b/fabtests/common/check_hmem.c @@ -44,7 +44,7 @@ int main(int argc, char** argv) return EXIT_FAILURE; hints->mode = ~0; hints->domain_attr->mode = ~0; - hints->domain_attr->mr_mode = ~(FI_MR_BASIC | FI_MR_SCALABLE); + hints->domain_attr->mr_mode = ~0; while ((op = getopt(argc, argv, "p:h")) != -1) { switch (op) { case 'p': diff --git a/fabtests/common/shared.c b/fabtests/common/shared.c index ecc5a3e92d7..e915368bb5f 100644 --- a/fabtests/common/shared.c +++ b/fabtests/common/shared.c @@ -278,8 +278,7 @@ static inline int ft_rma_write_target_allowed(uint64_t caps) static inline int ft_check_mr_local_flag(struct fi_info *info) { - return ((info->mode & FI_LOCAL_MR) || - (info->domain_attr->mr_mode & FI_MR_LOCAL)); + return (info->domain_attr->mr_mode & FI_MR_LOCAL); } uint64_t ft_info_to_mr_access(struct fi_info *info) @@ -1635,8 +1634,7 @@ int ft_exchange_keys(struct fi_rma_iov *peer_iov) len = sizeof(*rma_iov); } - if ((fi->domain_attr->mr_mode == FI_MR_BASIC) || - (fi->domain_attr->mr_mode & FI_MR_VIRT_ADDR)) { + if (fi->domain_attr->mr_mode & FI_MR_VIRT_ADDR) { rma_iov->addr = (uintptr_t) rx_buf + ft_rx_prefix_size(); } else { rma_iov->addr = 0; diff --git a/fabtests/functional/rdm_atomic.c b/fabtests/functional/rdm_atomic.c index 638b9e1148b..5f1a2c6b6fc 100644 --- a/fabtests/functional/rdm_atomic.c +++ b/fabtests/functional/rdm_atomic.c @@ -376,8 +376,7 @@ static uint64_t get_mr_key() { static uint64_t user_key = FT_MR_KEY + 1; - return ((fi->domain_attr->mr_mode == FI_MR_BASIC) || - (fi->domain_attr->mr_mode & FI_MR_PROV_KEY)) ? + return (fi->domain_attr->mr_mode & FI_MR_PROV_KEY) ? 0 : user_key++; } diff --git a/fabtests/unit/av_test.c b/fabtests/unit/av_test.c index 3c4f06ce773..c940797cbf0 100644 --- a/fabtests/unit/av_test.c +++ b/fabtests/unit/av_test.c @@ -718,7 +718,7 @@ int main(int argc, char **argv) hints->mode = ~0; hints->domain_attr->mode = ~0; - hints->domain_attr->mr_mode = ~(FI_MR_BASIC | FI_MR_SCALABLE); + hints->domain_attr->mr_mode = ~0; hints->addr_format = FI_SOCKADDR; ret = fi_getinfo(FT_FIVERSION, opts.src_addr, 0, FI_SOURCE, hints, &fi); diff --git a/fabtests/unit/cntr_test.c b/fabtests/unit/cntr_test.c index 45306040f7b..5f8542b0f45 100644 --- a/fabtests/unit/cntr_test.c +++ b/fabtests/unit/cntr_test.c @@ -134,7 +134,7 @@ static int cntr_loop() } } - if (i < cntr_cnt) + if (i < cntr_cnt) testret = FAIL; free(cntrs); @@ -174,7 +174,7 @@ int main(int argc, char **argv) hints->mode = ~0; hints->domain_attr->mode = ~0; - hints->domain_attr->mr_mode = ~(FI_MR_BASIC | FI_MR_SCALABLE); + hints->domain_attr->mr_mode = ~0; ret = fi_getinfo(FT_FIVERSION, NULL, 0, 0, hints, &fi); if (ret) { diff --git a/fabtests/unit/cq_test.c b/fabtests/unit/cq_test.c index 19bd108cf41..023f6b39628 100644 --- a/fabtests/unit/cq_test.c +++ b/fabtests/unit/cq_test.c @@ -247,7 +247,7 @@ int main(int argc, char **argv) hints->mode = ~0; hints->domain_attr->mode = ~0; - hints->domain_attr->mr_mode = ~(FI_MR_BASIC | FI_MR_SCALABLE); + hints->domain_attr->mr_mode = ~0; ret = fi_getinfo(FT_FIVERSION, NULL, 0, 0, hints, &fi); if (ret) { diff --git a/fabtests/unit/dom_test.c b/fabtests/unit/dom_test.c index 7116b78e282..7b1352ea94a 100644 --- a/fabtests/unit/dom_test.c +++ b/fabtests/unit/dom_test.c @@ -90,7 +90,7 @@ int main(int argc, char **argv) hints->mode = ~0; hints->domain_attr->mode = ~0; - hints->domain_attr->mr_mode = ~(FI_MR_BASIC | FI_MR_SCALABLE); + hints->domain_attr->mr_mode = ~0; ret = fi_getinfo(FT_FIVERSION, NULL, 0, 0, hints, &fi); if (ret) { diff --git a/fabtests/unit/eq_test.c b/fabtests/unit/eq_test.c index 80cfeb4a720..11b29b30f6e 100644 --- a/fabtests/unit/eq_test.c +++ b/fabtests/unit/eq_test.c @@ -611,7 +611,7 @@ int main(int argc, char **argv) hints->mode = FI_CONTEXT | FI_CONTEXT2 | FI_MSG_PREFIX | FI_ASYNC_IOV | FI_RX_CQ_DATA; - hints->domain_attr->mr_mode = ~(FI_MR_BASIC | FI_MR_SCALABLE); + hints->domain_attr->mr_mode = ~0; ret = fi_getinfo(FT_FIVERSION, NULL, 0, 0, hints, &fi); if (ret) { diff --git a/fabtests/unit/getinfo_test.c b/fabtests/unit/getinfo_test.c index 5cce2289f1e..6fb8d01afde 100644 --- a/fabtests/unit/getinfo_test.c +++ b/fabtests/unit/getinfo_test.c @@ -540,53 +540,6 @@ static int init_invalid_rma_WAW_ordering_size(struct fi_info *hints) /* * MR mode checks */ -static int init_mr_basic(struct fi_info *hints) -{ - hints->caps |= FI_RMA; - hints->domain_attr->mr_mode = FI_MR_BASIC; - return 0; -} - -static int check_mr_basic(struct fi_info *info) -{ - return (info->domain_attr->mr_mode != FI_MR_BASIC) ? - EXIT_FAILURE : 0; -} - -static int init_mr_scalable(struct fi_info *hints) -{ - hints->caps |= FI_RMA; - hints->domain_attr->mr_mode = FI_MR_SCALABLE; - return 0; -} - -static int check_mr_scalable(struct fi_info *info) -{ - return (info->domain_attr->mr_mode != FI_MR_SCALABLE) ? - EXIT_FAILURE : 0; -} - -static int init_mr_unspec(struct fi_info *hints) -{ - hints->caps |= FI_RMA; - hints->domain_attr->mr_mode = FI_MR_UNSPEC; - return 0; -} - -static int test_mr_v1_0(char *node, char *service, uint64_t flags, - struct fi_info *test_hints, struct fi_info **info) -{ - return fi_getinfo(FI_VERSION(1, 0), node, service, flags, - test_hints, info); -} - -static int check_mr_unspec(struct fi_info *info) -{ - return (info->domain_attr->mr_mode != FI_MR_BASIC && - info->domain_attr->mr_mode != FI_MR_SCALABLE) ? - EXIT_FAILURE : 0; -} - static int init_mr_mode(struct fi_info *hints, uint64_t mode) { hints->domain_attr->mr_mode = (uint32_t) mode; @@ -903,18 +856,7 @@ getinfo_test(bad_waw_ordering, 1, "Test invalid rma WAW ordering size", NULL, NULL, -FI_ENODATA) /* MR mode tests */ -getinfo_test(mr_mode, 1, "Test FI_MR_BASIC", NULL, NULL, 0, - hints, init_mr_basic, NULL, check_mr_basic, -FI_ENODATA) -getinfo_test(mr_mode, 2, "Test FI_MR_SCALABLE", NULL, NULL, 0, - hints, init_mr_scalable, NULL, check_mr_scalable, -FI_ENODATA) -getinfo_test(mr_mode, 3, "Test FI_MR_UNSPEC (v1.0)", NULL, NULL, 0, - hints, init_mr_unspec, test_mr_v1_0, check_mr_unspec, -FI_ENODATA) -getinfo_test(mr_mode, 4, "Test FI_MR_BASIC (v1.0)", NULL, NULL, 0, - hints, init_mr_basic, test_mr_v1_0, check_mr_basic, -FI_ENODATA) -getinfo_test(mr_mode, 5, "Test FI_MR_SCALABLE (v1.0)", NULL, NULL, 0, - hints, init_mr_scalable, test_mr_v1_0, check_mr_scalable, - -FI_ENODATA) -getinfo_test(mr_mode, 6, "Test mr_mode bits", NULL, NULL, 0, +getinfo_test(mr_mode, 1, "Test mr_mode bits", NULL, NULL, 0, hints, NULL, validate_mr_modes, NULL, 0) /* Progress tests */ @@ -1005,11 +947,6 @@ int main(int argc, char **argv) TEST_ENTRY_GETINFO(bad_waw_ordering1), TEST_ENTRY_GETINFO(neg1), TEST_ENTRY_GETINFO(mr_mode1), - TEST_ENTRY_GETINFO(mr_mode2), - TEST_ENTRY_GETINFO(mr_mode3), - TEST_ENTRY_GETINFO(mr_mode4), - TEST_ENTRY_GETINFO(mr_mode5), - TEST_ENTRY_GETINFO(mr_mode6), TEST_ENTRY_GETINFO(progress1), TEST_ENTRY_GETINFO(progress2), TEST_ENTRY_GETINFO(caps1), diff --git a/fabtests/unit/mr_cache_evict.c b/fabtests/unit/mr_cache_evict.c index 11435c7ebfc..42c25a5d5db 100644 --- a/fabtests/unit/mr_cache_evict.c +++ b/fabtests/unit/mr_cache_evict.c @@ -806,7 +806,7 @@ int main(int argc, char **argv) hints->mode = ~0; hints->domain_attr->mode = ~0; - hints->domain_attr->mr_mode = ~(FI_MR_BASIC | FI_MR_SCALABLE); + hints->domain_attr->mr_mode = ~0; hints->caps |= FI_MSG | FI_RMA; if (opts.options & FT_OPT_ENABLE_HMEM) diff --git a/fabtests/unit/mr_test.c b/fabtests/unit/mr_test.c index 41a8929d6c6..9375c78b61e 100644 --- a/fabtests/unit/mr_test.c +++ b/fabtests/unit/mr_test.c @@ -294,7 +294,7 @@ int main(int argc, char **argv) hints->mode = ~0; hints->domain_attr->mode = ~0; - hints->domain_attr->mr_mode = ~(FI_MR_BASIC | FI_MR_SCALABLE | FI_MR_LOCAL); + hints->domain_attr->mr_mode = ~FI_MR_LOCAL; hints->caps |= FI_MSG | FI_RMA; if (opts.options & FT_OPT_ENABLE_HMEM) diff --git a/fabtests/unit/setopt_test.c b/fabtests/unit/setopt_test.c index 40487aaa3de..85f2c5cd025 100644 --- a/fabtests/unit/setopt_test.c +++ b/fabtests/unit/setopt_test.c @@ -152,7 +152,7 @@ int main(int argc, char **argv) hints->mode = ~0; hints->domain_attr->mode = ~0; - hints->domain_attr->mr_mode = ~(FI_MR_BASIC | FI_MR_SCALABLE); + hints->domain_attr->mr_mode = ~0; hints->caps |= FI_MSG; failed = run_tests(test_array, err_buf); diff --git a/include/ofi_mr.h b/include/ofi_mr.h index 80069b3f9fa..99669703f1c 100644 --- a/include/ofi_mr.h +++ b/include/ofi_mr.h @@ -65,8 +65,6 @@ struct ofi_mr_info { #define OFI_MR_BASIC_MAP (FI_MR_ALLOCATED | FI_MR_PROV_KEY | FI_MR_VIRT_ADDR) -/* FI_LOCAL_MR is valid in pre-libfaric-1.5 and can be valid in - * post-libfabric-1.5 */ static inline int ofi_mr_local(const struct fi_info *info) { if (!info) @@ -78,32 +76,21 @@ static inline int ofi_mr_local(const struct fi_info *info) if (info->domain_attr->mr_mode & FI_MR_LOCAL) return 1; - if (info->domain_attr->mr_mode & ~(FI_MR_BASIC | FI_MR_SCALABLE)) + if (info->domain_attr->mr_mode) return 0; check_local_mr: - return (info->mode & FI_LOCAL_MR) ? 1 : 0; + return 0; } #define OFI_MR_MODE_RMA_TARGET (FI_MR_RAW | FI_MR_VIRT_ADDR | \ FI_MR_PROV_KEY | FI_MR_RMA_EVENT) -/* If the app sets FI_MR_LOCAL, we ignore FI_LOCAL_MR. So, if the - * app doesn't set FI_MR_LOCAL, we need to check for FI_LOCAL_MR. - * The provider is assumed only to set FI_MR_LOCAL correctly. - */ static inline uint64_t ofi_mr_get_prov_mode(uint32_t version, const struct fi_info *user_info, const struct fi_info *prov_info) { - if (FI_VERSION_LT(version, FI_VERSION(1, 5)) || - (user_info->domain_attr && - !(user_info->domain_attr->mr_mode & FI_MR_LOCAL))) { - return (prov_info->domain_attr->mr_mode & FI_MR_LOCAL) ? - prov_info->mode | FI_LOCAL_MR : prov_info->mode; - } else { - return prov_info->mode; - } + return prov_info->mode; } diff --git a/include/rdma/fabric.h b/include/rdma/fabric.h index c114a5defd0..681aa990783 100644 --- a/include/rdma/fabric.h +++ b/include/rdma/fabric.h @@ -227,12 +227,6 @@ enum fi_av_type { FI_AV_TABLE }; -/* Named enum for backwards compatibility */ -enum fi_mr_mode { - FI_MR_UNSPEC, - FI_MR_BASIC, /* (1 << 0) */ - FI_MR_SCALABLE, /* (1 << 1) */ -}; #define FI_MR_LOCAL (1 << 2) #define FI_MR_RAW (1 << 3) #define FI_MR_VIRT_ADDR (1 << 4) @@ -366,7 +360,7 @@ static inline uint8_t fi_tc_dscp_get(uint32_t tclass) #define FI_MSG_PREFIX (1ULL << 58) #define FI_ASYNC_IOV (1ULL << 57) #define FI_RX_CQ_DATA (1ULL << 56) -#define FI_LOCAL_MR (1ULL << 55) +/* #define FI_LOCAL_MR (1ULL << 55) */ /* #define FI_NOTIFY_FLAGS_ONLY (1ULL << 54) */ /* #define FI_RESTRICTED_COMP (1ULL << 53) */ #define FI_CONTEXT2 (1ULL << 52) diff --git a/man/fi_direct.7.md b/man/fi_direct.7.md index de80ca0502a..82f18868638 100644 --- a/man/fi_direct.7.md +++ b/man/fi_direct.7.md @@ -65,10 +65,6 @@ supported features. for additional details. When FI_DIRECT_CONTEXT is defined, applications should use struct fi_context in their definitions, even if FI_CONTEXT2 is set. -*FI_DIRECT_LOCAL_MR* -: The provider sets FI_LOCAL_MR for fi_info:mode. See fi_getinfo - for additional details. - # SEE ALSO [`fi_getinfo`(3)](fi_getinfo.3.html), diff --git a/man/fi_domain.3.md b/man/fi_domain.3.md index 6d82e520b9f..8fad9bfde2b 100644 --- a/man/fi_domain.3.md +++ b/man/fi_domain.3.md @@ -545,37 +545,16 @@ The following values may be specified. : Indicates that the memory regions associated with completion counters must be explicitly enabled after being bound to any counter. -*FI_MR_UNSPEC* -: Defined for compatibility -- library versions 1.4 and earlier. Setting - mr_mode to 0 indicates that FI_MR_BASIC or FI_MR_SCALABLE are requested - and supported. - *FI_MR_VIRT_ADDR* : Registered memory regions are referenced by peers using the virtual address of the registered memory region, rather than a 0-based offset. -*FI_MR_BASIC* -: Defined for compatibility -- library versions 1.4 and earlier. Only - basic memory registration operations are requested or supported. - This mode is equivalent to the FI_MR_VIRT_ADDR, FI_MR_ALLOCATED, and - FI_MR_PROV_KEY flags being set in later library versions. This flag - may not be used in conjunction with other mr_mode bits. - -*FI_MR_SCALABLE* -: Defined for compatibility -- library versions 1.4 and earlier. - Only scalable memory registration operations - are requested or supported. Scalable registration uses offset based - addressing, with application selectable memory keys. For library versions - 1.5 and later, this is the default if no mr_mode bits are set. This - flag may not be used in conjunction with other mr_mode bits. - Buffers used in data transfer operations may require notifying the provider of their use before a data transfer can occur. The mr_mode field indicates the type of memory registration that is required, and when registration is necessary. Applications that require the use of a specific registration mode should set the domain attribute mr_mode to the necessary value when calling -fi_getinfo. The value FI_MR_UNSPEC may be used to indicate support for any -registration mode. +fi_getinfo. ## MR Key Size (mr_key_size) diff --git a/man/fi_getinfo.3.md b/man/fi_getinfo.3.md index d217c27fcd2..a78a118f6c6 100644 --- a/man/fi_getinfo.3.md +++ b/man/fi_getinfo.3.md @@ -524,17 +524,6 @@ supported set of modes will be returned in the info structure(s). The requirements for using struct fi_context2 are identical as defined for FI_CONTEXT above. -*FI_LOCAL_MR* -: The provider is optimized around having applications register memory - for locally accessed data buffers. Data buffers used in send and - receive operations and as the source buffer for RMA and atomic - operations must be registered by the application for access domains - opened with this capability. This flag is defined for compatibility - and is ignored if the application version is 1.5 or later and the - domain mr_mode is set to anything other than FI_MR_BASIC or FI_MR_SCALABLE. - See the domain attribute mr_mode [`fi_domain`(3)](fi_domain.3.html) - and [`fi_mr`(3)](fi_mr.3.html). - *FI_MSG_PREFIX* : Message prefix mode indicates that an application will provide buffer space in front of all message send and receive buffers for diff --git a/man/fi_mr.3.md b/man/fi_mr.3.md index 312f6ce942c..1b22926153b 100644 --- a/man/fi_mr.3.md +++ b/man/fi_mr.3.md @@ -147,11 +147,6 @@ The following apply to memory registration. scalable memory registration. The default requirements are outlined below, followed by definitions of how each mr_mode bit alters the definition. - Compatibility: For library versions 1.4 and earlier, this was indicated by - setting mr_mode to FI_MR_SCALABLE and the fi_info mode bit FI_LOCAL_MR to 0. - FI_MR_SCALABLE and FI_LOCAL_MR were deprecated in libfabric version 1.5, - though they are supported for backwards compatibility purposes. - For security, memory registration is required for data buffers that are accessed directly by a peer process. For example, registration is required for RMA target buffers (read or written to), and those accessed @@ -216,9 +211,6 @@ The following apply to memory registration. manage their own local memory registrations may do so by using the memory registration calls. - Note: the FI_MR_LOCAL mr_mode bit replaces the FI_LOCAL_MR fi_info mode bit. - When FI_MR_LOCAL is set, FI_LOCAL_MR is ignored. - *FI_MR_RAW* : Raw memory regions are used to support providers with keys larger than 64-bits or require setup at the peer. When the FI_MR_RAW bit is set, @@ -310,22 +302,6 @@ The following apply to memory registration. requires registering regions passed to collective calls using the FI_COLLECTIVE flag. -*Basic Memory Registration* -: Basic memory registration was deprecated in libfabric version 1.5, but - is supported for backwards compatibility. Basic memory registration - is indicated by setting mr_mode equal to FI_MR_BASIC. - FI_MR_BASIC must be set alone and not paired with mr_mode bits. - Unlike other mr_mode bits, if FI_MR_BASIC is set on input to fi_getinfo(), - it will not be cleared by the provider. That is, setting mr_mode equal to - FI_MR_BASIC forces basic registration if the provider supports it. - - The behavior of basic registration is equivalent - to requiring the following mr_mode bits: FI_MR_VIRT_ADDR, - FI_MR_ALLOCATED, and FI_MR_PROV_KEY. Additionally, providers that - support basic registration usually require the (deprecated) fi_info mode - bit FI_LOCAL_MR, which was incorporated into the FI_MR_LOCAL mr_mode - bit. - The registrations functions -- fi_mr_reg, fi_mr_regv, and fi_mr_regattr -- are used to register one or more memory regions with fabric resources. The main difference between registration functions diff --git a/man/fi_netdir.7.md b/man/fi_netdir.7.md index dccf4c72ec3..31174bc4014 100644 --- a/man/fi_netdir.7.md +++ b/man/fi_netdir.7.md @@ -31,19 +31,11 @@ libfabric API: *Endpoint types* : The provider support the FI_EP_MSG endpoint types. -*Memory registration modes* -: The provider implements the *FI_MR_BASIC* memory registration mode. - *Data transfer operations* : The following data transfer interfaces are supported for the following endpoint types: *FI_MSG*, *FI_RMA*. See DATA TRANSFER OPERATIONS below for more details. -*Modes* -: The Network Direct provider requires applications to support - the following modes: - * FI_LOCAL_MR for all applications. - *Addressing Formats* : Supported addressing formats include FI_SOCKADDR, FI_SOCKADDR_IN, FI_SOCKADDR_IN6 @@ -65,7 +57,7 @@ libfabric API: # LIMITATIONS *Memory Regions* -: Only FI_MR_BASIC mode is supported. Adding regions via s/g list is +: Adding regions via s/g list is supported only up to a s/g list size of 1. No support for binding memory regions to a counter. diff --git a/man/fi_rma.3.md b/man/fi_rma.3.md index 156780834c3..2a550a58fa8 100644 --- a/man/fi_rma.3.md +++ b/man/fi_rma.3.md @@ -74,8 +74,8 @@ ssize_t fi_inject_writedata(struct fid_ep *ep, const void *buf, size_t len, *addr* : Address of remote memory to access. This will be the virtual - address of the remote region in the case of FI_MR_BASIC, or the - offset from the starting address in the case of FI_MR_SCALABLE. + address of the remote region in the case of FI_MR_VIRT_ADDR, or the + offset from the starting address. *key* : Protection key associated with the remote memory. diff --git a/man/fi_setup.7.md b/man/fi_setup.7.md index 8964ea15863..9f3736475a3 100644 --- a/man/fi_setup.7.md +++ b/man/fi_setup.7.md @@ -374,7 +374,7 @@ struct fi_domain_attr { enum fi_progress progress; enum fi_resource_mgmt resource_mgmt; enum fi_av_type av_type; - enum fi_mr_mode mr_mode; + int mr_mode; size_t mr_key_size; size_t cq_data_size; size_t cq_cnt; diff --git a/man/fi_verbs.7.md b/man/fi_verbs.7.md index 14ebc1faf33..462a0f387b4 100644 --- a/man/fi_verbs.7.md +++ b/man/fi_verbs.7.md @@ -58,8 +58,6 @@ Verbs provider requires applications to support the following modes: #### FI_EP_MSG endpoint type - * FI_LOCAL_MR / FI_MR_LOCAL mr mode. - * FI_RX_CQ_DATA for applications that want to use RMA. Applications must take responsibility of posting receives for any incoming CQ data. @@ -115,7 +113,7 @@ refer to fi_mr(3): Memory Registration Cache section for more details. # LIMITATIONS ### Memory Regions -Only FI_MR_BASIC mode is supported. Adding regions via s/g list is supported only +Adding regions via s/g list is supported only up to a s/g list size of 1. No support for binding memory regions to a counter. ### Wait objects diff --git a/prov/efa/src/efa_prov_info.c b/prov/efa/src/efa_prov_info.c index 827cb29146b..c8186bfbcc4 100644 --- a/prov/efa/src/efa_prov_info.c +++ b/prov/efa/src/efa_prov_info.c @@ -106,7 +106,7 @@ const struct fi_domain_attr efa_domain_attr = { .control_progress = FI_PROGRESS_AUTO, .data_progress = FI_PROGRESS_AUTO, .resource_mgmt = FI_RM_DISABLED, - .mr_mode = OFI_MR_BASIC_MAP | FI_MR_LOCAL | FI_MR_BASIC, + .mr_mode = OFI_MR_BASIC_MAP | FI_MR_LOCAL, .mr_key_size = sizeof_field(struct ibv_sge, lkey), .cq_data_size = 0, .tx_ctx_cnt = 1024, diff --git a/prov/efa/test/efa_unit_test_ep.c b/prov/efa/test/efa_unit_test_ep.c index 988c49b28aa..424054b6016 100644 --- a/prov/efa/test/efa_unit_test_ep.c +++ b/prov/efa/test/efa_unit_test_ep.c @@ -84,7 +84,7 @@ void test_efa_rdm_ep_ignore_non_hex_host_id(struct efa_resource **state) * the packet header and set the peer host id if HOST_ID_HDR is turned on. * Then the endpoint should respond with a handshake packet, and include the local host id * if and only if it is non-zero. - * + * * @param[in] state cmocka state variable * @param[in] local_host_id The local host id * @param[in] peer_host_id The remote peer host id @@ -445,7 +445,7 @@ void test_efa_rdm_ep_rma_without_caps(struct efa_resource **state) resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM); resource->hints->caps |= FI_MSG | FI_TAGGED; resource->hints->caps &= ~FI_RMA; - resource->hints->domain_attr->mr_mode = FI_MR_BASIC; + resource->hints->domain_attr->mr_mode = 0; efa_unit_test_resource_construct_with_hints(resource, FI_EP_RDM, resource->hints); /* ensure we don't have RMA capability. */ @@ -495,7 +495,7 @@ void test_efa_rdm_ep_atomic_without_caps(struct efa_resource **state) resource->hints = efa_unit_test_alloc_hints(FI_EP_RDM); resource->hints->caps |= FI_MSG | FI_TAGGED; resource->hints->caps &= ~FI_ATOMIC; - resource->hints->domain_attr->mr_mode = FI_MR_BASIC; + resource->hints->domain_attr->mr_mode = 0; efa_unit_test_resource_construct_with_hints(resource, FI_EP_RDM, resource->hints); /* ensure we don't have ATOMIC capability. */ diff --git a/prov/efa/test/efa_unit_test_info.c b/prov/efa/test/efa_unit_test_info.c index bec039a6462..72262adcea5 100644 --- a/prov/efa/test/efa_unit_test_info.c +++ b/prov/efa/test/efa_unit_test_info.c @@ -41,64 +41,6 @@ void test_info_open_ep_with_wrong_info() assert_int_equal(err, 0); } -/** - * @brief test that we support older version of libfabric API version 1.1 - */ -void test_info_open_ep_with_api_1_1_info() -{ - struct fi_info *hints, *info; - struct fid_fabric *fabric = NULL; - struct fid_domain *domain = NULL; - struct fid_ep *ep = NULL; - int err; - - hints = calloc(sizeof(struct fi_info), 1); - assert_non_null(hints); - - hints->domain_attr = calloc(sizeof(struct fi_domain_attr), 1); - assert_non_null(hints->domain_attr); - - hints->fabric_attr = calloc(sizeof(struct fi_fabric_attr), 1); - assert_non_null(hints->fabric_attr); - - hints->ep_attr = calloc(sizeof(struct fi_ep_attr), 1); - assert_non_null(hints->ep_attr); - - hints->fabric_attr->prov_name = "efa"; - hints->ep_attr->type = FI_EP_RDM; - - /* in libfabric API < 1.5, domain_attr->mr_mode is an enum with - * two options: FI_MR_BASIC or FI_MR_SCALABLE, (EFA does not support FI_MR_SCALABLE). - * - * Additional information about memory registration is specified as bits in - * "mode". For example, the requirement of local memory registration - * is specified as FI_LOCAL_MR. - */ - hints->mode = FI_LOCAL_MR; - hints->domain_attr->mr_mode = FI_MR_BASIC; - - err = fi_getinfo(FI_VERSION(1, 1), NULL, NULL, 0ULL, hints, &info); - assert_int_equal(err, 0); - - err = fi_fabric(info->fabric_attr, &fabric, NULL); - assert_int_equal(err, 0); - - err = fi_domain(fabric, info, &domain, NULL); - assert_int_equal(err, 0); - - err = fi_endpoint(domain, info, &ep, NULL); - assert_int_equal(err, 0); - - err = fi_close(&ep->fid); - assert_int_equal(err, 0); - - err = fi_close(&domain->fid); - assert_int_equal(err, 0); - - err = fi_close(&fabric->fid); - assert_int_equal(err, 0); -} - static void test_info_check_shm_info_from_hints(struct fi_info *hints) { struct fi_info *info; diff --git a/prov/efa/test/efa_unit_tests.c b/prov/efa/test/efa_unit_tests.c index 1091d4540ff..6598ad7744d 100644 --- a/prov/efa/test/efa_unit_tests.c +++ b/prov/efa/test/efa_unit_tests.c @@ -102,7 +102,6 @@ int main(void) cmocka_unit_test_setup_teardown(test_ibv_cq_ex_read_ignore_removed_peer, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_rdm_fallback_to_ibv_create_cq_ex_cq_read_ignore_forgotton_peer, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_info_open_ep_with_wrong_info, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), - cmocka_unit_test_setup_teardown(test_info_open_ep_with_api_1_1_info, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_info_check_shm_info_hmem, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_info_check_shm_info_op_flags, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), cmocka_unit_test_setup_teardown(test_info_check_shm_info_threading, efa_unit_test_mocks_setup, efa_unit_test_mocks_teardown), diff --git a/prov/efa/test/efa_unit_tests.h b/prov/efa/test/efa_unit_tests.h index dc5b01c664d..4e3b891d8b8 100644 --- a/prov/efa/test/efa_unit_tests.h +++ b/prov/efa/test/efa_unit_tests.h @@ -111,7 +111,6 @@ void test_ibv_cq_ex_read_recover_forgotten_peer_ah(); void test_rdm_fallback_to_ibv_create_cq_ex_cq_read_ignore_forgotton_peer(); void test_ibv_cq_ex_read_ignore_removed_peer(); void test_info_open_ep_with_wrong_info(); -void test_info_open_ep_with_api_1_1_info(); void test_info_check_shm_info_hmem(); void test_info_check_shm_info_op_flags(); void test_info_check_shm_info_threading(); diff --git a/prov/mrail/src/mrail_attr.c b/prov/mrail/src/mrail_attr.c index e8fda11fe2b..c3401ccf8b0 100644 --- a/prov/mrail/src/mrail_attr.c +++ b/prov/mrail/src/mrail_attr.c @@ -71,7 +71,7 @@ struct fi_domain_attr mrail_domain_attr = { .data_progress = FI_PROGRESS_AUTO, .resource_mgmt = FI_RM_ENABLED, .av_type = FI_AV_UNSPEC, - .mr_mode = FI_MR_BASIC | FI_MR_SCALABLE | FI_MR_RAW, + .mr_mode = FI_MR_RAW, .mr_key_size = SIZE_MAX, .cq_data_size = SIZE_MAX, .cq_cnt = SIZE_MAX, diff --git a/prov/mrail/src/mrail_init.c b/prov/mrail/src/mrail_init.c index 94c4b1acb1f..9b0f001db7d 100644 --- a/prov/mrail/src/mrail_init.c +++ b/prov/mrail/src/mrail_init.c @@ -159,8 +159,6 @@ static struct fi_info *mrail_create_core_hints(const struct fi_info *hints) core_hints->mode &= MRAIL_PASSTHRU_MODES; if (core_hints->domain_attr) { - if (core_hints->domain_attr->mr_mode == FI_MR_BASIC) - core_hints->domain_attr->mr_mode = OFI_MR_BASIC_MAP; removed_mr_mode = core_hints->domain_attr->mr_mode & ~MRAIL_PASSTHRU_MR_MODES; if (removed_mr_mode) { diff --git a/prov/opx/configure.m4 b/prov/opx/configure.m4 index 3da849ffd45..edd62bb5276 100644 --- a/prov/opx/configure.m4 +++ b/prov/opx/configure.m4 @@ -73,14 +73,6 @@ AC_DEFUN([FI_OPX_CONFIGURE],[ AC_SUBST(opx_av, [$OPX_AV_MODE]) AC_DEFINE_UNQUOTED(OPX_AV, [$OPX_AV_MODE], [fabric direct address vector]) - AS_CASE([x$OPX_MR], - [xscalable], [OPX_MR_MODE=FI_MR_SCALABLE], - [xbasic], [OPX_MR_MODE=FI_MR_BASIC], - [OPX_MR_MODE=FI_MR_SCALABLE]) - - AC_SUBST(opx_mr, [$OPX_MR_MODE]) - AC_DEFINE_UNQUOTED(OPX_MR, [$OPX_MR_MODE], [fabric direct memory region]) - dnl Only FI_THREAD_DOMAIN is supported by the opx provider OPX_THREAD_MODE=FI_THREAD_DOMAIN diff --git a/prov/opx/include/rdma/opx/fi_opx_domain.h b/prov/opx/include/rdma/opx/fi_opx_domain.h index a4da1838ba7..fc37fa38aa6 100644 --- a/prov/opx/include/rdma/opx/fi_opx_domain.h +++ b/prov/opx/include/rdma/opx/fi_opx_domain.h @@ -104,7 +104,7 @@ struct fi_opx_domain { enum fi_threading threading; enum fi_resource_mgmt resource_mgmt; - enum fi_mr_mode mr_mode; + int mr_mode; enum fi_progress data_progress; uuid_t unique_job_key; diff --git a/prov/opx/include/rdma/opx/fi_opx_endpoint.h b/prov/opx/include/rdma/opx/fi_opx_endpoint.h index 9977bd7f47c..3dd87cd8a55 100644 --- a/prov/opx/include/rdma/opx/fi_opx_endpoint.h +++ b/prov/opx/include/rdma/opx/fi_opx_endpoint.h @@ -2986,10 +2986,6 @@ ssize_t fi_opx_ep_rx_recv_internal (struct fi_opx_ep *opx_ep, * * - 'FI_ASYNC_IOV' mode bit which requires the application to maintain * the 'msg->msg_iov' iovec array until the operation completes - * - * - 'FI_LOCAL_MR' mode bit which allows the provider to ignore the 'desc' - * parameter .. no memory regions are required to access the local - * memory */ static inline ssize_t fi_opx_ep_rx_recvmsg_internal (struct fi_opx_ep *opx_ep, diff --git a/prov/opx/src/fi_opx_domain.c b/prov/opx/src/fi_opx_domain.c index 48eb6f0cf22..68972085df3 100644 --- a/prov/opx/src/fi_opx_domain.c +++ b/prov/opx/src/fi_opx_domain.c @@ -129,7 +129,7 @@ int fi_opx_alloc_default_domain_attr(struct fi_domain_attr **domain_attr) attr->data_progress = FI_PROGRESS_MANUAL; attr->resource_mgmt = FI_RM_DISABLED; attr->av_type = OPX_AV; - attr->mr_mode = OPX_MR; + attr->mr_mode = 0; attr->mr_key_size = sizeof(uint64_t); attr->cq_data_size = FI_OPX_REMOTE_CQ_DATA_SIZE; attr->cq_cnt = (size_t)-1; @@ -173,7 +173,7 @@ int fi_opx_choose_domain(uint64_t caps, struct fi_domain_attr *domain_attr, stru * Ignore any setting by the application - the checkinfo should have verified * it was set to the same setting. */ - domain_attr->mr_mode = OPX_MR; + domain_attr->mr_mode = 0; #endif if (hints) { @@ -230,10 +230,6 @@ int fi_opx_check_domain_attr(struct fi_domain_attr *attr) goto err; } - if (attr->mr_mode == FI_MR_UNSPEC) { - attr->mr_mode = OPX_MR == FI_MR_UNSPEC ? FI_MR_BASIC : OPX_MR; - } - if (attr->mr_key_size) { if (attr->mr_key_size > sizeof(uint64_t)) { FI_DBG(fi_opx_global.prov, FI_LOG_DOMAIN, diff --git a/prov/opx/src/fi_opx_info.c b/prov/opx/src/fi_opx_info.c index 90473e9a7e0..c5abf5f733f 100644 --- a/prov/opx/src/fi_opx_info.c +++ b/prov/opx/src/fi_opx_info.c @@ -86,7 +86,7 @@ void fi_opx_set_info(struct fi_info *fi, enum fi_progress progress) .data_progress = progress, .resource_mgmt = FI_RM_ENABLED, .av_type = OPX_AV, - .mr_mode = OPX_MR, + .mr_mode = 0, .mr_key_size = 2, .cq_data_size = FI_OPX_REMOTE_CQ_DATA_SIZE, .cq_cnt = SIZE_MAX, diff --git a/prov/opx/src/fi_opx_mr.c b/prov/opx/src/fi_opx_mr.c index b484c44c86a..7bd273b4aea 100644 --- a/prov/opx/src/fi_opx_mr.c +++ b/prov/opx/src/fi_opx_mr.c @@ -122,7 +122,7 @@ static int fi_opx_mr_regv(struct fid *fid, if (ret) return ret; FI_LOG(fi_opx_global.prov, FI_LOG_DEBUG, FI_LOG_MR, - "buf=%p, len=%lu, access=%lu, offset=%lu, requested_key=%lu, flags=%lu, context=%p\n", + "buf=%p, len=%lu, access=%lu, offset=%lu, requested_key=%lu, flags=%lu, context=%p\n", iov->iov_base, iov->iov_len, access, offset, requested_key, flags, context); opx_domain = (struct fi_opx_domain *) container_of(fid, struct fid_domain, fid); @@ -227,8 +227,6 @@ int fi_opx_init_mr_ops(struct fid_domain *domain, struct fi_info *info) struct fi_opx_domain *opx_domain = container_of(domain, struct fi_opx_domain, domain_fid); - if (info->domain_attr->mr_mode == FI_MR_UNSPEC) goto err; - opx_domain->domain_fid.mr = &fi_opx_mr_ops; opx_domain->mr_mode = info->domain_attr->mr_mode; diff --git a/prov/opx/src/fi_opx_tagged.c b/prov/opx/src/fi_opx_tagged.c index a1a0300e33b..96f2d61a1c5 100644 --- a/prov/opx/src/fi_opx_tagged.c +++ b/prov/opx/src/fi_opx_tagged.c @@ -45,10 +45,6 @@ * * - 'FI_ASYNC_IOV' mode bit which requires the application to maintain * the 'msg->msg_iov' iovec array until the operation completes - * - * - 'FI_LOCAL_MR' mode bit which allows the provider to ignore the 'desc' - * parameter .. no memory regions are required to access the local - * memory */ ssize_t fi_opx_trecvmsg_generic (struct fid_ep *ep, const struct fi_msg_tagged *msg, diff --git a/prov/psm2/src/psmx2.h b/prov/psm2/src/psmx2.h index cea411aaabf..396f04ef801 100644 --- a/prov/psm2/src/psmx2.h +++ b/prov/psm2/src/psmx2.h @@ -543,7 +543,7 @@ struct psmx2_fid_domain { uint64_t caps; psm2_uuid_t uuid; - enum fi_mr_mode mr_mode; + int mr_mode; ofi_spin_t mr_lock; uint64_t mr_reserved_key; RbtHandle mr_map; diff --git a/prov/psm2/src/psmx2_attr.c b/prov/psm2/src/psmx2_attr.c index 02f1a33d854..4dfb9c39e8a 100644 --- a/prov/psm2/src/psmx2_attr.c +++ b/prov/psm2/src/psmx2_attr.c @@ -89,7 +89,7 @@ static struct fi_domain_attr psmx2_domain_attr = { .data_progress = FI_PROGRESS_AUTO, .resource_mgmt = FI_RM_ENABLED, .av_type = FI_AV_UNSPEC, - .mr_mode = FI_MR_SCALABLE | FI_MR_BASIC, + .mr_mode = 0, .mr_key_size = sizeof(uint64_t), .cq_data_size = 0, /* 4, 8 */ .cq_cnt = 65535, @@ -420,9 +420,6 @@ void psmx2_alter_prov_info(uint32_t api_version, info->domain_attr->data_progress = FI_PROGRESS_MANUAL; - if (info->domain_attr->mr_mode == (FI_MR_BASIC | FI_MR_SCALABLE)) - info->domain_attr->mr_mode = FI_MR_SCALABLE; - /* * Avoid automatically adding secondary caps that may negatively * impact performance. diff --git a/prov/psm2/src/psmx2_domain.c b/prov/psm2/src/psmx2_domain.c index 0f4ca31a3ea..4558b59fb43 100644 --- a/prov/psm2/src/psmx2_domain.c +++ b/prov/psm2/src/psmx2_domain.c @@ -350,7 +350,7 @@ int psmx2_domain_open(struct fid_fabric *fabric, struct fi_info *info, struct psmx2_fid_fabric *fabric_priv; struct psmx2_fid_domain *domain_priv; struct psmx2_ep_name *src_addr = info->src_addr; - int mr_mode = (info->domain_attr->mr_mode & FI_MR_BASIC) ? FI_MR_BASIC : 0; + int mr_mode = 0; int err, tmp; FI_INFO(&psmx2_prov, FI_LOG_DOMAIN, "\n"); diff --git a/prov/psm2/src/psmx2_mr.c b/prov/psm2/src/psmx2_mr.c index 750721a399b..dcb72f9c902 100644 --- a/prov/psm2/src/psmx2_mr.c +++ b/prov/psm2/src/psmx2_mr.c @@ -73,19 +73,12 @@ static int psmx2_mr_reserve_key(struct psmx2_fid_domain *domain, domain->mr_lock_fn(&domain->mr_lock, 1); - if (domain->mr_mode == FI_MR_BASIC) { - key = domain->mr_reserved_key; - try_count = 10000; /* large enough */ - } else { - key = requested_key; - try_count = 1; - } + key = requested_key; + try_count = 1; for (i=0; imr_map, (void *)key)) { if (!rbtInsert(domain->mr_map, (void *)key, mr)) { - if (domain->mr_mode == FI_MR_BASIC) - domain->mr_reserved_key = key + 1; *assigned_key = key; err = 0; } @@ -314,8 +307,7 @@ STATIC int psmx2_mr_reg(struct fid *fid, const void *buf, size_t len, mr_priv->iov_count = 1; mr_priv->iov[0].iov_base = (void *)buf; mr_priv->iov[0].iov_len = len; - mr_priv->offset = (domain_priv->mr_mode == FI_MR_BASIC) ? 0 : - ((uint64_t)mr_priv->iov[0].iov_base - offset); + mr_priv->offset = ((uint64_t)mr_priv->iov[0].iov_base - offset); *mr = &mr_priv->mr; return 0; @@ -369,8 +361,7 @@ STATIC int psmx2_mr_regv(struct fid *fid, for (i=0; iiov[i] = iov[i]; psmx2_mr_normalize_iov(mr_priv->iov, &mr_priv->iov_count); - mr_priv->offset = (domain_priv->mr_mode == FI_MR_BASIC) ? 0 : - ((uint64_t)mr_priv->iov[0].iov_base - offset); + mr_priv->offset = ((uint64_t)mr_priv->iov[0].iov_base - offset); *mr = &mr_priv->mr; return 0; @@ -422,8 +413,7 @@ STATIC int psmx2_mr_regattr(struct fid *fid, const struct fi_mr_attr *attr, for (i=0; iiov_count; i++) mr_priv->iov[i] = attr->mr_iov[i]; psmx2_mr_normalize_iov(mr_priv->iov, &mr_priv->iov_count); - mr_priv->offset = (domain_priv->mr_mode == FI_MR_BASIC) ? 0 : - ((uint64_t)mr_priv->iov[0].iov_base - attr->offset); + mr_priv->offset = ((uint64_t)mr_priv->iov[0].iov_base - attr->offset); *mr = &mr_priv->mr; return 0; diff --git a/prov/psm3/src/psmx3.h b/prov/psm3/src/psmx3.h index 5209f138d5f..44eb6688fa6 100644 --- a/prov/psm3/src/psmx3.h +++ b/prov/psm3/src/psmx3.h @@ -558,7 +558,7 @@ struct psmx3_fid_domain { uint64_t mode; uint64_t caps; - enum fi_mr_mode mr_mode; + int mr_mode; ofi_spin_t mr_lock; uint64_t mr_reserved_key; RbtHandle mr_map; diff --git a/prov/psm3/src/psmx3_attr.c b/prov/psm3/src/psmx3_attr.c index 7c5a61a8031..04d6ce20e9d 100644 --- a/prov/psm3/src/psmx3_attr.c +++ b/prov/psm3/src/psmx3_attr.c @@ -92,7 +92,7 @@ static struct fi_domain_attr psmx3_domain_attr = { .data_progress = FI_PROGRESS_AUTO, .resource_mgmt = FI_RM_ENABLED, .av_type = FI_AV_UNSPEC, - .mr_mode = FI_MR_SCALABLE | FI_MR_BASIC, + .mr_mode = 0, .mr_key_size = sizeof(uint64_t), .cq_data_size = 0, /* 4, 8 */ .cq_cnt = 65535, @@ -673,9 +673,6 @@ void psmx3_alter_prov_info(uint32_t api_version, info->domain_attr->data_progress = FI_PROGRESS_MANUAL; - if (info->domain_attr->mr_mode == (FI_MR_BASIC | FI_MR_SCALABLE)) - info->domain_attr->mr_mode = FI_MR_SCALABLE; - /* * Avoid automatically adding secondary caps that may negatively * impact performance. diff --git a/prov/psm3/src/psmx3_domain.c b/prov/psm3/src/psmx3_domain.c index 74e9de2d9c4..e06720a7918 100644 --- a/prov/psm3/src/psmx3_domain.c +++ b/prov/psm3/src/psmx3_domain.c @@ -301,7 +301,7 @@ int psmx3_domain_open(struct fid_fabric *fabric, struct fi_info *info, struct psmx3_fid_fabric *fabric_priv; struct psmx3_fid_domain *domain_priv; struct psmx3_ep_name *src_addr = info->src_addr; - int mr_mode = (info->domain_attr->mr_mode & FI_MR_BASIC) ? FI_MR_BASIC : 0; + int mr_mode = 0; int err; PSMX3_INFO(&psmx3_prov, FI_LOG_DOMAIN, "\n"); diff --git a/prov/psm3/src/psmx3_mr.c b/prov/psm3/src/psmx3_mr.c index cc6533062ee..5611a9165da 100644 --- a/prov/psm3/src/psmx3_mr.c +++ b/prov/psm3/src/psmx3_mr.c @@ -73,19 +73,12 @@ static int psmx3_mr_reserve_key(struct psmx3_fid_domain *domain, domain->mr_lock_fn(&domain->mr_lock, 1); - if (domain->mr_mode == FI_MR_BASIC) { - key = domain->mr_reserved_key; - try_count = 10000; /* large enough */ - } else { - key = requested_key; - try_count = 1; - } + key = requested_key; + try_count = 1; for (i=0; imr_map, (void *)key)) { if (!rbtInsert(domain->mr_map, (void *)key, mr)) { - if (domain->mr_mode == FI_MR_BASIC) - domain->mr_reserved_key = key + 1; *assigned_key = key; err = 0; } @@ -314,8 +307,7 @@ STATIC int psmx3_mr_reg(struct fid *fid, const void *buf, size_t len, mr_priv->iov_count = 1; mr_priv->iov[0].iov_base = (void *)buf; mr_priv->iov[0].iov_len = len; - mr_priv->offset = (domain_priv->mr_mode == FI_MR_BASIC) ? 0 : - ((uint64_t)mr_priv->iov[0].iov_base - offset); + mr_priv->offset = ((uint64_t)mr_priv->iov[0].iov_base - offset); *mr = &mr_priv->mr; return 0; @@ -369,8 +361,7 @@ STATIC int psmx3_mr_regv(struct fid *fid, for (i=0; iiov[i] = iov[i]; psmx3_mr_normalize_iov(mr_priv->iov, &mr_priv->iov_count); - mr_priv->offset = (domain_priv->mr_mode == FI_MR_BASIC) ? 0 : - ((uint64_t)mr_priv->iov[0].iov_base - offset); + mr_priv->offset = ((uint64_t)mr_priv->iov[0].iov_base - offset); *mr = &mr_priv->mr; return 0; @@ -422,8 +413,7 @@ STATIC int psmx3_mr_regattr(struct fid *fid, const struct fi_mr_attr *attr, for (i=0; iiov_count; i++) mr_priv->iov[i] = attr->mr_iov[i]; psmx3_mr_normalize_iov(mr_priv->iov, &mr_priv->iov_count); - mr_priv->offset = (domain_priv->mr_mode == FI_MR_BASIC) ? 0 : - ((uint64_t)mr_priv->iov[0].iov_base - attr->offset); + mr_priv->offset = ((uint64_t)mr_priv->iov[0].iov_base - attr->offset); *mr = &mr_priv->mr; return 0; diff --git a/prov/rxd/src/rxd_attr.c b/prov/rxd/src/rxd_attr.c index d06515bec98..76f83802352 100644 --- a/prov/rxd/src/rxd_attr.c +++ b/prov/rxd/src/rxd_attr.c @@ -84,7 +84,7 @@ struct fi_domain_attr rxd_domain_attr = { .data_progress = FI_PROGRESS_MANUAL, .resource_mgmt = FI_RM_ENABLED, .av_type = FI_AV_UNSPEC, - .mr_mode = FI_MR_BASIC | FI_MR_SCALABLE, + .mr_mode = 0, .cq_data_size = sizeof_field(struct rxd_data_hdr, cq_data), .mr_key_size = sizeof(uint64_t), .cq_cnt = 128, diff --git a/prov/rxd/src/rxd_init.c b/prov/rxd/src/rxd_init.c index 6bdae1423c2..bd6b091a008 100644 --- a/prov/rxd/src/rxd_init.c +++ b/prov/rxd/src/rxd_init.c @@ -60,20 +60,8 @@ static void rxd_init_env(void) void rxd_info_to_core_mr_modes(uint32_t version, const struct fi_info *hints, struct fi_info *core_info) { - /* We handle FI_MR_BASIC and FI_MR_SCALABLE irrespective of version */ - if (hints && hints->domain_attr && - (hints->domain_attr->mr_mode & (FI_MR_SCALABLE | FI_MR_BASIC))) { - core_info->mode = FI_LOCAL_MR; - core_info->domain_attr->mr_mode = hints->domain_attr->mr_mode; - } else if (FI_VERSION_LT(version, FI_VERSION(1, 5))) { - core_info->mode |= FI_LOCAL_MR; - /* Specify FI_MR_UNSPEC (instead of FI_MR_BASIC) so that - * providers that support only FI_MR_SCALABLE aren't dropped */ - core_info->domain_attr->mr_mode = FI_MR_UNSPEC; - } else { - core_info->domain_attr->mr_mode |= FI_MR_LOCAL; - core_info->domain_attr->mr_mode |= OFI_MR_BASIC_MAP; - } + core_info->domain_attr->mr_mode |= FI_MR_LOCAL; + core_info->domain_attr->mr_mode |= OFI_MR_BASIC_MAP; } int rxd_info_to_core(uint32_t version, const struct fi_info *rxd_info_in, @@ -81,7 +69,7 @@ int rxd_info_to_core(uint32_t version, const struct fi_info *rxd_info_in, { rxd_info_to_core_mr_modes(version, rxd_info_in, core_info); core_info->caps = FI_MSG; - core_info->mode = FI_LOCAL_MR | FI_CONTEXT | FI_MSG_PREFIX; + core_info->mode = FI_CONTEXT | FI_MSG_PREFIX; core_info->ep_attr->type = FI_EP_DGRAM; return 0; diff --git a/prov/rxm/src/rxm.h b/prov/rxm/src/rxm.h index e6942b14725..ece87c6f7e3 100644 --- a/prov/rxm/src/rxm.h +++ b/prov/rxm/src/rxm.h @@ -139,11 +139,9 @@ extern size_t rxm_packet_size; FI_DELIVERY_COMPLETE | FI_COMPLETION) #define RXM_RX_OP_FLAGS (FI_MULTI_RECV | FI_COMPLETION) -#define RXM_MR_VIRT_ADDR(info) ((info->domain_attr->mr_mode == FI_MR_BASIC) ||\ - info->domain_attr->mr_mode & FI_MR_VIRT_ADDR) +#define RXM_MR_VIRT_ADDR(info) (info->domain_attr->mr_mode & FI_MR_VIRT_ADDR) -#define RXM_MR_PROV_KEY(info) ((info->domain_attr->mr_mode == FI_MR_BASIC) ||\ - info->domain_attr->mr_mode & FI_MR_PROV_KEY) +#define RXM_MR_PROV_KEY(info) (info->domain_attr->mr_mode & FI_MR_PROV_KEY) #define RXM_UPDATE_STATE(subsystem, buf, new_state) \ do { \ diff --git a/prov/rxm/src/rxm_attr.c b/prov/rxm/src/rxm_attr.c index 7d20bd84ec0..ada3ea24b43 100644 --- a/prov/rxm/src/rxm_attr.c +++ b/prov/rxm/src/rxm_attr.c @@ -115,11 +115,7 @@ static struct fi_domain_attr rxm_domain_attr = { .data_progress = FI_PROGRESS_AUTO, .resource_mgmt = FI_RM_ENABLED, .av_type = FI_AV_UNSPEC, - /* Advertise support for FI_MR_BASIC so that ofi_check_info call - * doesn't fail at RxM level. If an app requires FI_MR_BASIC, it - * would be passed down to core provider. - */ - .mr_mode = FI_MR_BASIC | FI_MR_SCALABLE, + .mr_mode = 0, .cq_data_size = sizeof_field(struct ofi_op_hdr, data), .cq_cnt = (1 << 16), .ep_cnt = (1 << 15), @@ -185,11 +181,7 @@ static struct fi_domain_attr rxm_domain_thru_attr = { .data_progress = FI_PROGRESS_AUTO, .resource_mgmt = FI_RM_ENABLED, .av_type = FI_AV_UNSPEC, - /* Advertise support for FI_MR_BASIC so that ofi_check_info call - * doesn't fail at RxM level. If an app requires FI_MR_BASIC, it - * would be passed down to core provider. - */ - .mr_mode = FI_MR_BASIC | FI_MR_SCALABLE, + .mr_mode = 0, .cq_data_size = sizeof(uint64_t), .cq_cnt = (1 << 16), .ep_cnt = (1 << 15), diff --git a/prov/rxm/src/rxm_init.c b/prov/rxm/src/rxm_init.c index cfd8c150116..ad3af0ba63c 100644 --- a/prov/rxm/src/rxm_init.c +++ b/prov/rxm/src/rxm_init.c @@ -71,7 +71,7 @@ bool rxm_passthru_info(const struct fi_info *info) } /* - * - Support FI_MR_LOCAL/FI_LOCAL_MR as ofi_rxm can handle it. + * - Support FI_MR_LOCAL as ofi_rxm can handle it. * - The RxM FI_RMA implementation is pass-through but the provider can handle * FI_MR_PROV_KEY and FI_MR_VIRT_ADDR in its large message transfer rendezvous * protocol. We can set FI_MR_PROV_KEY and FI_MR_VIRT_ADDR only if the app @@ -82,28 +82,19 @@ bool rxm_passthru_info(const struct fi_info *info) void rxm_info_to_core_mr_modes(uint32_t version, const struct fi_info *hints, struct fi_info *core_info) { - if (hints && hints->domain_attr && - (hints->domain_attr->mr_mode & (FI_MR_SCALABLE | FI_MR_BASIC))) { - core_info->mode |= FI_LOCAL_MR; - core_info->domain_attr->mr_mode = hints->domain_attr->mr_mode; - } else if (FI_VERSION_LT(version, FI_VERSION(1, 5))) { - core_info->mode |= FI_LOCAL_MR; - core_info->domain_attr->mr_mode = FI_MR_UNSPEC; - } else { - core_info->domain_attr->mr_mode |= FI_MR_LOCAL; - if (!hints || !hints->domain_attr || - !ofi_rma_target_allowed(hints->caps)) - core_info->domain_attr->mr_mode |= OFI_MR_BASIC_MAP; - else - core_info->domain_attr->mr_mode |= - hints->domain_attr->mr_mode; - - /* RxM is setup to support FI_HMEM with the core provider requiring - * FI_MR_HMEM. Always set this MR mode bit. - */ - if (hints && hints->caps & FI_HMEM) - core_info->domain_attr->mr_mode |= FI_MR_HMEM; - } + core_info->domain_attr->mr_mode |= FI_MR_LOCAL; + if (!hints || !hints->domain_attr || + !ofi_rma_target_allowed(hints->caps)) + core_info->domain_attr->mr_mode |= OFI_MR_BASIC_MAP; + else + core_info->domain_attr->mr_mode |= + hints->domain_attr->mr_mode; + + /* RxM is setup to support FI_HMEM with the core provider requiring + * FI_MR_HMEM. Always set this MR mode bit. + */ + if (hints && hints->caps & FI_HMEM) + core_info->domain_attr->mr_mode |= FI_MR_HMEM; } static bool rxm_use_srx(const struct fi_info *hints, @@ -473,9 +464,6 @@ static void rxm_alter_info(const struct fi_info *hints, struct fi_info *info) } if (!ofi_mr_local(hints)) { - cur->mode &= ~FI_LOCAL_MR; - cur->tx_attr->mode &= ~FI_LOCAL_MR; - cur->rx_attr->mode &= ~FI_LOCAL_MR; cur->domain_attr->mr_mode &= ~FI_MR_LOCAL; } diff --git a/prov/shm/src/smr_attr.c b/prov/shm/src/smr_attr.c index 70ebee0aa4f..0ff25937470 100644 --- a/prov/shm/src/smr_attr.c +++ b/prov/shm/src/smr_attr.c @@ -98,7 +98,7 @@ struct fi_domain_attr smr_domain_attr = { .data_progress = FI_PROGRESS_MANUAL, .resource_mgmt = FI_RM_ENABLED, .av_type = FI_AV_UNSPEC, - .mr_mode = FI_MR_BASIC | FI_MR_SCALABLE, + .mr_mode = 0, .mr_key_size = sizeof_field(struct fi_rma_iov, key), .cq_data_size = sizeof_field(struct smr_msg_hdr, data), .cq_cnt = (1 << 10), diff --git a/prov/sm2/src/sm2_attr.c b/prov/sm2/src/sm2_attr.c index 148bff5792a..f9404acda06 100644 --- a/prov/sm2/src/sm2_attr.c +++ b/prov/sm2/src/sm2_attr.c @@ -102,7 +102,7 @@ struct fi_domain_attr sm2_domain_attr = { .data_progress = FI_PROGRESS_MANUAL, .resource_mgmt = FI_RM_ENABLED, .av_type = FI_AV_UNSPEC, - .mr_mode = FI_MR_BASIC | FI_MR_SCALABLE, + .mr_mode = 0, .mr_key_size = sizeof_field(struct fi_rma_iov, key), .cq_data_size = sizeof_field(struct sm2_xfer_hdr, cq_data), .cq_cnt = (1 << 10), diff --git a/prov/tcp/src/xnet_attr.c b/prov/tcp/src/xnet_attr.c index 2ae79529cc5..25b9a9d3815 100644 --- a/prov/tcp/src/xnet_attr.c +++ b/prov/tcp/src/xnet_attr.c @@ -151,7 +151,7 @@ static struct fi_domain_attr xnet_domain_attr = { .control_progress = FI_PROGRESS_AUTO, .data_progress = FI_PROGRESS_AUTO, .resource_mgmt = FI_RM_ENABLED, - .mr_mode = FI_MR_SCALABLE | FI_MR_BASIC, + .mr_mode = 0, .mr_key_size = sizeof(uint64_t), .av_type = FI_AV_UNSPEC, .cq_data_size = sizeof(uint64_t), @@ -172,7 +172,7 @@ static struct fi_domain_attr xnet_rdm_domain_attr = { .control_progress = FI_PROGRESS_AUTO, .data_progress = FI_PROGRESS_AUTO, .resource_mgmt = FI_RM_ENABLED, - .mr_mode = FI_MR_SCALABLE | FI_MR_BASIC, + .mr_mode = 0, .mr_key_size = sizeof(uint64_t), .av_type = FI_AV_UNSPEC, .cq_data_size = sizeof(uint64_t), diff --git a/prov/ucx/src/ucx_init.c b/prov/ucx/src/ucx_init.c index ebd02a898ee..4da89c6597e 100644 --- a/prov/ucx/src/ucx_init.c +++ b/prov/ucx/src/ucx_init.c @@ -93,7 +93,7 @@ static struct fi_domain_attr ucx_domain_attrs = { .data_progress = FI_PROGRESS_MANUAL, .resource_mgmt = FI_RM_ENABLED, .av_type = FI_AV_UNSPEC, - .mr_mode = OFI_MR_BASIC_MAP | FI_MR_BASIC | FI_MR_RAW, + .mr_mode = OFI_MR_BASIC_MAP | FI_MR_RAW, .mr_key_size = FI_UCX_MAX_KEY_SIZE, .tx_ctx_cnt = 1, .rx_ctx_cnt = 1, diff --git a/prov/udp/src/udpx_attr.c b/prov/udp/src/udpx_attr.c index c6ad7df46fe..a187df09d20 100644 --- a/prov/udp/src/udpx_attr.c +++ b/prov/udp/src/udpx_attr.c @@ -66,7 +66,7 @@ struct fi_domain_attr udpx_domain_attr = { .data_progress = FI_PROGRESS_AUTO, .resource_mgmt = FI_RM_ENABLED, .av_type = FI_AV_UNSPEC, - .mr_mode = FI_MR_BASIC | FI_MR_SCALABLE, + .mr_mode = 0, .mr_key_size = sizeof(uint64_t), .cq_cnt = 256, .ep_cnt = 256, diff --git a/prov/util/src/util_attr.c b/prov/util/src/util_attr.c index de9a7335c7d..f6b7875b531 100644 --- a/prov/util/src/util_attr.c +++ b/prov/util/src/util_attr.c @@ -489,57 +489,19 @@ static int ofi_cap_mr_mode(uint64_t info_caps, int mr_mode) mr_mode &= ~OFI_MR_MODE_RMA_TARGET; } - return mr_mode & ~(FI_MR_BASIC | FI_MR_SCALABLE); + return mr_mode; } -/* - * Providers should set v1.0 registration modes (FI_MR_BASIC and - * FI_MR_SCALABLE) that they support, along with all required modes. - */ int ofi_check_mr_mode(const struct fi_provider *prov, uint32_t api_version, int prov_mode, const struct fi_info *user_info) { int user_mode = user_info->domain_attr->mr_mode; int ret = -FI_ENODATA; - if ((prov_mode & FI_MR_LOCAL) && - !((user_info->mode & FI_LOCAL_MR) || (user_mode & FI_MR_LOCAL))) + prov_mode = ofi_cap_mr_mode(user_info->caps, prov_mode); + if ((user_mode & prov_mode) != prov_mode) goto out; - if (FI_VERSION_LT(api_version, FI_VERSION(1, 5))) { - switch (user_mode) { - case FI_MR_UNSPEC: - if (!(prov_mode & (FI_MR_SCALABLE | FI_MR_BASIC))) - goto out; - break; - case FI_MR_BASIC: - if (!(prov_mode & FI_MR_BASIC)) - goto out; - break; - case FI_MR_SCALABLE: - if (!(prov_mode & FI_MR_SCALABLE)) - goto out; - break; - default: - goto out; - } - } else { - if (user_mode & FI_MR_BASIC) { - if ((user_mode & ~FI_MR_BASIC) || - !(prov_mode & FI_MR_BASIC)) - goto out; - } else if (user_mode & FI_MR_SCALABLE) { - if ((user_mode & ~FI_MR_SCALABLE) || - !(prov_mode & FI_MR_SCALABLE)) - goto out; - } else { - prov_mode = ofi_cap_mr_mode(user_info->caps, prov_mode); - if (user_mode != FI_MR_UNSPEC && - (user_mode & prov_mode) != prov_mode) - goto out; - } - } - ret = 0; out: if (ret) { @@ -1121,19 +1083,9 @@ static void fi_alter_domain_attr(struct fi_domain_attr *attr, int hints_mr_mode; hints_mr_mode = hints ? hints->mr_mode : 0; - if (hints_mr_mode & (FI_MR_BASIC | FI_MR_SCALABLE)) { - attr->mr_mode = hints_mr_mode; - } else if (FI_VERSION_LT(api_version, FI_VERSION(1, 5))) { - attr->mr_mode = (attr->mr_mode && attr->mr_mode != FI_MR_SCALABLE) ? - FI_MR_BASIC : FI_MR_SCALABLE; - } else { - attr->mr_mode &= ~(FI_MR_BASIC | FI_MR_SCALABLE); - - if (hints && - ((hints_mr_mode & attr->mr_mode) != attr->mr_mode)) { - attr->mr_mode = ofi_cap_mr_mode(info_caps, + if (hints && ((hints_mr_mode & attr->mr_mode) != attr->mr_mode)) { + attr->mr_mode = ofi_cap_mr_mode(info_caps, attr->mr_mode & hints_mr_mode); - } } attr->caps = ofi_get_caps(info_caps, hints ? hints->caps : 0, attr->caps); @@ -1226,10 +1178,7 @@ static uint64_t ofi_get_info_caps(const struct fi_info *prov_info, user_mode = user_info->domain_attr->mr_mode; - if ((FI_VERSION_LT(api_version, FI_VERSION(1,5)) && - (user_mode == FI_MR_UNSPEC)) || - (user_mode == FI_MR_BASIC) || - ((user_mode & prov_mode & OFI_MR_MODE_RMA_TARGET) == + if (((user_mode & prov_mode & OFI_MR_MODE_RMA_TARGET) == (prov_mode & OFI_MR_MODE_RMA_TARGET))) return caps; @@ -1250,12 +1199,6 @@ void ofi_alter_info(struct fi_info *info, const struct fi_info *hints, * the checks depend on unmodified provider mr_mode attr */ info->caps = ofi_get_info_caps(info, hints, api_version); - if ((info->domain_attr->mr_mode & FI_MR_LOCAL) && - (FI_VERSION_LT(api_version, FI_VERSION(1, 5)) || - (hints && hints->domain_attr && - (hints->domain_attr->mr_mode & (FI_MR_BASIC | FI_MR_SCALABLE))))) - info->mode |= FI_LOCAL_MR; - if (hints) info->handle = hints->handle; diff --git a/prov/util/src/util_mr_map.c b/prov/util/src/util_mr_map.c index f827bcbe9ea..b143135435b 100644 --- a/prov/util/src/util_mr_map.c +++ b/prov/util/src/util_mr_map.c @@ -177,10 +177,6 @@ static int compare_mr_keys(struct ofi_rbmap *rbtree, } -/* - * If a provider or app whose version is < 1.5, calls this function and passes - * FI_MR_UNSPEC as mode, it would be treated as MR scalable. - */ int ofi_mr_map_init(const struct fi_provider *prov, int mode, struct ofi_mr_map *map) { @@ -188,16 +184,7 @@ int ofi_mr_map_init(const struct fi_provider *prov, int mode, if (!map->rbtree) return -FI_ENOMEM; - switch (mode) { - case FI_MR_BASIC: - map->mode = OFI_MR_BASIC_MAP; - break; - case FI_MR_SCALABLE: - map->mode = 0; - break; - default: - map->mode = mode; - } + map->mode = mode; map->prov = prov; map->key = 1; diff --git a/prov/verbs/src/verbs_info.c b/prov/verbs/src/verbs_info.c index f6bc3b42e11..f22f9d3e4ab 100644 --- a/prov/verbs/src/verbs_info.c +++ b/prov/verbs/src/verbs_info.c @@ -82,7 +82,7 @@ const struct fi_domain_attr verbs_domain_attr = { .control_progress = FI_PROGRESS_AUTO, .data_progress = FI_PROGRESS_AUTO, .resource_mgmt = FI_RM_ENABLED, - .mr_mode = OFI_MR_BASIC_MAP | FI_MR_LOCAL | FI_MR_BASIC, + .mr_mode = OFI_MR_BASIC_MAP | FI_MR_LOCAL, .mr_key_size = sizeof_field(struct ibv_sge, lkey), .cq_data_size = sizeof_field(struct ibv_send_wr, imm_data), .tx_ctx_cnt = 1024, diff --git a/src/abi_1_0.c b/src/abi_1_0.c index 34d8e605b6b..4cf3b04d8bc 100644 --- a/src/abi_1_0.c +++ b/src/abi_1_0.c @@ -61,7 +61,7 @@ struct fi_domain_attr_1_0 { enum fi_progress data_progress; enum fi_resource_mgmt resource_mgmt; enum fi_av_type av_type; - enum fi_mr_mode mr_mode; + int mr_mode; size_t mr_key_size; size_t cq_data_size; size_t cq_cnt; diff --git a/src/fi_tostr.c b/src/fi_tostr.c index e47f3704daa..d1446eb77b6 100644 --- a/src/fi_tostr.c +++ b/src/fi_tostr.c @@ -266,7 +266,6 @@ static void ofi_tostr_mode(char *buf, size_t len, uint64_t mode) IFFLAGSTRN(mode, FI_MSG_PREFIX, len); IFFLAGSTRN(mode, FI_ASYNC_IOV, len); IFFLAGSTRN(mode, FI_RX_CQ_DATA, len); - IFFLAGSTRN(mode, FI_LOCAL_MR, len); IFFLAGSTRN(mode, FI_CONTEXT2, len); ofi_remove_comma(buf); @@ -429,8 +428,6 @@ static void ofi_tostr_av_type(char *buf, size_t len, enum fi_av_type type) static void ofi_tostr_mr_mode(char *buf, size_t len, int mr_mode) { - IFFLAGSTRN(mr_mode, FI_MR_BASIC, len); - IFFLAGSTRN(mr_mode, FI_MR_SCALABLE, len); IFFLAGSTRN(mr_mode, FI_MR_LOCAL, len); IFFLAGSTRN(mr_mode, FI_MR_RAW, len); IFFLAGSTRN(mr_mode, FI_MR_VIRT_ADDR, len); diff --git a/util/info.c b/util/info.c index 698b340c38f..e2786fc6c3c 100644 --- a/util/info.c +++ b/util/info.c @@ -161,7 +161,6 @@ static int str2mode(char *inputstr, uint64_t *value) ORCASE(FI_MSG_PREFIX); ORCASE(FI_ASYNC_IOV); ORCASE(FI_RX_CQ_DATA); - ORCASE(FI_LOCAL_MR); ORCASE(FI_CONTEXT2); fprintf(stderr, "error: Unrecognized mode: %s\n", inputstr); @@ -346,7 +345,7 @@ int main(int argc, char **argv) hints->mode = ~0; hints->domain_attr->mode = ~0; - hints->domain_attr->mr_mode = ~(FI_MR_BASIC | FI_MR_SCALABLE); + hints->domain_attr->mr_mode = ~0; while ((op = getopt_long(argc, argv, "s:n:P:c:m:t:a:p:d:f:eg:i:lhv", longopts, &option_index)) != -1) { From e0652c1019bce86bcb5a0815582dc5d625064020 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Wed, 27 Sep 2023 15:54:20 -0700 Subject: [PATCH 26/34] core: Remove support for async memory registration Feature is not implemented natively by providers. Signed-off-by: Sean Hefty --- include/rdma/fi_domain.h | 2 +- man/fi_domain.3.md | 7 ------- man/fi_mr.3.md | 11 ----------- prov/verbs/src/verbs_mr.c | 13 ------------- 4 files changed, 1 insertion(+), 32 deletions(-) diff --git a/include/rdma/fi_domain.h b/include/rdma/fi_domain.h index 0bce6853f9b..73eeedd6b64 100644 --- a/include/rdma/fi_domain.h +++ b/include/rdma/fi_domain.h @@ -315,7 +315,7 @@ struct fi_ops_mr { }; /* Domain bind flags */ -#define FI_REG_MR (1ULL << 59) +/* #define FI_REG_MR (1ULL << 59) */ struct fid_domain { struct fid fid; diff --git a/man/fi_domain.3.md b/man/fi_domain.3.md index 8fad9bfde2b..bb9e48ee00b 100644 --- a/man/fi_domain.3.md +++ b/man/fi_domain.3.md @@ -161,15 +161,8 @@ events that occur on the domain or active endpoints allocated on a domain. This includes CM events. Endpoints may direct their control events to alternate EQs by binding directly with the EQ. -Binding an event queue to a domain with the FI_REG_MR flag indicates -that the provider should perform all memory registration operations -asynchronously, with the completion reported through the event queue. -If an event queue is not bound to the domain with the FI_REG_MR flag, -then memory registration requests complete synchronously. - See [`fi_ep_bind`(3)](fi_ep_bind.3.html), -[`fi_mr_bind`(3)](fi_mr_bind.3.html), [`fi_pep_bind`(3)](fi_pep_bind.3.html), and [`fi_scalable_ep_bind`(3)](fi_scalable_ep_bind.3.html) for more information. diff --git a/man/fi_mr.3.md b/man/fi_mr.3.md index 1b22926153b..142c9d8bebe 100644 --- a/man/fi_mr.3.md +++ b/man/fi_mr.3.md @@ -308,17 +308,6 @@ fabric resources. The main difference between registration functions are the number and type of parameters that they accept as input. Otherwise, they perform the same general function. -By default, memory registration completes synchronously. I.e. the -registration call will not return until the registration has -completed. Memory registration can complete asynchronous by binding -the resource domain to an event queue using the FI_REG_MR flag. See -fi_domain_bind. When memory registration is asynchronous, in order to -avoid a race condition between the registration call returning and the -corresponding reading of the event from the EQ, the mr output -parameter will be written before any event associated with the -operation may be read by the application. An asynchronous event will -not be generated unless the registration call returns success (0). - ## fi_mr_reg The fi_mr_reg call registers the user-specified memory buffer with diff --git a/prov/verbs/src/verbs_mr.c b/prov/verbs/src/verbs_mr.c index 4c7fd26497d..188d2757caa 100644 --- a/prov/verbs/src/verbs_mr.c +++ b/prov/verbs/src/verbs_mr.c @@ -159,19 +159,6 @@ vrb_mr_reg_common(struct vrb_mem_desc *md, int vrb_access, const void *buf, md->lkey = md->mr->lkey; } - if (md->domain->eq_flags & FI_REG_MR) { - struct fi_eq_entry entry = { - .fid = &md->mr_fid.fid, - .context = context, - }; - if (md->domain->eq) - vrb_eq_write_event(md->domain->eq, FI_MR_COMPLETE, - &entry, sizeof(entry)); - else if (md->domain->util_domain.eq) - /* This branch is taken for the verbs/DGRAM */ - fi_eq_write(&md->domain->util_domain.eq->eq_fid, - FI_MR_COMPLETE, &entry, sizeof(entry), 0); - } return FI_SUCCESS; } From 8cf5f5aa892fcba3036b87a19c1d3a42a7a28d91 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Wed, 27 Sep 2023 16:10:01 -0700 Subject: [PATCH 27/34] core: Cleanup FI_ORDER flags Remove FI_ORDER_NONE (flag that's 0) and FI_ORDER_STRICT (which isn't a flag, and covers only a portion of the valid flags). Remove FI_ORDER_DATA, which will not be supported by version 2 in order to allow for greater provider optimization. Signed-off-by: Sean Hefty --- fabtests/unit/getinfo_test.c | 6 ++---- include/rdma/fabric.h | 6 +++--- man/fi_endpoint.3.md | 9 ++------- man/fi_netdir.7.md | 3 --- man/fi_verbs.7.md | 5 ----- prov/efa/src/efa_prov_info.c | 2 +- prov/psm3/src/psmx3.h | 2 +- 7 files changed, 9 insertions(+), 24 deletions(-) diff --git a/fabtests/unit/getinfo_test.c b/fabtests/unit/getinfo_test.c index 6fb8d01afde..70ffda87cd9 100644 --- a/fabtests/unit/getinfo_test.c +++ b/fabtests/unit/getinfo_test.c @@ -181,8 +181,7 @@ static int validate_tx_ordering_bits(char *node, char *service, uint64_t flags, struct fi_info *hints, struct fi_info **info) { return validate_bit_combos(node, service, flags, hints, info, - FI_ORDER_STRICT | FI_ORDER_DATA, - init_tx_order, check_tx_order); + ~0ULL, init_tx_order, check_tx_order); } static int init_rx_order(struct fi_info *hints, uint64_t order) @@ -200,8 +199,7 @@ static int validate_rx_ordering_bits(char *node, char *service, uint64_t flags, struct fi_info *hints, struct fi_info **info) { return validate_bit_combos(node, service, flags, hints, info, - FI_ORDER_STRICT | FI_ORDER_DATA, - init_rx_order, check_rx_order); + ~0ULL, init_rx_order, check_rx_order); } static int init_caps(struct fi_info *hints, uint64_t bits) diff --git a/include/rdma/fabric.h b/include/rdma/fabric.h index 681aa990783..ad2b2edcef4 100644 --- a/include/rdma/fabric.h +++ b/include/rdma/fabric.h @@ -259,7 +259,7 @@ enum fi_resource_mgmt { FI_RM_ENABLED }; -#define FI_ORDER_NONE 0ULL +/* #define FI_ORDER_NONE 0ULL */ #define FI_ORDER_RAR (1ULL << 0) #define FI_ORDER_RAW (1ULL << 1) #define FI_ORDER_RAS (1ULL << 2) @@ -269,7 +269,7 @@ enum fi_resource_mgmt { #define FI_ORDER_SAR (1ULL << 6) #define FI_ORDER_SAW (1ULL << 7) #define FI_ORDER_SAS (1ULL << 8) -#define FI_ORDER_STRICT 0x1FF +/* #define FI_ORDER_STRICT 0x1FF */ #define FI_ORDER_RMA_RAR (1ULL << 32) #define FI_ORDER_RMA_RAW (1ULL << 33) @@ -280,7 +280,7 @@ enum fi_resource_mgmt { #define FI_ORDER_ATOMIC_WAR (1ULL << 38) #define FI_ORDER_ATOMIC_WAW (1ULL << 39) -#define FI_ORDER_DATA (1ULL << 16) +/* #define FI_ORDER_DATA (1ULL << 16) */ enum fi_ep_type { FI_EP_UNSPEC, diff --git a/man/fi_endpoint.3.md b/man/fi_endpoint.3.md index aa6892138b9..172eb9537c7 100644 --- a/man/fi_endpoint.3.md +++ b/man/fi_endpoint.3.md @@ -964,11 +964,6 @@ transfer operation in order to guarantee that ordering is met. update operations. If not atomic updates may be transmitted out of order from their submission. -*FI_ORDER_NONE* -: No ordering is specified. This value may be used as input in order - to obtain the default message order supported by the provider. FI_ORDER_NONE - is an alias for the value 0. - *FI_ORDER_RAR* : Read after read. If set, RMA and atomic read operations are transmitted in the order submitted relative to other @@ -1210,7 +1205,7 @@ that messages will be handled in order based on a message level sequence number. The following ordering flags, as defined for transmit ordering, also -apply to the processing of received operations: FI_ORDER_NONE, +apply to the processing of received operations: FI_ORDER_RAR, FI_ORDER_RAW, FI_ORDER_RAS, FI_ORDER_WAR, FI_ORDER_WAW, FI_ORDER_WAS, FI_ORDER_SAR, FI_ORDER_SAW, FI_ORDER_SAS, FI_ORDER_RMA_RAR, FI_ORDER_RMA_RAW, FI_ORDER_RMA_WAR, FI_ORDER_RMA_WAW, FI_ORDER_ATOMIC_RAR, @@ -1506,7 +1501,7 @@ can return provider info structures that can support the minimal set of requirements (such that the application maintains correctness). However, it can also return provider info structures that exceed application requirements. As an example, consider an application -requesting msg_order as FI_ORDER_NONE. The resulting output from +requesting no msg_order. The resulting output from fi_getinfo may have all the ordering bits set. The application can reset the ordering bits it does not require before creating the endpoint. The provider is free to implement a stricter ordering than is diff --git a/man/fi_netdir.7.md b/man/fi_netdir.7.md index 31174bc4014..c64c2e682af 100644 --- a/man/fi_netdir.7.md +++ b/man/fi_netdir.7.md @@ -47,9 +47,6 @@ libfabric API: : The provider supports FI_INJECT, FI_COMPLETION, FI_TRANSMIT_COMPLETE, FI_INJECT_COMPLETE, FI_DELIVERY_COMPLETE, FI_SELECTIVE_COMPLETION -*Completion ordering* -: RX/TX contexts: FI_ORDER_STRICT - *Other supported features* : Multiple input/output vector (IOV) is supported for FI_RMA read/write and FI_MSG receive/transmit operations. diff --git a/man/fi_verbs.7.md b/man/fi_verbs.7.md index 462a0f387b4..6497fdda10a 100644 --- a/man/fi_verbs.7.md +++ b/man/fi_verbs.7.md @@ -93,11 +93,6 @@ Verbs provider support the following message ordering: * Send after Send -and the following completion ordering: - - * TX contexts: FI_ORDER_STRICT - * RX contexts: FI_ORDER_DATA - ### Fork Verbs provider does not provide fork safety by default. Fork safety can be requested by setting IBV_FORK_SAFE, or RDMAV_FORK_SAFE. If the system configuration supports diff --git a/prov/efa/src/efa_prov_info.c b/prov/efa/src/efa_prov_info.c index c8186bfbcc4..bf6a7bfdab7 100644 --- a/prov/efa/src/efa_prov_info.c +++ b/prov/efa/src/efa_prov_info.c @@ -59,7 +59,7 @@ #define EFA_RX_RDM_OP_FLAGS (0) #define EFA_RX_DGRM_OP_FLAGS (0) -#define EFA_MSG_ORDER (FI_ORDER_NONE) +#define EFA_MSG_ORDER (0) #define EFA_NO_DEFAULT -1 diff --git a/prov/psm3/src/psmx3.h b/prov/psm3/src/psmx3.h index 44eb6688fa6..974ab3260ed 100644 --- a/prov/psm3/src/psmx3.h +++ b/prov/psm3/src/psmx3.h @@ -129,7 +129,7 @@ extern struct fi_provider psmx3_prov; #define PSMX3_RMA_ORDER_SIZE (4096) #define PSMX3_MSG_ORDER (FI_ORDER_SAS | OFI_ORDER_RAR_SET | OFI_ORDER_RAW_SET | \ OFI_ORDER_WAR_SET | OFI_ORDER_WAW_SET) -#define PSMX3_COMP_ORDER FI_ORDER_NONE +#define PSMX3_COMP_ORDER 0 /* * Four bits are reserved from the 64-bit tag space as a flags to identify the From 7ab212a4b280f67d42e8b3266303cfac0b1b7495 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Wed, 27 Sep 2023 16:56:49 -0700 Subject: [PATCH 28/34] core: Restrict endpoints to a single CQ Remove the option of directing transmit and receive completions to separate CQs for the same endpoint. The option adds complexity at the provider and application levels. This is largely the result of needing SW based protocols for certain operations, such as tag matching. This either makes it necessary for the app to drive progress across multiple CQs, or the provider emulates the application's CQs in SW. This change updates the man page only. Provider developers are left to update their code bases separately. Signed-off-by: Sean Hefty --- man/fi_endpoint.3.md | 34 +++++++++++++--------------------- 1 file changed, 13 insertions(+), 21 deletions(-) diff --git a/man/fi_endpoint.3.md b/man/fi_endpoint.3.md index 172eb9537c7..2468ed63bef 100644 --- a/man/fi_endpoint.3.md +++ b/man/fi_endpoint.3.md @@ -282,24 +282,11 @@ and/or receive context, the shared contexts must be bound to the endpoint. CQs, counters, AV, and shared contexts must be bound to endpoints before they are enabled either explicitly or implicitly. -An endpoint must be bound with CQs capable of reporting completions for any -asynchronous operation initiated on the endpoint. For example, if the -endpoint supports any outbound transfers (sends, RMA, atomics, etc.), then -it must be bound to a completion queue that can report transmit completions. -This is true even if the endpoint is configured to suppress successful -completions, in order that operations that complete in error may be reported -to the user. - -An active endpoint may direct asynchronous completions to different -CQs, based on the type of operation. This is specified using -fi_ep_bind flags. The following flags may be OR'ed together when -binding an endpoint to a completion domain CQ. +Active endpoints, transmit contexts (from a scalable endpoint), and receive +contexts (from a scalable endpoint) must be bound to completion queues. An +endpoint or context may only be bound to a single completion queue. -*FI_RECV* -: Directs the notification of inbound data transfers to the specified - completion queue. This includes received messages. This binding - automatically includes FI_REMOTE_WRITE, if applicable to the - endpoint. +The following flag may be specified when binding to a CQ. *FI_SELECTIVE_COMPLETION* : By default, data transfer operations write CQ completion entries @@ -322,9 +309,14 @@ binding an endpoint to a completion domain CQ. interacts with the FI_CONTEXT and FI_CONTEXT2 mode bits. *FI_TRANSMIT* -: Directs the completion of outbound data transfer requests to the - specified completion queue. This includes send message, RMA, and - atomic operations. +: For compatibility with libfabric version 1, this flag may optionally be + specified when binding an endpoint with transmit capabilities (e.g. + FI_SEND, FI_WRITE, FI_READ, etc.) to a CQ. + +*FI_RECV* +: For compatibility with libfabric version 1, this flag may optionally be + specified when binding an endpoint with receive capabilities (e.g. + FI_RECV, FI_REMOTE_WRITE, etc.) to a CQ. An endpoint may optionally be bound to a completion counter. Associating an endpoint with a counter is in addition to binding the EP with a CQ. When @@ -361,7 +353,7 @@ binding an endpoint to a counter, the following flags may be specified. : Increments the specified counter whenever an RMA write or base atomic operation initiated from the endpoint has completed successfully or in error. -An endpoint may only be bound to a single CQ or counter for a given +An endpoint may only be bound to a single counter for a given type of operation. For example, a EP may not bind to two counters both using FI_WRITE. Furthermore, providers may limit CQ and counter bindings to endpoints of the same endpoint type (DGRAM, MSG, RDM, etc.). From 39a01fef7321107b61ebfd9312c9a9a4f33e1819 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Wed, 27 Sep 2023 17:51:29 -0700 Subject: [PATCH 29/34] core: Require using libfabric APIs to allocate fi_info structures Disallow users hand-crafting or hand-copying their own fi_info structs. This allows the library to allocate hidden fields for internal use. Plus, there's no need to apps to do this, given that the API call is way easier to use. Signed-off-by: Sean Hefty --- man/fi_domain.3.md | 2 ++ man/fi_endpoint.3.md | 5 +++-- man/fi_getinfo.3.md | 9 ++++++++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/man/fi_domain.3.md b/man/fi_domain.3.md index bb9e48ee00b..d6cf62bfff0 100644 --- a/man/fi_domain.3.md +++ b/man/fi_domain.3.md @@ -41,6 +41,8 @@ int fi_set_ops(struct fid *domain, const char *name, uint64_t flags, *info* : Fabric information, including domain capabilities and attributes. + The struct fi_info must have been obtained using either fi_getinfo() + or fi_dupinfo(). *domain* : An opened access domain. diff --git a/man/fi_endpoint.3.md b/man/fi_endpoint.3.md index 2468ed63bef..eee80e0a2c1 100644 --- a/man/fi_endpoint.3.md +++ b/man/fi_endpoint.3.md @@ -124,8 +124,9 @@ DEPRECATED ssize_t fi_tx_size_left(struct fid_ep *ep); associated resource. *info* -: Details about the fabric interface endpoint to be opened, obtained - from fi_getinfo. +: Details about the fabric interface endpoint to be opened. + The struct fi_info must have been obtained using either fi_getinfo() + or fi_dupinfo(). *ep* : A fabric endpoint. diff --git a/man/fi_getinfo.3.md b/man/fi_getinfo.3.md index a78a118f6c6..fb27a3959db 100644 --- a/man/fi_getinfo.3.md +++ b/man/fi_getinfo.3.md @@ -42,7 +42,8 @@ struct fi_info *fi_dupinfo(const struct fi_info *info); *hints* : Reference to an fi_info structure that specifies criteria for - selecting the returned fabric information. + selecting the returned fabric information. The fi_info hints + structure must be allocated using either fi_allocinfo() or fi_dupinfo(). *info* : A pointer to a linked list of fi_info structures containing response @@ -687,6 +688,12 @@ via fi_freeinfo(). # NOTES +Various libfabric calls, including fi_getinfo, take a struct fi_info as +input. Applications must use libfabric allocated fi_info structures. +A zeroed struct fi_info can be allocated using fi_allocinfo, which may +then be initialized by the user. A struct fi_info may be copied for +modification using the fi_dupinfo() call. + If hints are provided, the operation will be controlled by the values that are supplied in the various fields (see section on _fi_info_). Applications that require specific communication interfaces, domains, From 5fb413c19dde16731e2f350b63bab7506e3a724c Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Wed, 27 Sep 2023 18:01:10 -0700 Subject: [PATCH 30/34] core: Add fi_fabric2() API Add a new call that takes fi_info as input, which provides consistency with the domain and endpoint open calls. Signed-off-by: Sean Hefty --- Makefile.am | 1 + include/rdma/fabric.h | 2 ++ libfabric.def | 1 + libfabric.map.in | 4 +++- man/man3/fi_fabric2.3 | 1 + src/fabric.c | 14 ++++++++++++++ 6 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 man/man3/fi_fabric2.3 diff --git a/Makefile.am b/Makefile.am index bfc9068e6af..4717c0a4c8b 100644 --- a/Makefile.am +++ b/Makefile.am @@ -349,6 +349,7 @@ dummy_man_pages = \ man/man3/fi_eq_sread.3 \ man/man3/fi_eq_strerror.3 \ man/man3/fi_eq_write.3 \ + man/man3/fi_fabric2.3 \ man/man3/fi_fetch_atomic.3 \ man/man3/fi_fetch_atomic_valid.3 \ man/man3/fi_fetch_atomicmsg.3 \ diff --git a/include/rdma/fabric.h b/include/rdma/fabric.h index ad2b2edcef4..aab042b24d8 100644 --- a/include/rdma/fabric.h +++ b/include/rdma/fabric.h @@ -600,6 +600,8 @@ struct fid_fabric { uint32_t api_version; }; +int fi_fabric2(struct fi_info *info, struct fid_fabric **fabric, + uint64_t flags, void *context); int fi_fabric(struct fi_fabric_attr *attr, struct fid_fabric **fabric, void *context); int fi_open(uint32_t version, const char *name, void *attr, size_t attr_len, diff --git a/libfabric.def b/libfabric.def index 62ec295436a..5d982145b82 100644 --- a/libfabric.def +++ b/libfabric.def @@ -16,3 +16,4 @@ EXPORTS fi_tostr_r = fi_tostr_r fi_open = fi_open fi_log_ready = fi_log_ready + fi_fabric2 = fi_fabric2 diff --git a/libfabric.map.in b/libfabric.map.in index 342216c57bf..48156eafb05 100644 --- a/libfabric.map.in +++ b/libfabric.map.in @@ -52,4 +52,6 @@ FABRIC_1.5 { FABRIC_1.6 { global: fi_log_ready; -} FABRIC_1.5; \ No newline at end of file + + fi_fabric2; +} FABRIC_1.5; diff --git a/man/man3/fi_fabric2.3 b/man/man3/fi_fabric2.3 new file mode 100644 index 00000000000..d03cd480b77 --- /dev/null +++ b/man/man3/fi_fabric2.3 @@ -0,0 +1 @@ +.so man3/fi_fabric.3 diff --git a/src/fabric.c b/src/fabric.c index e3284e9bc86..8d597f12657 100644 --- a/src/fabric.c +++ b/src/fabric.c @@ -1494,6 +1494,20 @@ int DEFAULT_SYMVER_PRE(fi_fabric)(struct fi_fabric_attr *attr, } DEFAULT_SYMVER(fi_fabric_, fi_fabric, FABRIC_1.1); +// TODO: THIS IS WRONG!!! The libfabric.map.in MUST be updated correctly. +// The SYMVER for all calls must also be updated. For now, this is simply +// trying to avoid a conflict with other API changes to verify compilation. +__attribute__((visibility ("default"),EXTERNALLY_VISIBLE)) +int DEFAULT_SYMVER_PRE(fi_fabric2)(struct fi_info *info, + struct fid_fabric **fabric, uint64_t flags, void *context) +{ + if (flags || !info) + return -FI_EINVAL; + + return fi_fabric(info->fabric_attr, fabric, context); +} +CURRENT_SYMVER(fi_fabric2_, fi_fabric2); + __attribute__((visibility ("default"),EXTERNALLY_VISIBLE)) uint32_t DEFAULT_SYMVER_PRE(fi_version)(void) { From 1feb6d890f52ba251b322a3e7fddd7315ab30eb7 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Wed, 27 Sep 2023 18:40:51 -0700 Subject: [PATCH 31/34] core/log: Replace fi_log_subsys with flags Subsystem filtering isn't useful (or likely ever used). Remove the enum fi_log_subsys. Instead convert the API to accept int flags. This maintains API compatibility. Update all current FI_LOG_xxx subsys values to 0. This avoids needing to update the providers to the new API, forcing them to pass in 0 for the flags. No actual flag values are defined. Those become a placeholder for future options. The logging checks are simplified by this change. Signed-off-by: Sean Hefty --- include/ofi.h | 1 - include/ofi_net.h | 8 +- include/rdma/fi_ext.h | 6 +- include/rdma/providers/fi_log.h | 75 ++++++++++--------- man/fi_fabric.3.md | 3 - man/fi_provider.3.md | 12 +-- prov/efa/src/efa_prov.h | 18 ++--- prov/hook/hook_debug/src/hook_debug.c | 10 +-- prov/opx/src/test/test_hfi_select.c | 4 +- prov/psm3/src/psmx3.h | 32 ++++---- prov/verbs/src/verbs_ofi.h | 18 ++--- src/common.c | 8 +- src/fi_tostr.c | 3 - src/log.c | 104 ++++++-------------------- 14 files changed, 119 insertions(+), 183 deletions(-) diff --git a/include/ofi.h b/include/ofi.h index a13730d4911..70cec20c60b 100644 --- a/include/ofi.h +++ b/include/ofi.h @@ -552,7 +552,6 @@ size_t ofi_vrb_speed(uint8_t speed, uint8_t width); int ofi_open_log(uint32_t version, void *attr, size_t attr_len, uint64_t flags, struct fid **fid, void *context); void ofi_tostr_log_level(char *buf, size_t len, enum fi_log_level level); -void ofi_tostr_log_subsys(char *buf, size_t len, enum fi_log_subsys subsys); #ifdef __cplusplus } diff --git a/include/ofi_net.h b/include/ofi_net.h index 1eeaea980d7..e65e14d9542 100644 --- a/include/ofi_net.h +++ b/include/ofi_net.h @@ -906,17 +906,17 @@ int ofi_str_toaddr(const char *str, uint32_t *addr_format, void ofi_straddr_log_internal(const char *func, int line, const struct fi_provider *prov, enum fi_log_level level, - enum fi_log_subsys subsys, char *log_str, + int flags, char *log_str, const void *addr); #define ofi_straddr_log(...) \ ofi_straddr_log_internal(__func__, __LINE__, __VA_ARGS__) #if ENABLE_DEBUG -#define ofi_straddr_dbg(prov, subsystem, ...) \ - ofi_straddr_log(prov, FI_LOG_DEBUG, subsystem, __VA_ARGS__) +#define ofi_straddr_dbg(prov, flags, ...) \ + ofi_straddr_log(prov, FI_LOG_DEBUG, flags, __VA_ARGS__) #else -#define ofi_straddr_dbg(prov, subsystem, ...) do {} while(0) +#define ofi_straddr_dbg(prov, flags, ...) do {} while(0) #endif diff --git a/include/rdma/fi_ext.h b/include/rdma/fi_ext.h index a17288dae5a..5265313057d 100644 --- a/include/rdma/fi_ext.h +++ b/include/rdma/fi_ext.h @@ -145,11 +145,11 @@ struct fid_mem_monitor { struct fi_ops_log { size_t size; int (*enabled)(const struct fi_provider *prov, enum fi_log_level level, - enum fi_log_subsys subsys, uint64_t flags); + int prov_flags, uint64_t flags); int (*ready)(const struct fi_provider *prov, enum fi_log_level level, - enum fi_log_subsys subsys, uint64_t flags, uint64_t *showtime); + int prov_flags, uint64_t flags, uint64_t *showtime); void (*log)(const struct fi_provider *prov, enum fi_log_level level, - enum fi_log_subsys subsys, const char *func, int line, + int prov_flags, const char *func, int line, const char *msg); }; diff --git a/include/rdma/providers/fi_log.h b/include/rdma/providers/fi_log.h index 9268a2ee86c..10d645282d5 100644 --- a/include/rdma/providers/fi_log.h +++ b/include/rdma/providers/fi_log.h @@ -42,17 +42,18 @@ extern "C" { #endif -enum fi_log_subsys { - FI_LOG_CORE, - FI_LOG_FABRIC, - FI_LOG_DOMAIN, - FI_LOG_EP_CTRL, - FI_LOG_EP_DATA, - FI_LOG_AV, - FI_LOG_CQ, - FI_LOG_EQ, - FI_LOG_MR, - FI_LOG_CNTR, +/* Compatibility with version 1 */ +enum { + FI_LOG_CORE = 0, + FI_LOG_FABRIC = 0, + FI_LOG_DOMAIN = 0, + FI_LOG_EP_CTRL = 0, + FI_LOG_EP_DATA = 0, + FI_LOG_AV = 0, + FI_LOG_CQ = 0, + FI_LOG_EQ = 0, + FI_LOG_MR = 0, + FI_LOG_CNTR = 0, }; enum fi_log_level { @@ -63,64 +64,64 @@ enum fi_log_level { }; int fi_log_enabled(const struct fi_provider *prov, enum fi_log_level level, - enum fi_log_subsys subsys); + int flags); int fi_log_ready(const struct fi_provider *prov, enum fi_log_level level, - enum fi_log_subsys subsys, uint64_t *showtime); + int flags, uint64_t *showtime); void fi_log(const struct fi_provider *prov, enum fi_log_level level, - enum fi_log_subsys subsys, const char *func, int line, + int flags, const char *func, int line, const char *fmt, ...) FI_FORMAT_PRINTF(6, 7); -#define FI_LOG(prov, level, subsystem, ...) \ +#define FI_LOG(prov, level, flags, ...) \ do { \ - if (fi_log_enabled(prov, level, subsystem)) { \ + if (fi_log_enabled(prov, level, flags)) { \ int saved_errno = errno; \ - fi_log(prov, level, subsystem, \ + fi_log(prov, level, flags, \ __func__, __LINE__, __VA_ARGS__); \ errno = saved_errno; \ } \ } while (0) -#define FI_LOG_SPARSE(prov, level, subsystem, ...) \ +#define FI_LOG_SPARSE(prov, level, flags, ...) \ do { \ static uint64_t showtime; \ - if (fi_log_ready(prov, level, subsystem, &showtime)) { \ + if (fi_log_ready(prov, level, flags, &showtime)) { \ int saved_errno = errno; \ - fi_log(prov, level, subsystem, \ + fi_log(prov, level, flags, \ __func__, __LINE__, __VA_ARGS__); \ errno = saved_errno; \ } \ } while (0) -#define FI_WARN(prov, subsystem, ...) \ - FI_LOG(prov, FI_LOG_WARN, subsystem, __VA_ARGS__) -#define FI_WARN_SPARSE(prov, subsystem, ...) \ - FI_LOG_SPARSE(prov, FI_LOG_WARN, subsystem, __VA_ARGS__) +#define FI_WARN(prov, flags, ...) \ + FI_LOG(prov, FI_LOG_WARN, flags, __VA_ARGS__) +#define FI_WARN_SPARSE(prov, flags, ...) \ + FI_LOG_SPARSE(prov, FI_LOG_WARN, flags, __VA_ARGS__) -#define FI_TRACE(prov, subsystem, ...) \ - FI_LOG(prov, FI_LOG_TRACE, subsystem, __VA_ARGS__) +#define FI_TRACE(prov, flags, ...) \ + FI_LOG(prov, FI_LOG_TRACE, flags, __VA_ARGS__) -#define FI_INFO(prov, subsystem, ...) \ - FI_LOG(prov, FI_LOG_INFO, subsystem, __VA_ARGS__) +#define FI_INFO(prov, flags, ...) \ + FI_LOG(prov, FI_LOG_INFO, flags, __VA_ARGS__) #if defined(ENABLE_DEBUG) && ENABLE_DEBUG -#define FI_DBG(prov, subsystem, ...) \ - FI_LOG(prov, FI_LOG_DEBUG, subsystem, __VA_ARGS__) -#define FI_DBG_TRACE(prov, subsystem, ...) \ - FI_LOG(prov, FI_LOG_TRACE, subsystem, __VA_ARGS__) +#define FI_DBG(prov, flags, ...) \ + FI_LOG(prov, FI_LOG_DEBUG, flags, __VA_ARGS__) +#define FI_DBG_TRACE(prov, flags, ...) \ + FI_LOG(prov, FI_LOG_TRACE, flags, __VA_ARGS__) #else -#define FI_DBG(prov_name, subsystem, ...) \ +#define FI_DBG(prov_name, flags, ...) \ do {} while (0) -#define FI_DBG_TRACE(prov, subsystem, ...) \ +#define FI_DBG_TRACE(prov, flags, ...) \ do {} while (0) #endif -#define FI_WARN_ONCE(prov, subsystem, ...) \ +#define FI_WARN_ONCE(prov, flags, ...) \ do { \ static int warned = 0; \ if (!warned && \ - fi_log_enabled(prov, FI_LOG_WARN, subsystem)) { \ + fi_log_enabled(prov, FI_LOG_WARN, flags)) { \ int saved_errno = errno; \ - fi_log(prov, FI_LOG_WARN, subsystem, \ + fi_log(prov, FI_LOG_WARN, flags, \ __func__, __LINE__, __VA_ARGS__); \ warned = 1; \ errno = saved_errno; \ diff --git a/man/fi_fabric.3.md b/man/fi_fabric.3.md index 0d899ff9137..71d5d64993f 100644 --- a/man/fi_fabric.3.md +++ b/man/fi_fabric.3.md @@ -174,9 +174,6 @@ datatype or field value. *FI_TYPE_LOG_LEVEL* : enum fi_log_level -*FI_TYPE_LOG_SUBSYS* -: enum fi_log_subsys - fi_tostr() will return a pointer to an internal libfabric buffer that should not be modified, and will be overwritten the next time fi_tostr() is invoked. fi_tostr() is not thread safe. diff --git a/man/fi_provider.3.md b/man/fi_provider.3.md index cb4edb4e605..d7cd4dd7df7 100644 --- a/man/fi_provider.3.md +++ b/man/fi_provider.3.md @@ -51,13 +51,13 @@ int fi_param_get_size_t(struct fi_provider *provider, const char *param_name, #include int fi_log_enabled(const struct fi_provider *prov, enum fi_log_level level, - enum fi_log_subsys subsys); + int flags); int fi_log_ready(const struct fi_provider *prov, enum fi_log_level level, - enum fi_log_subsys subsys, uint64_t *showtime); + int flags, uint64_t *showtime); void fi_log(const struct fi_provider *prov, enum fi_log_level level, - enum fi_log_subsys subsys, const char *func, int line, + int flags, const char *func, int line, const char *fmt, ...); ``` @@ -208,11 +208,11 @@ new callback functions. struct fi_ops_log { size_t size; int (*enabled)(const struct fi_provider *prov, enum fi_log_level level, - enum fi_log_subsys subsys, uint64_t flags); + int prov_flags, uint64_t flags); int (*ready)(const struct fi_provider *prov, enum fi_log_level level, - enum fi_log_subsys subsys, uint64_t flags, uint64_t *showtime); + int prov_flags, uint64_t flags, uint64_t *showtime); void (*log)(const struct fi_provider *prov, enum fi_log_level level, - enum fi_log_subsys subsys, const char *func, int line, + int prov_flags, const char *func, int line, const char *msg); }; diff --git a/prov/efa/src/efa_prov.h b/prov/efa/src/efa_prov.h index 456f02ed9cb..2aa25907429 100644 --- a/prov/efa/src/efa_prov.h +++ b/prov/efa/src/efa_prov.h @@ -39,14 +39,14 @@ extern struct fi_provider efa_prov; extern struct util_prov efa_util_prov; -#define EFA_WARN(subsys, ...) FI_WARN(&efa_prov, subsys, __VA_ARGS__) -#define EFA_WARN_ONCE(subsys, ...) FI_WARN_ONCE(&efa_prov, subsys, __VA_ARGS__) -#define EFA_TRACE(subsys, ...) FI_TRACE(&efa_prov, subsys, __VA_ARGS__) -#define EFA_INFO(subsys, ...) FI_INFO(&efa_prov, subsys, __VA_ARGS__) -#define EFA_INFO_ERRNO(subsys, fn, errno) \ - EFA_INFO(subsys, fn ": %s(%d)\n", strerror(errno), errno) -#define EFA_WARN_ERRNO(subsys, fn, errno) \ - EFA_WARN(subsys, fn ": %s(%d)\n", strerror(errno), errno) -#define EFA_DBG(subsys, ...) FI_DBG(&efa_prov, subsys, __VA_ARGS__) +#define EFA_WARN(flags, ...) FI_WARN(&efa_prov, flags, __VA_ARGS__) +#define EFA_WARN_ONCE(flags, ...) FI_WARN_ONCE(&efa_prov, flags, __VA_ARGS__) +#define EFA_TRACE(flags, ...) FI_TRACE(&efa_prov, flags, __VA_ARGS__) +#define EFA_INFO(flags, ...) FI_INFO(&efa_prov, flags, __VA_ARGS__) +#define EFA_INFO_ERRNO(flags, fn, errno) \ + EFA_INFO(flags, fn ": %s(%d)\n", strerror(errno), errno) +#define EFA_WARN_ERRNO(flags, fn, errno) \ + EFA_WARN(flags, fn ": %s(%d)\n", strerror(errno), errno) +#define EFA_DBG(flags, ...) FI_DBG(&efa_prov, flags, __VA_ARGS__) #endif \ No newline at end of file diff --git a/prov/hook/hook_debug/src/hook_debug.c b/prov/hook/hook_debug/src/hook_debug.c index da19a119dc1..eb8f5bc0caa 100644 --- a/prov/hook/hook_debug/src/hook_debug.c +++ b/prov/hook/hook_debug/src/hook_debug.c @@ -82,21 +82,21 @@ hook_debug_get_rx_entry(struct hook_debug_ep *myep, void *context, } static void hook_debug_trace_exit(struct fid *fid, struct fid *hfid, - enum fi_log_subsys subsys, const char *fn, + int flags, const char *fn, ssize_t ret, size_t *eagain_count) { if (!config.trace_exit) return; if (ret > 0) { - FI_TRACE(hook_to_hprov(fid), subsys, "%s (fid: %p) returned: " + FI_TRACE(hook_to_hprov(fid), flags, "%s (fid: %p) returned: " "%zd\n", fn, (void *) hfid, ret); goto out; } if (ret != -FI_EAGAIN || !eagain_count || !((*eagain_count)++ % HOOK_DEBUG_EAGAIN_LOG)) - FI_TRACE(hook_to_hprov(fid), subsys, "%s (fid: %p) returned: " + FI_TRACE(hook_to_hprov(fid), flags, "%s (fid: %p) returned: " "%zd (%s)\n", fn, (void *) hfid, ret, fi_strerror(-ret)); out: if (eagain_count && ret != -FI_EAGAIN) @@ -472,8 +472,8 @@ hook_debug_tinjectdata(struct fid_ep *ep, const void *buf, size_t len, return ret; } -#define HOOK_DEBUG_TRACE(fabric, subsys, ...) \ - FI_TRACE(hook_fabric_to_hprov(fabric), subsys, __VA_ARGS__) +#define HOOK_DEBUG_TRACE(fabric, flags, ...) \ + FI_TRACE(hook_fabric_to_hprov(fabric), flags, __VA_ARGS__) #define HOOK_DEBUG_CQ_TRACE(cq, ...) \ HOOK_DEBUG_TRACE(cq->hook_cq.domain->fabric, FI_LOG_CQ, __VA_ARGS__) diff --git a/prov/opx/src/test/test_hfi_select.c b/prov/opx/src/test/test_hfi_select.c index f07d962c2da..e3da0792c4c 100644 --- a/prov/opx/src/test/test_hfi_select.c +++ b/prov/opx/src/test/test_hfi_select.c @@ -43,7 +43,7 @@ struct fi_provider *fi_opx_provider = NULL; void fi_log(const struct fi_provider *prov, enum fi_log_level level, - enum fi_log_subsys subsys, const char *func, int line, + int flags, const char *func, int line, const char *fmt, ...) { va_list ap; @@ -53,7 +53,7 @@ void fi_log(const struct fi_provider *prov, enum fi_log_level level, } int fi_log_enabled(const struct fi_provider *prov, enum fi_log_level level, - enum fi_log_subsys subsys) + int flags) { return 1; } diff --git a/prov/psm3/src/psmx3.h b/prov/psm3/src/psmx3.h index 974ab3260ed..45309c71ecf 100644 --- a/prov/psm3/src/psmx3.h +++ b/prov/psm3/src/psmx3.h @@ -85,22 +85,22 @@ extern "C" { #endif /* wrapper for logging macros so we can add our process label */ -#define PSMX3_INFO(prov, subsys, format, ...) \ - FI_INFO(prov, subsys, "%s: " format, psm3_get_mylabel(), ##__VA_ARGS__) -#define PSMX3_DBG_TRACE(prov, subsys, format, ...) \ - FI_DBG_TRACE(prov, subsys, "%s: " format, psm3_get_mylabel(), ##__VA_ARGS__) -#define PSMX3_TRACE(prov, subsys, format, ...) \ - FI_TRACE(prov, subsys, "%s: " format, psm3_get_mylabel(), ##__VA_ARGS__) -#define PSMX3_WARN(prov, subsys, format, ...) \ - FI_WARN(prov, subsys, "%s: " format, psm3_get_mylabel(), ##__VA_ARGS__) -#define PSMX3_WARN_SPARSE(prov, subsys, format, ...) \ - FI_WARN_SPARSE(prov, subsys, "%s: " format, psm3_get_mylabel(), ##__VA_ARGS__) -#define PSMX3_WARN_ONCE(prov, subsys, format, ...) \ - FI_WARN_ONCE(prov, subsys, "%s: " format, psm3_get_mylabel(), ##__VA_ARGS__) -#define PSMX3_DBG(prov, subsys, func, line, format, ...) \ - FI_DBG(prov, subsys, "%s: " format, psm3_get_mylabel(), ##__VA_ARGS__) -#define psmx3_log(prov, level, subsys, func, line, format, ...) \ - fi_log(prov, level, subsys, func, line, "%s: " format, psm3_get_mylabel(), ##__VA_ARGS__) +#define PSMX3_INFO(prov, flags, format, ...) \ + FI_INFO(prov, flags, "%s: " format, psm3_get_mylabel(), ##__VA_ARGS__) +#define PSMX3_DBG_TRACE(prov, flags, format, ...) \ + FI_DBG_TRACE(prov, flags, "%s: " format, psm3_get_mylabel(), ##__VA_ARGS__) +#define PSMX3_TRACE(prov, flags, format, ...) \ + FI_TRACE(prov, flags, "%s: " format, psm3_get_mylabel(), ##__VA_ARGS__) +#define PSMX3_WARN(prov, flags, format, ...) \ + FI_WARN(prov, flags, "%s: " format, psm3_get_mylabel(), ##__VA_ARGS__) +#define PSMX3_WARN_SPARSE(prov, flags, format, ...) \ + FI_WARN_SPARSE(prov, flags, "%s: " format, psm3_get_mylabel(), ##__VA_ARGS__) +#define PSMX3_WARN_ONCE(prov, flags, format, ...) \ + FI_WARN_ONCE(prov, flags, "%s: " format, psm3_get_mylabel(), ##__VA_ARGS__) +#define PSMX3_DBG(prov, flags, func, line, format, ...) \ + FI_DBG(prov, flags, "%s: " format, psm3_get_mylabel(), ##__VA_ARGS__) +#define psmx3_log(prov, level, flags, func, line, format, ...) \ + fi_log(prov, level, flags, func, line, "%s: " format, psm3_get_mylabel(), ##__VA_ARGS__) extern struct fi_provider psmx3_prov; diff --git a/prov/verbs/src/verbs_ofi.h b/prov/verbs/src/verbs_ofi.h index 78b8c40369e..25551aafa4d 100644 --- a/prov/verbs/src/verbs_ofi.h +++ b/prov/verbs/src/verbs_ofi.h @@ -92,15 +92,15 @@ #define VERBS_PROV_NAME "verbs" -#define VRB_DBG(subsys, ...) FI_DBG(&vrb_prov, subsys, __VA_ARGS__) -#define VRB_INFO(subsys, ...) FI_INFO(&vrb_prov, subsys, __VA_ARGS__) -#define VRB_TRACE(subsys, ...) FI_TRACE(&vrb_prov, subsys, __VA_ARGS__) -#define VRB_WARN(subsys, ...) FI_WARN(&vrb_prov, subsys, __VA_ARGS__) - -#define VRB_WARN_ERRNO(subsys, fn) \ - VRB_WARN(subsys, fn ": %s (%d)\n", strerror(errno), errno) -#define VRB_WARN_ERR(subsys, fn, err) \ - VRB_WARN(subsys, fn ": %s (%d)\n", fi_strerror((int) -(err)), (int) err) +#define VRB_DBG(flags, ...) FI_DBG(&vrb_prov, flags, __VA_ARGS__) +#define VRB_INFO(flags, ...) FI_INFO(&vrb_prov, flags, __VA_ARGS__) +#define VRB_TRACE(flags, ...) FI_TRACE(&vrb_prov, flags, __VA_ARGS__) +#define VRB_WARN(flags, ...) FI_WARN(&vrb_prov, flags, __VA_ARGS__) + +#define VRB_WARN_ERRNO(flags, fn) \ + VRB_WARN(flags, fn ": %s (%d)\n", strerror(errno), errno) +#define VRB_WARN_ERR(flags, fn, err) \ + VRB_WARN(flags, fn ": %s (%d)\n", fi_strerror((int) -(err)), (int) err) #define VERBS_INJECT_FLAGS(ep, len, flags, desc) \ diff --git a/src/common.c b/src/common.c index 8c6db788afd..9c145fd7855 100644 --- a/src/common.c +++ b/src/common.c @@ -1038,20 +1038,20 @@ size_t ofi_mask_addr(struct sockaddr *maskaddr, const struct sockaddr *srcaddr, void ofi_straddr_log_internal(const char *func, int line, const struct fi_provider *prov, enum fi_log_level level, - enum fi_log_subsys subsys, char *log_str, + int flags, char *log_str, const void *addr) { char buf[OFI_ADDRSTRLEN]; uint32_t addr_format; size_t len = sizeof(buf); - if (fi_log_enabled(prov, level, subsys)) { + if (fi_log_enabled(prov, level, flags)) { if (addr) { addr_format = ofi_translate_addr_format(ofi_sa_family(addr)); - fi_log(prov, level, subsys, func, line, "%s: %s\n", log_str, + fi_log(prov, level, flags, func, line, "%s: %s\n", log_str, ofi_straddr(buf, &len, addr_format, addr)); } else { - fi_log(prov, level, subsys, func, line, "%s: (null)\n", log_str); + fi_log(prov, level, flags, func, line, "%s: (null)\n", log_str); } } } diff --git a/src/fi_tostr.c b/src/fi_tostr.c index d1446eb77b6..942a993f520 100644 --- a/src/fi_tostr.c +++ b/src/fi_tostr.c @@ -986,9 +986,6 @@ char *DEFAULT_SYMVER_PRE(fi_tostr_r)(char *buf, size_t len, case FI_TYPE_LOG_LEVEL: ofi_tostr_log_level(buf, len, *enumval); break; - case FI_TYPE_LOG_SUBSYS: - ofi_tostr_log_subsys(buf, len, *enumval); - break; case FI_TYPE_AV_ATTR: ofi_tostr_av_attr(buf, len, data); break; diff --git a/src/log.c b/src/log.c index 951bad48198..6f416971d21 100644 --- a/src/log.c +++ b/src/log.c @@ -46,24 +46,6 @@ #include "ofi_util.h" -enum { - OFI_LOG_SUBSYS_MAX = 10, - OFI_LOG_MAX = 4 -}; - -static const char * const log_subsys[] = { - [FI_LOG_CORE] = "core", - [FI_LOG_FABRIC] = "fabric", - [FI_LOG_DOMAIN] = "domain", - [FI_LOG_EP_CTRL] = "ep_ctrl", - [FI_LOG_EP_DATA] = "ep_data", - [FI_LOG_AV] = "av", - [FI_LOG_CQ] = "cq", - [FI_LOG_EQ] = "eq", - [FI_LOG_MR] = "mr", - [FI_LOG_CNTR] = "cntr", -}; - static const char * const log_levels[] = { [FI_LOG_WARN] = "warn", [FI_LOG_TRACE] = "trace", @@ -71,23 +53,8 @@ static const char * const log_levels[] = { [FI_LOG_DEBUG] = "debug", }; -enum { - FI_LOG_SUBSYS_OFFSET = OFI_LOG_MAX, - FI_LOG_PROV_OFFSET = FI_LOG_SUBSYS_OFFSET + OFI_LOG_SUBSYS_MAX, - FI_LOG_LEVEL_MASK = ((1 << OFI_LOG_MAX) - 1), - FI_LOG_SUBSYS_MASK = (((1 << OFI_LOG_SUBSYS_MAX) - 1) << - FI_LOG_SUBSYS_OFFSET), -// FI_LOG_PROV_MASK = (((1 << (64 - FI_LOG_PROV_OFFSET)) - 1) << -// FI_LOG_PROV_OFFSET) -}; - -#define FI_LOG_TAG(prov, level, subsys) \ - (((uint64_t) prov << FI_LOG_PROV_OFFSET) | \ - ((uint64_t) (1 << (subsys + FI_LOG_SUBSYS_OFFSET))) | \ - ((uint64_t) (1 << level))) - static int log_interval = 2000; -uint64_t log_mask; +static int log_level = -1; struct ofi_filter prov_log_filter; extern struct ofi_common_locks common_locks; @@ -109,9 +76,7 @@ static int fi_convert_log_str(const char *value) void fi_log_init(void) { - struct ofi_filter subsys_filter; - int level, i; - char *levelstr = NULL, *provstr = NULL, *subsysstr = NULL; + char *levelstr = NULL, *provstr = NULL; fi_param_define(NULL, "log_interval", FI_PARAM_INT, "Delay in ms between rate limited log messages " @@ -121,49 +86,34 @@ void fi_log_init(void) fi_param_define(NULL, "log_level", FI_PARAM_STRING, "Specify logging level: warn, trace, info, debug (default: warn)"); fi_param_get_str(NULL, "log_level", &levelstr); - level = fi_convert_log_str(levelstr); - if (level >= 0) - log_mask = ((1 << (level + 1)) - 1); + log_level = fi_convert_log_str(levelstr); fi_param_define(NULL, "log_prov", FI_PARAM_STRING, "Specify specific provider to log (default: all)"); fi_param_get_str(NULL, "log_prov", &provstr); ofi_create_filter(&prov_log_filter, provstr); - fi_param_define(NULL, "log_subsys", FI_PARAM_STRING, - "Specify specific subsystem to log (default: all)"); - fi_param_get_str(NULL, "log_subsys", &subsysstr); - ofi_create_filter(&subsys_filter, subsysstr); - for (i = 0; i < OFI_LOG_SUBSYS_MAX; i++) { - if (!ofi_apply_filter(&subsys_filter, log_subsys[i])) - log_mask |= (1ULL << (i + FI_LOG_SUBSYS_OFFSET)); - } - ofi_free_filter(&subsys_filter); pid = getpid(); } static int ofi_log_enabled(const struct fi_provider *prov, - enum fi_log_level level, enum fi_log_subsys subsys, + enum fi_log_level level, int prov_flags, uint64_t flags) { - int prov_filtered = (flags & FI_LOG_PROV_FILTERED); - - return ((FI_LOG_TAG(prov_filtered, level, subsys) & log_mask) == - FI_LOG_TAG(prov_filtered, level, subsys)); + return (level <= log_level) && !(flags & FI_LOG_PROV_FILTERED); } static void ofi_log(const struct fi_provider *prov, enum fi_log_level level, - enum fi_log_subsys subsys, const char *func, int line, + int flags, const char *func, int line, const char *msg) { - fprintf(stderr, "%s:%d:%ld:%s:%s:%s:%s():%d<%s> %s", + fprintf(stderr, "%s:%d:%ld:%s:%s:%s():%d<%s> %s", PACKAGE, pid, (unsigned long) time(NULL), log_prefix, - prov->name, log_subsys[subsys], func, line, - log_levels[level], msg); + prov->name, func, line, log_levels[level], msg); } static int ofi_log_ready(const struct fi_provider *prov, - enum fi_log_level level, enum fi_log_subsys subsys, + enum fi_log_level level, int prov_flags, uint64_t flags, uint64_t *showtime); static struct fi_ops_log ofi_import_log_ops = { @@ -254,12 +204,12 @@ static int ofi_bind_logging_fid(struct fid *fid, struct fid *bfid, } static int ofi_log_ready(const struct fi_provider *prov, - enum fi_log_level level, enum fi_log_subsys subsys, + enum fi_log_level level, int prov_flags, uint64_t flags, uint64_t *showtime) { uint64_t cur; - if (log_fid.ops->enabled(prov, level, subsys, flags)) { + if (log_fid.ops->enabled(prov, level, prov_flags, flags)) { cur = ofi_gettime_ms(); if (cur >= *showtime) { *showtime = cur + (uint64_t) log_interval; @@ -307,14 +257,6 @@ void ofi_tostr_log_level(char *buf, size_t len, enum fi_log_level level) ofi_strncatf(buf, len, log_levels[level]); } -void ofi_tostr_log_subsys(char *buf, size_t len, enum fi_log_subsys subsys) -{ - if (subsys > FI_LOG_CNTR) - ofi_strncatf(buf, len, "Unknown"); - else - ofi_strncatf(buf, len, log_subsys[subsys]); -} - void fi_log_fini(void) { ofi_free_filter(&prov_log_filter); @@ -322,35 +264,35 @@ void fi_log_fini(void) __attribute__((visibility ("default"),EXTERNALLY_VISIBLE)) int DEFAULT_SYMVER_PRE(fi_log_enabled)(const struct fi_provider *prov, - enum fi_log_level level, - enum fi_log_subsys subsys) + enum fi_log_level level, int flags) { - uint64_t flags = 0; + uint64_t enable_flags = 0; if (ofi_prov_ctx(prov)->disable_logging) - flags |= FI_LOG_PROV_FILTERED; + enable_flags |= FI_LOG_PROV_FILTERED; - return log_fid.ops->enabled(prov, level, subsys, flags); + return log_fid.ops->enabled(prov, level, flags, enable_flags); } DEFAULT_SYMVER(fi_log_enabled_, fi_log_enabled, FABRIC_1.0); __attribute__((visibility ("default"),EXTERNALLY_VISIBLE)) int DEFAULT_SYMVER_PRE(fi_log_ready)(const struct fi_provider *prov, - enum fi_log_level level, enum fi_log_subsys subsys, + enum fi_log_level level, int flags, uint64_t *showtime) { - uint64_t flags = 0; + uint64_t ready_flags = 0; if (ofi_prov_ctx(prov)->disable_logging) - flags |= FI_LOG_PROV_FILTERED; + ready_flags |= FI_LOG_PROV_FILTERED; - return log_fid.ops->ready(prov, level, subsys, flags, showtime); + return log_fid.ops->ready(prov, level, flags, ready_flags, showtime); } CURRENT_SYMVER(fi_log_ready_, fi_log_ready); __attribute__((visibility ("default"),EXTERNALLY_VISIBLE)) -void DEFAULT_SYMVER_PRE(fi_log)(const struct fi_provider *prov, enum fi_log_level level, - enum fi_log_subsys subsys, const char *func, int line, +void DEFAULT_SYMVER_PRE(fi_log)(const struct fi_provider *prov, + enum fi_log_level level, + int flags, const char *func, int line, const char *fmt, ...) { char msg[1024]; @@ -361,6 +303,6 @@ void DEFAULT_SYMVER_PRE(fi_log)(const struct fi_provider *prov, enum fi_log_leve vsnprintf(msg + size, sizeof(msg) - size, fmt, vargs); va_end(vargs); - log_fid.ops->log(prov, level, subsys, func, line, msg); + log_fid.ops->log(prov, level, flags, func, line, msg); } DEFAULT_SYMVER(fi_log_, fi_log, FABRIC_1.0); From ac80575c8cfcdd911639bb820630b21e2a045582 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Thu, 28 Sep 2023 11:18:33 -0700 Subject: [PATCH 32/34] docs: Add information on porting applications between v1 and v2 Signed-off-by: Sean Hefty --- man/fabric.7.md | 208 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 208 insertions(+) diff --git a/man/fabric.7.md b/man/fabric.7.md index 93344c08da8..1a3a471d7f2 100644 --- a/man/fabric.7.md +++ b/man/fabric.7.md @@ -19,6 +19,9 @@ Libfabric is a high-performance fabric software library designed to provide low-latency interfaces to fabric hardware. For an in-depth discussion of the motivation and design see [`fi_guide`(7)](fi_guide.7.html). +For information on differences and portability between the libfabric version 1 +series and version 2 series, see the Version 2 section below. + # OVERVIEW Libfabric provides 'process direct I/O' to application software communicating @@ -389,6 +392,211 @@ call. ABI version starting with libfabric 1.14. Added fi_log_ready for providers. +# VERSION 2 + +The version 2 series focuses on streamlining the libfabric API. It removes +features from the API to enable simplifying and optimizing provider +implementations, plus provide greater design guidance to users. Version 2 is +ABI compatible with version 1, but not API compatible. Data structures and +APIs that are common between version 1 and version 2 share the same layouts +and signatures. Applications, once updated to the version 2 APIs, should be +able to support both libfabric version 1 and 2 with minimal to no differences +to their code bases. The following section describe the significant changes +between version 1 and 2 which may impact applications. New features being +introduced in version 2 are not included, as the impact is the same as if +they were added into a newer version 1 release. + +## Address Vector Simplified + +Support for asynchronous insertion of addresses into an AV is removed. The +feature was never natively supported by a provider. Support for FI_AV_MAP +is removed. Providers are required to support FI_AV_TABLE. The expected +impact to applications with these changes is minimal. Applications that use +FI_AV_MAP can use the same logic with FI_AV_TABLE. By limiting fi_addr_t +values to indices, additional features and optimizations become available +which would otherwise require more significant changes to the API. + +## Completion Queue Restriction + +Endpoints may only be associated with a single completion queue. Support +for transmit and receive completions to be written to separate CQs is +removed. In many cases, providers are unable to completely decouple transmit +operations from receives and must always drive progress across both CQs. +This is required by providers that implement portions of the API in +software, such as tag matching or support for advanced completion levels +(e.g. FI_DELIVERY_COMPLETE). Limiting an endpoint to a single completion +queue simplifies and optimizes the provider implementation and allows for +a more direct mapping between a libfabric CQ and hardware. + +Note that this restriction applies to endpoints that are bound to +completion queues. This includes receive and transmit contexts that are +part of a scalable endpoint. When scalable endpoints are involved, +separate receive contexts and transmit contexts may still be associated +with separate completion queues. + +The expected impact of this change on applications is none to moderate. +Applications that use a single CQ per endpoint are not impacted. For +applications that bind an endpoint to separate send and receive CQs, +they must update to use a single CQ. Completion flags can then be used +to determine if a CQ entry is for a send or receive completion. See the +Threading Model section for more information on possible impacts to +serialization. + +## Completions Simplified + +The completion options FI_ORDER_STRICT and FI_ORDER_DATA are removed, along +with FI_ORDER_NONE. The latter is a flag that was defined as 0, making its +use confusing. There are no guarantees on completion order, including how +the data is written to target memory. This gives providers greater +flexibility on implementation and wire protocols. + +For applications that use unconnected endpoints, there is no expected impact +with these changes. Most providers did not support these ordering requirements +over unconnected endpoints. However, applications that use connected endpoints +might be impacted. Some providers did implement these options over connected +endpoints. Applications which are designed around these features will +require updates to their design. + +## Endpoint Type Removal + +Experimental endpoint types that were added for exploratory purposes +are removed. These are FI_EP_SOCK_DGRAM and FI_EP_SOCK_STREAM. There +is no expected impact to applications with this change. + +## Header File Cleanup + +Definitions which should have been kept internal to libfabric were exposed +directly to applications by being included in the publicly installed +header files. Such definitions are moved into internal headers. +Applications using these definitions will fail to compile against the +version 2 header files and need to define equivalent values and +macros in their own code bases. Removed definitions were not documented +in libfabric man pages. + +## Feature (Deprecated) Removal + +Features and attributes that were marked as deprecated in version 1 +releases were removed from the API. This includes +fi_rx_attr::total_buffered_recv, FI_LOCAL_MR, FI_MR_BASIC, and +FI_MR_SCALABLE. Most applications are not expected to be impacted by these +changes. + +## Feature (Unimplemented) Removal + +Features which were never implemented by an upstream provider or known +to be implemented by an out of tree provider are removed. This includes +FI_VARIABLE_MSG, FI_XPU_TRIGGER, FI_RESTRICTED_COMP, FI_WAIT_MUTEX_COND, +and FI_NOTIFY_FLAGS_ONLY. There is no expected impact to applications +with this change. + +## fi_info Requirements + +The APIs now require that all fi_info structures passed to any function +call be allocated by libfabric. Applications are no longer permitted to +hand-craft an fi_info structure directly. This enables libfabric to +allocate and access hidden and provider specific fields, which can be passed +between API calls. For most applications, we expect no impact by this +change. Most applications use the fi_allocinfo() and fi_dupinfo() calls +to allocate and duplicate fi_info structures prior to modifying them. + +## Logging Simplified + +The fi_log related calls are updated to replace the enum fi_log_subsys with +int flags. Subsystem based filtering has been of limit use, while +complicated the logging implementation. Additionally, provider developers +can easily select the wrong subsystem for a given log message. This change +mostly impacts providers, but also impacts users displaying logging data. +Applications that intercept logging messages should be inspected to see +if the subsys parameters are interpreted. + +Version 2 providers will pass in a flags value of 0 instead, which +corresponded with the FI_LOG_CORE subsystem. The flags parameter provides +a mechanism by which the logging API can be extended in a compatible manner. + +## Memory Registration Simplified + +Support for asynchronous memory registration is removed. The feature was +never natively supported by a provider. Deprecated memory +options: FI_LOCAL_MR, FI_MR_BASIC, and FI_MR_SCABLE, are also removed. Most +applications are not expected to be impacted by these changes. Applications +still using either FI_MR_BASIC or FI_MR_SCALABLE must be updated to use the +modernized mr_mode bits, which can provide equivalent functionality. + +## Poll Sets Removal + +The poll set (fid_poll) object and poll set APIs are removed. This includes +the calls fi_poll(), fi_poll_add(), and fi_poll_del(). Poll sets are +implemented as simple iterators used to drive progress over an array of CQs +and counters. Such iteration is trivial for an application to implement, +but the possibility of needing to support poll sets adds undo complexity +to providers. Poll sets also introduce inefficiencies by driving progress +without retrieving completions directly. This results in applications +needing to recheck the CQs and counters, which drives progress an additional +time. + +It is believed that poll sets are not widely used by applications. However, +for applications that do use poll sets, the poll set calls would need to be +replaced equivalent functionality. It is expected that such functionality +would be straightforward to implement for most applications. + +## Progress Model Simplified + +The separate fi_domain_attr:control_progress and data_progress options are +combined under a single fi_domain_attr::progress field. The new field is an +alias for data_progress. There is minimal expected impact to applications +with this change. In most situations, control and data progress +operations are closely coupled, such that the data progress option took +preference. + +However, applications using FI_EP_MSG may see an impact if they +rely on FI_PROGRESS_AUTO for control operations, such as handling connections, +but FI_PROGRESS_MANUAL for data operations. Depending on the provider +implementation, manual progress may require more frequent checking for +asynchronous events (i.e. reading an EQ) to prevent stalls or timeouts +handling connection events. + +Providers are requested to consider such impacts when migrating to +a single progress model. + +## Provider Removal + +Several providers that are not actively updated have been removed. +Applications that use these providers will need to use a version 1 library. +Removed providers are: bgq, gni, netdir, rstream, sockets, and usnic. +Applications using the sockets provider may be able to use the tcp as +an alternative. Windows Network Direct support is available through the +verbs provider. + +## Threading Model Simplified + +To help align application design with provider implementation for lockless +operation, the threading models align on FI_THREAD_DOMAIN for standard endpoints +and FI_THREAD_COMPLETION for scalable endpoints. The threading models for +FI_THREAD_FID and FI_THREAD_ENDPOINT are removed. Multi-threaded applications +using those threading models that desire lockless operation should consider +mapping libfabric resources around the domain or completion structures, +based on their endpoint type. Requests for FI_THREAD_FID or FI_THREAD_ENDPOINT +will be treated as FI_THREAD_SAFE. + +There is minimal to no expected impact to applications with this change, as +most providers did not optimize for the removed threading models. + +## Wait Sets Removal + +The wait set (fid_wait) object and wait set APIs are removed. This includes +the fi_wait() call and the FI_WAIT_SET enum fi_wait_obj. Wait sets add +complexity and inefficiency to the provider implementations by restricting +the wait object to which completion events must be reported. The removal of +wait sets does not impact support for blocking reads (e.g. fi_cq_sread) or +the use of other wait objects (e.g. FI_WAIT_FD). + +It is believed that wait sets are not widely used by applications. However, +for applications that use wait sets, wait set functionality would need to +be replaced with equivalent functionality. This can be accomplished by +requesting native operating system wait objects (e.g. FI_WAIT_FD), using +fi_control() operations to retrieve the wait object, and using a native +operating system call (e.g. poll() or epoll()). + # SEE ALSO [`fi_info`(1)](fi_info.1.html), From 5f3c619ee967c6a8801366ca4dc7ad99a3b1b021 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Mon, 2 Oct 2023 16:23:34 -0700 Subject: [PATCH 33/34] core: Add new peer group feature Introduce the concept of peer groups. A peer group is a set of peers that are communicating together for some specific set of tasks. Peer groups provide a lower-level mapping of HPC and AI communicators. Signed-off-by: Sean Hefty --- include/rdma/fabric.h | 1 + include/rdma/fi_domain.h | 6 ++++++ man/fi_av.3.md | 29 +++++++++++++++++++++++++++++ man/fi_domain.3.md | 10 ++++++++++ 4 files changed, 46 insertions(+) diff --git a/include/rdma/fabric.h b/include/rdma/fabric.h index aab042b24d8..f9a7ef197d9 100644 --- a/include/rdma/fabric.h +++ b/include/rdma/fabric.h @@ -438,6 +438,7 @@ struct fi_domain_attr { size_t max_err_data; size_t mr_cnt; uint32_t tclass; + uint32_t max_group_id; }; struct fi_fabric_attr { diff --git a/include/rdma/fi_domain.h b/include/rdma/fi_domain.h index 73eeedd6b64..9a1a47b2082 100644 --- a/include/rdma/fi_domain.h +++ b/include/rdma/fi_domain.h @@ -512,6 +512,12 @@ fi_rx_addr(fi_addr_t fi_addr, int rx_index, int rx_ctx_bits) return (fi_addr_t) (((uint64_t) rx_index << (64 - rx_ctx_bits)) | fi_addr); } +static inline fi_addr_t +fi_group_addr(fi_addr_t fi_addr, uint32_t group_id) +{ + return (fi_addr_t) (((uint64_t) group_id << 32) | fi_addr); +} + #endif #ifdef __cplusplus diff --git a/man/fi_av.3.md b/man/fi_av.3.md index 142b4c58eb0..1ae0fea19d7 100644 --- a/man/fi_av.3.md +++ b/man/fi_av.3.md @@ -51,6 +51,8 @@ int fi_av_lookup(struct fid_av *av, fi_addr_t fi_addr, fi_addr_t fi_rx_addr(fi_addr_t fi_addr, int rx_index, int rx_ctx_bits); +fi_addr_t fi_group_addr(fi_addr_t fi_addr, uint32_t group_id); + const char * fi_av_straddr(struct fid_av *av, const void *addr, char *buf, size_t *len); ``` @@ -423,6 +425,33 @@ that the output from the AV insertion call is unchanged. The provider will return an fi_addr_t value that maps to each address, and that value must be used for all data transfer operations. +# PEER GROUPS + +Peer groups provide a direct mapping to HPC and AI communicator constructs. + +The addresses in an AV represent the full set of peers that a local process +may communicate with. A peer group conceptually represents a subset of +those peers. A peer group may be used to identify peers working on a common +task, which need their communication logically separated from other traffic. +Peer groups are not a security mechanism, but instead help separate data. +A given peer may belong to 0 or more peer groups, +with no limit placed on how many peers can belong to a single peer group. + +Peer groups are identified using an integer value, known as a group id. +Group id's are selected by the user and conveyed as part of an fi_addr_t +value. The management of a group id and it's relationship to addresses +inserted into an AV is directly controlled by the user. When enabled, +sent messages are marked as belonging to a specific peer group, and posted +receive buffers must have a matching group id to receive the data. + +Users are responsible for selecting a valid peer group id, subject to the +limitation negotiated using the domain attribute max_group_id. The group +id of an fi_addr_t may be set using the fi_group_addr() function. + +## fi_group_addr + +This function is used to set the group ID portion of an fi_addr_t. + # RETURN VALUES Insertion calls will return the number of addresses that were successfully diff --git a/man/fi_domain.3.md b/man/fi_domain.3.md index d6cf62bfff0..d64d58c9254 100644 --- a/man/fi_domain.3.md +++ b/man/fi_domain.3.md @@ -208,6 +208,7 @@ struct fi_domain_attr { size_t max_err_data; size_t mr_cnt; uint32_t tclass; + uint32_t max_group_id; }; ``` @@ -710,6 +711,15 @@ This specifies the default traffic class that will be associated any endpoints created within the domain. See [`fi_endpoint`(3)](fi_endpoint.3.html) for additional information. +## Maximum Peer Group Id (max_group_id) + +The maximum value that a peer group may be assigned, inclusive. Valid peer +group id's must be between 0 and max_group_id. See [`fi_av`(3)](fi_av.3.html) +for additional information on peer groups and their use. Users may request +support for peer groups by setting this to a non-zero value. Providers that +cannot meet the requested max_group_id will fail fi_getinfo(). On output, +providers may return a value higher than that requested by the application. + # RETURN VALUE Returns 0 on success. On error, a negative value corresponding to fabric From 44246f8677cdf5f72c1e12903fad31a5f360cba3 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Mon, 2 Oct 2023 16:23:41 -0700 Subject: [PATCH 34/34] core: Define new tag formats Allow specifying precise tag formatting options. The mem_tag_format takes as input a set of bit fields. In practice, this ends up being unusable to implement, resulting in the entire tag simply being masked with ignore bits. When the mem_tag_format value only has the lower bits set (< 256), interpret the format as specific options. Two new options are defined, one aligned with MPI and the other with CCLs. This information can be used by providers to optimize for the separate use cases. Signed-off-by: Sean Hefty --- include/rdma/fabric.h | 7 ++ include/rdma/fi_tagged.h | 10 +++ man/fi_endpoint.3.md | 138 +++++++++++++++++++++++++++++---------- 3 files changed, 121 insertions(+), 34 deletions(-) diff --git a/include/rdma/fabric.h b/include/rdma/fabric.h index f9a7ef197d9..c26455fc9d9 100644 --- a/include/rdma/fabric.h +++ b/include/rdma/fabric.h @@ -333,6 +333,13 @@ enum { FI_PROTO_SM2, }; +enum { + FI_TAG_BITS, + FI_TAG_HPC, + FI_TAG_AI, + FI_TAG_MAX_FORMAT = (1ULL << 16), +}; + enum { FI_TC_UNSPEC = 0, FI_TC_DSCP = 0x100, diff --git a/include/rdma/fi_tagged.h b/include/rdma/fi_tagged.h index 61eba4e860a..a8525162d82 100644 --- a/include/rdma/fi_tagged.h +++ b/include/rdma/fi_tagged.h @@ -42,6 +42,16 @@ extern "C" { #endif +#define FI_HPC_IGNORE_TAG ((uint64_t) UINT32_MAX) +#define FI_HPC_IGNORE_PAYLOAD (((uint64_t) UINT8_MAX) << 32) + + +static inline uint64_t +fi_tag_hpc(int tag, uint8_t payload_id) +{ + return (((uint64_t) payload_id) << 32) | ((uint64_t) (uint32_t) tag); +} + struct fi_msg_tagged { const struct iovec *msg_iov; void **desc; diff --git a/man/fi_endpoint.3.md b/man/fi_endpoint.3.md index eee80e0a2c1..5d2dd7b28b8 100644 --- a/man/fi_endpoint.3.md +++ b/man/fi_endpoint.3.md @@ -766,40 +766,110 @@ A value of -1 guarantees ordering for any data size. ## mem_tag_format - Memory Tag Format -The memory tag format is a bit array used to convey the number of -tagged bits supported by a provider. Additionally, it may be used to -divide the bit array into separate fields. The mem_tag_format -optionally begins with a series of bits set to 0, to signify bits -which are ignored by the provider. Following the initial prefix of -ignored bits, the array will consist of alternating groups of bits set -to all 1's or all 0's. Each group of bits corresponds to a tagged -field. The implication of defining a tagged field is that when a mask -is applied to the tagged bit array, all bits belonging to a single -field will either be set to 1 or 0, collectively. - -For example, a mem_tag_format of 0x30FF indicates support for 14 -tagged bits, separated into 3 fields. The first field consists of -2-bits, the second field 4-bits, and the final field 8-bits. Valid -masks for such a tagged field would be a bitwise OR'ing of zero or -more of the following values: 0x3000, 0x0F00, and 0x00FF. The provider -may not validate the mask provided by the application for performance -reasons. - -By identifying fields within a tag, a provider may be able to optimize -their search routines. An application which requests tag fields must -provide tag masks that either set all mask bits corresponding to a -field to all 0 or all 1. When negotiating tag fields, an application -can request a specific number of fields of a given size. A provider -must return a tag format that supports the requested number of fields, -with each field being at least the size requested, or fail the -request. A provider may increase the size of the fields. When reporting -completions (see FI_CQ_FORMAT_TAGGED), it is not guaranteed that the -provider would clear out any unsupported tag bits in the tag field of -the completion entry. - -It is recommended that field sizes be ordered from smallest to -largest. A generic, unstructured tag and mask can be achieved by -requesting a bit array consisting of alternating 1's and 0's. +The memory tag format field is used to convey information on +the use of the tag and ignore parameters in the fi_tagged API calls, +as well as matching criteria. This information is used by the +provider to optimize tag matching support, including alignment with +wire protocols. The following tag formats are defined: + +*FI_TAG_BITS* + +: If specified on input to fi_getinfo, this indicates that tags + contain up to 64-bits of data, and the receiver must apply ignore_bits + to tags when matching receive buffers with sends. The output of + fi_getinfo will set 0 or more upper bits of mem_tag_format to 0 to + indicate those tag bits which are ignored or reserved by the provider. + Applications must check the number of upper bits which are 0 and + set them to 0 on all tag and ignore bits. + + The value of FI_TAG_BITS is 0, making this the default behavior if + the hints are left uninialized after being allocated by fi_allocinfo(). + This format provides the most flexibility to applications, but limits + provider optimization options. FI_TAG_BITS aligns with the behavior + defined for libfabric versions 1.x. + +*FI_TAG_HPC* + +: FI_TAG_HPC is a constrained usage of FI_TAG_BITS. When selected, applications + treat the tag as fields of data, rather than bits, with the ability to + wildcard each field. The HPC tag format specifically targets MPI based + implementations and applications. An HPC formatted tag consists of 2 fields: + a message tag and a payload identier. The message tag is a 32-bit searchable + tag. Matching on a message tag requires searching through a list of posted + buffers at the receiver, which we refer to as a searchable tag. + The integer tag in MPI point-to-point messages can map directly to + the libfabric message tag field. + + The second field is an identifier that corresponds to the operation or + data being carried in the message payload. For example, this field may + be used to identify the type of collective operation associated with a + message payload. Note that only the size and behavior for + the HPC tag formats are defined. Described use of the fields are only + suggestions. + + Applications that use the HPC format should initialize their tags using + the fi_tag_hpc() function. Ignore bits should be specified as + FI_HPC_IGNORE_TAG, FI_HPC_IGNORE_PAYLOAD, or their bitwise OR'ing. + +*FI_TAG_AI* + +: The FI_TAG_AI format further restricts the FI_TAG_HPC format. When used, + only a single tag field may be set, which must match exactly at the target. + The field may not be wild carded. The AI tag format targets collective + communication libraries and applications. The AI format consists of a single + field: a payload identifier. The identifier corresponds to the operation or + data being carried in the message payload. For example, this field may be + used to identify whether a message is for point-to-point communication or + part of a collective operation, and in the latter case, the type of + collective operation. + + The AI tag format does not require searching for matching receive + buffers, only directing the message to the correct virtual message queue + based on to the payload identifier. + + Applications that use the AI format pass in the payload identifier + directly as the tag and set ignore bits to 0. + +*FI_TAG_MAX_FORMAT* +: If the value of mem_tag_format is >= FI_TAG_MAX_FORMAT, the tag format + is treated as a set of bit fields. The behavior is functionally the same + as FI_TAG_BITS. The following description is for backwards compatibility + and describes how the provider may interpret the mem_tag_format field + if the value is >= FI_TAG_MAX_FORMAT. + + The memory tag format may be used to + divide the bit array into separate fields. The mem_tag_format + optionally begins with a series of bits set to 0, to signify bits + which are ignored by the provider. Following the initial prefix of + ignored bits, the array will consist of alternating groups of bits set + to all 1's or all 0's. Each group of bits corresponds to a tagged + field. The implication of defining a tagged field is that when a mask + is applied to the tagged bit array, all bits belonging to a single + field will either be set to 1 or 0, collectively. + + For example, a mem_tag_format of 0x30FF indicates support for 14 + tagged bits, separated into 3 fields. The first field consists of + 2-bits, the second field 4-bits, and the final field 8-bits. Valid + masks for such a tagged field would be a bitwise OR'ing of zero or + more of the following values: 0x3000, 0x0F00, and 0x00FF. The provider + may not validate the mask provided by the application for performance + reasons. + + By identifying fields within a tag, a provider may be able to optimize + their search routines. An application which requests tag fields must + provide tag masks that either set all mask bits corresponding to a + field to all 0 or all 1. When negotiating tag fields, an application + can request a specific number of fields of a given size. A provider + must return a tag format that supports the requested number of fields, + with each field being at least the size requested, or fail the + request. A provider may increase the size of the fields. When reporting + completions (see FI_CQ_FORMAT_TAGGED), it is not guaranteed that the + provider would clear out any unsupported tag bits in the tag field of + the completion entry. + + It is recommended that field sizes be ordered from smallest to + largest. A generic, unstructured tag and mask can be achieved by + requesting a bit array consisting of alternating 1's and 0's. ## tx_ctx_cnt - Transmit Context Count