diff --git a/opal/mca/btl/base/btl_base_frame.c b/opal/mca/btl/base/btl_base_frame.c index 6cb49e5f49c..f5f15c86544 100644 --- a/opal/mca/btl/base/btl_base_frame.c +++ b/opal/mca/btl/base/btl_base_frame.c @@ -61,6 +61,12 @@ mca_base_var_enum_value_flag_t mca_btl_base_atomic_enum_flags[] = { {MCA_BTL_ATOMIC_SUPPORTS_AND, "and", 0}, {MCA_BTL_ATOMIC_SUPPORTS_OR, "or", 0}, {MCA_BTL_ATOMIC_SUPPORTS_XOR, "xor", 0}, + {MCA_BTL_ATOMIC_SUPPORTS_LAND, "land", 0}, + {MCA_BTL_ATOMIC_SUPPORTS_LOR, "lor", 0}, + {MCA_BTL_ATOMIC_SUPPORTS_LXOR, "lxor", 0}, + {MCA_BTL_ATOMIC_SUPPORTS_SWAP, "swap", 0}, + {MCA_BTL_ATOMIC_SUPPORTS_MIN, "min", 0}, + {MCA_BTL_ATOMIC_SUPPORTS_MAX, "max", 0}, {MCA_BTL_ATOMIC_SUPPORTS_CSWAP, "compare-and-swap", 0}, {MCA_BTL_ATOMIC_SUPPORTS_GLOB, "global"}, {0, NULL, 0} diff --git a/opal/mca/btl/btl.h b/opal/mca/btl/btl.h index fb23a095506..4253c157872 100644 --- a/opal/mca/btl/btl.h +++ b/opal/mca/btl/btl.h @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2006-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012-2013 NVIDIA Corporation. All rights reserved. @@ -293,10 +293,44 @@ enum { MCA_BTL_ATOMIC_SUPPORTS_OR = 0x00000400, /** The btl supports atomic bitwise exclusive or */ MCA_BTL_ATOMIC_SUPPORTS_XOR = 0x00000800, + + /** The btl supports logical and */ + MCA_BTL_ATOMIC_SUPPORTS_LAND = 0x00001000, + /** The btl supports logical or */ + MCA_BTL_ATOMIC_SUPPORTS_LOR = 0x00002000, + /** The btl supports logical exclusive or */ + MCA_BTL_ATOMIC_SUPPORTS_LXOR = 0x00004000, + + /** The btl supports atomic swap */ + MCA_BTL_ATOMIC_SUPPORTS_SWAP = 0x00010000, + + /** The btl supports atomic min */ + MCA_BTL_ATOMIC_SUPPORTS_MIN = 0x00100000, + /** The btl supports atomic min */ + MCA_BTL_ATOMIC_SUPPORTS_MAX = 0x00200000, + /** The btl supports atomic compare-and-swap */ MCA_BTL_ATOMIC_SUPPORTS_CSWAP = 0x10000000, + /** The btl guarantees global atomicity (can mix btl atomics with cpu atomics) */ MCA_BTL_ATOMIC_SUPPORTS_GLOB = 0x20000000, + + + /** The btl supports 32-bit integer operations. Keep in mind the btl may + * support only a subset of the available atomics. */ + MCA_BTL_ATOMIC_SUPPORTS_32BIT = 0x40000000, + + /** The btl supports floating-point operations. Keep in mind the btl may + * support only a subset of the available atomics and may not support + * both 64 or 32-bit floating point. */ + MCA_BTL_ATOMIC_SUPPORTS_FLOAT = 0x80000000, +}; + +enum { + /** Use 32-bit atomics */ + MCA_BTL_ATOMIC_FLAG_32BIT = 0x00000001, + /** Use floating-point atomics */ + MCA_BTL_ATOMIC_FLAG_FLOAT = 0x00000002, }; enum mca_btl_base_atomic_op_t { @@ -308,6 +342,20 @@ enum mca_btl_base_atomic_op_t { MCA_BTL_ATOMIC_OR = 0x0012, /** Atomic xor: (*remote_address) = (*remote_address) ^ operand */ MCA_BTL_ATOMIC_XOR = 0x0014, + /** Atomic logical and: (*remote_address) = (*remote_address) && operand */ + MCA_BTL_ATOMIC_LAND = 0x0015, + /** Atomic logical or: (*remote_address) = (*remote_address) || operand */ + MCA_BTL_ATOMIC_LOR = 0x0016, + /** Atomic logical xor: (*remote_address) = (*remote_address) != operand */ + MCA_BTL_ATOMIC_LXOR = 0x0017, + /** Atomic swap: (*remote_address) = operand */ + MCA_BTL_ATOMIC_SWAP = 0x001a, + /** Atomic min */ + MCA_BTL_ATOMIC_MIN = 0x0020, + /** Atomic max */ + MCA_BTL_ATOMIC_MAX = 0x0021, + + MCA_BTL_ATOMIC_LAST, }; typedef enum mca_btl_base_atomic_op_t mca_btl_base_atomic_op_t; @@ -977,7 +1025,7 @@ typedef int (*mca_btl_base_module_get_fn_t) (struct mca_btl_base_module_t *btl, * (remote_address, remote_address + 8) * @param op (IN) Operation to perform * @param operand (IN) Operand for the operation - * @param flags (IN) Flags for this put operation + * @param flags (IN) Flags for this atomic operation * @param order (IN) Ordering * @param cbfunc (IN) Function to call on completion (if queued) * @param cbcontext (IN) Context for the callback @@ -1021,7 +1069,7 @@ typedef int (*mca_btl_base_module_atomic_op64_fn_t) (struct mca_btl_base_module_ * (remote_address, remote_address + 8) * @param op (IN) Operation to perform * @param operand (IN) Operand for the operation - * @param flags (IN) Flags for this put operation + * @param flags (IN) Flags for this atomic operation * @param order (IN) Ordering * @param cbfunc (IN) Function to call on completion (if queued) * @param cbcontext (IN) Context for the callback @@ -1067,7 +1115,7 @@ typedef int (*mca_btl_base_module_atomic_fop64_fn_t) (struct mca_btl_base_module * (remote_address, remote_address + 8) * @param compare (IN) Operand for the operation * @param value (IN) Value to store on success - * @param flags (IN) Flags for this put operation + * @param flags (IN) Flags for this atomic operation * @param order (IN) Ordering * @param cbfunc (IN) Function to call on completion (if queued) * @param cbcontext (IN) Context for the callback diff --git a/opal/mca/btl/openib/btl_openib_atomic.c b/opal/mca/btl/openib/btl_openib_atomic.c index 0c6460f2cf3..ec0eb644f1a 100644 --- a/opal/mca/btl/openib/btl_openib_atomic.c +++ b/opal/mca/btl/openib/btl_openib_atomic.c @@ -112,7 +112,7 @@ int mca_btl_openib_atomic_fop (struct mca_btl_base_module_t *btl, struct mca_btl void *cbcontext, void *cbdata) { - if (OPAL_UNLIKELY(MCA_BTL_ATOMIC_ADD != op)) { + if (OPAL_UNLIKELY(MCA_BTL_ATOMIC_ADD != op || (MCA_BTL_ATOMIC_FLAG_32BIT & flags))) { return OPAL_ERR_NOT_SUPPORTED; } @@ -128,6 +128,10 @@ int mca_btl_openib_atomic_cswap (struct mca_btl_base_module_t *btl, struct mca_b uint64_t value, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) { + if (OPAL_UNLIKELY(MCA_BTL_ATOMIC_FLAG_32BIT & flags)) { + return OPAL_ERR_NOT_SUPPORTED; + } + return mca_btl_openib_atomic_internal (btl, endpoint, local_address, remote_address, local_handle, remote_handle, IBV_WR_ATOMIC_CMP_AND_SWP, compare, value, flags, order, cbfunc, cbcontext, cbdata); diff --git a/opal/mca/btl/ugni/btl_ugni_atomic.c b/opal/mca/btl/ugni/btl_ugni_atomic.c index 981bc759ee9..3c62670da89 100644 --- a/opal/mca/btl/ugni/btl_ugni_atomic.c +++ b/opal/mca/btl/ugni/btl_ugni_atomic.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -11,18 +11,66 @@ #include "btl_ugni_rdma.h" -static gni_fma_cmd_type_t famo_cmds[] = { - [MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC_FADD, - [MCA_BTL_ATOMIC_AND] = GNI_FMA_ATOMIC_FAND, - [MCA_BTL_ATOMIC_OR] = GNI_FMA_ATOMIC_FOR, - [MCA_BTL_ATOMIC_XOR] = GNI_FMA_ATOMIC_FXOR, +static gni_fma_cmd_type_t amo_cmds[][MCA_BTL_ATOMIC_LAST] = { + [OPAL_INT32] = { + [MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC2_IADD_S, + [MCA_BTL_ATOMIC_LAND] = GNI_FMA_ATOMIC2_AND_S, + [MCA_BTL_ATOMIC_LOR] = GNI_FMA_ATOMIC2_OR_S, + [MCA_BTL_ATOMIC_LXOR] = GNI_FMA_ATOMIC2_XOR_S, + [MCA_BTL_ATOMIC_SWAP] = GNI_FMA_ATOMIC2_SWAP_S, + [MCA_BTL_ATOMIC_MIN] = GNI_FMA_ATOMIC2_IMIN_S, + [MCA_BTL_ATOMIC_MAX] = GNI_FMA_ATOMIC2_IMAX_S, + }, + [OPAL_INT64] = { + [MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC_ADD, + [MCA_BTL_ATOMIC_AND] = GNI_FMA_ATOMIC_AND, + [MCA_BTL_ATOMIC_OR] = GNI_FMA_ATOMIC_OR, + [MCA_BTL_ATOMIC_XOR] = GNI_FMA_ATOMIC_XOR, + [MCA_BTL_ATOMIC_SWAP] = GNI_FMA_ATOMIC2_SWAP, + [MCA_BTL_ATOMIC_MIN] = GNI_FMA_ATOMIC2_IMIN, + [MCA_BTL_ATOMIC_MAX] = GNI_FMA_ATOMIC2_IMAX, + }, + [OPAL_FLOAT] = { + [MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC2_FPADD_S, + [MCA_BTL_ATOMIC_MIN] = GNI_FMA_ATOMIC2_FPMIN_S, + [MCA_BTL_ATOMIC_MAX] = GNI_FMA_ATOMIC2_FPMAX_S, + }, + [OPAL_DOUBLE] = { + [MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC2_FPADD, + [MCA_BTL_ATOMIC_MIN] = GNI_FMA_ATOMIC2_FPMIN, + [MCA_BTL_ATOMIC_MAX] = GNI_FMA_ATOMIC2_FPMAX, + }, }; -static gni_fma_cmd_type_t amo_cmds[] = { - [MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC_ADD, - [MCA_BTL_ATOMIC_AND] = GNI_FMA_ATOMIC_AND, - [MCA_BTL_ATOMIC_OR] = GNI_FMA_ATOMIC_OR, - [MCA_BTL_ATOMIC_XOR] = GNI_FMA_ATOMIC_XOR, +static gni_fma_cmd_type_t famo_cmds[][MCA_BTL_ATOMIC_LAST] = { + [OPAL_INT32] = { + [MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC2_FIADD_S, + [MCA_BTL_ATOMIC_LAND] = GNI_FMA_ATOMIC2_FAND_S, + [MCA_BTL_ATOMIC_LOR] = GNI_FMA_ATOMIC2_FOR_S, + [MCA_BTL_ATOMIC_LXOR] = GNI_FMA_ATOMIC2_FXOR_S, + [MCA_BTL_ATOMIC_SWAP] = GNI_FMA_ATOMIC2_FSWAP_S, + [MCA_BTL_ATOMIC_MIN] = GNI_FMA_ATOMIC2_FIMIN_S, + [MCA_BTL_ATOMIC_MAX] = GNI_FMA_ATOMIC2_FIMAX_S, + }, + [OPAL_INT64] = { + [MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC_FADD, + [MCA_BTL_ATOMIC_AND] = GNI_FMA_ATOMIC_FAND, + [MCA_BTL_ATOMIC_OR] = GNI_FMA_ATOMIC_FOR, + [MCA_BTL_ATOMIC_XOR] = GNI_FMA_ATOMIC_FXOR, + [MCA_BTL_ATOMIC_SWAP] = GNI_FMA_ATOMIC2_FSWAP, + [MCA_BTL_ATOMIC_MIN] = GNI_FMA_ATOMIC2_FIMIN, + [MCA_BTL_ATOMIC_MAX] = GNI_FMA_ATOMIC2_FIMAX, + }, + [OPAL_FLOAT] = { + [MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC2_FFPADD_S, + [MCA_BTL_ATOMIC_MIN] = GNI_FMA_ATOMIC2_FFPMIN_S, + [MCA_BTL_ATOMIC_MAX] = GNI_FMA_ATOMIC2_FFPMAX_S, + }, + [OPAL_DOUBLE] = { + [MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC2_FFPADD, + [MCA_BTL_ATOMIC_MIN] = GNI_FMA_ATOMIC2_FFPMIN, + [MCA_BTL_ATOMIC_MAX] = GNI_FMA_ATOMIC2_FFPMAX, + }, }; int mca_btl_ugni_aop (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, @@ -32,7 +80,20 @@ int mca_btl_ugni_aop (struct mca_btl_base_module_t *btl, struct mca_btl_base_end { gni_mem_handle_t dummy = {0, 0}; mca_btl_ugni_post_descriptor_t *post_desc; - int rc; + int gni_op, rc, type; + size_t size; + + size = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? 4 : 8; + if (MCA_BTL_ATOMIC_FLAG_FLOAT & flags) { + type = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? OPAL_FLOAT : OPAL_DOUBLE; + } else { + type = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? OPAL_INT32 : OPAL_INT64; + } + + gni_op = amo_cmds[type][op]; + if (0 == gni_op) { + return OPAL_ERR_NOT_SUPPORTED; + } rc = mca_btl_ugni_check_endpoint_state_rdma (endpoint); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { @@ -45,8 +106,8 @@ int mca_btl_ugni_aop (struct mca_btl_base_module_t *btl, struct mca_btl_base_end } init_gni_post_desc (&post_desc->desc, order, GNI_POST_AMO, 0, dummy, remote_address, - remote_handle->gni_handle, 8, 0); - post_desc->desc.base.amo_cmd = amo_cmds[op]; + remote_handle->gni_handle, size, 0); + post_desc->desc.base.amo_cmd = gni_op; post_desc->desc.base.first_operand = operand; @@ -54,6 +115,10 @@ int mca_btl_ugni_aop (struct mca_btl_base_module_t *btl, struct mca_btl_base_end rc = GNI_PostFma (endpoint->rdma_ep_handle, &post_desc->desc.base); OPAL_THREAD_UNLOCK(&endpoint->btl->device->dev_lock); if (GNI_RC_SUCCESS != rc) { + mca_btl_ugni_return_post_descriptor (endpoint->btl, post_desc); + if (GNI_RC_ILLEGAL_OP == rc) { + return OPAL_ERR_NOT_SUPPORTED; + } return OPAL_ERR_OUT_OF_RESOURCE; } @@ -67,7 +132,20 @@ int mca_btl_ugni_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_en void *cbcontext, void *cbdata) { mca_btl_ugni_post_descriptor_t *post_desc; - int rc; + int gni_op, rc, type; + size_t size; + + size = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? 4 : 8; + if (MCA_BTL_ATOMIC_FLAG_FLOAT & flags) { + type = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? OPAL_FLOAT : OPAL_DOUBLE; + } else { + type = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? OPAL_INT32 : OPAL_INT64; + } + + gni_op = famo_cmds[type][op]; + if (0 == gni_op) { + return OPAL_ERR_NOT_SUPPORTED; + } rc = mca_btl_ugni_check_endpoint_state_rdma (endpoint); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { @@ -81,8 +159,8 @@ int mca_btl_ugni_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_en init_gni_post_desc (&post_desc->desc, order, GNI_POST_AMO, (intptr_t) local_address, local_handle->gni_handle, - remote_address, remote_handle->gni_handle, 8, 0); - post_desc->desc.base.amo_cmd = famo_cmds[op]; + remote_address, remote_handle->gni_handle, size, 0); + post_desc->desc.base.amo_cmd = gni_op; post_desc->desc.base.first_operand = operand; @@ -91,6 +169,9 @@ int mca_btl_ugni_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_en OPAL_THREAD_UNLOCK(&endpoint->btl->device->dev_lock); if (GNI_RC_SUCCESS != rc) { mca_btl_ugni_return_post_descriptor (endpoint->btl, post_desc); + if (GNI_RC_ILLEGAL_OP == rc) { + return OPAL_ERR_NOT_SUPPORTED; + } return OPAL_ERR_OUT_OF_RESOURCE; } @@ -103,7 +184,11 @@ int mca_btl_ugni_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_ int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) { mca_btl_ugni_post_descriptor_t *post_desc; - int rc; + int gni_op, rc; + size_t size; + + gni_op = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? GNI_FMA_ATOMIC2_CSWAP_S : GNI_FMA_ATOMIC_CSWAP; + size = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? 4 : 8; rc = mca_btl_ugni_check_endpoint_state_rdma (endpoint); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { @@ -117,8 +202,8 @@ int mca_btl_ugni_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_ init_gni_post_desc (&post_desc->desc, order, GNI_POST_AMO, (intptr_t) local_address, local_handle->gni_handle, - remote_address, remote_handle->gni_handle, 8, 0); - post_desc->desc.base.amo_cmd = GNI_FMA_ATOMIC_CSWAP; + remote_address, remote_handle->gni_handle, size, 0); + post_desc->desc.base.amo_cmd = gni_op; post_desc->desc.base.first_operand = compare; post_desc->desc.base.second_operand = value; diff --git a/opal/mca/btl/ugni/btl_ugni_component.c b/opal/mca/btl/ugni/btl_ugni_component.c index 04076a16d6b..145401692fe 100644 --- a/opal/mca/btl/ugni/btl_ugni_component.c +++ b/opal/mca/btl/ugni/btl_ugni_component.c @@ -275,11 +275,8 @@ btl_ugni_component_register(void) /* * see def. of ALIGNMENT_MASK to figure this one out */ - if (GNI_DEVICE_GEMINI == device_type) { - mca_btl_ugni_module.super.btl_get_alignment = 4; - } else { - mca_btl_ugni_module.super.btl_get_alignment = 0; - } + /* both gemini and aries have a 4-byte alignment requirement on remote addresses */ + mca_btl_ugni_module.super.btl_get_alignment = 4; /* threshold for put */ mca_btl_ugni_module.super.btl_min_rdma_pipeline_size = 8 * 1024; @@ -291,6 +288,13 @@ btl_ugni_component_register(void) MCA_BTL_ATOMIC_SUPPORTS_AND | MCA_BTL_ATOMIC_SUPPORTS_OR | MCA_BTL_ATOMIC_SUPPORTS_XOR | MCA_BTL_ATOMIC_SUPPORTS_CSWAP; + if (GNI_DEVICE_ARIES == device_type) { + /* aries supports additional atomic operations */ + mca_btl_ugni_module.super.btl_atomic_flags |= MCA_BTL_ATOMIC_SUPPORTS_MIN | MCA_BTL_ATOMIC_SUPPORTS_MAX | + MCA_BTL_ATOMIC_SUPPORTS_LAND | MCA_BTL_ATOMIC_SUPPORTS_LOR | MCA_BTL_ATOMIC_SUPPORTS_LXOR | + MCA_BTL_ATOMIC_SUPPORTS_32BIT | MCA_BTL_ATOMIC_SUPPORTS_FLOAT; + } + mca_btl_ugni_module.super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t); mca_btl_ugni_module.super.btl_bandwidth = 40000; /* Mbs */