Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions opal/mca/btl/base/btl_base_frame.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,12 @@ mca_base_var_enum_value_flag_t mca_btl_base_atomic_enum_flags[] = {
{MCA_BTL_ATOMIC_SUPPORTS_AND, "and", 0},
{MCA_BTL_ATOMIC_SUPPORTS_OR, "or", 0},
{MCA_BTL_ATOMIC_SUPPORTS_XOR, "xor", 0},
{MCA_BTL_ATOMIC_SUPPORTS_LAND, "land", 0},
{MCA_BTL_ATOMIC_SUPPORTS_LOR, "lor", 0},
{MCA_BTL_ATOMIC_SUPPORTS_LXOR, "lxor", 0},
{MCA_BTL_ATOMIC_SUPPORTS_SWAP, "swap", 0},
{MCA_BTL_ATOMIC_SUPPORTS_MIN, "min", 0},
{MCA_BTL_ATOMIC_SUPPORTS_MAX, "max", 0},
{MCA_BTL_ATOMIC_SUPPORTS_CSWAP, "compare-and-swap", 0},
{MCA_BTL_ATOMIC_SUPPORTS_GLOB, "global"},
{0, NULL, 0}
Expand Down
56 changes: 52 additions & 4 deletions opal/mca/btl/btl.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights
* Copyright (c) 2006-2016 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012-2013 NVIDIA Corporation. All rights reserved.
Expand Down Expand Up @@ -293,10 +293,44 @@ enum {
MCA_BTL_ATOMIC_SUPPORTS_OR = 0x00000400,
/** The btl supports atomic bitwise exclusive or */
MCA_BTL_ATOMIC_SUPPORTS_XOR = 0x00000800,

/** The btl supports logical and */
MCA_BTL_ATOMIC_SUPPORTS_LAND = 0x00001000,
/** The btl supports logical or */
MCA_BTL_ATOMIC_SUPPORTS_LOR = 0x00002000,
/** The btl supports logical exclusive or */
MCA_BTL_ATOMIC_SUPPORTS_LXOR = 0x00004000,

/** The btl supports atomic swap */
MCA_BTL_ATOMIC_SUPPORTS_SWAP = 0x00010000,

/** The btl supports atomic min */
MCA_BTL_ATOMIC_SUPPORTS_MIN = 0x00100000,
/** The btl supports atomic min */
MCA_BTL_ATOMIC_SUPPORTS_MAX = 0x00200000,

/** The btl supports atomic compare-and-swap */
MCA_BTL_ATOMIC_SUPPORTS_CSWAP = 0x10000000,

/** The btl guarantees global atomicity (can mix btl atomics with cpu atomics) */
MCA_BTL_ATOMIC_SUPPORTS_GLOB = 0x20000000,


/** The btl supports 32-bit integer operations. Keep in mind the btl may
* support only a subset of the available atomics. */
MCA_BTL_ATOMIC_SUPPORTS_32BIT = 0x40000000,

/** The btl supports floating-point operations. Keep in mind the btl may
* support only a subset of the available atomics and may not support
* both 64 or 32-bit floating point. */
MCA_BTL_ATOMIC_SUPPORTS_FLOAT = 0x80000000,
};

enum {
/** Use 32-bit atomics */
MCA_BTL_ATOMIC_FLAG_32BIT = 0x00000001,
/** Use floating-point atomics */
MCA_BTL_ATOMIC_FLAG_FLOAT = 0x00000002,
};

enum mca_btl_base_atomic_op_t {
Expand All @@ -308,6 +342,20 @@ enum mca_btl_base_atomic_op_t {
MCA_BTL_ATOMIC_OR = 0x0012,
/** Atomic xor: (*remote_address) = (*remote_address) ^ operand */
MCA_BTL_ATOMIC_XOR = 0x0014,
/** Atomic logical and: (*remote_address) = (*remote_address) && operand */
MCA_BTL_ATOMIC_LAND = 0x0015,
/** Atomic logical or: (*remote_address) = (*remote_address) || operand */
MCA_BTL_ATOMIC_LOR = 0x0016,
/** Atomic logical xor: (*remote_address) = (*remote_address) != operand */
MCA_BTL_ATOMIC_LXOR = 0x0017,
/** Atomic swap: (*remote_address) = operand */
MCA_BTL_ATOMIC_SWAP = 0x001a,
/** Atomic min */
MCA_BTL_ATOMIC_MIN = 0x0020,
/** Atomic max */
MCA_BTL_ATOMIC_MAX = 0x0021,

MCA_BTL_ATOMIC_LAST,
};
typedef enum mca_btl_base_atomic_op_t mca_btl_base_atomic_op_t;

Expand Down Expand Up @@ -977,7 +1025,7 @@ typedef int (*mca_btl_base_module_get_fn_t) (struct mca_btl_base_module_t *btl,
* (remote_address, remote_address + 8)
* @param op (IN) Operation to perform
* @param operand (IN) Operand for the operation
* @param flags (IN) Flags for this put operation
* @param flags (IN) Flags for this atomic operation
* @param order (IN) Ordering
* @param cbfunc (IN) Function to call on completion (if queued)
* @param cbcontext (IN) Context for the callback
Expand Down Expand Up @@ -1021,7 +1069,7 @@ typedef int (*mca_btl_base_module_atomic_op64_fn_t) (struct mca_btl_base_module_
* (remote_address, remote_address + 8)
* @param op (IN) Operation to perform
* @param operand (IN) Operand for the operation
* @param flags (IN) Flags for this put operation
* @param flags (IN) Flags for this atomic operation
* @param order (IN) Ordering
* @param cbfunc (IN) Function to call on completion (if queued)
* @param cbcontext (IN) Context for the callback
Expand Down Expand Up @@ -1067,7 +1115,7 @@ typedef int (*mca_btl_base_module_atomic_fop64_fn_t) (struct mca_btl_base_module
* (remote_address, remote_address + 8)
* @param compare (IN) Operand for the operation
* @param value (IN) Value to store on success
* @param flags (IN) Flags for this put operation
* @param flags (IN) Flags for this atomic operation
* @param order (IN) Ordering
* @param cbfunc (IN) Function to call on completion (if queued)
* @param cbcontext (IN) Context for the callback
Expand Down
6 changes: 5 additions & 1 deletion opal/mca/btl/openib/btl_openib_atomic.c
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ int mca_btl_openib_atomic_fop (struct mca_btl_base_module_t *btl, struct mca_btl
void *cbcontext, void *cbdata)
{

if (OPAL_UNLIKELY(MCA_BTL_ATOMIC_ADD != op)) {
if (OPAL_UNLIKELY(MCA_BTL_ATOMIC_ADD != op || (MCA_BTL_ATOMIC_FLAG_32BIT & flags))) {
return OPAL_ERR_NOT_SUPPORTED;
}

Expand All @@ -128,6 +128,10 @@ int mca_btl_openib_atomic_cswap (struct mca_btl_base_module_t *btl, struct mca_b
uint64_t value, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc,
void *cbcontext, void *cbdata)
{
if (OPAL_UNLIKELY(MCA_BTL_ATOMIC_FLAG_32BIT & flags)) {
return OPAL_ERR_NOT_SUPPORTED;
}

return mca_btl_openib_atomic_internal (btl, endpoint, local_address, remote_address, local_handle,
remote_handle, IBV_WR_ATOMIC_CMP_AND_SWP, compare, value,
flags, order, cbfunc, cbcontext, cbdata);
Expand Down
125 changes: 105 additions & 20 deletions opal/mca/btl/ugni/btl_ugni_atomic.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights
* Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
Expand All @@ -11,18 +11,66 @@

#include "btl_ugni_rdma.h"

static gni_fma_cmd_type_t famo_cmds[] = {
[MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC_FADD,
[MCA_BTL_ATOMIC_AND] = GNI_FMA_ATOMIC_FAND,
[MCA_BTL_ATOMIC_OR] = GNI_FMA_ATOMIC_FOR,
[MCA_BTL_ATOMIC_XOR] = GNI_FMA_ATOMIC_FXOR,
static gni_fma_cmd_type_t amo_cmds[][MCA_BTL_ATOMIC_LAST] = {
[OPAL_INT32] = {
[MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC2_IADD_S,
[MCA_BTL_ATOMIC_LAND] = GNI_FMA_ATOMIC2_AND_S,
[MCA_BTL_ATOMIC_LOR] = GNI_FMA_ATOMIC2_OR_S,
[MCA_BTL_ATOMIC_LXOR] = GNI_FMA_ATOMIC2_XOR_S,
[MCA_BTL_ATOMIC_SWAP] = GNI_FMA_ATOMIC2_SWAP_S,
[MCA_BTL_ATOMIC_MIN] = GNI_FMA_ATOMIC2_IMIN_S,
[MCA_BTL_ATOMIC_MAX] = GNI_FMA_ATOMIC2_IMAX_S,
},
[OPAL_INT64] = {
[MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC_ADD,
[MCA_BTL_ATOMIC_AND] = GNI_FMA_ATOMIC_AND,
[MCA_BTL_ATOMIC_OR] = GNI_FMA_ATOMIC_OR,
[MCA_BTL_ATOMIC_XOR] = GNI_FMA_ATOMIC_XOR,
[MCA_BTL_ATOMIC_SWAP] = GNI_FMA_ATOMIC2_SWAP,
[MCA_BTL_ATOMIC_MIN] = GNI_FMA_ATOMIC2_IMIN,
[MCA_BTL_ATOMIC_MAX] = GNI_FMA_ATOMIC2_IMAX,
},
[OPAL_FLOAT] = {
[MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC2_FPADD_S,
[MCA_BTL_ATOMIC_MIN] = GNI_FMA_ATOMIC2_FPMIN_S,
[MCA_BTL_ATOMIC_MAX] = GNI_FMA_ATOMIC2_FPMAX_S,
},
[OPAL_DOUBLE] = {
[MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC2_FPADD,
[MCA_BTL_ATOMIC_MIN] = GNI_FMA_ATOMIC2_FPMIN,
[MCA_BTL_ATOMIC_MAX] = GNI_FMA_ATOMIC2_FPMAX,
},
};

static gni_fma_cmd_type_t amo_cmds[] = {
[MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC_ADD,
[MCA_BTL_ATOMIC_AND] = GNI_FMA_ATOMIC_AND,
[MCA_BTL_ATOMIC_OR] = GNI_FMA_ATOMIC_OR,
[MCA_BTL_ATOMIC_XOR] = GNI_FMA_ATOMIC_XOR,
static gni_fma_cmd_type_t famo_cmds[][MCA_BTL_ATOMIC_LAST] = {
[OPAL_INT32] = {
[MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC2_FIADD_S,
[MCA_BTL_ATOMIC_LAND] = GNI_FMA_ATOMIC2_FAND_S,
[MCA_BTL_ATOMIC_LOR] = GNI_FMA_ATOMIC2_FOR_S,
[MCA_BTL_ATOMIC_LXOR] = GNI_FMA_ATOMIC2_FXOR_S,
[MCA_BTL_ATOMIC_SWAP] = GNI_FMA_ATOMIC2_FSWAP_S,
[MCA_BTL_ATOMIC_MIN] = GNI_FMA_ATOMIC2_FIMIN_S,
[MCA_BTL_ATOMIC_MAX] = GNI_FMA_ATOMIC2_FIMAX_S,
},
[OPAL_INT64] = {
[MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC_FADD,
[MCA_BTL_ATOMIC_AND] = GNI_FMA_ATOMIC_FAND,
[MCA_BTL_ATOMIC_OR] = GNI_FMA_ATOMIC_FOR,
[MCA_BTL_ATOMIC_XOR] = GNI_FMA_ATOMIC_FXOR,
[MCA_BTL_ATOMIC_SWAP] = GNI_FMA_ATOMIC2_FSWAP,
[MCA_BTL_ATOMIC_MIN] = GNI_FMA_ATOMIC2_FIMIN,
[MCA_BTL_ATOMIC_MAX] = GNI_FMA_ATOMIC2_FIMAX,
},
[OPAL_FLOAT] = {
[MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC2_FFPADD_S,
[MCA_BTL_ATOMIC_MIN] = GNI_FMA_ATOMIC2_FFPMIN_S,
[MCA_BTL_ATOMIC_MAX] = GNI_FMA_ATOMIC2_FFPMAX_S,
},
[OPAL_DOUBLE] = {
[MCA_BTL_ATOMIC_ADD] = GNI_FMA_ATOMIC2_FFPADD,
[MCA_BTL_ATOMIC_MIN] = GNI_FMA_ATOMIC2_FFPMIN,
[MCA_BTL_ATOMIC_MAX] = GNI_FMA_ATOMIC2_FFPMAX,
},
};

int mca_btl_ugni_aop (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
Expand All @@ -32,7 +80,20 @@ int mca_btl_ugni_aop (struct mca_btl_base_module_t *btl, struct mca_btl_base_end
{
gni_mem_handle_t dummy = {0, 0};
mca_btl_ugni_post_descriptor_t *post_desc;
int rc;
int gni_op, rc, type;
size_t size;

size = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? 4 : 8;
if (MCA_BTL_ATOMIC_FLAG_FLOAT & flags) {
type = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? OPAL_FLOAT : OPAL_DOUBLE;
} else {
type = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? OPAL_INT32 : OPAL_INT64;
}

gni_op = amo_cmds[type][op];
if (0 == gni_op) {
return OPAL_ERR_NOT_SUPPORTED;
}

rc = mca_btl_ugni_check_endpoint_state_rdma (endpoint);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
Expand All @@ -45,15 +106,19 @@ int mca_btl_ugni_aop (struct mca_btl_base_module_t *btl, struct mca_btl_base_end
}

init_gni_post_desc (&post_desc->desc, order, GNI_POST_AMO, 0, dummy, remote_address,
remote_handle->gni_handle, 8, 0);
post_desc->desc.base.amo_cmd = amo_cmds[op];
remote_handle->gni_handle, size, 0);
post_desc->desc.base.amo_cmd = gni_op;

post_desc->desc.base.first_operand = operand;

OPAL_THREAD_LOCK(&endpoint->btl->device->dev_lock);
rc = GNI_PostFma (endpoint->rdma_ep_handle, &post_desc->desc.base);
OPAL_THREAD_UNLOCK(&endpoint->btl->device->dev_lock);
if (GNI_RC_SUCCESS != rc) {
mca_btl_ugni_return_post_descriptor (endpoint->btl, post_desc);
if (GNI_RC_ILLEGAL_OP == rc) {
return OPAL_ERR_NOT_SUPPORTED;
}
return OPAL_ERR_OUT_OF_RESOURCE;
}

Expand All @@ -67,7 +132,20 @@ int mca_btl_ugni_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_en
void *cbcontext, void *cbdata)
{
mca_btl_ugni_post_descriptor_t *post_desc;
int rc;
int gni_op, rc, type;
size_t size;

size = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? 4 : 8;
if (MCA_BTL_ATOMIC_FLAG_FLOAT & flags) {
type = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? OPAL_FLOAT : OPAL_DOUBLE;
} else {
type = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? OPAL_INT32 : OPAL_INT64;
}

gni_op = famo_cmds[type][op];
if (0 == gni_op) {
return OPAL_ERR_NOT_SUPPORTED;
}

rc = mca_btl_ugni_check_endpoint_state_rdma (endpoint);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
Expand All @@ -81,8 +159,8 @@ int mca_btl_ugni_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_en


init_gni_post_desc (&post_desc->desc, order, GNI_POST_AMO, (intptr_t) local_address, local_handle->gni_handle,
remote_address, remote_handle->gni_handle, 8, 0);
post_desc->desc.base.amo_cmd = famo_cmds[op];
remote_address, remote_handle->gni_handle, size, 0);
post_desc->desc.base.amo_cmd = gni_op;

post_desc->desc.base.first_operand = operand;

Expand All @@ -91,6 +169,9 @@ int mca_btl_ugni_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_en
OPAL_THREAD_UNLOCK(&endpoint->btl->device->dev_lock);
if (GNI_RC_SUCCESS != rc) {
mca_btl_ugni_return_post_descriptor (endpoint->btl, post_desc);
if (GNI_RC_ILLEGAL_OP == rc) {
return OPAL_ERR_NOT_SUPPORTED;
}
return OPAL_ERR_OUT_OF_RESOURCE;
}

Expand All @@ -103,7 +184,11 @@ int mca_btl_ugni_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{
mca_btl_ugni_post_descriptor_t *post_desc;
int rc;
int gni_op, rc;
size_t size;

gni_op = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? GNI_FMA_ATOMIC2_CSWAP_S : GNI_FMA_ATOMIC_CSWAP;
size = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? 4 : 8;

rc = mca_btl_ugni_check_endpoint_state_rdma (endpoint);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
Expand All @@ -117,8 +202,8 @@ int mca_btl_ugni_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_


init_gni_post_desc (&post_desc->desc, order, GNI_POST_AMO, (intptr_t) local_address, local_handle->gni_handle,
remote_address, remote_handle->gni_handle, 8, 0);
post_desc->desc.base.amo_cmd = GNI_FMA_ATOMIC_CSWAP;
remote_address, remote_handle->gni_handle, size, 0);
post_desc->desc.base.amo_cmd = gni_op;

post_desc->desc.base.first_operand = compare;
post_desc->desc.base.second_operand = value;
Expand Down
14 changes: 9 additions & 5 deletions opal/mca/btl/ugni/btl_ugni_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -275,11 +275,8 @@ btl_ugni_component_register(void)
/*
* see def. of ALIGNMENT_MASK to figure this one out
*/
if (GNI_DEVICE_GEMINI == device_type) {
mca_btl_ugni_module.super.btl_get_alignment = 4;
} else {
mca_btl_ugni_module.super.btl_get_alignment = 0;
}
/* both gemini and aries have a 4-byte alignment requirement on remote addresses */
mca_btl_ugni_module.super.btl_get_alignment = 4;

/* threshold for put */
mca_btl_ugni_module.super.btl_min_rdma_pipeline_size = 8 * 1024;
Expand All @@ -291,6 +288,13 @@ btl_ugni_component_register(void)
MCA_BTL_ATOMIC_SUPPORTS_AND | MCA_BTL_ATOMIC_SUPPORTS_OR | MCA_BTL_ATOMIC_SUPPORTS_XOR |
MCA_BTL_ATOMIC_SUPPORTS_CSWAP;

if (GNI_DEVICE_ARIES == device_type) {
/* aries supports additional atomic operations */
mca_btl_ugni_module.super.btl_atomic_flags |= MCA_BTL_ATOMIC_SUPPORTS_MIN | MCA_BTL_ATOMIC_SUPPORTS_MAX |
MCA_BTL_ATOMIC_SUPPORTS_LAND | MCA_BTL_ATOMIC_SUPPORTS_LOR | MCA_BTL_ATOMIC_SUPPORTS_LXOR |
MCA_BTL_ATOMIC_SUPPORTS_32BIT | MCA_BTL_ATOMIC_SUPPORTS_FLOAT;
}

mca_btl_ugni_module.super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t);

mca_btl_ugni_module.super.btl_bandwidth = 40000; /* Mbs */
Expand Down