Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 48 additions & 45 deletions opal/mca/btl/portals4/btl_portals4.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,17 @@
#include "btl_portals4.h"
#include "btl_portals4_recv.h"


mca_btl_base_registration_handle_t *
mca_btl_portals4_register_mem(mca_btl_base_module_t *btl,
mca_btl_base_endpoint_t *endpoint,
void *base,
size_t size,
uint32_t flags);

int mca_btl_portals4_deregister_mem(mca_btl_base_module_t *btl,
mca_btl_base_registration_handle_t *handle);

mca_btl_portals4_module_t mca_btl_portals4_module = {
.super = {
.btl_component = &mca_btl_portals4_component.super,
Expand All @@ -52,7 +63,8 @@ mca_btl_portals4_module_t mca_btl_portals4_module = {
.btl_alloc = mca_btl_portals4_alloc,
.btl_free = mca_btl_portals4_free,
.btl_prepare_src = mca_btl_portals4_prepare_src,
.btl_prepare_dst = mca_btl_portals4_prepare_dst,
.btl_register_mem = mca_btl_portals4_register_mem,
.btl_deregister_mem = mca_btl_portals4_deregister_mem,
.btl_send = mca_btl_portals4_send,
.btl_get = mca_btl_portals4_get,
.btl_dump = mca_btl_base_dump,
Expand Down Expand Up @@ -222,7 +234,7 @@ mca_btl_portals4_alloc(struct mca_btl_base_module_t* btl_base,
}

frag->md_h = PTL_INVALID_HANDLE;
frag->base.des_local_count = 1;
frag->base.des_segment_count = 1;
frag->base.des_flags = flags | MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
frag->base.order = MCA_BTL_NO_ORDER;

Expand Down Expand Up @@ -274,7 +286,6 @@ mca_btl_portals4_free(struct mca_btl_base_module_t* btl_base,
mca_btl_base_descriptor_t*
mca_btl_portals4_prepare_src(struct mca_btl_base_module_t* btl_base,
struct mca_btl_base_endpoint_t* peer,
mca_mpool_base_registration_t* registration,
struct opal_convertor_t* convertor,
uint8_t order,
size_t reserve,
Expand Down Expand Up @@ -312,7 +323,7 @@ mca_btl_portals4_prepare_src(struct mca_btl_base_module_t* btl_base,
}

frag->segments[0].base.seg_len = max_data + reserve;
frag->base.des_local_count = 1;
frag->base.des_segment_count = 1;

} else {
/* no need to pack - rdma operation out of user's buffer */
Expand Down Expand Up @@ -347,7 +358,7 @@ mca_btl_portals4_prepare_src(struct mca_btl_base_module_t* btl_base,
frag->segments[0].base.seg_len = max_data;
frag->segments[0].base.seg_addr.pval = iov.iov_base;
frag->segments[0].key = OPAL_THREAD_ADD64(&(portals4_btl->portals_rdma_key), 1);
frag->base.des_local_count = 1;
frag->base.des_segment_count = 1;

/* either a put or get. figure out which later */
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
Expand Down Expand Up @@ -398,58 +409,50 @@ mca_btl_portals4_prepare_src(struct mca_btl_base_module_t* btl_base,
(void *)frag, frag->me_h, me.start, me.length,
me.match_id.rank, me.match_id.phys.nid, me.match_id.phys.pid, me.match_bits));
}
frag->base.des_local = &frag->segments[0].base;
frag->base.des_remote = NULL;
frag->base.des_remote_count = 0;

frag->base.des_segments = &frag->segments[0].base;
frag->base.des_flags = flags | MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
frag->base.order = MCA_BTL_NO_ORDER;
return &frag->base;
}

mca_btl_base_descriptor_t*
mca_btl_portals4_prepare_dst(struct mca_btl_base_module_t* btl_base,
struct mca_btl_base_endpoint_t* peer,
mca_mpool_base_registration_t* registration,
struct opal_convertor_t* convertor,
uint8_t order,
size_t reserve,
size_t* size,
uint32_t flags)
mca_btl_base_registration_handle_t *
mca_btl_portals4_register_mem(mca_btl_base_module_t *btl_base,
mca_btl_base_endpoint_t *endpoint,
void *base,
size_t size,
uint32_t flags)
{
struct mca_btl_portals4_module_t* portals4_btl = (struct mca_btl_portals4_module_t*) btl_base;
mca_btl_portals4_frag_t* frag;

/* reserve space in the event queue for rdma operations immediately */
while (OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, 1) >
portals4_btl->portals_max_outstanding_ops) {
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "Call to mca_btl_portals4_component_progress (2)\n"));
mca_btl_portals4_component_progress();
}
struct mca_btl_portals4_module_t *portals4_btl = (struct mca_btl_portals4_module_t*) btl_base;
mca_btl_base_registration_handle_t *handle = NULL;

OPAL_BTL_PORTALS4_FRAG_ALLOC_USER(portals4_btl, frag);
if (NULL == frag) {
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
handle = (mca_btl_base_registration_handle_t *)malloc(sizeof(mca_btl_base_registration_handle_t));
if (!handle) {
return NULL;
}

handle->key = OPAL_THREAD_ADD64(&(portals4_btl->portals_rdma_key), 1);

OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
"mca_btl_portals4_prepare_dst: Incrementing portals_outstanding_ops=%d\n", portals4_btl->portals_outstanding_ops));

frag->segments[0].base.seg_len = *size;
opal_convertor_get_current_pointer( convertor, (void**)&(frag->segments[0].base.seg_addr.pval) );
frag->segments[0].key = OPAL_THREAD_ADD64(&(portals4_btl->portals_rdma_key), 1);
frag->base.des_remote = NULL;
frag->base.des_remote_count = 0;
frag->base.des_local = &frag->segments[0].base;
frag->base.des_local_count = 1;
frag->base.des_flags = flags | MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
frag->base.order = MCA_BTL_NO_ORDER;
frag->md_h = PTL_INVALID_HANDLE;
"mca_btl_portals4_register_mem NI=%d base=%p size=%ld handle=%p key=%ld\n",
portals4_btl->interface_num, base, size, (void *)handle, handle->key));

return handle;
}

int
mca_btl_portals4_deregister_mem(mca_btl_base_module_t *btl_base,
mca_btl_base_registration_handle_t *handle)
{
struct mca_btl_portals4_module_t *portals4_btl = (struct mca_btl_portals4_module_t*) btl_base;

OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
"mca_btl_portals4_prepare_dst &base=%p reserve=%ld size=%ld rank=%x pid=%x key=%ld\n",
(void *)&frag->base, reserve, *size, peer->ptl_proc.rank, peer->ptl_proc.phys.pid, frag->segments[0].key));
return &frag->base;
"mca_btl_portals4_deregister_mem NI=%d handle=%p key=%ld\n",
portals4_btl->interface_num, (void *)handle, handle->key));

free(handle);

return OPAL_SUCCESS;
}

int
Expand Down
27 changes: 15 additions & 12 deletions opal/mca/btl/portals4/btl_portals4.h
Original file line number Diff line number Diff line change
Expand Up @@ -238,23 +238,12 @@ int mca_btl_portals4_free(struct mca_btl_base_module_t* btl_base,
mca_btl_base_descriptor_t*
mca_btl_portals4_prepare_src(struct mca_btl_base_module_t* btl_base,
struct mca_btl_base_endpoint_t* peer,
mca_mpool_base_registration_t* registration,
struct opal_convertor_t* convertor,
uint8_t order,
size_t reserve,
size_t* size,
uint32_t flags);

mca_btl_base_descriptor_t*
mca_btl_portals4_prepare_dst(struct mca_btl_base_module_t* btl_base,
struct mca_btl_base_endpoint_t* peer,
mca_mpool_base_registration_t* registration,
struct opal_convertor_t* convertor,
uint8_t order,
size_t reserve,
size_t* size,
uint32_t flags);

int mca_btl_portals4_send(struct mca_btl_base_module_t* btl_base,
struct mca_btl_base_endpoint_t* btl_peer,
struct mca_btl_base_descriptor_t* descriptor,
Expand All @@ -279,10 +268,24 @@ int mca_btl_portals4_put(struct mca_btl_base_module_t* btl_base,

int mca_btl_portals4_get(struct mca_btl_base_module_t* btl_base,
struct mca_btl_base_endpoint_t* btl_peer,
struct mca_btl_base_descriptor_t* decriptor);
void *local_address,
uint64_t remote_address,
struct mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_registration_handle_t *remote_handle,
size_t size,
int flags,
int order,
mca_btl_base_rdma_completion_fn_t cbfunc,
void *cbcontext,
void *cbdata);

int mca_btl_portals4_get_error(int ptl_error);

struct mca_btl_base_registration_handle_t {
/** Portals4 match bits */
ptl_match_bits_t key;
};

/*
* global structures
*/
Expand Down
35 changes: 26 additions & 9 deletions opal/mca/btl/portals4/btl_portals4_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,17 @@ mca_btl_portals4_component_open(void)
mca_btl_portals4_module.super.btl_flags =
MCA_BTL_FLAGS_RDMA |
MCA_BTL_FLAGS_RDMA_MATCHED;
mca_btl_portals4_module.super.btl_seg_size = sizeof (mca_btl_portals4_segment_t);

mca_btl_portals4_module.super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t);

mca_btl_portals4_module.super.btl_get_limit = SIZE_MAX;
mca_btl_portals4_module.super.btl_put_limit = 0; /* not implemented */
mca_btl_portals4_module.super.btl_get_alignment = 0;
mca_btl_portals4_module.super.btl_put_alignment = 0;

mca_btl_portals4_module.super.btl_get_local_registration_threshold = 0;
mca_btl_portals4_module.super.btl_put_local_registration_threshold = 0;

mca_btl_portals4_module.super.btl_bandwidth = 1000;
mca_btl_portals4_module.super.btl_latency = 0;

Expand Down Expand Up @@ -770,8 +780,8 @@ mca_btl_portals4_component_progress(void)

tag = (unsigned char) (ev.hdr_data);

btl_base_descriptor.des_local = seg;
btl_base_descriptor.des_local_count = 1;
btl_base_descriptor.des_segments = seg;
btl_base_descriptor.des_segment_count = 1;
seg[0].seg_addr.pval = ev.start;
seg[0].seg_len = ev.mlength;

Expand All @@ -785,6 +795,8 @@ mca_btl_portals4_component_progress(void)

case PTL_EVENT_PUT_OVERFLOW:
/* */
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
"PTL_EVENT_OVERFLOW received\n"));
goto done;
break;

Expand All @@ -810,8 +822,10 @@ mca_btl_portals4_component_progress(void)
goto done;
break;

case PTL_EVENT_GET:
case PTL_EVENT_GET: /* Generated on source (target) when a get from memory ends */
/* */
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
"PTL_EVENT_GET received at target rlength=%ld mlength=%ld\n", ev.rlength, ev.mlength));
goto done;
break;

Expand Down Expand Up @@ -849,11 +863,14 @@ mca_btl_portals4_component_progress(void)
}
else {
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
"PTL_EVENT_REPLY: Call to des_cbfunc: %lx\n", (uint64_t)frag->base.des_cbfunc));
frag->base.des_cbfunc(&portals4_btl->super,
frag->endpoint,
&frag->base,
OPAL_SUCCESS);
"PTL_EVENT_REPLY: Call to rdma_cbfunc=%p\n", (void *)frag->rdma_cb.func));
frag->rdma_cb.func(&portals4_btl->super,
frag->endpoint,
ev.start,
frag->rdma_cb.local_handle,
frag->rdma_cb.context,
frag->rdma_cb.data,
OPAL_SUCCESS);
PtlMDRelease(frag->md_h);
frag->md_h = PTL_INVALID_HANDLE;

Expand Down
4 changes: 2 additions & 2 deletions opal/mca/btl/portals4/btl_portals4_frag.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ static void
mca_btl_portals4_frag_common_send_constructor(mca_btl_portals4_frag_t* frag)
{
frag->base.des_flags = 0;
frag->base.des_local = &frag->segments[0].base;
frag->base.des_local_count = 2;
frag->base.des_segments = &frag->segments[0].base;
frag->base.des_segment_count = 2;

frag->segments[0].base.seg_addr.pval = frag + 1;
frag->segments[0].base.seg_len = frag->size;
Expand Down
8 changes: 8 additions & 0 deletions opal/mca/btl/portals4/btl_portals4_frag.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,14 @@ struct mca_btl_portals4_frag_t {
/* length for retransmit case */
ptl_process_t peer_proc;

/* the callback and context to complete an RDMA operation */
struct {
mca_btl_base_rdma_completion_fn_t func;
void *context;
void *data;
mca_btl_base_registration_handle_t *local_handle;
} rdma_cb;

enum { BTL_PORTALS4_FRAG_TYPE_EAGER,
BTL_PORTALS4_FRAG_TYPE_MAX,
BTL_PORTALS4_FRAG_TYPE_USER } type;
Expand Down
43 changes: 36 additions & 7 deletions opal/mca/btl/portals4/btl_portals4_rdma.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,23 +37,52 @@ mca_btl_portals4_put(struct mca_btl_base_module_t* btl_base,
int
mca_btl_portals4_get(struct mca_btl_base_module_t* btl_base,
struct mca_btl_base_endpoint_t* btl_peer,
struct mca_btl_base_descriptor_t* descriptor)
void *local_address,
uint64_t remote_address,
struct mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_registration_handle_t *remote_handle,
size_t size,
int flags,
int order,
mca_btl_base_rdma_completion_fn_t cbfunc,
void *cbcontext,
void *cbdata)
{
mca_btl_portals4_module_t *portals4_btl = (mca_btl_portals4_module_t *) btl_base;
mca_btl_portals4_segment_t *src_seg = (mca_btl_portals4_segment_t *) descriptor->des_remote;
mca_btl_portals4_frag_t *frag = (mca_btl_portals4_frag_t*) descriptor;
mca_btl_portals4_frag_t *frag = NULL;
ptl_md_t md;
int ret;

/* reserve space in the event queue for rdma operations immediately */
while (OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, 1) >
portals4_btl->portals_max_outstanding_ops) {
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "Call to mca_btl_portals4_component_progress (1)\n"));
mca_btl_portals4_component_progress();
}

OPAL_BTL_PORTALS4_FRAG_ALLOC_USER(portals4_btl, frag);
if (NULL == frag){
OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
return OPAL_ERROR;
}
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
"mca_btl_portals4_prepare_src: Incrementing portals_outstanding_ops=%d\n", portals4_btl->portals_outstanding_ops));

OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
"mca_btl_portals4_get frag=%p src_seg=%p frag->md_h=%d\n", (void *)frag, (void *)src_seg, frag->md_h));
"mca_btl_portals4_get frag=%p\n", (void *)frag));

frag->rdma_cb.func = cbfunc;
frag->rdma_cb.context = cbcontext;
frag->rdma_cb.data = cbdata;
frag->rdma_cb.local_handle = local_handle;

frag->endpoint = btl_peer;
frag->hdr.tag = MCA_BTL_TAG_MAX;

/* Bind the memory */
md.start = (void *)frag->segments[0].base.seg_addr.pval;
md.length = frag->segments[0].base.seg_len;
md.start = (void *)local_address;
md.length = size;
md.options = 0;
md.eq_handle = portals4_btl->recv_eq_h;
md.ct_handle = PTL_CT_NONE;
Expand All @@ -69,7 +98,7 @@ mca_btl_portals4_get(struct mca_btl_base_module_t* btl_base,
return OPAL_ERROR;
}

frag->match_bits = src_seg->key;
frag->match_bits = remote_handle->key;
frag->length = md.length;
frag->peer_proc = btl_peer->ptl_proc;
ret = PtlGet(frag->md_h,
Expand Down