From cc750b00a6adc0c31a5359cfef0474bb40e52e7d Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Thu, 19 Feb 2015 16:13:37 -0700 Subject: [PATCH] btl: export local registration thresholds Some BTLs do not require local registration for some rdma transactions. For example: inline put on openib, fma put on ugni. This commit adds code to expose the local registration thresholds to BTL users. Optimized code can take advantage of this information to improve rdma performance. --- opal/mca/btl/btl.h | 4 ++++ opal/mca/btl/openib/btl_openib_component.c | 3 +++ opal/mca/btl/openib/btl_openib_put.c | 20 ++++++++++++++------ opal/mca/btl/ugni/btl_ugni_component.c | 5 +++++ opal/mca/btl/ugni/btl_ugni_rdma.h | 7 ++++++- 5 files changed, 32 insertions(+), 7 deletions(-) diff --git a/opal/mca/btl/btl.h b/opal/mca/btl/btl.h index bdd267c6f5e..ae44d714cfb 100644 --- a/opal/mca/btl/btl.h +++ b/opal/mca/btl/btl.h @@ -1119,6 +1119,10 @@ struct mca_btl_base_module_t { size_t btl_put_limit; /**< maximum size supported by the btl_put function */ size_t btl_put_alignment; /**< minimum alignment/size needed by btl_put (power of 2) */ + /* minimum transaction sizes for which registration is required for local memory */ + size_t btl_get_local_registration_threshold; + size_t btl_put_local_registration_threshold; + /* BTL function table */ mca_btl_base_module_add_procs_fn_t btl_add_procs; mca_btl_base_module_del_procs_fn_t btl_del_procs; diff --git a/opal/mca/btl/openib/btl_openib_component.c b/opal/mca/btl/openib/btl_openib_component.c index 5a3d75b5944..b892544b786 100644 --- a/opal/mca/btl/openib/btl_openib_component.c +++ b/opal/mca/btl/openib/btl_openib_component.c @@ -816,6 +816,9 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device, openib_btl->super.btl_put_limit = openib_btl->ib_port_attr.max_msg_sz; } + openib_btl->super.btl_put_local_registration_threshold = openib_btl->device->max_inline_data; + openib_btl->super.btl_get_local_registration_threshold = 0; + #if HAVE_DECL_IBV_ATOMIC_HCA if (openib_btl->device->ib_dev_attr.atomic_cap == IBV_ATOMIC_NONE) { openib_btl->super.btl_flags &= ~MCA_BTL_FLAGS_ATOMIC_FOPS; diff --git a/opal/mca/btl/openib/btl_openib_put.c b/opal/mca/btl/openib/btl_openib_put.c index cc2cadcf73b..1814fbcd0b6 100644 --- a/opal/mca/btl/openib/btl_openib_put.c +++ b/opal/mca/btl/openib/btl_openib_put.c @@ -45,7 +45,12 @@ int mca_btl_openib_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint mca_btl_openib_put_frag_t *frag = NULL; int rc, qp = order; - if (OPAL_UNLIKELY(size > btl->btl_put_limit)) { + if (MCA_BTL_NO_ORDER == qp) { + qp = mca_btl_openib_component.rdma_qp; + } + + if (OPAL_UNLIKELY((ep->qps[qp].ib_inline_max < size && !local_handle) || !remote_handle || + size > btl->btl_put_limit)) { return OPAL_ERR_BAD_PARAM; } @@ -54,10 +59,6 @@ int mca_btl_openib_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint return OPAL_ERR_OUT_OF_RESOURCE; } - if (MCA_BTL_NO_ORDER == qp) { - qp = mca_btl_openib_component.rdma_qp; - } - /* set base descriptor flags */ to_base_frag(frag)->base.order = qp; /* free this descriptor when the operation is complete */ @@ -65,7 +66,14 @@ int mca_btl_openib_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint /* set up scatter-gather entry */ to_com_frag(frag)->sg_entry.length = size; - to_com_frag(frag)->sg_entry.lkey = local_handle->lkey; + + if (local_handle) { + to_com_frag(frag)->sg_entry.lkey = local_handle->lkey; + } else { + /* lkey is not required for inline RDMA write */ + to_com_frag(frag)->sg_entry.lkey = 0; + } + to_com_frag(frag)->sg_entry.addr = (uint64_t)(intptr_t) local_address; to_com_frag(frag)->endpoint = ep; diff --git a/opal/mca/btl/ugni/btl_ugni_component.c b/opal/mca/btl/ugni/btl_ugni_component.c index 9338c1fa217..1e0fc91555d 100644 --- a/opal/mca/btl/ugni/btl_ugni_component.c +++ b/opal/mca/btl/ugni/btl_ugni_component.c @@ -251,6 +251,9 @@ btl_ugni_component_register(void) mca_btl_ugni_module.super.btl_bandwidth = 40000; /* Mbs */ mca_btl_ugni_module.super.btl_latency = 2; /* Microsecs */ + mca_btl_ugni_module.super.btl_get_local_registration_threshold = 0; + mca_btl_ugni_module.super.btl_put_local_registration_threshold = mca_btl_ugni_component.ugni_fma_limit; + /* Call the BTL based to register its MCA params */ mca_btl_base_param_register(&mca_btl_ugni_component.super.btl_version, &mca_btl_ugni_module.super); @@ -321,6 +324,8 @@ mca_btl_ugni_component_init (int *num_btl_modules, mca_btl_ugni_component.ugni_fma_limit = 65536; } + mca_btl_ugni_module.super.btl_put_local_registration_threshold = mca_btl_ugni_component.ugni_fma_limit; + if (enable_mpi_threads && mca_btl_ugni_component.progress_thread_requested) { mca_btl_ugni_component.progress_thread_enabled = 1; } diff --git a/opal/mca/btl/ugni/btl_ugni_rdma.h b/opal/mca/btl/ugni/btl_ugni_rdma.h index bcc8a5f33c7..970feabd34c 100644 --- a/opal/mca/btl/ugni/btl_ugni_rdma.h +++ b/opal/mca/btl/ugni/btl_ugni_rdma.h @@ -53,8 +53,13 @@ static inline int mca_btl_ugni_post_fma (struct mca_btl_base_endpoint_t *endpoin void *cbcontext, void *cbdata) { mca_btl_ugni_post_descriptor_t *post_desc; + gni_mem_handle_t local_gni_handle = {0, 0}; gni_return_t grc; + if (local_handle) { + local_gni_handle = local_handle->gni_handle; + } + mca_btl_ugni_alloc_post_descriptor (endpoint, local_handle, cbfunc, cbcontext, cbdata, &post_desc); if (OPAL_UNLIKELY(NULL == post_desc)) { return OPAL_ERR_OUT_OF_RESOURCE; @@ -62,7 +67,7 @@ static inline int mca_btl_ugni_post_fma (struct mca_btl_base_endpoint_t *endpoin /* Post descriptor (CQ is ignored for FMA transactions) -- The CQ associated with the endpoint * is used. */ - init_gni_post_desc (&post_desc->desc, order, op_type, (intptr_t) local_address, local_handle->gni_handle, + init_gni_post_desc (&post_desc->desc, order, op_type, (intptr_t) local_address, local_gni_handle, remote_address, remote_handle->gni_handle, size, 0); OPAL_THREAD_LOCK(&endpoint->btl->device->dev_lock);