Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions opal/mca/btl/uct/btl_uct.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
* All rights reserved.
* Copyright (c) 2015-2018 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2019 Google, LLC. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -85,6 +86,10 @@ struct mca_btl_uct_module_t {
/** array containing the am_tl and rdma_tl */
mca_btl_uct_tl_t *comm_tls[2];

#if UCT_API >= UCT_VERSION(1, 7)
uct_component_h uct_component;
#endif

/** registration cache */
mca_rcache_base_module_t *rcache;

Expand Down
4 changes: 2 additions & 2 deletions opal/mca/btl/uct/btl_uct_amo.c
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ int mca_btl_uct_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_end
mca_btl_uct_uct_completion_release (comp);
}

uct_rkey_release (&rkey);
mca_btl_uct_rkey_release (uct_btl, &rkey);

return rc;
}
Expand Down Expand Up @@ -184,7 +184,7 @@ int mca_btl_uct_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_e
mca_btl_uct_uct_completion_release (comp);
}

uct_rkey_release (&rkey);
mca_btl_uct_rkey_release (uct_btl, &rkey);

return rc;
}
82 changes: 79 additions & 3 deletions opal/mca/btl/uct/btl_uct_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,12 @@ ucs_status_t mca_btl_uct_am_handler (void *arg, void *data, size_t length, unsig
return UCS_OK;
}

#if UCT_API >= UCT_VERSION(1, 7)
static int mca_btl_uct_component_process_uct_md (uct_component_h component, uct_md_resource_desc_t *md_desc,
char **allowed_ifaces)
#else
static int mca_btl_uct_component_process_uct_md (uct_md_resource_desc_t *md_desc, char **allowed_ifaces)
#endif
{
mca_rcache_base_resources_t rcache_resources;
uct_tl_resource_desc_t *tl_desc;
Expand Down Expand Up @@ -348,8 +353,14 @@ static int mca_btl_uct_component_process_uct_md (uct_md_resource_desc_t *md_desc

md = OBJ_NEW(mca_btl_uct_md_t);


#if UCT_API >= UCT_VERSION(1, 7)
uct_md_config_read (component, NULL, NULL, &uct_config);
uct_md_open (component, md_desc->md_name, uct_config, &md->uct_md);
#else
uct_md_config_read (md_desc->md_name, NULL, NULL, &uct_config);
uct_md_open (md_desc->md_name, uct_config, &md->uct_md);
#endif
uct_config_release (uct_config);

uct_md_query (md->uct_md, &md_attr);
Expand All @@ -375,6 +386,10 @@ static int mca_btl_uct_component_process_uct_md (uct_md_resource_desc_t *md_desc
return OPAL_ERR_NOT_AVAILABLE;
}

#if UCT_API >= UCT_VERSION(1, 7)
module->uct_component = component;
#endif

mca_btl_uct_component.modules[mca_btl_uct_component.module_count++] = module;

/* NTH: a registration cache shouldn't be necessary when using UCT but there are measurable
Expand All @@ -400,6 +415,42 @@ static int mca_btl_uct_component_process_uct_md (uct_md_resource_desc_t *md_desc
return OPAL_SUCCESS;
}

#if UCT_API >= UCT_VERSION(1, 7)
static int mca_btl_uct_component_process_uct_component (uct_component_h component, char **allowed_ifaces)
{
uct_component_attr_t attr = {.field_mask = UCT_COMPONENT_ATTR_FIELD_NAME |
UCT_COMPONENT_ATTR_FIELD_MD_RESOURCE_COUNT};
ucs_status_t ucs_status;
int rc;

ucs_status = uct_component_query (component, &attr);
if (UCS_OK != ucs_status) {
return OPAL_ERROR;
}

BTL_VERBOSE(("processing uct component %s", attr.name));

attr.md_resources = calloc (attr.md_resource_count, sizeof (*attr.md_resources));
attr.field_mask |= UCT_COMPONENT_ATTR_FIELD_MD_RESOURCES;
ucs_status = uct_component_query (component, &attr);
if (UCS_OK != ucs_status) {
return OPAL_ERROR;
}

for (int i = 0 ; i < attr.md_resource_count ; ++i) {
rc = mca_btl_uct_component_process_uct_md (component, attr.md_resources + i,
allowed_ifaces);
if (OPAL_SUCCESS != rc) {
break;
}
}

free (attr.md_resources);

return OPAL_SUCCESS;
}
#endif /* UCT_API >= UCT_VERSION(1, 7) */

/*
* UCT component initialization:
* (1) read interface list from kernel and compare against component parameters
Expand All @@ -415,6 +466,7 @@ static mca_btl_base_module_t **mca_btl_uct_component_init (int *num_btl_modules,
struct mca_btl_base_module_t **base_modules;
uct_md_resource_desc_t *resources;
unsigned resource_count;
ucs_status_t ucs_status;
char **allowed_ifaces;
int rc;

Expand All @@ -431,10 +483,32 @@ static mca_btl_base_module_t **mca_btl_uct_component_init (int *num_btl_modules,
return NULL;
}

uct_query_md_resources (&resources, &resource_count);

mca_btl_uct_component.module_count = 0;

#if UCT_API >= UCT_VERSION(1, 7)
uct_component_h *components;
unsigned num_components;

ucs_status = uct_query_components(&components, &num_components);
if (UCS_OK != ucs_status) {
BTL_ERROR(("could not query UCT components"));
return NULL;
}

/* generate all suitable btl modules */
for (unsigned i = 0 ; i < num_components ; ++i) {
rc = mca_btl_uct_component_process_uct_component (components[i], allowed_ifaces);
if (OPAL_SUCCESS != rc) {
break;
}
}

uct_release_component_list (components);

#else /* UCT 1.6 and older */

uct_query_md_resources (&resources, &resource_count);

/* generate all suitable btl modules */
for (unsigned i = 0 ; i < resource_count ; ++i) {
rc = mca_btl_uct_component_process_uct_md (resources + i, allowed_ifaces);
Expand All @@ -443,9 +517,11 @@ static mca_btl_base_module_t **mca_btl_uct_component_init (int *num_btl_modules,
}
}

opal_argv_free (allowed_ifaces);
uct_release_md_resource_list (resources);

#endif /* UCT_API >= UCT_VERSION(1, 7) */

opal_argv_free (allowed_ifaces);
mca_btl_uct_modex_send ();

/* pass module array back to caller */
Expand Down
4 changes: 2 additions & 2 deletions opal/mca/btl/uct/btl_uct_rdma.c
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ int mca_btl_uct_get (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoi

BTL_VERBOSE(("get issued. status = %d", ucs_status));

uct_rkey_release (&rkey);
mca_btl_uct_rkey_release (uct_btl, &rkey);

return OPAL_LIKELY(UCS_OK == ucs_status) ? OPAL_SUCCESS : OPAL_ERR_RESOURCE_BUSY;
}
Expand Down Expand Up @@ -237,7 +237,7 @@ int mca_btl_uct_put (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoi
mca_btl_uct_uct_completion_release (comp);
}

uct_rkey_release (&rkey);
mca_btl_uct_rkey_release (uct_btl, &rkey);

return OPAL_LIKELY(UCS_OK == ucs_status) ? OPAL_SUCCESS : OPAL_ERR_RESOURCE_BUSY;
}
Expand Down
14 changes: 14 additions & 0 deletions opal/mca/btl/uct/btl_uct_rdma.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,22 @@ static inline int mca_btl_uct_get_rkey (mca_btl_uct_module_t *module,
return rc;
}

#if UCT_API >= UCT_VERSION(1, 7)
ucs_status = uct_rkey_unpack (module->uct_component, (void *) remote_handle, rkey);
#else
ucs_status = uct_rkey_unpack ((void *) remote_handle, rkey);
#endif
return (UCS_OK == ucs_status) ? OPAL_SUCCESS : OPAL_ERROR;
}

static inline void mca_btl_uct_rkey_release (mca_btl_uct_module_t *uct_btl, uct_rkey_bundle_t *rkey)
{
#if UCT_API >= UCT_VERSION(1, 7)
uct_rkey_release (uct_btl->uct_component, rkey);
#else
(void) uct_btl;
uct_rkey_release (rkey);
#endif
}

#endif /* !defined(BTL_UCT_RDMA_H) */
6 changes: 6 additions & 0 deletions opal/mca/btl/uct/btl_uct_tl.c
Original file line number Diff line number Diff line change
Expand Up @@ -516,7 +516,13 @@ static int mca_btl_uct_evaluate_tl (mca_btl_uct_module_t *module, mca_btl_uct_tl
* come up with a better estimate. */

/* UCT bandwidth is in bytes/sec, BTL is in MB/sec */
#if UCT_API >= UCT_VERSION(1, 7)
module->super.btl_bandwidth = (uint32_t) ((MCA_BTL_UCT_TL_ATTR(tl, 0).bandwidth.dedicated +
MCA_BTL_UCT_TL_ATTR(tl, 0).bandwidth.shared /
(opal_process_info.num_local_peers + 1)) / 1048576.0);
#else
module->super.btl_bandwidth = (uint32_t) (MCA_BTL_UCT_TL_ATTR(tl, 0).bandwidth / 1048576.0);
#endif
/* TODO -- figure out how to translate UCT latency to us */
module->super.btl_latency = 1;
}
Expand Down
37 changes: 37 additions & 0 deletions opal/mca/btl/uct/configure.m4
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
# All rights reserved.
# Copyright (c) 2018 Research Organization for Information Science
# and Technology (RIST). All rights reserved.
# Copyright (c) 2019 Triad National Security, LLC. All rights
# reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
Expand All @@ -35,6 +37,41 @@ AC_DEFUN([MCA_opal_btl_uct_CONFIG],[
OMPI_CHECK_UCX([btl_uct],
[btl_uct_happy="yes"],
[btl_uct_happy="no"])
dnl
dnl check UCT version. UCT API can change at any given release
dnl so we only allow compiling against ones we know work.
dnl
AC_ARG_ENABLE([uct-version-check],
[AC_HELP_STRING([--enable-uct-version-check],
[enable UCT version check (default: enabled)])])
AC_MSG_CHECKING([check uct version])
if test "$enable_uct_version_check" != "no"; then
AC_MSG_RESULT([yes])
else
AC_MSG_RESULT([no])
fi

max_allowed_uct_major=1
max_allowed_uct_minor=7
if test "$btl_uct_happy" = "yes" && test "$enable_uct_version_check" != "no"; then
AC_MSG_CHECKING([UCT version compatibility])
OPAL_VAR_SCOPE_PUSH([CPPFLAGS_save])
CPPFLAGS_save="$CPPFLAGS"
CPPFLAGS="$CPPFLAGS $btl_uct_CPPFLAGS"
AC_PREPROC_IFELSE([AC_LANG_PROGRAM([#include <uct/api/version.h>
#if (UCT_VERNO_MAJOR > $max_allowed_uct_major)
#error "UCT MAJOR VERNO > $max_allowed_uct_major"
#endif
#if (UCT_VERNO_MINOR > $max_allowed_uct_minor)
#error "UCT MINOR VERNO > $max_allowed_uct_minor"
#endif], [])],
[AC_MSG_RESULT([UCT version compatible])],
[AC_MSG_RESULT([UCT version not compatible - need UCX $max_allowed_uct_major.$max_allowed_uct_minor or older])
btl_uct_happy="no"])
CPPFLAGS="$CPPFLAGS_save"
OPAL_VAR_SCOPE_POP
fi

if test "$btl_uct_happy" = "yes" ; then
OPAL_VAR_SCOPE_PUSH([CPPFLAGS_save])

Expand Down