Skip to content

Commit 3c7b2dd

Browse files
committed
UCP/WIREUP/IB: Fix error message when FLID is not available
1 parent d1cf46f commit 3c7b2dd

File tree

17 files changed

+299
-128
lines changed

17 files changed

+299
-128
lines changed

src/ucp/wireup/select.c

Lines changed: 26 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -386,8 +386,7 @@ static UCS_F_NOINLINE ucs_status_t ucp_wireup_select_transport(
386386
const ucp_wireup_criteria_t *criteria, ucp_tl_bitmap_t tl_bitmap,
387387
uint64_t remote_md_map, uint64_t local_dev_bitmap,
388388
uint64_t remote_dev_bitmap, int show_error,
389-
ucp_wireup_select_info_t *select_info, char *info_str,
390-
size_t info_str_size)
389+
ucp_wireup_select_info_t *select_info)
391390
{
392391
UCS_STRING_BUFFER_ONSTACK(missing_flags_str,
393392
UCP_WIREUP_MAX_FLAGS_STRING_SIZE);
@@ -410,6 +409,7 @@ static UCS_F_NOINLINE ucs_status_t ucp_wireup_select_transport(
410409
ucp_rsc_index_t dev_index;
411410
ucp_lane_index_t lane;
412411
char tls_info[256];
412+
char uct_info[256];
413413
char *p, *endp;
414414
uct_iface_attr_t *iface_attr;
415415
uct_md_attr_v2_t *md_attr;
@@ -600,13 +600,15 @@ static UCS_F_NOINLINE ucs_status_t ucp_wireup_select_transport(
600600
UCS_STATIC_BITMAP_AND_INPLACE(&rsc_addr_index_map, addr_index_map);
601601
}
602602

603+
/* ucp_wireup_is_reachable() can fail without filling uct_info string if
604+
* none of the remote transports match the local one */
605+
snprintf(uct_info, sizeof(uct_info), "not available");
603606
is_reachable = 0;
604-
605607
UCS_STATIC_BITMAP_FOR_EACH_BIT(addr_index, &rsc_addr_index_map) {
606608
ae = &address->address_list[addr_index];
607609
if (!ucp_wireup_is_reachable(ep, select_params->ep_init_flags,
608-
rsc_index, ae, info_str,
609-
info_str_size)) {
610+
rsc_index, ae, uct_info,
611+
sizeof(uct_info))) {
610612
/* Must be reachable device address, on same transport */
611613
continue;
612614
}
@@ -632,9 +634,8 @@ static UCS_F_NOINLINE ucs_status_t ucp_wireup_select_transport(
632634
/* If a local resource cannot reach any of the remote addresses,
633635
* generate debug message. */
634636
if (!is_reachable) {
635-
snprintf(p, endp - p, UCT_TL_RESOURCE_DESC_FMT" - %s, ",
636-
UCT_TL_RESOURCE_DESC_ARG(resource),
637-
ucs_status_string(UCS_ERR_UNREACHABLE));
637+
snprintf(p, endp - p, UCT_TL_RESOURCE_DESC_FMT " - %s, ",
638+
UCT_TL_RESOURCE_DESC_ARG(resource), uct_info);
638639
p += strlen(p);
639640
}
640641
}
@@ -929,7 +930,7 @@ static UCS_F_NOINLINE ucs_status_t ucp_wireup_add_memaccess_lanes(
929930
status = ucp_wireup_select_transport(select_ctx, select_params,
930931
&mem_criteria, mem_type_tl_bitmap,
931932
remote_md_map, UINT64_MAX, UINT64_MAX,
932-
!allow_am, &select_info, NULL, 0);
933+
!allow_am, &select_info);
933934
if (status == UCS_OK) {
934935
/* Add to the list of lanes */
935936
status = ucp_wireup_add_lane(select_params, &select_info, lane_type,
@@ -975,8 +976,7 @@ static UCS_F_NOINLINE ucs_status_t ucp_wireup_add_memaccess_lanes(
975976
status = ucp_wireup_select_transport(select_ctx, select_params,
976977
&mem_criteria, tl_bitmap,
977978
remote_md_map, UINT64_MAX,
978-
UINT64_MAX, 0, &select_info,
979-
NULL, 0);
979+
UINT64_MAX, 0, &select_info);
980980
/* Break if: */
981981
/* - transport selection wasn't OK */
982982
if ((status != UCS_OK) ||
@@ -1447,8 +1447,7 @@ ucp_wireup_is_am_required(const ucp_wireup_select_params_t *select_params,
14471447
static ucs_status_t
14481448
ucp_wireup_add_am_lane(const ucp_wireup_select_params_t *select_params,
14491449
ucp_wireup_select_info_t *am_info,
1450-
ucp_wireup_select_context_t *select_ctx,
1451-
char *info_string, size_t info_string_length)
1450+
ucp_wireup_select_context_t *select_ctx)
14521451
{
14531452
ucp_worker_h worker = select_params->ep->worker;
14541453
ucp_tl_bitmap_t tl_bitmap = select_params->tl_bitmap;
@@ -1488,8 +1487,7 @@ ucp_wireup_add_am_lane(const ucp_wireup_select_params_t *select_params,
14881487
status = ucp_wireup_select_transport(select_ctx, select_params,
14891488
&criteria, tl_bitmap, UINT64_MAX,
14901489
UINT64_MAX, UINT64_MAX, 1,
1491-
am_info, info_string,
1492-
info_string_length);
1490+
am_info);
14931491
if (status != UCS_OK) {
14941492
return status;
14951493
}
@@ -1681,11 +1679,11 @@ ucp_wireup_add_bw_lanes_a2a(const ucp_wireup_select_params_t *select_params,
16811679
ucs_for_each_bit(remote_dev_index, remote_dev_bitmap) {
16821680
sinfo = ucs_array_append(&sinfo_array, break);
16831681
status = ucp_wireup_select_transport(select_ctx, select_params,
1684-
&bw_info->criteria,
1685-
tl_bitmap, UINT64_MAX,
1682+
&bw_info->criteria, tl_bitmap,
1683+
UINT64_MAX,
16861684
UCS_BIT(local_dev_index),
1687-
UCS_BIT(remote_dev_index),
1688-
0, sinfo, NULL, 0);
1685+
UCS_BIT(remote_dev_index), 0,
1686+
sinfo);
16891687
if (status != UCS_OK) {
16901688
ucs_array_pop_back(&sinfo_array);
16911689
continue;
@@ -1781,8 +1779,7 @@ static int ucp_wireup_add_bw_lanes_pairwise(
17811779
status = ucp_wireup_select_transport(select_ctx, select_params,
17821780
&bw_info->criteria, tl_bitmap,
17831781
UINT64_MAX, local_dev_bitmap,
1784-
remote_dev_bitmap, 0, sinfo,
1785-
NULL, 0);
1782+
remote_dev_bitmap, 0, sinfo);
17861783
if (status != UCS_OK) {
17871784
ucs_array_pop_back(&sinfo_array);
17881785
break;
@@ -2262,7 +2259,7 @@ ucp_wireup_add_tag_lane(const ucp_wireup_select_params_t *select_params,
22622259
status = ucp_wireup_select_transport(select_ctx, select_params, &criteria,
22632260
ucp_tl_bitmap_max, UINT64_MAX,
22642261
UINT64_MAX, UINT64_MAX, 0,
2265-
&select_info, NULL, 0);
2262+
&select_info);
22662263
if ((status == UCS_OK) &&
22672264
(ucp_score_cmp(select_info.score,
22682265
am_info->score) >= 0)) {
@@ -2414,7 +2411,7 @@ ucp_wireup_add_keepalive_lane(const ucp_wireup_select_params_t *select_params,
24142411

24152412
status = ucp_wireup_select_transport(select_ctx, select_params, &criteria,
24162413
*tl_bitmap, UINT64_MAX, UINT64_MAX,
2417-
UINT64_MAX, 0, &select_info, NULL, 0);
2414+
UINT64_MAX, 0, &select_info);
24182415
if (status == UCS_OK) {
24192416
return ucp_wireup_add_lane(select_params, &select_info,
24202417
UCP_LANE_TYPE_KEEPALIVE, /* show error */ 1,
@@ -2436,8 +2433,7 @@ ucp_wireup_select_context_init(ucp_wireup_select_context_t *select_ctx)
24362433
static UCS_F_NOINLINE ucs_status_t
24372434
ucp_wireup_search_lanes(const ucp_wireup_select_params_t *select_params,
24382435
ucp_err_handling_mode_t err_mode,
2439-
ucp_wireup_select_context_t *select_ctx,
2440-
char *info_string, size_t info_string_length)
2436+
ucp_wireup_select_context_t *select_ctx)
24412437
{
24422438
ucp_wireup_select_info_t am_info;
24432439
ucs_status_t status;
@@ -2463,8 +2459,7 @@ ucp_wireup_search_lanes(const ucp_wireup_select_params_t *select_params,
24632459

24642460
/* Add AM lane only after RMA/AMO was selected to be aware
24652461
* about whether they need emulation over AM or not */
2466-
status = ucp_wireup_add_am_lane(select_params, &am_info, select_ctx,
2467-
info_string, info_string_length);
2462+
status = ucp_wireup_add_am_lane(select_params, &am_info, select_ctx);
24682463
if (status != UCS_OK) {
24692464
return status;
24702465
}
@@ -2731,7 +2726,6 @@ ucp_wireup_select_lanes(ucp_ep_h ep, unsigned ep_init_flags,
27312726
ucp_tl_bitmap_t scalable_tl_bitmap = worker->scalable_tl_bitmap;
27322727
/* TODO: remove initialization after all ucp_wireup_add_X_lanes functions
27332728
will support specifying a reason */
2734-
char wireup_info[256] = {0};
27352729
ucp_wireup_select_context_t select_ctx;
27362730
ucp_wireup_select_params_t select_params;
27372731
ucs_status_t status;
@@ -2742,8 +2736,7 @@ ucp_wireup_select_lanes(ucp_ep_h ep, unsigned ep_init_flags,
27422736
ucp_wireup_select_params_init(&select_params, ep, ep_init_flags,
27432737
remote_address, scalable_tl_bitmap, 0);
27442738
status = ucp_wireup_search_lanes(&select_params, key->err_mode,
2745-
&select_ctx, wireup_info,
2746-
sizeof(wireup_info));
2739+
&select_ctx);
27472740
if (status == UCS_OK) {
27482741
goto out;
27492742
}
@@ -2756,13 +2749,8 @@ ucp_wireup_select_lanes(ucp_ep_h ep, unsigned ep_init_flags,
27562749
ucp_wireup_select_params_init(&select_params, ep, ep_init_flags,
27572750
remote_address, tl_bitmap, show_error);
27582751
status = ucp_wireup_search_lanes(&select_params, key->err_mode,
2759-
&select_ctx, wireup_info,
2760-
sizeof(wireup_info));
2752+
&select_ctx);
27612753
if (status != UCS_OK) {
2762-
if (wireup_info[0] != '\0') {
2763-
ucs_diag("destination is unreachable [%s]", wireup_info);
2764-
}
2765-
27662754
return status;
27672755
}
27682756

@@ -2803,7 +2791,7 @@ ucp_wireup_select_aux_transport(ucp_ep_h ep, unsigned ep_init_flags,
28032791
status = ucp_wireup_select_transport(&select_ctx, &select_params, &criteria,
28042792
ucp_tl_bitmap_max, UINT64_MAX,
28052793
UINT64_MAX, UINT64_MAX, 0,
2806-
select_info, NULL, 0);
2794+
select_info);
28072795
if (status == UCS_OK) {
28082796
return UCS_OK;
28092797
}
@@ -2813,6 +2801,5 @@ ucp_wireup_select_aux_transport(ucp_ep_h ep, unsigned ep_init_flags,
28132801
ucp_wireup_fill_aux_criteria(&criteria, ep_init_flags, 0);
28142802
return ucp_wireup_select_transport(&select_ctx, &select_params, &criteria,
28152803
ucp_tl_bitmap_max, UINT64_MAX,
2816-
UINT64_MAX, UINT64_MAX, 1, select_info,
2817-
NULL, 0);
2804+
UINT64_MAX, UINT64_MAX, 1, select_info);
28182805
}

0 commit comments

Comments
 (0)