Skip to content

Commit fe9b012

Browse files
committed
UCT/IB: Support VRF tables for RoCE reachability check
Signed-off-by: Dmitrii Gabor <dmitryg1709@gmail.com>
1 parent 5e0979c commit fe9b012

File tree

6 files changed

+73
-15
lines changed

6 files changed

+73
-15
lines changed

AUTHORS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ Corey J. Nolet <cjnolet@gmail.com>
2323
David Wootton <dwootton@us.ibm.com>
2424
Devendar Bureddy <devendar@mellanox.com>
2525
Devesh Sharma <devesh.sharma@broadcom.com>
26+
Dmitrii Gabor <dmitryg1709@gmail.com>
2627
Dmitry Gladkov <dmitrygla@mellanox.com>
2728
Doug Jacobsen <dmjacobsen@lbl.gov>
2829
Edgar Gabriel <edgar.gabriel@amd.com>

src/ucs/sys/sys.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,18 @@ uint32_t ucs_file_checksum(const char *filename)
165165
return crc;
166166
}
167167

168+
ucs_status_t ucs_ifname_to_index(const char *ndev_name, unsigned *ndev_index_p)
169+
{
170+
unsigned ndev_index = if_nametoindex(ndev_name);
171+
if (ndev_index == 0) {
172+
ucs_error("failed to get interface index for %s: %m", ndev_name);
173+
return UCS_ERR_IO_ERROR;
174+
}
175+
176+
*ndev_index_p = ndev_index;
177+
return UCS_OK;
178+
}
179+
168180
static uint64_t ucs_get_mac_address()
169181
{
170182
static uint64_t mac_address = 0;

src/ucs/sys/sys.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,12 @@ const char *ucs_get_exe();
186186
uint32_t ucs_file_checksum(const char *filename);
187187

188188

189+
/**
190+
* Get interface index for a given interface name.
191+
*/
192+
ucs_status_t ucs_ifname_to_index(const char *ndev_name, unsigned *ndev_index_p);
193+
194+
189195
/**
190196
* Get a globally unique identifier of the machine running the current process.
191197
*/

src/uct/ib/base/ib_device.c

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
#include <rdma/rdma_netlink.h>
3131
#endif
3232

33+
#define UCT_IB_DEVICE_LOOPBACK_NDEV_INDEX_INVALID 0
34+
3335

3436
/* This table is according to "Encoding for RNR NAK Timer Field"
3537
* in IBTA specification */
@@ -109,7 +111,7 @@ uct_ib_device_to_ndev_cache_hash_equal(uct_ib_device_to_ndev_key_t key1,
109111
(key1.guid == key2.guid);
110112
}
111113

112-
KHASH_INIT(uct_ib_device_to_ndev, uct_ib_device_to_ndev_key_t, int, 1,
114+
KHASH_INIT(uct_ib_device_to_ndev, uct_ib_device_to_ndev_key_t, unsigned, 1,
113115
uct_ib_device_to_ndev_cache_hash_func,
114116
uct_ib_device_to_ndev_cache_hash_equal);
115117

@@ -1510,9 +1512,25 @@ uct_ib_device_get_roce_ndev_name(uct_ib_device_t *dev, uint8_t port_num,
15101512
return UCS_OK;
15111513
}
15121514

1515+
ucs_status_t uct_ib_iface_get_loopback_ndev_index(unsigned *ndev_index_p)
1516+
{
1517+
static unsigned loopback_ndev_index = UCT_IB_DEVICE_LOOPBACK_NDEV_INDEX_INVALID;
1518+
ucs_status_t status;
1519+
1520+
if (loopback_ndev_index == UCT_IB_DEVICE_LOOPBACK_NDEV_INDEX_INVALID) {
1521+
status = ucs_ifname_to_index("lo", &loopback_ndev_index);
1522+
if (status != UCS_OK) {
1523+
return status;
1524+
}
1525+
}
1526+
1527+
*ndev_index_p = loopback_ndev_index;
1528+
return UCS_OK;
1529+
}
1530+
15131531
ucs_status_t
15141532
uct_ib_device_get_roce_ndev_index(uct_ib_device_t *dev, uint8_t port_num,
1515-
uint8_t gid_index, int *ndev_index_p)
1533+
uint8_t gid_index, unsigned *ndev_index_p)
15161534
{
15171535
uct_ib_device_to_ndev_key_t ib_dev = {.guid = IBV_DEV_ATTR(dev, node_guid),
15181536
.port_num = port_num,
@@ -1521,9 +1539,9 @@ uct_ib_device_get_roce_ndev_index(uct_ib_device_t *dev, uint8_t port_num,
15211539
PTHREAD_MUTEX_INITIALIZER;
15221540
ucs_status_t status;
15231541
char ndev_name[IFNAMSIZ];
1524-
int ndev_index;
1542+
unsigned ndev_index;
15251543
khiter_t iter;
1526-
int khret;
1544+
unsigned khret;
15271545

15281546
pthread_mutex_lock(&uct_ib_device_to_ndev_cache_lock);
15291547
iter = kh_put(uct_ib_device_to_ndev, &ib_dev_to_ndev_map, ib_dev, &khret);
@@ -1539,11 +1557,8 @@ uct_ib_device_get_roce_ndev_index(uct_ib_device_t *dev, uint8_t port_num,
15391557
goto out_unlock;
15401558
}
15411559

1542-
ndev_index = if_nametoindex(ndev_name);
1543-
if (ndev_index == 0) {
1544-
ucs_error("failed to get interface index for %s (errno %d)",
1545-
ndev_name, errno);
1546-
status = UCS_ERR_IO_ERROR;
1560+
status = ucs_ifname_to_index(ndev_name, &ndev_index);
1561+
if (status != UCS_OK) {
15471562
goto out_unlock;
15481563
}
15491564

src/uct/ib/base/ib_device.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -409,9 +409,11 @@ ucs_status_t uct_ib_device_get_roce_ndev_name(uct_ib_device_t *dev,
409409
uint8_t gid_index,
410410
char *ndev_name, size_t max);
411411

412+
ucs_status_t uct_ib_iface_get_loopback_ndev_index(unsigned *ndev_index_p);
413+
412414
ucs_status_t
413415
uct_ib_device_get_roce_ndev_index(uct_ib_device_t *dev, uint8_t port_num,
414-
uint8_t gid_index, int *ndev_index_p);
416+
uint8_t gid_index, unsigned *ndev_index_p);
415417

416418
unsigned uct_ib_device_get_roce_lag_level(uct_ib_device_t *dev,
417419
uint8_t port_num,

src/uct/ib/base/ib_iface.c

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -692,18 +692,40 @@ uct_ib_iface_roce_is_routable(uct_ib_iface_t *iface, uint8_t gid_index,
692692
uct_ib_device_t *dev = uct_ib_iface_device(iface);
693693
uint8_t port_num = iface->config.port_num;
694694
char remote_str[128];
695-
int ndev_index;
695+
unsigned ndev_index, lo_ndev_index;
696696

697697
if (uct_ib_device_get_roce_ndev_index(dev, port_num, gid_index,
698698
&ndev_index) != UCS_OK) {
699-
uct_iface_fill_info_str_buf(params, "iface index is not found");
699+
uct_iface_fill_info_str_buf(params,
700+
"iface index is not found for "
701+
UCT_IB_IFACE_FMT ", gid index %u",
702+
UCT_IB_IFACE_ARG(iface), gid_index);
700703
return 0;
701704
}
702705

703706
if (!ucs_netlink_route_exists(ndev_index, sa_remote)) {
704-
uct_iface_fill_info_str_buf(params, "remote address %s is not routable",
705-
ucs_sockaddr_str(sa_remote, remote_str, 128));
706-
return 0;
707+
/* try to use loopback interface for reachability check, because it may
708+
* be used for routing in case of an interface with VRF is configured
709+
* and a RoCE IP interface uses this VRF table for routing.
710+
*/
711+
if (uct_ib_iface_get_loopback_ndev_index(&lo_ndev_index) != UCS_OK) {
712+
uct_iface_fill_info_str_buf(params,
713+
"loopback iface index is not found");
714+
return 0;
715+
}
716+
717+
if (!ucs_netlink_route_exists(lo_ndev_index, sa_remote)) {
718+
uct_iface_fill_info_str_buf(params,
719+
"remote address %s is not routable "
720+
"neither by interface "UCT_IB_IFACE_FMT
721+
" (ifname_index=%u) nor by loopback "
722+
"interface (ifname_index=%u)",
723+
ucs_sockaddr_str(sa_remote, remote_str,
724+
128),
725+
UCT_IB_IFACE_ARG(iface),
726+
ndev_index, lo_ndev_index);
727+
return 0;
728+
}
707729
}
708730

709731
return 1;

0 commit comments

Comments
 (0)