Skip to content

Commit 61e00bb

Browse files
committed
UCS/TOPO/TEST: Don't clean the global topo state during gtest
UCT tests that use resources rely on sys_device values created during test initialization.
1 parent d3ff211 commit 61e00bb

File tree

3 files changed

+107
-11
lines changed

3 files changed

+107
-11
lines changed

src/ucs/sys/topo/base/topo.c

Lines changed: 76 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,12 @@ typedef struct ucs_topo_global_ctx {
102102
} ucs_topo_global_ctx_t;
103103

104104

105+
struct ucs_global_state {
106+
unsigned num_devices;
107+
ucs_topo_sys_device_info_t devices[];
108+
};
109+
110+
105111
const ucs_sys_dev_distance_t ucs_topo_default_distance = {
106112
.latency = 0,
107113
.bandwidth = INFINITY
@@ -1000,6 +1006,75 @@ void ucs_topo_print_info(FILE *stream)
10001006
}
10011007
}
10021008

1009+
static void ucs_topo_release_devices()
1010+
{
1011+
ucs_topo_sys_device_info_t *device;
1012+
1013+
while (ucs_topo_global_ctx.num_devices-- > 0) {
1014+
device = &ucs_topo_global_ctx.devices[ucs_topo_global_ctx.num_devices];
1015+
ucs_free(device->name);
1016+
}
1017+
}
1018+
1019+
ucs_global_state_t *ucs_topo_extract_state(void)
1020+
{
1021+
ucs_global_state_t *state;
1022+
size_t devices_size;
1023+
1024+
devices_size = sizeof(ucs_topo_sys_device_info_t) *
1025+
ucs_topo_global_ctx.num_devices;
1026+
1027+
state = ucs_malloc(sizeof(*state) + devices_size, "ucs_global_state_t");
1028+
if (state == NULL) {
1029+
return NULL;
1030+
}
1031+
1032+
ucs_spin_lock(&ucs_topo_global_ctx.lock);
1033+
1034+
memcpy(state->devices, ucs_topo_global_ctx.devices, devices_size);
1035+
state->num_devices = ucs_topo_global_ctx.num_devices;
1036+
1037+
ucs_topo_global_ctx.num_devices = 0;
1038+
kh_clear(bus_to_sys_dev, &ucs_topo_global_ctx.bus_to_sys_dev_hash);
1039+
1040+
ucs_spin_unlock(&ucs_topo_global_ctx.lock);
1041+
1042+
return state;
1043+
}
1044+
1045+
void ucs_topo_restore_state(ucs_global_state_t *state)
1046+
{
1047+
const ucs_topo_sys_device_info_t *device;
1048+
ucs_sys_device_t sys_dev;
1049+
int kh_put_status;
1050+
khiter_t hash_it;
1051+
1052+
ucs_spin_lock(&ucs_topo_global_ctx.lock);
1053+
1054+
ucs_topo_release_devices();
1055+
1056+
memcpy(ucs_topo_global_ctx.devices, state->devices,
1057+
sizeof(ucs_topo_sys_device_info_t) * state->num_devices);
1058+
ucs_topo_global_ctx.num_devices = state->num_devices;
1059+
1060+
/* Create the hash table */
1061+
kh_clear(bus_to_sys_dev, &ucs_topo_global_ctx.bus_to_sys_dev_hash);
1062+
for (sys_dev = 0; sys_dev < ucs_topo_global_ctx.num_devices; ++sys_dev) {
1063+
device = &ucs_topo_global_ctx.devices[sys_dev];
1064+
hash_it = kh_put(bus_to_sys_dev,
1065+
&ucs_topo_global_ctx.bus_to_sys_dev_hash,
1066+
ucs_topo_get_bus_id_bit_repr(&device->bus_id),
1067+
&kh_put_status);
1068+
ucs_assert((kh_put_status == UCS_KH_PUT_BUCKET_EMPTY) ||
1069+
(kh_put_status == UCS_KH_PUT_BUCKET_CLEAR));
1070+
kh_val(&ucs_topo_global_ctx.bus_to_sys_dev_hash, hash_it) = sys_dev;
1071+
}
1072+
1073+
ucs_spin_unlock(&ucs_topo_global_ctx.lock);
1074+
1075+
ucs_free(state);
1076+
}
1077+
10031078
static ucs_sys_topo_provider_t ucs_sys_topo_provider_sysfs = {
10041079
.name = "sysfs",
10051080
.ops = {
@@ -1021,16 +1096,10 @@ void ucs_topo_init()
10211096

10221097
void ucs_topo_cleanup()
10231098
{
1024-
ucs_topo_sys_device_info_t *device;
1025-
10261099
ucs_list_del(&ucs_sys_topo_provider_sysfs.list);
10271100
ucs_list_del(&ucs_sys_topo_provider_default.list);
10281101

1029-
while (ucs_topo_global_ctx.num_devices-- > 0) {
1030-
device = &ucs_topo_global_ctx.devices[ucs_topo_global_ctx.num_devices];
1031-
ucs_free(device->name);
1032-
}
1033-
1102+
ucs_topo_release_devices();
10341103
kh_destroy_inplace(bus_to_sys_dev,
10351104
&ucs_topo_global_ctx.bus_to_sys_dev_hash);
10361105
ucs_spinlock_destroy(&ucs_topo_global_ctx.lock);

src/ucs/sys/topo/base/topo.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,13 @@ typedef struct ucs_sys_bus_id {
4545
typedef uint8_t ucs_sys_device_t;
4646

4747

48+
/**
49+
* @ingroup UCS_RESOURCE
50+
* Global state of the topology subsystem.
51+
*/
52+
typedef struct ucs_global_state ucs_global_state_t;
53+
54+
4855
/*
4956
* Captures the estimated latency and bandwidth between two system devices
5057
* referred by ucs_sys_device_t handle.
@@ -323,6 +330,23 @@ unsigned ucs_topo_num_devices(void);
323330
void ucs_topo_print_info(FILE *stream);
324331

325332

333+
/**
334+
* Extract the state of the topology subsystem and clear the global context.
335+
*
336+
* @return A pointer to the saved state of the topology subsystem.
337+
*/
338+
ucs_global_state_t *ucs_topo_extract_state(void);
339+
340+
341+
/**
342+
* Restore the state of the topology subsystem, overriding the current global
343+
* context.
344+
*
345+
* @param [in] state A pointer to the saved state of the topology subsystem.
346+
*/
347+
void ucs_topo_restore_state(ucs_global_state_t *state);
348+
349+
326350
/**
327351
* Initialize UCS topology subsystem.
328352
*/

test/gtest/ucs/test_topo.cc

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ static std::string get_sysfs_device_path(const std::string &bdf)
2525
class test_topo : public ucs::test {
2626
protected:
2727
std::vector<std::string> m_hcas, m_gpus, m_dmas;
28+
ucs_global_state_t *m_topo_state;
2829

2930
ucs_sys_device_t
3031
register_device(const std::string &name, const std::string &bdf)
@@ -75,14 +76,12 @@ class test_topo : public ucs::test {
7576
virtual void init()
7677
{
7778
ucs::test::init();
78-
ucs_topo_cleanup();
79-
ucs_topo_init();
79+
m_topo_state = ucs_topo_extract_state();
8080
}
8181

8282
virtual void cleanup()
8383
{
84-
ucs_topo_cleanup();
85-
ucs_topo_init();
84+
ucs_topo_restore_state(m_topo_state);
8685
ucs::test::cleanup();
8786
}
8887
};
@@ -149,7 +148,11 @@ UCS_TEST_F(test_topo, get_distance) {
149148
}
150149

151150
UCS_TEST_F(test_topo, print_info) {
151+
// Restore the state to print the info
152+
ucs_topo_restore_state(m_topo_state);
152153
ucs_topo_print_info(stdout);
154+
// Extract the state again
155+
m_topo_state = ucs_topo_extract_state();
153156
}
154157

155158
UCS_TEST_F(test_topo, bdf_name) {

0 commit comments

Comments
 (0)