Skip to content

Commit

Permalink
linux: add HWLOC_USE_NUMA_DISTANCES envvar to disable new node locali…
Browse files Browse the repository at this point in the history
…ty heuristics

1) Some SLIT tables are buggy for NVDIMMs nodes (asymmetric and wrong).

2) Current Linux kernels (at least up to 5.3) may create conflicting node
target/initiators when proximity domains and OS node indexes are different
(e.g. when PXM 0 and 1 in CPU0 are nodes 0 and 2 in Linux on dual-CLX platforms in SNC mode).

This envvar will allow working around such hardware/software bugs.
By default, the envvar is 7, which means SLIT is gathered and (1) and (2) are enabled.
Removing bit 0 disables SLIT entirely.
Removing bit 1 disables (1).
Removing bit 2 disables (2).

Signed-off-by: Brice Goglin <Brice.Goglin@inria.fr>
  • Loading branch information
bgoglin committed Sep 18, 2019
1 parent d0cea3f commit d944f97
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 6 deletions.
11 changes: 11 additions & 0 deletions doc/hwloc.doxy
Expand Up @@ -1002,6 +1002,17 @@ following environment variables.
actual displaying of these error messages.
</dd>

<dt>HWLOC_USE_NUMA_DISTANCES=7</dt>
<dd>enables or disables the use of NUMA distances.
NUMA distances and memory target/initiator information may be used
to improve the locality of NUMA nodes, especially CPU-less nodes.
Bits in the value of this environment variable enable different features:
Bit 0 enables the gathering of NUMA distances from the operating system.
Bit 1 further enables the use of NUMA distances to improve the
locality of CPU-less nodes.
Bit 2 enables the use of target/initiator information.
</dd>

<dt>HWLOC_GROUPING=1</dt>
<dd>enables or disables objects grouping based on distances.
By default, hwloc uses distance matrices between objects (either read
Expand Down
34 changes: 28 additions & 6 deletions hwloc/topology-linux.c
Expand Up @@ -57,6 +57,9 @@ struct hwloc_linux_backend_data_s {
int is_knl;
int is_amd_with_CU;
int use_dt;
int use_numa_distances;
int use_numa_distances_for_cpuless;
int use_numa_initiators;
struct utsname utsname; /* fields contain \0 when unknown */
int fallback_nbprocessors; /* only used in hwloc_linux_fallback_pu_level(), maybe be <= 0 (error) earlier */
unsigned pagesize;
Expand Down Expand Up @@ -4008,7 +4011,9 @@ annotate_sysfsnode(struct hwloc_topology *topology,
topology->support.discovery->numa_memory = 1;
topology->support.discovery->disallowed_numa = 1;

if (nbnodes >= 2 && !hwloc_parse_nodes_distances(path, nbnodes, indexes, distances, data->root_fd)) {
if (nbnodes >= 2
&& data->use_numa_distances
&& !hwloc_parse_nodes_distances(path, nbnodes, indexes, distances, data->root_fd)) {
hwloc_internal_distances_add(topology, "NUMALatency", nbnodes, nodes, distances,
HWLOC_DISTANCES_KIND_FROM_OS|HWLOC_DISTANCES_KIND_MEANS_LATENCY,
HWLOC_DISTANCES_ADD_FLAG_GROUP);
Expand Down Expand Up @@ -4188,6 +4193,10 @@ look_sysfsnode(struct hwloc_topology *topology,
/* failed to read/create some nodes, don't bother reading/fixing
* a distance matrix that would likely be wrong anyway.
*/
data->use_numa_distances = 0;
}

if (!data->use_numa_distances) {
free(distances);
distances = NULL;
}
Expand Down Expand Up @@ -4223,7 +4232,8 @@ look_sysfsnode(struct hwloc_topology *topology,
if (node && !hwloc_bitmap_iszero(node->cpuset)) {
hwloc_obj_t tree;
/* update from HMAT initiators if any */
read_node_initiators(data, node, nbnodes, nodes, path);
if (data->use_numa_initiators)
read_node_initiators(data, node, nbnodes, nodes, path);

tree = node;
if (need_memcaches)
Expand All @@ -4241,12 +4251,13 @@ look_sysfsnode(struct hwloc_topology *topology,
if (node && hwloc_bitmap_iszero(node->cpuset)) {
hwloc_obj_t tree;
/* update from HMAT initiators if any */
if (!read_node_initiators(data, node, nbnodes, nodes, path))
if (!hwloc_bitmap_iszero(node->cpuset))
goto fixed;
if (data->use_numa_initiators)
if (!read_node_initiators(data, node, nbnodes, nodes, path))
if (!hwloc_bitmap_iszero(node->cpuset))
goto fixed;

/* if HMAT didn't help, try to find locality of CPU-less NUMA nodes by looking at their distances */
if (distances)
if (distances && data->use_numa_distances_for_cpuless)
fixup_cpuless_node_locality_from_distances(i, nbnodes, nodes, distances);

fixed:
Expand Down Expand Up @@ -6884,6 +6895,17 @@ hwloc_linux_component_instantiate(struct hwloc_topology *topology,
if (!data->dumped_hwdata_dirname)
data->dumped_hwdata_dirname = (char *) RUNSTATEDIR "/hwloc/";

data->use_numa_distances = 1;
data->use_numa_distances_for_cpuless = 1;
data->use_numa_initiators = 1;
env = getenv("HWLOC_USE_NUMA_DISTANCES");
if (env) {
unsigned val = atoi(env);
data->use_numa_distances = !!(val & 3); /* 2 implies 1 */
data->use_numa_distances_for_cpuless = !!(val & 2);
data->use_numa_initiators = !!(val & 4);
}

env = getenv("HWLOC_USE_DT");
if (env)
data->use_dt = atoi(env);
Expand Down

0 comments on commit d944f97

Please sign in to comment.