Skip to content

Commit 0a963b6

Browse files
committed
8371701: Add ability to set NUMA-affinity for threads
Reviewed-by: aboldtch, ayang
1 parent 8ae4ea8 commit 0a963b6

File tree

6 files changed

+103
-0
lines changed

6 files changed

+103
-0
lines changed

src/hotspot/os/aix/os_aix.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1747,6 +1747,9 @@ size_t os::pd_pretouch_memory(void* first, void* last, size_t page_size) {
17471747
return page_size;
17481748
}
17491749

1750+
void os::numa_set_thread_affinity(Thread *thread, int node) {
1751+
}
1752+
17501753
void os::numa_make_global(char *addr, size_t bytes) {
17511754
}
17521755

src/hotspot/os/bsd/os_bsd.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1581,6 +1581,9 @@ size_t os::pd_pretouch_memory(void* first, void* last, size_t page_size) {
15811581
return page_size;
15821582
}
15831583

1584+
void os::numa_set_thread_affinity(Thread *thread, int node) {
1585+
}
1586+
15841587
void os::numa_make_global(char *addr, size_t bytes) {
15851588
}
15861589

src/hotspot/os/linux/os_linux.cpp

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2995,6 +2995,10 @@ size_t os::pd_pretouch_memory(void* first, void* last, size_t page_size) {
29952995
return page_size;
29962996
}
29972997

2998+
void os::numa_set_thread_affinity(Thread* thread, int node) {
2999+
Linux::numa_set_thread_affinity(thread->osthread()->thread_id(), node);
3000+
}
3001+
29983002
void os::numa_make_global(char *addr, size_t bytes) {
29993003
Linux::numa_interleave_memory(addr, bytes);
30003004
}
@@ -3177,6 +3181,8 @@ bool os::Linux::libnuma_init() {
31773181
libnuma_dlsym(handle, "numa_set_bind_policy")));
31783182
set_numa_bitmask_isbitset(CAST_TO_FN_PTR(numa_bitmask_isbitset_func_t,
31793183
libnuma_dlsym(handle, "numa_bitmask_isbitset")));
3184+
set_numa_bitmask_clearbit(CAST_TO_FN_PTR(numa_bitmask_clearbit_func_t,
3185+
libnuma_dlsym(handle, "numa_bitmask_clearbit")));
31803186
set_numa_bitmask_equal(CAST_TO_FN_PTR(numa_bitmask_equal_func_t,
31813187
libnuma_dlsym(handle, "numa_bitmask_equal")));
31823188
set_numa_distance(CAST_TO_FN_PTR(numa_distance_func_t,
@@ -3191,20 +3197,32 @@ bool os::Linux::libnuma_init() {
31913197
libnuma_dlsym(handle, "numa_set_preferred")));
31923198
set_numa_get_run_node_mask(CAST_TO_FN_PTR(numa_get_run_node_mask_func_t,
31933199
libnuma_v2_dlsym(handle, "numa_get_run_node_mask")));
3200+
set_numa_sched_setaffinity(CAST_TO_FN_PTR(numa_sched_setaffinity_func_t,
3201+
libnuma_v2_dlsym(handle, "numa_sched_setaffinity")));
3202+
set_numa_allocate_cpumask(CAST_TO_FN_PTR(numa_allocate_cpumask_func_t,
3203+
libnuma_v2_dlsym(handle, "numa_allocate_cpumask")));
31943204

31953205
if (numa_available() != -1) {
31963206
set_numa_all_nodes((unsigned long*)libnuma_dlsym(handle, "numa_all_nodes"));
31973207
set_numa_all_nodes_ptr((struct bitmask **)libnuma_dlsym(handle, "numa_all_nodes_ptr"));
31983208
set_numa_nodes_ptr((struct bitmask **)libnuma_dlsym(handle, "numa_nodes_ptr"));
3209+
set_numa_all_cpus_ptr((struct bitmask **)libnuma_dlsym(handle, "numa_all_cpus_ptr"));
31993210
set_numa_interleave_bitmask(_numa_get_interleave_mask());
32003211
set_numa_membind_bitmask(_numa_get_membind());
32013212
set_numa_cpunodebind_bitmask(_numa_get_run_node_mask());
3213+
32023214
// Create an index -> node mapping, since nodes are not always consecutive
32033215
_nindex_to_node = new (mtInternal) GrowableArray<int>(0, mtInternal);
32043216
rebuild_nindex_to_node_map();
3217+
32053218
// Create a cpu -> node mapping
32063219
_cpu_to_node = new (mtInternal) GrowableArray<int>(0, mtInternal);
32073220
rebuild_cpu_to_node_map();
3221+
3222+
// Create a node -> CPUs mapping
3223+
_numa_affinity_masks = new (mtInternal) GrowableArray<struct bitmask*>(0, mtInternal);
3224+
build_numa_affinity_masks();
3225+
32083226
return true;
32093227
}
32103228
}
@@ -3240,6 +3258,42 @@ size_t os::Linux::default_guard_size(os::ThreadType thr_type) {
32403258
return ((thr_type == java_thread || thr_type == compiler_thread) ? 0 : os::vm_page_size());
32413259
}
32423260

3261+
void os::Linux::build_numa_affinity_masks() {
3262+
// We only build the affinity masks if running libnuma v2 (_numa_node_to_cpus_v2
3263+
// is available) and we have the affinity mask of the process when it started.
3264+
if (_numa_node_to_cpus_v2 == nullptr || _numa_all_cpus_ptr == nullptr) {
3265+
return;
3266+
}
3267+
3268+
// It's important that we respect any user configuration by removing the
3269+
// CPUs we're not allowed to run on from the affinity mask. For example,
3270+
// if the user runs the JVM with "numactl -C 0-1,4-5" on a machine with
3271+
// the following NUMA setup:
3272+
// NUMA 0: CPUs 0-3, NUMA 1: CPUs 4-7
3273+
// We expect to get the following affinity masks:
3274+
// Affinity masks: idx 0 = (0, 1), idx 1 = (4, 5)
3275+
3276+
const int num_nodes = get_existing_num_nodes();
3277+
const unsigned num_cpus = (unsigned)os::processor_count();
3278+
3279+
for (int i = 0; i < num_nodes; i++) {
3280+
struct bitmask* affinity_mask = _numa_allocate_cpumask();
3281+
3282+
// Fill the affinity mask with all CPUs belonging to NUMA node i
3283+
_numa_node_to_cpus_v2(i, affinity_mask);
3284+
3285+
// Clear the bits of all CPUs that the process is not allowed to
3286+
// execute tasks on
3287+
for (unsigned j = 0; j < num_cpus; j++) {
3288+
if (!_numa_bitmask_isbitset(_numa_all_cpus_ptr, j)) {
3289+
_numa_bitmask_clearbit(affinity_mask, j);
3290+
}
3291+
}
3292+
3293+
_numa_affinity_masks->push(affinity_mask);
3294+
}
3295+
}
3296+
32433297
void os::Linux::rebuild_nindex_to_node_map() {
32443298
int highest_node_number = Linux::numa_max_node();
32453299

@@ -3355,6 +3409,25 @@ int os::Linux::numa_node_to_cpus(int node, unsigned long *buffer, int bufferlen)
33553409
return -1;
33563410
}
33573411

3412+
void os::Linux::numa_set_thread_affinity(pid_t tid, int node) {
3413+
// We only set affinity if running libnuma v2 (_numa_sched_setaffinity
3414+
// is available) and we have all affinity mask
3415+
if (_numa_sched_setaffinity == nullptr ||
3416+
_numa_all_cpus_ptr == nullptr ||
3417+
_numa_affinity_masks->is_empty()) {
3418+
return;
3419+
}
3420+
3421+
if (node == -1) {
3422+
// If the node is -1, the affinity is reverted to the original affinity
3423+
// of the thread when the VM was started
3424+
_numa_sched_setaffinity(tid, _numa_all_cpus_ptr);
3425+
} else {
3426+
// Normal case, set the affinity to the corresponding affinity mask
3427+
_numa_sched_setaffinity(tid, _numa_affinity_masks->at(node));
3428+
}
3429+
}
3430+
33583431
int os::Linux::get_node_by_cpu(int cpu_id) {
33593432
if (cpu_to_node() != nullptr && cpu_id >= 0 && cpu_id < cpu_to_node()->length()) {
33603433
return cpu_to_node()->at(cpu_id);
@@ -3364,6 +3437,7 @@ int os::Linux::get_node_by_cpu(int cpu_id) {
33643437

33653438
GrowableArray<int>* os::Linux::_cpu_to_node;
33663439
GrowableArray<int>* os::Linux::_nindex_to_node;
3440+
GrowableArray<struct bitmask*>* os::Linux::_numa_affinity_masks;
33673441
os::Linux::sched_getcpu_func_t os::Linux::_sched_getcpu;
33683442
os::Linux::numa_node_to_cpus_func_t os::Linux::_numa_node_to_cpus;
33693443
os::Linux::numa_node_to_cpus_v2_func_t os::Linux::_numa_node_to_cpus_v2;
@@ -3375,17 +3449,21 @@ os::Linux::numa_interleave_memory_func_t os::Linux::_numa_interleave_memory;
33753449
os::Linux::numa_interleave_memory_v2_func_t os::Linux::_numa_interleave_memory_v2;
33763450
os::Linux::numa_set_bind_policy_func_t os::Linux::_numa_set_bind_policy;
33773451
os::Linux::numa_bitmask_isbitset_func_t os::Linux::_numa_bitmask_isbitset;
3452+
os::Linux::numa_bitmask_clearbit_func_t os::Linux::_numa_bitmask_clearbit;
33783453
os::Linux::numa_bitmask_equal_func_t os::Linux::_numa_bitmask_equal;
33793454
os::Linux::numa_distance_func_t os::Linux::_numa_distance;
33803455
os::Linux::numa_get_membind_func_t os::Linux::_numa_get_membind;
33813456
os::Linux::numa_get_interleave_mask_func_t os::Linux::_numa_get_interleave_mask;
33823457
os::Linux::numa_get_run_node_mask_func_t os::Linux::_numa_get_run_node_mask;
3458+
os::Linux::numa_sched_setaffinity_func_t os::Linux::_numa_sched_setaffinity;
3459+
os::Linux::numa_allocate_cpumask_func_t os::Linux::_numa_allocate_cpumask;
33833460
os::Linux::numa_move_pages_func_t os::Linux::_numa_move_pages;
33843461
os::Linux::numa_set_preferred_func_t os::Linux::_numa_set_preferred;
33853462
os::Linux::NumaAllocationPolicy os::Linux::_current_numa_policy;
33863463
unsigned long* os::Linux::_numa_all_nodes;
33873464
struct bitmask* os::Linux::_numa_all_nodes_ptr;
33883465
struct bitmask* os::Linux::_numa_nodes_ptr;
3466+
struct bitmask* os::Linux::_numa_all_cpus_ptr;
33893467
struct bitmask* os::Linux::_numa_interleave_bitmask;
33903468
struct bitmask* os::Linux::_numa_membind_bitmask;
33913469
struct bitmask* os::Linux::_numa_cpunodebind_bitmask;

src/hotspot/os/linux/os_linux.hpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ class os::Linux {
4545
static GrowableArray<int>* _cpu_to_node;
4646
static GrowableArray<int>* _nindex_to_node;
4747

48+
static GrowableArray<struct bitmask*>* _numa_affinity_masks;
49+
50+
static void build_numa_affinity_masks();
51+
4852
protected:
4953

5054
static physical_memory_size_type _physical_memory;
@@ -230,8 +234,11 @@ class os::Linux {
230234
typedef void (*numa_set_preferred_func_t)(int node);
231235
typedef void (*numa_set_bind_policy_func_t)(int policy);
232236
typedef int (*numa_bitmask_isbitset_func_t)(struct bitmask *bmp, unsigned int n);
237+
typedef int (*numa_bitmask_clearbit_func_t)(struct bitmask *bmp, unsigned int n);
233238
typedef int (*numa_bitmask_equal_func_t)(struct bitmask *bmp1, struct bitmask *bmp2);
234239
typedef int (*numa_distance_func_t)(int node1, int node2);
240+
typedef int (*numa_sched_setaffinity_func_t)(pid_t pid, struct bitmask* mask);
241+
typedef struct bitmask* (*numa_allocate_cpumask_func_t)(void);
235242

236243
static sched_getcpu_func_t _sched_getcpu;
237244
static numa_node_to_cpus_func_t _numa_node_to_cpus;
@@ -244,16 +251,20 @@ class os::Linux {
244251
static numa_interleave_memory_v2_func_t _numa_interleave_memory_v2;
245252
static numa_set_bind_policy_func_t _numa_set_bind_policy;
246253
static numa_bitmask_isbitset_func_t _numa_bitmask_isbitset;
254+
static numa_bitmask_clearbit_func_t _numa_bitmask_clearbit;
247255
static numa_bitmask_equal_func_t _numa_bitmask_equal;
248256
static numa_distance_func_t _numa_distance;
249257
static numa_get_membind_func_t _numa_get_membind;
250258
static numa_get_run_node_mask_func_t _numa_get_run_node_mask;
251259
static numa_get_interleave_mask_func_t _numa_get_interleave_mask;
252260
static numa_move_pages_func_t _numa_move_pages;
253261
static numa_set_preferred_func_t _numa_set_preferred;
262+
static numa_sched_setaffinity_func_t _numa_sched_setaffinity;
263+
static numa_allocate_cpumask_func_t _numa_allocate_cpumask;
254264
static unsigned long* _numa_all_nodes;
255265
static struct bitmask* _numa_all_nodes_ptr;
256266
static struct bitmask* _numa_nodes_ptr;
267+
static struct bitmask* _numa_all_cpus_ptr;
257268
static struct bitmask* _numa_interleave_bitmask;
258269
static struct bitmask* _numa_membind_bitmask;
259270
static struct bitmask* _numa_cpunodebind_bitmask;
@@ -269,6 +280,7 @@ class os::Linux {
269280
static void set_numa_interleave_memory_v2(numa_interleave_memory_v2_func_t func) { _numa_interleave_memory_v2 = func; }
270281
static void set_numa_set_bind_policy(numa_set_bind_policy_func_t func) { _numa_set_bind_policy = func; }
271282
static void set_numa_bitmask_isbitset(numa_bitmask_isbitset_func_t func) { _numa_bitmask_isbitset = func; }
283+
static void set_numa_bitmask_clearbit(numa_bitmask_clearbit_func_t func) { _numa_bitmask_clearbit = func; }
272284
static void set_numa_bitmask_equal(numa_bitmask_equal_func_t func) { _numa_bitmask_equal = func; }
273285
static void set_numa_distance(numa_distance_func_t func) { _numa_distance = func; }
274286
static void set_numa_get_membind(numa_get_membind_func_t func) { _numa_get_membind = func; }
@@ -279,9 +291,12 @@ class os::Linux {
279291
static void set_numa_all_nodes(unsigned long* ptr) { _numa_all_nodes = ptr; }
280292
static void set_numa_all_nodes_ptr(struct bitmask **ptr) { _numa_all_nodes_ptr = (ptr == nullptr ? nullptr : *ptr); }
281293
static void set_numa_nodes_ptr(struct bitmask **ptr) { _numa_nodes_ptr = (ptr == nullptr ? nullptr : *ptr); }
294+
static void set_numa_all_cpus_ptr(struct bitmask **ptr) { _numa_all_cpus_ptr = (ptr == nullptr ? nullptr : *ptr); }
282295
static void set_numa_interleave_bitmask(struct bitmask* ptr) { _numa_interleave_bitmask = ptr ; }
283296
static void set_numa_membind_bitmask(struct bitmask* ptr) { _numa_membind_bitmask = ptr ; }
284297
static void set_numa_cpunodebind_bitmask(struct bitmask* ptr) { _numa_cpunodebind_bitmask = ptr ; }
298+
static void set_numa_sched_setaffinity(numa_sched_setaffinity_func_t func) { _numa_sched_setaffinity = func; }
299+
static void set_numa_allocate_cpumask(numa_allocate_cpumask_func_t func) { _numa_allocate_cpumask = func; }
285300
static int sched_getcpu_syscall(void);
286301

287302
enum NumaAllocationPolicy{
@@ -292,6 +307,8 @@ class os::Linux {
292307
static NumaAllocationPolicy _current_numa_policy;
293308

294309
public:
310+
static void numa_set_thread_affinity(pid_t tid, int node);
311+
295312
static int sched_getcpu() { return _sched_getcpu != nullptr ? _sched_getcpu() : -1; }
296313
static int numa_node_to_cpus(int node, unsigned long *buffer, int bufferlen);
297314
static int numa_max_node() { return _numa_max_node != nullptr ? _numa_max_node() : -1; }

src/hotspot/os/windows/os_windows.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3752,6 +3752,7 @@ size_t os::pd_pretouch_memory(void* first, void* last, size_t page_size) {
37523752
return page_size;
37533753
}
37543754

3755+
void os::numa_set_thread_affinity(Thread *thread, int node) { }
37553756
void os::numa_make_global(char *addr, size_t bytes) { }
37563757
void os::numa_make_local(char *addr, size_t bytes, int lgrp_hint) { }
37573758
size_t os::numa_get_groups_num() { return MAX2(numa_node_list_holder.get_count(), 1); }

src/hotspot/share/runtime/os.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -534,6 +534,7 @@ class os: AllStatic {
534534
static void realign_memory(char *addr, size_t bytes, size_t alignment_hint);
535535

536536
// NUMA-specific interface
537+
static void numa_set_thread_affinity(Thread* thread, int node);
537538
static void numa_make_local(char *addr, size_t bytes, int lgrp_hint);
538539
static void numa_make_global(char *addr, size_t bytes);
539540
static size_t numa_get_groups_num();

0 commit comments

Comments
 (0)