@@ -2995,6 +2995,10 @@ size_t os::pd_pretouch_memory(void* first, void* last, size_t page_size) {
29952995 return page_size;
29962996}
29972997
2998+ void os::numa_set_thread_affinity (Thread* thread, int node) {
2999+ Linux::numa_set_thread_affinity (thread->osthread ()->thread_id (), node);
3000+ }
3001+
29983002void os::numa_make_global (char *addr, size_t bytes) {
29993003 Linux::numa_interleave_memory (addr, bytes);
30003004}
@@ -3177,6 +3181,8 @@ bool os::Linux::libnuma_init() {
31773181 libnuma_dlsym (handle, " numa_set_bind_policy" )));
31783182 set_numa_bitmask_isbitset (CAST_TO_FN_PTR (numa_bitmask_isbitset_func_t ,
31793183 libnuma_dlsym (handle, " numa_bitmask_isbitset" )));
3184+ set_numa_bitmask_clearbit (CAST_TO_FN_PTR (numa_bitmask_clearbit_func_t ,
3185+ libnuma_dlsym (handle, " numa_bitmask_clearbit" )));
31803186 set_numa_bitmask_equal (CAST_TO_FN_PTR (numa_bitmask_equal_func_t ,
31813187 libnuma_dlsym (handle, " numa_bitmask_equal" )));
31823188 set_numa_distance (CAST_TO_FN_PTR (numa_distance_func_t ,
@@ -3191,20 +3197,32 @@ bool os::Linux::libnuma_init() {
31913197 libnuma_dlsym (handle, " numa_set_preferred" )));
31923198 set_numa_get_run_node_mask (CAST_TO_FN_PTR (numa_get_run_node_mask_func_t ,
31933199 libnuma_v2_dlsym (handle, " numa_get_run_node_mask" )));
3200+ set_numa_sched_setaffinity (CAST_TO_FN_PTR (numa_sched_setaffinity_func_t ,
3201+ libnuma_v2_dlsym (handle, " numa_sched_setaffinity" )));
3202+ set_numa_allocate_cpumask (CAST_TO_FN_PTR (numa_allocate_cpumask_func_t ,
3203+ libnuma_v2_dlsym (handle, " numa_allocate_cpumask" )));
31943204
31953205 if (numa_available () != -1 ) {
31963206 set_numa_all_nodes ((unsigned long *)libnuma_dlsym (handle, " numa_all_nodes" ));
31973207 set_numa_all_nodes_ptr ((struct bitmask **)libnuma_dlsym (handle, " numa_all_nodes_ptr" ));
31983208 set_numa_nodes_ptr ((struct bitmask **)libnuma_dlsym (handle, " numa_nodes_ptr" ));
3209+ set_numa_all_cpus_ptr ((struct bitmask **)libnuma_dlsym (handle, " numa_all_cpus_ptr" ));
31993210 set_numa_interleave_bitmask (_numa_get_interleave_mask ());
32003211 set_numa_membind_bitmask (_numa_get_membind ());
32013212 set_numa_cpunodebind_bitmask (_numa_get_run_node_mask ());
3213+
32023214 // Create an index -> node mapping, since nodes are not always consecutive
32033215 _nindex_to_node = new (mtInternal) GrowableArray<int >(0 , mtInternal);
32043216 rebuild_nindex_to_node_map ();
3217+
32053218 // Create a cpu -> node mapping
32063219 _cpu_to_node = new (mtInternal) GrowableArray<int >(0 , mtInternal);
32073220 rebuild_cpu_to_node_map ();
3221+
3222+ // Create a node -> CPUs mapping
3223+ _numa_affinity_masks = new (mtInternal) GrowableArray<struct bitmask *>(0 , mtInternal);
3224+ build_numa_affinity_masks ();
3225+
32083226 return true ;
32093227 }
32103228 }
@@ -3240,6 +3258,42 @@ size_t os::Linux::default_guard_size(os::ThreadType thr_type) {
32403258 return ((thr_type == java_thread || thr_type == compiler_thread) ? 0 : os::vm_page_size ());
32413259}
32423260
3261+ void os::Linux::build_numa_affinity_masks () {
3262+ // We only build the affinity masks if running libnuma v2 (_numa_node_to_cpus_v2
3263+ // is available) and we have the affinity mask of the process when it started.
3264+ if (_numa_node_to_cpus_v2 == nullptr || _numa_all_cpus_ptr == nullptr ) {
3265+ return ;
3266+ }
3267+
3268+ // It's important that we respect any user configuration by removing the
3269+ // CPUs we're not allowed to run on from the affinity mask. For example,
3270+ // if the user runs the JVM with "numactl -C 0-1,4-5" on a machine with
3271+ // the following NUMA setup:
3272+ // NUMA 0: CPUs 0-3, NUMA 1: CPUs 4-7
3273+ // We expect to get the following affinity masks:
3274+ // Affinity masks: idx 0 = (0, 1), idx 1 = (4, 5)
3275+
3276+ const int num_nodes = get_existing_num_nodes ();
3277+ const unsigned num_cpus = (unsigned )os::processor_count ();
3278+
3279+ for (int i = 0 ; i < num_nodes; i++) {
3280+ struct bitmask * affinity_mask = _numa_allocate_cpumask ();
3281+
3282+ // Fill the affinity mask with all CPUs belonging to NUMA node i
3283+ _numa_node_to_cpus_v2 (i, affinity_mask);
3284+
3285+ // Clear the bits of all CPUs that the process is not allowed to
3286+ // execute tasks on
3287+ for (unsigned j = 0 ; j < num_cpus; j++) {
3288+ if (!_numa_bitmask_isbitset (_numa_all_cpus_ptr, j)) {
3289+ _numa_bitmask_clearbit (affinity_mask, j);
3290+ }
3291+ }
3292+
3293+ _numa_affinity_masks->push (affinity_mask);
3294+ }
3295+ }
3296+
32433297void os::Linux::rebuild_nindex_to_node_map () {
32443298 int highest_node_number = Linux::numa_max_node ();
32453299
@@ -3355,6 +3409,25 @@ int os::Linux::numa_node_to_cpus(int node, unsigned long *buffer, int bufferlen)
33553409 return -1 ;
33563410}
33573411
3412+ void os::Linux::numa_set_thread_affinity (pid_t tid, int node) {
3413+ // We only set affinity if running libnuma v2 (_numa_sched_setaffinity
3414+ // is available) and we have all affinity mask
3415+ if (_numa_sched_setaffinity == nullptr ||
3416+ _numa_all_cpus_ptr == nullptr ||
3417+ _numa_affinity_masks->is_empty ()) {
3418+ return ;
3419+ }
3420+
3421+ if (node == -1 ) {
3422+ // If the node is -1, the affinity is reverted to the original affinity
3423+ // of the thread when the VM was started
3424+ _numa_sched_setaffinity (tid, _numa_all_cpus_ptr);
3425+ } else {
3426+ // Normal case, set the affinity to the corresponding affinity mask
3427+ _numa_sched_setaffinity (tid, _numa_affinity_masks->at (node));
3428+ }
3429+ }
3430+
33583431int os::Linux::get_node_by_cpu (int cpu_id) {
33593432 if (cpu_to_node () != nullptr && cpu_id >= 0 && cpu_id < cpu_to_node ()->length ()) {
33603433 return cpu_to_node ()->at (cpu_id);
@@ -3364,6 +3437,7 @@ int os::Linux::get_node_by_cpu(int cpu_id) {
33643437
33653438GrowableArray<int >* os::Linux::_cpu_to_node;
33663439GrowableArray<int >* os::Linux::_nindex_to_node;
3440+ GrowableArray<struct bitmask *>* os::Linux::_numa_affinity_masks;
33673441os::Linux::sched_getcpu_func_t os::Linux::_sched_getcpu;
33683442os::Linux::numa_node_to_cpus_func_t os::Linux::_numa_node_to_cpus;
33693443os::Linux::numa_node_to_cpus_v2_func_t os::Linux::_numa_node_to_cpus_v2;
@@ -3375,17 +3449,21 @@ os::Linux::numa_interleave_memory_func_t os::Linux::_numa_interleave_memory;
33753449os::Linux::numa_interleave_memory_v2_func_t os::Linux::_numa_interleave_memory_v2;
33763450os::Linux::numa_set_bind_policy_func_t os::Linux::_numa_set_bind_policy;
33773451os::Linux::numa_bitmask_isbitset_func_t os::Linux::_numa_bitmask_isbitset;
3452+ os::Linux::numa_bitmask_clearbit_func_t os::Linux::_numa_bitmask_clearbit;
33783453os::Linux::numa_bitmask_equal_func_t os::Linux::_numa_bitmask_equal;
33793454os::Linux::numa_distance_func_t os::Linux::_numa_distance;
33803455os::Linux::numa_get_membind_func_t os::Linux::_numa_get_membind;
33813456os::Linux::numa_get_interleave_mask_func_t os::Linux::_numa_get_interleave_mask;
33823457os::Linux::numa_get_run_node_mask_func_t os::Linux::_numa_get_run_node_mask;
3458+ os::Linux::numa_sched_setaffinity_func_t os::Linux::_numa_sched_setaffinity;
3459+ os::Linux::numa_allocate_cpumask_func_t os::Linux::_numa_allocate_cpumask;
33833460os::Linux::numa_move_pages_func_t os::Linux::_numa_move_pages;
33843461os::Linux::numa_set_preferred_func_t os::Linux::_numa_set_preferred;
33853462os::Linux::NumaAllocationPolicy os::Linux::_current_numa_policy;
33863463unsigned long * os::Linux::_numa_all_nodes;
33873464struct bitmask * os::Linux::_numa_all_nodes_ptr;
33883465struct bitmask * os::Linux::_numa_nodes_ptr;
3466+ struct bitmask * os::Linux::_numa_all_cpus_ptr;
33893467struct bitmask * os::Linux::_numa_interleave_bitmask;
33903468struct bitmask * os::Linux::_numa_membind_bitmask;
33913469struct bitmask * os::Linux::_numa_cpunodebind_bitmask;
0 commit comments