Skip to content

Commit 91d58b8

Browse files
committed
8237649: ZGC: Improved NUMA support when using small pages
Reviewed-by: eosterlund, smonteith
1 parent 06456a9 commit 91d58b8

File tree

6 files changed

+66
-5
lines changed

6 files changed

+66
-5
lines changed

src/hotspot/os/linux/gc/z/zPhysicalMemoryBacking_linux.cpp

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,15 @@
2727
#include "gc/z/zGlobals.hpp"
2828
#include "gc/z/zLargePages.inline.hpp"
2929
#include "gc/z/zMountPoint_linux.hpp"
30+
#include "gc/z/zNUMA.inline.hpp"
3031
#include "gc/z/zPhysicalMemoryBacking_linux.hpp"
3132
#include "gc/z/zSyscall_linux.hpp"
3233
#include "logging/log.hpp"
3334
#include "runtime/init.hpp"
3435
#include "runtime/os.hpp"
3536
#include "utilities/align.hpp"
3637
#include "utilities/debug.hpp"
38+
#include "utilities/growableArray.hpp"
3739

3840
#include <fcntl.h>
3941
#include <stdio.h>
@@ -596,7 +598,38 @@ bool ZPhysicalMemoryBacking::commit_inner(size_t offset, size_t length) {
596598
return true;
597599
}
598600

599-
size_t ZPhysicalMemoryBacking::commit(size_t offset, size_t length) {
601+
static int offset_to_node(size_t offset) {
602+
const GrowableArray<int>* mapping = os::Linux::numa_nindex_to_node();
603+
const size_t nindex = (offset >> ZGranuleSizeShift) % mapping->length();
604+
return mapping->at((int)nindex);
605+
}
606+
607+
size_t ZPhysicalMemoryBacking::commit_numa_interleaved(size_t offset, size_t length) {
608+
size_t committed = 0;
609+
610+
// Commit one granule at a time, so that each granule
611+
// can be allocated from a different preferred node.
612+
while (committed < length) {
613+
const size_t granule_offset = offset + committed;
614+
615+
// Setup NUMA policy to allocate memory from a preferred node
616+
os::Linux::numa_set_preferred(offset_to_node(granule_offset));
617+
618+
if (!commit_inner(granule_offset, ZGranuleSize)) {
619+
// Failed
620+
break;
621+
}
622+
623+
committed += ZGranuleSize;
624+
}
625+
626+
// Restore NUMA policy
627+
os::Linux::numa_set_preferred(-1);
628+
629+
return committed;
630+
}
631+
632+
size_t ZPhysicalMemoryBacking::commit_default(size_t offset, size_t length) {
600633
// Try to commit the whole region
601634
if (commit_inner(offset, length)) {
602635
// Success
@@ -624,6 +657,16 @@ size_t ZPhysicalMemoryBacking::commit(size_t offset, size_t length) {
624657
}
625658
}
626659

660+
size_t ZPhysicalMemoryBacking::commit(size_t offset, size_t length) {
661+
if (ZNUMA::is_enabled() && !ZLargePages::is_explicit()) {
662+
// To get granule-level NUMA interleaving when using non-large pages,
663+
// we must explicitly interleave the memory at commit/fallocate time.
664+
return commit_numa_interleaved(offset, length);
665+
}
666+
667+
return commit_default(offset, length);
668+
}
669+
627670
size_t ZPhysicalMemoryBacking::uncommit(size_t offset, size_t length) {
628671
log_trace(gc, heap)("Uncommitting memory: " SIZE_FORMAT "M-" SIZE_FORMAT "M (" SIZE_FORMAT "M)",
629672
offset / M, (offset + length) / M, length / M);

src/hotspot/os/linux/gc/z/zPhysicalMemoryBacking_linux.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ class ZPhysicalMemoryBacking {
5757
ZErrno fallocate(bool punch_hole, size_t offset, size_t length);
5858

5959
bool commit_inner(size_t offset, size_t length);
60+
size_t commit_numa_interleaved(size_t offset, size_t length);
61+
size_t commit_default(size_t offset, size_t length);
6062

6163
public:
6264
ZPhysicalMemoryBacking();

src/hotspot/os/linux/os_linux.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3163,6 +3163,8 @@ bool os::Linux::libnuma_init() {
31633163
libnuma_v2_dlsym(handle, "numa_get_interleave_mask")));
31643164
set_numa_move_pages(CAST_TO_FN_PTR(numa_move_pages_func_t,
31653165
libnuma_dlsym(handle, "numa_move_pages")));
3166+
set_numa_set_preferred(CAST_TO_FN_PTR(numa_set_preferred_func_t,
3167+
libnuma_dlsym(handle, "numa_set_preferred")));
31663168

31673169
if (numa_available() != -1) {
31683170
set_numa_all_nodes((unsigned long*)libnuma_dlsym(handle, "numa_all_nodes"));
@@ -3298,6 +3300,7 @@ os::Linux::numa_distance_func_t os::Linux::_numa_distance;
32983300
os::Linux::numa_get_membind_func_t os::Linux::_numa_get_membind;
32993301
os::Linux::numa_get_interleave_mask_func_t os::Linux::_numa_get_interleave_mask;
33003302
os::Linux::numa_move_pages_func_t os::Linux::_numa_move_pages;
3303+
os::Linux::numa_set_preferred_func_t os::Linux::_numa_set_preferred;
33013304
os::Linux::NumaAllocationPolicy os::Linux::_current_numa_policy;
33023305
unsigned long* os::Linux::_numa_all_nodes;
33033306
struct bitmask* os::Linux::_numa_all_nodes_ptr;

src/hotspot/os/linux/os_linux.hpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ class Linux {
219219
typedef struct bitmask* (*numa_get_membind_func_t)(void);
220220
typedef struct bitmask* (*numa_get_interleave_mask_func_t)(void);
221221
typedef long (*numa_move_pages_func_t)(int pid, unsigned long count, void **pages, const int *nodes, int *status, int flags);
222-
222+
typedef void (*numa_set_preferred_func_t)(int node);
223223
typedef void (*numa_set_bind_policy_func_t)(int policy);
224224
typedef int (*numa_bitmask_isbitset_func_t)(struct bitmask *bmp, unsigned int n);
225225
typedef int (*numa_distance_func_t)(int node1, int node2);
@@ -238,6 +238,7 @@ class Linux {
238238
static numa_get_membind_func_t _numa_get_membind;
239239
static numa_get_interleave_mask_func_t _numa_get_interleave_mask;
240240
static numa_move_pages_func_t _numa_move_pages;
241+
static numa_set_preferred_func_t _numa_set_preferred;
241242
static unsigned long* _numa_all_nodes;
242243
static struct bitmask* _numa_all_nodes_ptr;
243244
static struct bitmask* _numa_nodes_ptr;
@@ -258,6 +259,7 @@ class Linux {
258259
static void set_numa_get_membind(numa_get_membind_func_t func) { _numa_get_membind = func; }
259260
static void set_numa_get_interleave_mask(numa_get_interleave_mask_func_t func) { _numa_get_interleave_mask = func; }
260261
static void set_numa_move_pages(numa_move_pages_func_t func) { _numa_move_pages = func; }
262+
static void set_numa_set_preferred(numa_set_preferred_func_t func) { _numa_set_preferred = func; }
261263
static void set_numa_all_nodes(unsigned long* ptr) { _numa_all_nodes = ptr; }
262264
static void set_numa_all_nodes_ptr(struct bitmask **ptr) { _numa_all_nodes_ptr = (ptr == NULL ? NULL : *ptr); }
263265
static void set_numa_nodes_ptr(struct bitmask **ptr) { _numa_nodes_ptr = (ptr == NULL ? NULL : *ptr); }
@@ -315,6 +317,11 @@ class Linux {
315317
_numa_interleave_memory(start, size, _numa_all_nodes);
316318
}
317319
}
320+
static void numa_set_preferred(int node) {
321+
if (_numa_set_preferred != NULL) {
322+
_numa_set_preferred(node);
323+
}
324+
}
318325
static void numa_set_bind_policy(int policy) {
319326
if (_numa_set_bind_policy != NULL) {
320327
_numa_set_bind_policy(policy);
@@ -392,6 +399,10 @@ class Linux {
392399
return false;
393400
}
394401
}
402+
403+
static const GrowableArray<int>* numa_nindex_to_node() {
404+
return _nindex_to_node;
405+
}
395406
};
396407

397408
#endif // OS_LINUX_OS_LINUX_HPP

src/hotspot/share/gc/z/zNUMA.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
#include "precompiled.hpp"
2525
#include "gc/z/zNUMA.hpp"
2626
#include "logging/log.hpp"
27-
#include "runtime/os.hpp"
2827

2928
bool ZNUMA::_enabled;
3029

src/hotspot/share/gc/z/zPhysicalMemory.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -277,8 +277,11 @@ void ZPhysicalMemoryManager::map_view(const ZPhysicalMemory& pmem, uintptr_t add
277277
size += segment.size();
278278
}
279279

280-
// Setup NUMA interleaving
281-
if (ZNUMA::is_enabled()) {
280+
// Setup NUMA interleaving for large pages
281+
if (ZNUMA::is_enabled() && ZLargePages::is_explicit()) {
282+
// To get granule-level NUMA interleaving when using large pages,
283+
// we simply let the kernel interleave the memory for us at page
284+
// fault time.
282285
os::numa_make_global((char*)addr, size);
283286
}
284287

0 commit comments

Comments
 (0)