From 91d58b8f962201861eceefd227bbc9d007a9fb1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Per=20Lid=C3=A9n?= Date: Thu, 30 Jan 2020 12:41:26 +0100 Subject: [PATCH] 8237649: ZGC: Improved NUMA support when using small pages Reviewed-by: eosterlund, smonteith --- .../gc/z/zPhysicalMemoryBacking_linux.cpp | 45 ++++++++++++++++++- .../gc/z/zPhysicalMemoryBacking_linux.hpp | 2 + src/hotspot/os/linux/os_linux.cpp | 3 ++ src/hotspot/os/linux/os_linux.hpp | 13 +++++- src/hotspot/share/gc/z/zNUMA.cpp | 1 - src/hotspot/share/gc/z/zPhysicalMemory.cpp | 7 ++- 6 files changed, 66 insertions(+), 5 deletions(-) diff --git a/src/hotspot/os/linux/gc/z/zPhysicalMemoryBacking_linux.cpp b/src/hotspot/os/linux/gc/z/zPhysicalMemoryBacking_linux.cpp index 824f082e643..e6e389e9b7f 100644 --- a/src/hotspot/os/linux/gc/z/zPhysicalMemoryBacking_linux.cpp +++ b/src/hotspot/os/linux/gc/z/zPhysicalMemoryBacking_linux.cpp @@ -27,6 +27,7 @@ #include "gc/z/zGlobals.hpp" #include "gc/z/zLargePages.inline.hpp" #include "gc/z/zMountPoint_linux.hpp" +#include "gc/z/zNUMA.inline.hpp" #include "gc/z/zPhysicalMemoryBacking_linux.hpp" #include "gc/z/zSyscall_linux.hpp" #include "logging/log.hpp" @@ -34,6 +35,7 @@ #include "runtime/os.hpp" #include "utilities/align.hpp" #include "utilities/debug.hpp" +#include "utilities/growableArray.hpp" #include #include @@ -596,7 +598,38 @@ bool ZPhysicalMemoryBacking::commit_inner(size_t offset, size_t length) { return true; } -size_t ZPhysicalMemoryBacking::commit(size_t offset, size_t length) { +static int offset_to_node(size_t offset) { + const GrowableArray* mapping = os::Linux::numa_nindex_to_node(); + const size_t nindex = (offset >> ZGranuleSizeShift) % mapping->length(); + return mapping->at((int)nindex); +} + +size_t ZPhysicalMemoryBacking::commit_numa_interleaved(size_t offset, size_t length) { + size_t committed = 0; + + // Commit one granule at a time, so that each granule + // can be allocated from a different preferred node. + while (committed < length) { + const size_t granule_offset = offset + committed; + + // Setup NUMA policy to allocate memory from a preferred node + os::Linux::numa_set_preferred(offset_to_node(granule_offset)); + + if (!commit_inner(granule_offset, ZGranuleSize)) { + // Failed + break; + } + + committed += ZGranuleSize; + } + + // Restore NUMA policy + os::Linux::numa_set_preferred(-1); + + return committed; +} + +size_t ZPhysicalMemoryBacking::commit_default(size_t offset, size_t length) { // Try to commit the whole region if (commit_inner(offset, length)) { // Success @@ -624,6 +657,16 @@ size_t ZPhysicalMemoryBacking::commit(size_t offset, size_t length) { } } +size_t ZPhysicalMemoryBacking::commit(size_t offset, size_t length) { + if (ZNUMA::is_enabled() && !ZLargePages::is_explicit()) { + // To get granule-level NUMA interleaving when using non-large pages, + // we must explicitly interleave the memory at commit/fallocate time. + return commit_numa_interleaved(offset, length); + } + + return commit_default(offset, length); +} + size_t ZPhysicalMemoryBacking::uncommit(size_t offset, size_t length) { log_trace(gc, heap)("Uncommitting memory: " SIZE_FORMAT "M-" SIZE_FORMAT "M (" SIZE_FORMAT "M)", offset / M, (offset + length) / M, length / M); diff --git a/src/hotspot/os/linux/gc/z/zPhysicalMemoryBacking_linux.hpp b/src/hotspot/os/linux/gc/z/zPhysicalMemoryBacking_linux.hpp index 7644a86c8bf..c5f1583a424 100644 --- a/src/hotspot/os/linux/gc/z/zPhysicalMemoryBacking_linux.hpp +++ b/src/hotspot/os/linux/gc/z/zPhysicalMemoryBacking_linux.hpp @@ -57,6 +57,8 @@ class ZPhysicalMemoryBacking { ZErrno fallocate(bool punch_hole, size_t offset, size_t length); bool commit_inner(size_t offset, size_t length); + size_t commit_numa_interleaved(size_t offset, size_t length); + size_t commit_default(size_t offset, size_t length); public: ZPhysicalMemoryBacking(); diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp index 814df213785..3eeb58f0e8e 100644 --- a/src/hotspot/os/linux/os_linux.cpp +++ b/src/hotspot/os/linux/os_linux.cpp @@ -3163,6 +3163,8 @@ bool os::Linux::libnuma_init() { libnuma_v2_dlsym(handle, "numa_get_interleave_mask"))); set_numa_move_pages(CAST_TO_FN_PTR(numa_move_pages_func_t, libnuma_dlsym(handle, "numa_move_pages"))); + set_numa_set_preferred(CAST_TO_FN_PTR(numa_set_preferred_func_t, + libnuma_dlsym(handle, "numa_set_preferred"))); if (numa_available() != -1) { set_numa_all_nodes((unsigned long*)libnuma_dlsym(handle, "numa_all_nodes")); @@ -3298,6 +3300,7 @@ os::Linux::numa_distance_func_t os::Linux::_numa_distance; os::Linux::numa_get_membind_func_t os::Linux::_numa_get_membind; os::Linux::numa_get_interleave_mask_func_t os::Linux::_numa_get_interleave_mask; os::Linux::numa_move_pages_func_t os::Linux::_numa_move_pages; +os::Linux::numa_set_preferred_func_t os::Linux::_numa_set_preferred; os::Linux::NumaAllocationPolicy os::Linux::_current_numa_policy; unsigned long* os::Linux::_numa_all_nodes; struct bitmask* os::Linux::_numa_all_nodes_ptr; diff --git a/src/hotspot/os/linux/os_linux.hpp b/src/hotspot/os/linux/os_linux.hpp index 8d4b7f94713..9b98ba98581 100644 --- a/src/hotspot/os/linux/os_linux.hpp +++ b/src/hotspot/os/linux/os_linux.hpp @@ -219,7 +219,7 @@ class Linux { typedef struct bitmask* (*numa_get_membind_func_t)(void); typedef struct bitmask* (*numa_get_interleave_mask_func_t)(void); typedef long (*numa_move_pages_func_t)(int pid, unsigned long count, void **pages, const int *nodes, int *status, int flags); - + typedef void (*numa_set_preferred_func_t)(int node); typedef void (*numa_set_bind_policy_func_t)(int policy); typedef int (*numa_bitmask_isbitset_func_t)(struct bitmask *bmp, unsigned int n); typedef int (*numa_distance_func_t)(int node1, int node2); @@ -238,6 +238,7 @@ class Linux { static numa_get_membind_func_t _numa_get_membind; static numa_get_interleave_mask_func_t _numa_get_interleave_mask; static numa_move_pages_func_t _numa_move_pages; + static numa_set_preferred_func_t _numa_set_preferred; static unsigned long* _numa_all_nodes; static struct bitmask* _numa_all_nodes_ptr; static struct bitmask* _numa_nodes_ptr; @@ -258,6 +259,7 @@ class Linux { static void set_numa_get_membind(numa_get_membind_func_t func) { _numa_get_membind = func; } static void set_numa_get_interleave_mask(numa_get_interleave_mask_func_t func) { _numa_get_interleave_mask = func; } static void set_numa_move_pages(numa_move_pages_func_t func) { _numa_move_pages = func; } + static void set_numa_set_preferred(numa_set_preferred_func_t func) { _numa_set_preferred = func; } static void set_numa_all_nodes(unsigned long* ptr) { _numa_all_nodes = ptr; } static void set_numa_all_nodes_ptr(struct bitmask **ptr) { _numa_all_nodes_ptr = (ptr == NULL ? NULL : *ptr); } static void set_numa_nodes_ptr(struct bitmask **ptr) { _numa_nodes_ptr = (ptr == NULL ? NULL : *ptr); } @@ -315,6 +317,11 @@ class Linux { _numa_interleave_memory(start, size, _numa_all_nodes); } } + static void numa_set_preferred(int node) { + if (_numa_set_preferred != NULL) { + _numa_set_preferred(node); + } + } static void numa_set_bind_policy(int policy) { if (_numa_set_bind_policy != NULL) { _numa_set_bind_policy(policy); @@ -392,6 +399,10 @@ class Linux { return false; } } + + static const GrowableArray* numa_nindex_to_node() { + return _nindex_to_node; + } }; #endif // OS_LINUX_OS_LINUX_HPP diff --git a/src/hotspot/share/gc/z/zNUMA.cpp b/src/hotspot/share/gc/z/zNUMA.cpp index bf3e8fd4d5e..51a0012ba83 100644 --- a/src/hotspot/share/gc/z/zNUMA.cpp +++ b/src/hotspot/share/gc/z/zNUMA.cpp @@ -24,7 +24,6 @@ #include "precompiled.hpp" #include "gc/z/zNUMA.hpp" #include "logging/log.hpp" -#include "runtime/os.hpp" bool ZNUMA::_enabled; diff --git a/src/hotspot/share/gc/z/zPhysicalMemory.cpp b/src/hotspot/share/gc/z/zPhysicalMemory.cpp index cf7268f2f2e..350d0065063 100644 --- a/src/hotspot/share/gc/z/zPhysicalMemory.cpp +++ b/src/hotspot/share/gc/z/zPhysicalMemory.cpp @@ -277,8 +277,11 @@ void ZPhysicalMemoryManager::map_view(const ZPhysicalMemory& pmem, uintptr_t add size += segment.size(); } - // Setup NUMA interleaving - if (ZNUMA::is_enabled()) { + // Setup NUMA interleaving for large pages + if (ZNUMA::is_enabled() && ZLargePages::is_explicit()) { + // To get granule-level NUMA interleaving when using large pages, + // we simply let the kernel interleave the memory for us at page + // fault time. os::numa_make_global((char*)addr, size); }