Skip to content

Commit

Permalink
8312182: THPs cause huge RSS due to thread start timing issue
Browse files Browse the repository at this point in the history
8310687: JDK-8303215 is incomplete

Reviewed-by: dholmes, poonam
  • Loading branch information
tstuefe committed Jul 21, 2023
1 parent 842d632 commit 84b325b
Show file tree
Hide file tree
Showing 3 changed files with 301 additions and 7 deletions.
9 changes: 9 additions & 0 deletions src/hotspot/os/linux/globals_linux.hpp
Expand Up @@ -89,6 +89,15 @@
"to disable both the override and the printouts." \
"See prctl(PR_SET_TIMERSLACK) for more info.") \
\
product(bool, DisableTHPStackMitigation, false, DIAGNOSTIC, \
"If THPs are unconditionally enabled on the system (mode " \
"\"always\"), the JVM will prevent THP from forming in " \
"thread stacks. This switch disables that mitigation and " \
"allows THPs to form in thread stacks.") \
\
develop(bool, DelayThreadStartALot, false, \
"Artificially delay thread starts randomly for testing.") \
\


// end of RUNTIME_OS_FLAGS
Expand Down
58 changes: 51 additions & 7 deletions src/hotspot/os/linux/os_linux.cpp
Expand Up @@ -775,6 +775,10 @@ static void *thread_native_entry(Thread *thread) {

assert(osthread->pthread_id() != 0, "pthread_id was not set as expected");

if (DelayThreadStartALot) {
os::naked_short_sleep(100);
}

// call one more level start routine
thread->call_run();

Expand Down Expand Up @@ -911,6 +915,7 @@ bool os::create_thread(Thread* thread, ThreadType thr_type,
// Calculate stack size if it's not specified by caller.
size_t stack_size = os::Posix::get_initial_stack_size(thr_type, req_stack_size);
size_t guard_size = os::Linux::default_guard_size(thr_type);

// Configure glibc guard page. Must happen before calling
// get_static_tls_area_size(), which uses the guard_size.
pthread_attr_setguardsize(&attr, guard_size);
Expand All @@ -931,13 +936,16 @@ bool os::create_thread(Thread* thread, ThreadType thr_type,
}
assert(is_aligned(stack_size, os::vm_page_size()), "stack_size not aligned");

// Add an additional page to the stack size to reduce its chances of getting large page aligned
// so that the stack does not get backed by a transparent huge page.
size_t default_large_page_size = HugePages::default_static_hugepage_size();
if (default_large_page_size != 0 &&
stack_size >= default_large_page_size &&
is_aligned(stack_size, default_large_page_size)) {
stack_size += os::vm_page_size();
if (!DisableTHPStackMitigation) {
// In addition to the glibc guard page that prevents inter-thread-stack hugepage
// coalescing (see comment in os::Linux::default_guard_size()), we also make
// sure the stack size itself is not huge-page-size aligned; that makes it much
// more likely for thread stack boundaries to be unaligned as well and hence
// protects thread stacks from being targeted by khugepaged.
if (HugePages::thp_pagesize() > 0 &&
is_aligned(stack_size, HugePages::thp_pagesize())) {
stack_size += os::vm_page_size();
}
}

int status = pthread_attr_setstacksize(&attr, stack_size);
Expand Down Expand Up @@ -3077,6 +3085,27 @@ bool os::Linux::libnuma_init() {
}

size_t os::Linux::default_guard_size(os::ThreadType thr_type) {

if (!DisableTHPStackMitigation) {
// If THPs are unconditionally enabled, the following scenario can lead to huge RSS
// - parent thread spawns, in quick succession, multiple child threads
// - child threads are slow to start
// - thread stacks of future child threads are adjacent and get merged into one large VMA
// by the kernel, and subsequently transformed into huge pages by khugepaged
// - child threads come up, place JVM guard pages, thus splinter the large VMA, splinter
// the huge pages into many (still paged-in) small pages.
// The result of that sequence are thread stacks that are fully paged-in even though the
// threads did not even start yet.
// We prevent that by letting the glibc allocate a guard page, which causes a VMA with different
// permission bits to separate two ajacent thread stacks and therefore prevent merging stacks
// into one VMA.
//
// Yes, this means we have two guard sections - the glibc and the JVM one - per thread. But the
// cost for that one extra protected page is dwarfed from a large win in performance and memory
// that avoiding interference by khugepaged buys us.
return os::vm_page_size();
}

// Creating guard page is very expensive. Java thread has HotSpot
// guard pages, only enable glibc guard page for non-Java threads.
// (Remember: compiler thread is a Java thread, too!)
Expand Down Expand Up @@ -3740,6 +3769,21 @@ void os::large_page_init() {
// Query OS information first.
HugePages::initialize();

// If THPs are unconditionally enabled (THP mode "always"), khugepaged may attempt to
// coalesce small pages in thread stacks to huge pages. That costs a lot of memory and
// is usually unwanted for thread stacks. Therefore we attempt to prevent THP formation in
// thread stacks unless the user explicitly allowed THP formation by manually disabling
// -XX:+DisableTHPStackMitigation.
if (HugePages::thp_mode() == THPMode::always) {
if (DisableTHPStackMitigation) {
log_info(pagesize)("JVM will *not* prevent THPs in thread stacks. This may cause high RSS.");
} else {
log_info(pagesize)("JVM will attempt to prevent THPs in thread stacks.");
}
} else {
FLAG_SET_ERGO(DisableTHPStackMitigation, true); // Mitigation not needed
}

// 1) Handle the case where we do not want to use huge pages
if (!UseLargePages &&
!UseTransparentHugePages &&
Expand Down
241 changes: 241 additions & 0 deletions test/hotspot/jtreg/runtime/os/THPsInThreadStackPreventionTest.java
@@ -0,0 +1,241 @@
/*
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2023, Red Hat Inc.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/

/*
* @test id=ENABLED
* @bug 8303215 8312182
* @summary On THP=always systems, we prevent THPs from forming within thread stacks
* @library /test/lib
* @requires os.family == "linux"
* @requires vm.debug
* @requires os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
* @modules java.base/jdk.internal.misc
* java.management
* @run driver THPsInThreadStackPreventionTest PATCH-ENABLED
*/

/*
* @test id=DISABLED
* @bug 8303215 8312182
* @summary On THP=always systems, we prevent THPs from forming within thread stacks (negative test)
* @library /test/lib
* @requires os.family == "linux"
* @requires vm.debug
* @requires os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
* @modules java.base/jdk.internal.misc
* java.management
* @run main/manual THPsInThreadStackPreventionTest PATCH-DISABLED
*/
import jdk.test.lib.process.OutputAnalyzer;
import jdk.test.lib.process.ProcessTools;
import jtreg.SkippedException;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Objects;
import java.util.concurrent.BrokenBarrierException;
import java.util.concurrent.CyclicBarrier;

public class THPsInThreadStackPreventionTest {

// We test the mitigation for "huge rss for THP=always" introduced with JDK-8312182 and JDK-8302015:
//
// We start a program that spawns a ton of threads with a stack size close to THP page size. The threads
// are idle and should not build up a lot of stack. The threads are started with an artificial delay
// between thread start and stack guardpage creation, which exacerbates the RSS bloat (for explanation
// please see 8312182).
//
// We then observe RSS of that program. We expect it to stay below a reasonable maximum. The unpatched
// version should show an RSS of ~2 GB (paying for the fully paged in thread stacks). The fixed variant should
// cost only ~200-400 MB.

static final int numThreads = 1000;
static final long threadStackSizeMB = 2; // must be 2M
static final long heapSizeMB = 64;
static final long basicRSSOverheadMB = heapSizeMB + 150;
// A successful completion of this test would show not more than X KB per thread stack.
static final long acceptableRSSPerThreadStack = 128 * 1024;
static final long acceptableRSSForAllThreadStacks = numThreads * acceptableRSSPerThreadStack;
static final long acceptableRSSLimitMB = (acceptableRSSForAllThreadStacks / (1024 * 1024)) + basicRSSOverheadMB;

private static class TestMain {

static class Sleeper extends Thread {
CyclicBarrier barrier;
public Sleeper(CyclicBarrier barrier) {
this.barrier = barrier;
}
@Override
public void run() {
try {
barrier.await(); // wait for all siblings
barrier.await(); // wait main thread to print status
} catch (InterruptedException | BrokenBarrierException e) {
e.printStackTrace();
}
}
}

public static void main(String[] args) throws BrokenBarrierException, InterruptedException {

// Fire up 1000 threads with 2M stack size each.
Sleeper[] threads = new Sleeper[numThreads];
CyclicBarrier barrier = new CyclicBarrier(numThreads + 1);

for (int i = 0; i < numThreads; i++) {
threads[i] = new Sleeper(barrier);
threads[i].start();
}

// Wait for all threads to come up
barrier.await();

// print status
String file = "/proc/self/status";
try (FileReader fr = new FileReader(file);
BufferedReader reader = new BufferedReader(fr)) {
String line;
while ((line = reader.readLine()) != null) {
System.out.println(line);
}
} catch (IOException | NumberFormatException e) { /* ignored */ }

// Signal threads to stop
barrier.await();

}
}

static class ProcSelfStatus {

public long rssMB;
public long swapMB;
public int numLifeThreads;

// Parse output from /proc/self/status
public static ProcSelfStatus parse(OutputAnalyzer o) {
ProcSelfStatus status = new ProcSelfStatus();
String s = o.firstMatch("Threads:\\s*(\\d+)", 1);
Objects.requireNonNull(s);
status.numLifeThreads = Integer.parseInt(s);
s = o.firstMatch("VmRSS:\\s*(\\d+) kB", 1);
Objects.requireNonNull(s);
status.rssMB = Long.parseLong(s) / 1024;
s = o.firstMatch("VmSwap:\\s*(\\d+) kB", 1);
Objects.requireNonNull(s);
status.swapMB = Long.parseLong(s) / 1024;
return status;
}
}

public static void main(String[] args) throws Exception {

HugePageConfiguration config = HugePageConfiguration.readFromOS();
// This issue is bound to THP=always
if (config.getThpMode() != HugePageConfiguration.THPMode.always) {
throw new SkippedException("Test only makes sense in THP \"always\" mode");
}

String[] defaultArgs = {
"-Xlog:pagesize",
"-Xmx" + heapSizeMB + "m", "-Xms" + heapSizeMB + "m", "-XX:+AlwaysPreTouch", // stabilize RSS
"-Xss" + threadStackSizeMB + "m",
"-XX:-CreateCoredumpOnCrash",
// This will delay the child threads before they create guard pages, thereby greatly increasing the
// chance of large VMA formation + hugepage coalescation; see JDK-8312182
"-XX:+DelayThreadStartALot"
};
ArrayList<String> finalargs = new ArrayList<>(Arrays.asList(defaultArgs));

switch (args[0]) {
case "PATCH-ENABLED": {
finalargs.add(TestMain.class.getName());
ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(finalargs);

OutputAnalyzer output = new OutputAnalyzer(pb.start());
output.shouldHaveExitValue(0);

// this line indicates the mitigation is active:
output.shouldContain("[pagesize] JVM will attempt to prevent THPs in thread stacks.");

ProcSelfStatus status = ProcSelfStatus.parse(output);
if (status.numLifeThreads < numThreads) {
throw new RuntimeException("Number of live threads lower than expected: " + status.numLifeThreads + ", expected " + numThreads);
} else {
System.out.println("Found " + status.numLifeThreads + " to be alive. Ok.");
}

long rssPlusSwapMB = status.swapMB + status.rssMB;

if (rssPlusSwapMB > acceptableRSSLimitMB) {
throw new RuntimeException("RSS+Swap larger than expected: " + rssPlusSwapMB + "m, expected at most " + acceptableRSSLimitMB + "m");
} else {
if (rssPlusSwapMB < heapSizeMB) { // we pretouch the java heap, so we expect to see at least that:
throw new RuntimeException("RSS+Swap suspiciously low: " + rssPlusSwapMB + "m, expected at least " + heapSizeMB + "m");
}
System.out.println("Okay: RSS+Swap=" + rssPlusSwapMB + ", within acceptable limit of " + acceptableRSSLimitMB);
}
}
break;

case "PATCH-DISABLED": {

// Only execute manually! this will allocate ~2gb of memory!

// explicitly disable the no-THP-workaround:
finalargs.add("-XX:+UnlockDiagnosticVMOptions");
finalargs.add("-XX:+DisableTHPStackMitigation");

finalargs.add(TestMain.class.getName());
ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(finalargs);
OutputAnalyzer output = new OutputAnalyzer(pb.start());

output.shouldHaveExitValue(0);

// We deliberately switched off mitigation, VM should tell us:
output.shouldContain("[pagesize] JVM will *not* prevent THPs in thread stacks. This may cause high RSS.");

// Parse output from self/status
ProcSelfStatus status = ProcSelfStatus.parse(output);
if (status.numLifeThreads < numThreads) {
throw new RuntimeException("Number of live threads lower than expected (" + status.numLifeThreads + ", expected " + numThreads +")");
} else {
System.out.println("Found " + status.numLifeThreads + " to be alive. Ok.");
}

long rssPlusSwapMB = status.swapMB + status.rssMB;

if (rssPlusSwapMB < acceptableRSSLimitMB) {
throw new RuntimeException("RSS+Swap lower than expected: " + rssPlusSwapMB + "m, expected more than " + acceptableRSSLimitMB + "m");
}
break;
}

default: throw new RuntimeException("Bad argument: " + args[0]);
}
}
}

11 comments on commit 84b325b

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tstuefe
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

/backport jdk11u-dev

@openjdk
Copy link

@openjdk openjdk bot commented on 84b325b Aug 17, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tstuefe Could not automatically backport 84b325b8 to openjdk/jdk11u-dev due to conflicts in the following files:

  • src/hotspot/os/linux/globals_linux.hpp
  • src/hotspot/os/linux/os_linux.cpp

Please fetch the appropriate branch/commit and manually resolve these conflicts by using the following commands in your personal fork of openjdk/jdk11u-dev. Note: these commands are just some suggestions and you can use other equivalent commands you know.

# Fetch the up-to-date version of the target branch
$ git fetch --no-tags https://git.openjdk.org/jdk11u-dev.git master:master

# Check out the target branch and create your own branch to backport
$ git checkout master
$ git checkout -b tstuefe-backport-84b325b8

# Fetch the commit you want to backport
$ git fetch --no-tags https://git.openjdk.org/jdk.git 84b325b844c08809448a9c073a11443d9e3c3f8e

# Backport the commit
$ git cherry-pick --no-commit 84b325b844c08809448a9c073a11443d9e3c3f8e
# Resolve conflicts now

# Commit the files you have modified
$ git add files/with/resolved/conflicts
$ git commit -m 'Backport 84b325b844c08809448a9c073a11443d9e3c3f8e'

Once you have resolved the conflicts as explained above continue with creating a pull request towards the openjdk/jdk11u-dev with the title Backport 84b325b844c08809448a9c073a11443d9e3c3f8e.

@tstuefe
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

/backport jdk17u-dev

@openjdk
Copy link

@openjdk openjdk bot commented on 84b325b Aug 21, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tstuefe Could not automatically backport 84b325b8 to openjdk/jdk17u-dev due to conflicts in the following files:

  • src/hotspot/os/linux/os_linux.cpp

Please fetch the appropriate branch/commit and manually resolve these conflicts by using the following commands in your personal fork of openjdk/jdk17u-dev. Note: these commands are just some suggestions and you can use other equivalent commands you know.

# Fetch the up-to-date version of the target branch
$ git fetch --no-tags https://git.openjdk.org/jdk17u-dev.git master:master

# Check out the target branch and create your own branch to backport
$ git checkout master
$ git checkout -b tstuefe-backport-84b325b8

# Fetch the commit you want to backport
$ git fetch --no-tags https://git.openjdk.org/jdk.git 84b325b844c08809448a9c073a11443d9e3c3f8e

# Backport the commit
$ git cherry-pick --no-commit 84b325b844c08809448a9c073a11443d9e3c3f8e
# Resolve conflicts now

# Commit the files you have modified
$ git add files/with/resolved/conflicts
$ git commit -m 'Backport 84b325b844c08809448a9c073a11443d9e3c3f8e'

Once you have resolved the conflicts as explained above continue with creating a pull request towards the openjdk/jdk17u-dev with the title Backport 84b325b844c08809448a9c073a11443d9e3c3f8e.

@tstuefe
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

/backport jdk17u-dev

@openjdk
Copy link

@openjdk openjdk bot commented on 84b325b Aug 25, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tstuefe Could not automatically backport 84b325b8 to openjdk/jdk17u-dev due to conflicts in the following files:

  • src/hotspot/os/linux/os_linux.cpp

Please fetch the appropriate branch/commit and manually resolve these conflicts by using the following commands in your personal fork of openjdk/jdk17u-dev. Note: these commands are just some suggestions and you can use other equivalent commands you know.

# Fetch the up-to-date version of the target branch
$ git fetch --no-tags https://git.openjdk.org/jdk17u-dev.git master:master

# Check out the target branch and create your own branch to backport
$ git checkout master
$ git checkout -b tstuefe-backport-84b325b8

# Fetch the commit you want to backport
$ git fetch --no-tags https://git.openjdk.org/jdk.git 84b325b844c08809448a9c073a11443d9e3c3f8e

# Backport the commit
$ git cherry-pick --no-commit 84b325b844c08809448a9c073a11443d9e3c3f8e
# Resolve conflicts now

# Commit the files you have modified
$ git add files/with/resolved/conflicts
$ git commit -m 'Backport 84b325b844c08809448a9c073a11443d9e3c3f8e'

Once you have resolved the conflicts as explained above continue with creating a pull request towards the openjdk/jdk17u-dev with the title Backport 84b325b844c08809448a9c073a11443d9e3c3f8e.

@tstuefe
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

/backport jdk21u

@openjdk
Copy link

@openjdk openjdk bot commented on 84b325b Aug 25, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tstuefe the backport was successfully created on the branch tstuefe-backport-84b325b8 in my personal fork of openjdk/jdk21u. To create a pull request with this backport targeting openjdk/jdk21u:master, just click the following link:

➡️ Create pull request

The title of the pull request is automatically filled in correctly and below you find a suggestion for the pull request body:

Hi all,

This pull request contains a backport of commit 84b325b8 from the openjdk/jdk repository.

The commit being backported was authored by Thomas Stuefe on 21 Jul 2023 and was reviewed by David Holmes and Poonam Bajaj.

Thanks!

If you need to update the source branch of the pull then run the following commands in a local clone of your personal fork of openjdk/jdk21u:

$ git fetch https://github.com/openjdk-bots/jdk21u.git tstuefe-backport-84b325b8:tstuefe-backport-84b325b8
$ git checkout tstuefe-backport-84b325b8
# make changes
$ git add paths/to/changed/files
$ git commit --message 'Describe additional changes made'
$ git push https://github.com/openjdk-bots/jdk21u.git tstuefe-backport-84b325b8

@tstuefe
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

/backport jdk11u-dev

@openjdk
Copy link

@openjdk openjdk bot commented on 84b325b Oct 23, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tstuefe Could not automatically backport 84b325b8 to openjdk/jdk11u-dev due to conflicts in the following files:

  • src/hotspot/os/linux/globals_linux.hpp
  • src/hotspot/os/linux/os_linux.cpp

Please fetch the appropriate branch/commit and manually resolve these conflicts by using the following commands in your personal fork of openjdk/jdk11u-dev. Note: these commands are just some suggestions and you can use other equivalent commands you know.

# Fetch the up-to-date version of the target branch
$ git fetch --no-tags https://git.openjdk.org/jdk11u-dev.git master:master

# Check out the target branch and create your own branch to backport
$ git checkout master
$ git checkout -b tstuefe-backport-84b325b8

# Fetch the commit you want to backport
$ git fetch --no-tags https://git.openjdk.org/jdk.git 84b325b844c08809448a9c073a11443d9e3c3f8e

# Backport the commit
$ git cherry-pick --no-commit 84b325b844c08809448a9c073a11443d9e3c3f8e
# Resolve conflicts now

# Commit the files you have modified
$ git add files/with/resolved/conflicts
$ git commit -m 'Backport 84b325b844c08809448a9c073a11443d9e3c3f8e'

Once you have resolved the conflicts as explained above continue with creating a pull request towards the openjdk/jdk11u-dev with the title Backport 84b325b844c08809448a9c073a11443d9e3c3f8e.

Please sign in to comment.