Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
8292083: Detected container memory limit may exceed physical machine …
…memory

Reviewed-by: sgehwolf, stuefe
  • Loading branch information
Jonathan Dowland authored and jerboaa committed Aug 26, 2022
1 parent f91943c commit f694f8a
Show file tree
Hide file tree
Showing 7 changed files with 72 additions and 28 deletions.
23 changes: 23 additions & 0 deletions src/hotspot/os/linux/cgroupSubsystem_linux.cpp
Expand Up @@ -528,7 +528,30 @@ jlong CgroupSubsystem::memory_limit_in_bytes() {
if (!memory_limit->should_check_metric()) {
return memory_limit->value();
}
jlong phys_mem = os::Linux::physical_memory();
log_trace(os, container)("total physical memory: " JLONG_FORMAT, phys_mem);
jlong mem_limit = read_memory_limit_in_bytes();

if (mem_limit <= 0 || mem_limit >= phys_mem) {
jlong read_mem_limit = mem_limit;
const char *reason;
if (mem_limit >= phys_mem) {
// Exceeding physical memory is treated as unlimited. Cg v1's implementation
// of read_memory_limit_in_bytes() caps this at phys_mem since Cg v1 has no
// value to represent 'max'. Cg v2 may return a value >= phys_mem if e.g. the
// container engine was started with a memory flag exceeding it.
reason = "ignored";
mem_limit = -1;
} else if (OSCONTAINER_ERROR == mem_limit) {
reason = "failed";
} else {
assert(mem_limit == -1, "Expected unlimited");
reason = "unlimited";
}
log_debug(os, container)("container memory limit %s: " JLONG_FORMAT ", using host value " JLONG_FORMAT,
reason, read_mem_limit, phys_mem);
}

// Update cached metric to avoid re-reading container settings too often
memory_limit->set_value(mem_limit, OSCONTAINER_CACHE_TIMEOUT);
return mem_limit;
Expand Down
15 changes: 9 additions & 6 deletions src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp
Expand Up @@ -31,6 +31,7 @@
#include "runtime/globals.hpp"
#include "runtime/os.hpp"
#include "utilities/globalDefinitions.hpp"
#include "os_linux.hpp"

/*
* Set directory to subsystem specific files based
Expand Down Expand Up @@ -91,15 +92,15 @@ jlong CgroupV1Subsystem::read_memory_limit_in_bytes() {
GET_CONTAINER_INFO(julong, _memory->controller(), "/memory.limit_in_bytes",
"Memory Limit is: " JULONG_FORMAT, JULONG_FORMAT, memlimit);

if (memlimit >= _unlimited_memory) {
if (memlimit >= os::Linux::physical_memory()) {
log_trace(os, container)("Non-Hierarchical Memory Limit is: Unlimited");
CgroupV1MemoryController* mem_controller = reinterpret_cast<CgroupV1MemoryController*>(_memory->controller());
if (mem_controller->is_hierarchical()) {
const char* matchline = "hierarchical_memory_limit";
const char* format = "%s " JULONG_FORMAT;
GET_CONTAINER_INFO_LINE(julong, _memory->controller(), "/memory.stat", matchline,
"Hierarchical Memory Limit is: " JULONG_FORMAT, format, hier_memlimit)
if (hier_memlimit >= _unlimited_memory) {
if (hier_memlimit >= os::Linux::physical_memory()) {
log_trace(os, container)("Hierarchical Memory Limit is: Unlimited");
} else {
return (jlong)hier_memlimit;
Expand All @@ -113,17 +114,19 @@ jlong CgroupV1Subsystem::read_memory_limit_in_bytes() {
}

jlong CgroupV1Subsystem::memory_and_swap_limit_in_bytes() {
julong host_total_memsw;
GET_CONTAINER_INFO(julong, _memory->controller(), "/memory.memsw.limit_in_bytes",
"Memory and Swap Limit is: " JULONG_FORMAT, JULONG_FORMAT, memswlimit);
if (memswlimit >= _unlimited_memory) {
host_total_memsw = os::Linux::host_swap() + os::Linux::physical_memory();
if (memswlimit >= host_total_memsw) {
log_trace(os, container)("Non-Hierarchical Memory and Swap Limit is: Unlimited");
CgroupV1MemoryController* mem_controller = reinterpret_cast<CgroupV1MemoryController*>(_memory->controller());
if (mem_controller->is_hierarchical()) {
const char* matchline = "hierarchical_memsw_limit";
const char* format = "%s " JULONG_FORMAT;
GET_CONTAINER_INFO_LINE(julong, _memory->controller(), "/memory.stat", matchline,
"Hierarchical Memory and Swap Limit is : " JULONG_FORMAT, format, hier_memswlimit)
if (hier_memswlimit >= _unlimited_memory) {
if (hier_memswlimit >= host_total_memsw) {
log_trace(os, container)("Hierarchical Memory and Swap Limit is: Unlimited");
} else {
jlong swappiness = read_mem_swappiness();
Expand Down Expand Up @@ -158,7 +161,7 @@ jlong CgroupV1Subsystem::read_mem_swappiness() {
jlong CgroupV1Subsystem::memory_soft_limit_in_bytes() {
GET_CONTAINER_INFO(julong, _memory->controller(), "/memory.soft_limit_in_bytes",
"Memory Soft Limit is: " JULONG_FORMAT, JULONG_FORMAT, memsoftlimit);
if (memsoftlimit >= _unlimited_memory) {
if (memsoftlimit >= os::Linux::physical_memory()) {
log_trace(os, container)("Memory Soft Limit is: Unlimited");
return (jlong)-1;
} else {
Expand Down Expand Up @@ -205,7 +208,7 @@ jlong CgroupV1Subsystem::kernel_memory_usage_in_bytes() {
jlong CgroupV1Subsystem::kernel_memory_limit_in_bytes() {
GET_CONTAINER_INFO(julong, _memory->controller(), "/memory.kmem.limit_in_bytes",
"Kernel Memory Limit is: " JULONG_FORMAT, JULONG_FORMAT, kmem_limit);
if (kmem_limit >= _unlimited_memory) {
if (kmem_limit >= os::Linux::physical_memory()) {
return (jlong)-1;
}
return (jlong)kmem_limit;
Expand Down
3 changes: 0 additions & 3 deletions src/hotspot/os/linux/cgroupV1Subsystem_linux.hpp
Expand Up @@ -104,8 +104,6 @@ class CgroupV1Subsystem: public CgroupSubsystem {
CachingCgroupController * cpu_controller() { return _cpu; }

private:
julong _unlimited_memory;

/* controllers */
CachingCgroupController* _memory = NULL;
CgroupV1Controller* _cpuset = NULL;
Expand All @@ -128,7 +126,6 @@ class CgroupV1Subsystem: public CgroupSubsystem {
_cpuacct = cpuacct;
_pids = pids;
_memory = new CachingCgroupController(memory);
_unlimited_memory = (LONG_MAX / os::vm_page_size()) * os::vm_page_size();
}
};

Expand Down
9 changes: 0 additions & 9 deletions src/hotspot/os/linux/osContainer_linux.cpp
Expand Up @@ -43,8 +43,6 @@ CgroupSubsystem* cgroup_subsystem;
* we are running under cgroup control.
*/
void OSContainer::init() {
jlong mem_limit;

assert(!_is_initialized, "Initializing OSContainer more than once");

_is_initialized = true;
Expand All @@ -60,15 +58,8 @@ void OSContainer::init() {
if (cgroup_subsystem == NULL) {
return; // Required subsystem files not found or other error
}
// We need to update the amount of physical memory now that
// cgroup subsystem files have been processed.
if ((mem_limit = cgroup_subsystem->memory_limit_in_bytes()) > 0) {
os::Linux::set_physical_memory(mem_limit);
log_info(os, container)("Memory Limit is: " JLONG_FORMAT, mem_limit);
}

_is_containerized = true;

}

const char * OSContainer::container_type() {
Expand Down
19 changes: 11 additions & 8 deletions src/hotspot/os/linux/os_linux.cpp
Expand Up @@ -194,15 +194,12 @@ julong os::Linux::available_memory() {
julong avail_mem;

if (OSContainer::is_containerized()) {
jlong mem_limit, mem_usage;
if ((mem_limit = OSContainer::memory_limit_in_bytes()) < 1) {
log_debug(os, container)("container memory limit %s: " JLONG_FORMAT ", using host value",
mem_limit == OSCONTAINER_ERROR ? "failed" : "unlimited", mem_limit);
}
jlong mem_limit = OSContainer::memory_limit_in_bytes();
jlong mem_usage;
if (mem_limit > 0 && (mem_usage = OSContainer::memory_usage_in_bytes()) < 1) {
log_debug(os, container)("container memory usage failed: " JLONG_FORMAT ", using host value", mem_usage);
}
if (mem_limit > 0 && mem_usage > 0 ) {
if (mem_limit > 0 && mem_usage > 0) {
avail_mem = mem_limit > mem_usage ? (julong)mem_limit - (julong)mem_usage : 0;
log_trace(os)("available container memory: " JULONG_FORMAT, avail_mem);
return avail_mem;
Expand All @@ -223,8 +220,6 @@ julong os::physical_memory() {
log_trace(os)("total container memory: " JLONG_FORMAT, mem_limit);
return mem_limit;
}
log_debug(os, container)("container memory limit %s: " JLONG_FORMAT ", using host value",
mem_limit == OSCONTAINER_ERROR ? "failed" : "unlimited", mem_limit);
}

phys_mem = Linux::physical_memory();
Expand Down Expand Up @@ -340,6 +335,14 @@ pid_t os::Linux::gettid() {
return (pid_t)rslt;
}

// Returns the amount of swap currently configured, in bytes.
// This can change at any time.
julong os::Linux::host_swap() {
struct sysinfo si;
sysinfo(&si);
return (julong)si.totalswap;
}

// Most versions of linux have a bug where the number of processors are
// determined by looking at the /proc file system. In a chroot environment,
// the system call returns 1.
Expand Down
5 changes: 3 additions & 2 deletions src/hotspot/os/linux/os_linux.hpp
Expand Up @@ -57,8 +57,6 @@ class os::Linux {
static pthread_t _main_thread;

static julong available_memory();
static julong physical_memory() { return _physical_memory; }
static void set_physical_memory(julong phys_mem) { _physical_memory = phys_mem; }
static int active_processor_count();

static void initialize_system_info();
Expand Down Expand Up @@ -131,6 +129,9 @@ class os::Linux {
static address initial_thread_stack_bottom(void) { return _initial_thread_stack_bottom; }
static uintptr_t initial_thread_stack_size(void) { return _initial_thread_stack_size; }

static julong physical_memory() { return _physical_memory; }
static julong host_swap();

static intptr_t* ucontext_get_sp(const ucontext_t* uc);
static intptr_t* ucontext_get_fp(const ucontext_t* uc);

Expand Down
26 changes: 26 additions & 0 deletions test/hotspot/jtreg/containers/docker/TestMemoryAwareness.java
Expand Up @@ -24,6 +24,7 @@

/*
* @test
* @bug 8146115 8292083
* @key cgroups
* @summary Test JVM's memory resource awareness when running inside docker container
* @requires docker.support
Expand All @@ -41,6 +42,8 @@
import jdk.test.lib.containers.docker.DockerTestUtils;
import jdk.test.lib.process.OutputAnalyzer;

import static jdk.test.lib.Asserts.assertNotNull;

public class TestMemoryAwareness {
private static final String imageName = Common.imageName("memory");

Expand Down Expand Up @@ -76,6 +79,7 @@ public static void main(String[] args) throws Exception {
"1G", Integer.toString(((int) Math.pow(2, 20)) * 1024),
"1500M", Integer.toString(((int) Math.pow(2, 20)) * (1500 - 1024))
);
testContainerMemExceedsPhysical();
} finally {
if (!DockerTestUtils.RETAIN_IMAGE_AFTER_TEST) {
DockerTestUtils.removeDockerImage(imageName);
Expand All @@ -96,6 +100,28 @@ private static void testMemoryLimit(String valueToSet, String expectedTraceValue
.shouldMatch("Memory Limit is:.*" + expectedTraceValue);
}

// JDK-8292083
// Ensure that Java ignores container memory limit values above the host's physical memory.
private static void testContainerMemExceedsPhysical()
throws Exception {

Common.logNewTestCase("container memory limit exceeds physical memory");

DockerRunOptions opts = Common.newOpts(imageName);

// first run: establish physical memory in test environment and derive
// a bad value one power of ten larger
String goodMem = Common.run(opts).firstMatch("total physical memory: (\\d+)", 1);
assertNotNull(goodMem, "no match for 'total physical memory' in trace output");
String badMem = goodMem + "0";

// second run: set a container memory limit to the bad value
opts = Common.newOpts(imageName)
.addDockerOpts("--memory", badMem);
Common.run(opts)
.shouldMatch("container memory limit (ignored: " + badMem + "|unlimited: -1), using host value " + goodMem);
}


private static void testMemorySoftLimit(String valueToSet, String expectedTraceValue)
throws Exception {
Expand Down

3 comments on commit f694f8a

@jmtd
Copy link

@jmtd jmtd commented on f694f8a Aug 26, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

/backport jdk17u-dev

@openjdk
Copy link

@openjdk openjdk bot commented on f694f8a Aug 26, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jmtd Could not automatically backport f694f8a7 to openjdk/jdk17u-dev due to conflicts in the following files:

  • src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp
  • src/hotspot/os/linux/os_linux.hpp

Please fetch the appropriate branch/commit and manually resolve these conflicts by using the following commands in your personal fork of openjdk/jdk17u-dev. Note: these commands are just some suggestions and you can use other equivalent commands you know.

# Fetch the up-to-date version of the target branch
$ git fetch --no-tags https://git.openjdk.org/jdk17u-dev master:master

# Check out the target branch and create your own branch to backport
$ git checkout master
$ git checkout -b jmtd-backport-f694f8a7

# Fetch the commit you want to backport
$ git fetch --no-tags https://git.openjdk.org/jdk f694f8a7671002559e7d23fdb65d5e9c768f9c03

# Backport the commit
$ git cherry-pick --no-commit f694f8a7671002559e7d23fdb65d5e9c768f9c03
# Resolve conflicts now

# Commit the files you have modified
$ git add files/with/resolved/conflicts
$ git commit -m 'Backport f694f8a7671002559e7d23fdb65d5e9c768f9c03'

Once you have resolved the conflicts as explained above continue with creating a pull request towards the openjdk/jdk17u-dev with the title Backport f694f8a7671002559e7d23fdb65d5e9c768f9c03.

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.