Skip to content

Commit

Permalink
8266490: Extend the OSContainer API to support the pids controller of…
Browse files Browse the repository at this point in the history
… cgroups

Reviewed-by: sgehwolf, lucy
  • Loading branch information
MBaesken committed Aug 10, 2021
1 parent 2384e12 commit 089e83b
Show file tree
Hide file tree
Showing 22 changed files with 527 additions and 79 deletions.
118 changes: 87 additions & 31 deletions src/hotspot/os/linux/cgroupSubsystem_linux.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -34,11 +34,15 @@
#include "runtime/os.hpp"
#include "utilities/globalDefinitions.hpp"

// controller names have to match the *_IDX indices
static const char* cg_controller_name[] = { "cpu", "cpuset", "cpuacct", "memory", "pids" };

CgroupSubsystem* CgroupSubsystemFactory::create() {
CgroupV1MemoryController* memory = NULL;
CgroupV1Controller* cpuset = NULL;
CgroupV1Controller* cpu = NULL;
CgroupV1Controller* cpuacct = NULL;
CgroupV1Controller* pids = NULL;
CgroupInfo cg_infos[CG_INFO_LENGTH];
u1 cg_type_flags = INVALID_CGROUPS_GENERIC;
const char* proc_cgroups = "/proc/cgroups";
Expand Down Expand Up @@ -93,22 +97,29 @@ CgroupSubsystem* CgroupSubsystemFactory::create() {
assert(is_cgroup_v1(&cg_type_flags), "Cgroup v1 expected");
for (int i = 0; i < CG_INFO_LENGTH; i++) {
CgroupInfo info = cg_infos[i];
if (strcmp(info._name, "memory") == 0) {
memory = new CgroupV1MemoryController(info._root_mount_path, info._mount_path);
memory->set_subsystem_path(info._cgroup_path);
} else if (strcmp(info._name, "cpuset") == 0) {
cpuset = new CgroupV1Controller(info._root_mount_path, info._mount_path);
cpuset->set_subsystem_path(info._cgroup_path);
} else if (strcmp(info._name, "cpu") == 0) {
cpu = new CgroupV1Controller(info._root_mount_path, info._mount_path);
cpu->set_subsystem_path(info._cgroup_path);
} else if (strcmp(info._name, "cpuacct") == 0) {
cpuacct = new CgroupV1Controller(info._root_mount_path, info._mount_path);
cpuacct->set_subsystem_path(info._cgroup_path);
if (info._data_complete) { // pids controller might have incomplete data
if (strcmp(info._name, "memory") == 0) {
memory = new CgroupV1MemoryController(info._root_mount_path, info._mount_path);
memory->set_subsystem_path(info._cgroup_path);
} else if (strcmp(info._name, "cpuset") == 0) {
cpuset = new CgroupV1Controller(info._root_mount_path, info._mount_path);
cpuset->set_subsystem_path(info._cgroup_path);
} else if (strcmp(info._name, "cpu") == 0) {
cpu = new CgroupV1Controller(info._root_mount_path, info._mount_path);
cpu->set_subsystem_path(info._cgroup_path);
} else if (strcmp(info._name, "cpuacct") == 0) {
cpuacct = new CgroupV1Controller(info._root_mount_path, info._mount_path);
cpuacct->set_subsystem_path(info._cgroup_path);
} else if (strcmp(info._name, "pids") == 0) {
pids = new CgroupV1Controller(info._root_mount_path, info._mount_path);
pids->set_subsystem_path(info._cgroup_path);
}
} else {
log_debug(os, container)("CgroupInfo for %s not complete", cg_controller_name[i]);
}
}
cleanup(cg_infos);
return new CgroupV1Subsystem(cpuset, cpu, cpuacct, memory);
return new CgroupV1Subsystem(cpuset, cpu, cpuacct, pids, memory);
}

bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
Expand All @@ -122,9 +133,10 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
char buf[MAXPATHLEN+1];
char *p;
bool is_cgroupsV2;
// true iff all controllers, memory, cpu, cpuset, cpuacct are enabled
// true iff all required controllers, memory, cpu, cpuset, cpuacct are enabled
// at the kernel level.
bool all_controllers_enabled;
// pids might not be enabled on older Linux distros (SLES 12.1, RHEL 7.1)
bool all_required_controllers_enabled;

/*
* Read /proc/cgroups so as to be able to distinguish cgroups v2 vs cgroups v1.
Expand All @@ -136,10 +148,9 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
*/
cgroups = fopen(proc_cgroups, "r");
if (cgroups == NULL) {
log_debug(os, container)("Can't open %s, %s",
proc_cgroups, os::strerror(errno));
*flags = INVALID_CGROUPS_GENERIC;
return false;
log_debug(os, container)("Can't open %s, %s", proc_cgroups, os::strerror(errno));
*flags = INVALID_CGROUPS_GENERIC;
return false;
}

while ((p = fgets(buf, MAXPATHLEN, cgroups)) != NULL) {
Expand Down Expand Up @@ -167,19 +178,30 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
cg_infos[CPUACCT_IDX]._name = os::strdup(name);
cg_infos[CPUACCT_IDX]._hierarchy_id = hierarchy_id;
cg_infos[CPUACCT_IDX]._enabled = (enabled == 1);
} else if (strcmp(name, "pids") == 0) {
log_debug(os, container)("Detected optional pids controller entry in %s", proc_cgroups);
cg_infos[PIDS_IDX]._name = os::strdup(name);
cg_infos[PIDS_IDX]._hierarchy_id = hierarchy_id;
cg_infos[PIDS_IDX]._enabled = (enabled == 1);
}
}
fclose(cgroups);

is_cgroupsV2 = true;
all_controllers_enabled = true;
all_required_controllers_enabled = true;
for (int i = 0; i < CG_INFO_LENGTH; i++) {
is_cgroupsV2 = is_cgroupsV2 && cg_infos[i]._hierarchy_id == 0;
all_controllers_enabled = all_controllers_enabled && cg_infos[i]._enabled;
// pids controller is optional. All other controllers are required
if (i != PIDS_IDX) {
is_cgroupsV2 = is_cgroupsV2 && cg_infos[i]._hierarchy_id == 0;
all_required_controllers_enabled = all_required_controllers_enabled && cg_infos[i]._enabled;
}
if (log_is_enabled(Debug, os, container) && !cg_infos[i]._enabled) {
log_debug(os, container)("controller %s is not enabled\n", cg_controller_name[i]);
}
}

if (!all_controllers_enabled) {
// one or more controllers disabled, disable container support
if (!all_required_controllers_enabled) {
// one or more required controllers disabled, disable container support
log_debug(os, container)("One or more required controllers disabled at kernel level.");
cleanup(cg_infos);
*flags = INVALID_CGROUPS_GENERIC;
Expand Down Expand Up @@ -220,17 +242,21 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,

while (!is_cgroupsV2 && (token = strsep(&controllers, ",")) != NULL) {
if (strcmp(token, "memory") == 0) {
assert(hierarchy_id == cg_infos[MEMORY_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
assert(hierarchy_id == cg_infos[MEMORY_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch for memory");
cg_infos[MEMORY_IDX]._cgroup_path = os::strdup(cgroup_path);
} else if (strcmp(token, "cpuset") == 0) {
assert(hierarchy_id == cg_infos[CPUSET_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
assert(hierarchy_id == cg_infos[CPUSET_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch for cpuset");
cg_infos[CPUSET_IDX]._cgroup_path = os::strdup(cgroup_path);
} else if (strcmp(token, "cpu") == 0) {
assert(hierarchy_id == cg_infos[CPU_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
assert(hierarchy_id == cg_infos[CPU_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch for cpu");
cg_infos[CPU_IDX]._cgroup_path = os::strdup(cgroup_path);
} else if (strcmp(token, "cpuacct") == 0) {
assert(hierarchy_id == cg_infos[CPUACCT_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
assert(hierarchy_id == cg_infos[CPUACCT_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch for cpuacc");
cg_infos[CPUACCT_IDX]._cgroup_path = os::strdup(cgroup_path);
} else if (strcmp(token, "pids") == 0) {
assert(hierarchy_id == cg_infos[PIDS_IDX]._hierarchy_id, "/proc/cgroups (%d) and /proc/self/cgroup (%d) hierarchy mismatch for pids",
cg_infos[PIDS_IDX]._hierarchy_id, hierarchy_id);
cg_infos[PIDS_IDX]._cgroup_path = os::strdup(cgroup_path);
}
}
if (is_cgroupsV2) {
Expand Down Expand Up @@ -281,13 +307,15 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,

/* Cgroup v1 relevant info
*
* Find the cgroup mount point for memory, cpuset, cpu, cpuacct
* Find the cgroup mount point for memory, cpuset, cpu, cpuacct, pids
*
* Example for docker:
* 219 214 0:29 /docker/7208cebd00fa5f2e342b1094f7bed87fa25661471a4637118e65f1c995be8a34 /sys/fs/cgroup/memory ro,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory
*
* Example for host:
* 34 28 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory
*
* 44 31 0:39 / /sys/fs/cgroup/pids rw,nosuid,nodev,noexec,relatime shared:23 - cgroup cgroup rw,pids
*/
if (sscanf(p, "%*d %*d %*d:%*d %s %s %*[^-]- %s %*s %s", tmproot, tmpmount, tmp_fs_type, tmpcgroups) == 4) {
if (strcmp("cgroup", tmp_fs_type) != 0) {
Expand Down Expand Up @@ -333,6 +361,12 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
cg_infos[CPUACCT_IDX]._mount_path = os::strdup(tmpmount);
cg_infos[CPUACCT_IDX]._root_mount_path = os::strdup(tmproot);
cg_infos[CPUACCT_IDX]._data_complete = true;
} else if (strcmp(token, "pids") == 0) {
any_cgroup_mounts_found = true;
assert(cg_infos[PIDS_IDX]._mount_path == NULL, "stomping of _mount_path");
cg_infos[PIDS_IDX]._mount_path = os::strdup(tmpmount);
cg_infos[PIDS_IDX]._root_mount_path = os::strdup(tmproot);
cg_infos[PIDS_IDX]._data_complete = true;
}
}
}
Expand Down Expand Up @@ -387,10 +421,13 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
*flags = INVALID_CGROUPS_V1;
return false;
}
if (log_is_enabled(Debug, os, container) && !cg_infos[PIDS_IDX]._data_complete) {
log_debug(os, container)("Optional cgroup v1 pids subsystem not found");
// keep the other controller info, pids is optional
}
// Cgroups v1 case, we have all the info we need.
*flags = CGROUPS_V1;
return true;

};

void CgroupSubsystemFactory::cleanup(CgroupInfo* cg_infos) {
Expand Down Expand Up @@ -514,3 +551,22 @@ jlong CgroupSubsystem::memory_limit_in_bytes() {
memory_limit->set_value(mem_limit, OSCONTAINER_CACHE_TIMEOUT);
return mem_limit;
}

jlong CgroupSubsystem::limit_from_str(char* limit_str) {
if (limit_str == NULL) {
return OSCONTAINER_ERROR;
}
// Unlimited memory in cgroups is the literal string 'max' for
// some controllers, for example the pids controller.
if (strcmp("max", limit_str) == 0) {
os::free(limit_str);
return (jlong)-1;
}
julong limit;
if (sscanf(limit_str, JULONG_FORMAT, &limit) != 1) {
os::free(limit_str);
return OSCONTAINER_ERROR;
}
os::free(limit_str);
return (jlong)limit;
}
9 changes: 6 additions & 3 deletions src/hotspot/os/linux/cgroupSubsystem_linux.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -61,12 +61,13 @@
#define INVALID_CGROUPS_NO_MOUNT 5
#define INVALID_CGROUPS_GENERIC 6

// Four controllers: cpu, cpuset, cpuacct, memory
#define CG_INFO_LENGTH 4
// Five controllers: cpu, cpuset, cpuacct, memory, pids
#define CG_INFO_LENGTH 5
#define CPUSET_IDX 0
#define CPU_IDX 1
#define CPUACCT_IDX 2
#define MEMORY_IDX 3
#define PIDS_IDX 4

typedef char * cptr;

Expand Down Expand Up @@ -238,10 +239,12 @@ class CgroupSubsystem: public CHeapObj<mtInternal> {
public:
jlong memory_limit_in_bytes();
int active_processor_count();
jlong limit_from_str(char* limit_str);

virtual int cpu_quota() = 0;
virtual int cpu_period() = 0;
virtual int cpu_shares() = 0;
virtual jlong pids_max() = 0;
virtual jlong memory_usage_in_bytes() = 0;
virtual jlong memory_and_swap_limit_in_bytes() = 0;
virtual jlong memory_soft_limit_in_bytes() = 0;
Expand Down
27 changes: 26 additions & 1 deletion src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -241,3 +241,28 @@ int CgroupV1Subsystem::cpu_shares() {

return shares;
}


char* CgroupV1Subsystem::pids_max_val() {
GET_CONTAINER_INFO_CPTR(cptr, _pids, "/pids.max",
"Maximum number of tasks is: %s", "%s %*d", pidsmax, 1024);
if (pidsmax == NULL) {
return NULL;
}
return os::strdup(pidsmax);
}

/* pids_max
*
* Return the maximum number of tasks available to the process
*
* return:
* maximum number of tasks
* -1 for unlimited
* OSCONTAINER_ERROR for not supported
*/
jlong CgroupV1Subsystem::pids_max() {
if (_pids == NULL) return OSCONTAINER_ERROR;
char * pidsmax_str = pids_max_val();
return limit_from_str(pidsmax_str);
}
9 changes: 8 additions & 1 deletion src/hotspot/os/linux/cgroupV1Subsystem_linux.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -87,6 +87,8 @@ class CgroupV1Subsystem: public CgroupSubsystem {

int cpu_shares();

jlong pids_max();

const char * container_type() {
return "cgroupv1";
}
Expand All @@ -101,15 +103,20 @@ class CgroupV1Subsystem: public CgroupSubsystem {
CgroupV1Controller* _cpuset = NULL;
CachingCgroupController* _cpu = NULL;
CgroupV1Controller* _cpuacct = NULL;
CgroupV1Controller* _pids = NULL;

char * pids_max_val();

public:
CgroupV1Subsystem(CgroupV1Controller* cpuset,
CgroupV1Controller* cpu,
CgroupV1Controller* cpuacct,
CgroupV1Controller* pids,
CgroupV1MemoryController* memory) {
_cpuset = cpuset;
_cpu = new CachingCgroupController(cpu);
_cpuacct = cpuacct;
_pids = pids;
_memory = new CachingCgroupController(memory);
_unlimited_memory = (LONG_MAX / os::vm_page_size()) * os::vm_page_size();
}
Expand Down
43 changes: 24 additions & 19 deletions src/hotspot/os/linux/cgroupV2Subsystem_linux.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, Red Hat Inc.
* Copyright (c) 2020, 2021, Red Hat Inc.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -203,24 +203,6 @@ jlong CgroupV2Subsystem::read_memory_limit_in_bytes() {
return limit;
}

jlong CgroupV2Subsystem::limit_from_str(char* limit_str) {
if (limit_str == NULL) {
return OSCONTAINER_ERROR;
}
// Unlimited memory in Cgroups V2 is the literal string 'max'
if (strcmp("max", limit_str) == 0) {
os::free(limit_str);
return (jlong)-1;
}
julong limit;
if (sscanf(limit_str, JULONG_FORMAT, &limit) != 1) {
os::free(limit_str);
return OSCONTAINER_ERROR;
}
os::free(limit_str);
return (jlong)limit;
}

char* CgroupV2Subsystem::mem_limit_val() {
GET_CONTAINER_INFO_CPTR(cptr, _unified, "/memory.max",
"Raw value for memory limit is: %s", "%s", mem_limit_str, 1024);
Expand All @@ -244,3 +226,26 @@ char* CgroupV2Controller::construct_path(char* mount_path, char *cgroup_path) {
return os::strdup(buf);
}

char* CgroupV2Subsystem::pids_max_val() {
GET_CONTAINER_INFO_CPTR(cptr, _unified, "/pids.max",
"Maximum number of tasks is: %s", "%s %*d", pidsmax, 1024);
if (pidsmax == NULL) {
return NULL;
}
return os::strdup(pidsmax);
}

/* pids_max
*
* Return the maximum number of tasks available to the process
*
* return:
* maximum number of tasks
* -1 for unlimited
* OSCONTAINER_ERROR for not supported
*/
jlong CgroupV2Subsystem::pids_max() {
char * pidsmax_str = pids_max_val();
return limit_from_str(pidsmax_str);
}

Loading

5 comments on commit 089e83b

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@MBaesken
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

/backport jdk17u-dev

@openjdk
Copy link

@openjdk openjdk bot commented on 089e83b Jan 7, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@MBaesken the backport was successfully created on the branch MBaesken-backport-089e83bf in my personal fork of openjdk/jdk17u-dev. To create a pull request with this backport targeting openjdk/jdk17u-dev:master, just click the following link:

➡️ Create pull request

The title of the pull request is automatically filled in correctly and below you find a suggestion for the pull request body:

Hi all,

This pull request contains a backport of commit 089e83bf from the openjdk/jdk repository.

The commit being backported was authored by Matthias Baesken on 10 Aug 2021 and was reviewed by Severin Gehwolf and Lutz Schmidt.

Thanks!

If you need to update the source branch of the pull then run the following commands in a local clone of your personal fork of openjdk/jdk17u-dev:

$ git fetch https://github.com/openjdk-bots/jdk17u-dev MBaesken-backport-089e83bf:MBaesken-backport-089e83bf
$ git checkout MBaesken-backport-089e83bf
# make changes
$ git add paths/to/changed/files
$ git commit --message 'Describe additional changes made'
$ git push https://github.com/openjdk-bots/jdk17u-dev MBaesken-backport-089e83bf

@MBaesken
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

/backport jdk11u-dev

@openjdk
Copy link

@openjdk openjdk bot commented on 089e83b May 16, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@MBaesken Could not automatically backport 089e83bf to openjdk/jdk11u-dev due to conflicts in the following files:

  • src/hotspot/os/linux/cgroupV2Subsystem_linux.cpp
  • src/hotspot/os/linux/os_linux.cpp
  • test/lib/jdk/test/lib/containers/docker/Common.java

To manually resolve these conflicts run the following commands in your personal fork of openjdk/jdk11u-dev:

$ git checkout -b MBaesken-backport-089e83bf
$ git fetch --no-tags https://git.openjdk.java.net/jdk 089e83bf1bf6f28cec8dd30288720b6d066301f0
$ git cherry-pick --no-commit 089e83bf1bf6f28cec8dd30288720b6d066301f0
$ # Resolve conflicts
$ git add files/with/resolved/conflicts
$ git commit -m 'Backport 089e83bf1bf6f28cec8dd30288720b6d066301f0'

Once you have resolved the conflicts as explained above continue with creating a pull request towards the openjdk/jdk11u-dev with the title Backport 089e83bf1bf6f28cec8dd30288720b6d066301f0.

Please sign in to comment.