Skip to content

Commit

Permalink
8266490: Extend the OSContainer API to support the pids controller of…
Browse files Browse the repository at this point in the history
… cgroups

Backport-of: 089e83bf1bf6f28cec8dd30288720b6d066301f0
  • Loading branch information
MBaesken committed Jan 10, 2022
1 parent 3d3533a commit 3de6b2c
Show file tree
Hide file tree
Showing 22 changed files with 527 additions and 79 deletions.
118 changes: 87 additions & 31 deletions src/hotspot/os/linux/cgroupSubsystem_linux.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -34,11 +34,15 @@
#include "runtime/os.hpp"
#include "utilities/globalDefinitions.hpp"

// controller names have to match the *_IDX indices
static const char* cg_controller_name[] = { "cpu", "cpuset", "cpuacct", "memory", "pids" };

CgroupSubsystem* CgroupSubsystemFactory::create() {
CgroupV1MemoryController* memory = NULL;
CgroupV1Controller* cpuset = NULL;
CgroupV1Controller* cpu = NULL;
CgroupV1Controller* cpuacct = NULL;
CgroupV1Controller* pids = NULL;
CgroupInfo cg_infos[CG_INFO_LENGTH];
u1 cg_type_flags = INVALID_CGROUPS_GENERIC;
const char* proc_cgroups = "/proc/cgroups";
Expand Down Expand Up @@ -93,22 +97,29 @@ CgroupSubsystem* CgroupSubsystemFactory::create() {
assert(is_cgroup_v1(&cg_type_flags), "Cgroup v1 expected");
for (int i = 0; i < CG_INFO_LENGTH; i++) {
CgroupInfo info = cg_infos[i];
if (strcmp(info._name, "memory") == 0) {
memory = new CgroupV1MemoryController(info._root_mount_path, info._mount_path);
memory->set_subsystem_path(info._cgroup_path);
} else if (strcmp(info._name, "cpuset") == 0) {
cpuset = new CgroupV1Controller(info._root_mount_path, info._mount_path);
cpuset->set_subsystem_path(info._cgroup_path);
} else if (strcmp(info._name, "cpu") == 0) {
cpu = new CgroupV1Controller(info._root_mount_path, info._mount_path);
cpu->set_subsystem_path(info._cgroup_path);
} else if (strcmp(info._name, "cpuacct") == 0) {
cpuacct = new CgroupV1Controller(info._root_mount_path, info._mount_path);
cpuacct->set_subsystem_path(info._cgroup_path);
if (info._data_complete) { // pids controller might have incomplete data
if (strcmp(info._name, "memory") == 0) {
memory = new CgroupV1MemoryController(info._root_mount_path, info._mount_path);
memory->set_subsystem_path(info._cgroup_path);
} else if (strcmp(info._name, "cpuset") == 0) {
cpuset = new CgroupV1Controller(info._root_mount_path, info._mount_path);
cpuset->set_subsystem_path(info._cgroup_path);
} else if (strcmp(info._name, "cpu") == 0) {
cpu = new CgroupV1Controller(info._root_mount_path, info._mount_path);
cpu->set_subsystem_path(info._cgroup_path);
} else if (strcmp(info._name, "cpuacct") == 0) {
cpuacct = new CgroupV1Controller(info._root_mount_path, info._mount_path);
cpuacct->set_subsystem_path(info._cgroup_path);
} else if (strcmp(info._name, "pids") == 0) {
pids = new CgroupV1Controller(info._root_mount_path, info._mount_path);
pids->set_subsystem_path(info._cgroup_path);
}
} else {
log_debug(os, container)("CgroupInfo for %s not complete", cg_controller_name[i]);
}
}
cleanup(cg_infos);
return new CgroupV1Subsystem(cpuset, cpu, cpuacct, memory);
return new CgroupV1Subsystem(cpuset, cpu, cpuacct, pids, memory);
}

bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
Expand All @@ -122,9 +133,10 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
char buf[MAXPATHLEN+1];
char *p;
bool is_cgroupsV2;
// true iff all controllers, memory, cpu, cpuset, cpuacct are enabled
// true iff all required controllers, memory, cpu, cpuset, cpuacct are enabled
// at the kernel level.
bool all_controllers_enabled;
// pids might not be enabled on older Linux distros (SLES 12.1, RHEL 7.1)
bool all_required_controllers_enabled;

/*
* Read /proc/cgroups so as to be able to distinguish cgroups v2 vs cgroups v1.
Expand All @@ -136,10 +148,9 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
*/
cgroups = fopen(proc_cgroups, "r");
if (cgroups == NULL) {
log_debug(os, container)("Can't open %s, %s",
proc_cgroups, os::strerror(errno));
*flags = INVALID_CGROUPS_GENERIC;
return false;
log_debug(os, container)("Can't open %s, %s", proc_cgroups, os::strerror(errno));
*flags = INVALID_CGROUPS_GENERIC;
return false;
}

while ((p = fgets(buf, MAXPATHLEN, cgroups)) != NULL) {
Expand Down Expand Up @@ -167,19 +178,30 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
cg_infos[CPUACCT_IDX]._name = os::strdup(name);
cg_infos[CPUACCT_IDX]._hierarchy_id = hierarchy_id;
cg_infos[CPUACCT_IDX]._enabled = (enabled == 1);
} else if (strcmp(name, "pids") == 0) {
log_debug(os, container)("Detected optional pids controller entry in %s", proc_cgroups);
cg_infos[PIDS_IDX]._name = os::strdup(name);
cg_infos[PIDS_IDX]._hierarchy_id = hierarchy_id;
cg_infos[PIDS_IDX]._enabled = (enabled == 1);
}
}
fclose(cgroups);

is_cgroupsV2 = true;
all_controllers_enabled = true;
all_required_controllers_enabled = true;
for (int i = 0; i < CG_INFO_LENGTH; i++) {
is_cgroupsV2 = is_cgroupsV2 && cg_infos[i]._hierarchy_id == 0;
all_controllers_enabled = all_controllers_enabled && cg_infos[i]._enabled;
// pids controller is optional. All other controllers are required
if (i != PIDS_IDX) {
is_cgroupsV2 = is_cgroupsV2 && cg_infos[i]._hierarchy_id == 0;
all_required_controllers_enabled = all_required_controllers_enabled && cg_infos[i]._enabled;
}
if (log_is_enabled(Debug, os, container) && !cg_infos[i]._enabled) {
log_debug(os, container)("controller %s is not enabled\n", cg_controller_name[i]);
}
}

if (!all_controllers_enabled) {
// one or more controllers disabled, disable container support
if (!all_required_controllers_enabled) {
// one or more required controllers disabled, disable container support
log_debug(os, container)("One or more required controllers disabled at kernel level.");
cleanup(cg_infos);
*flags = INVALID_CGROUPS_GENERIC;
Expand Down Expand Up @@ -220,17 +242,21 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,

while (!is_cgroupsV2 && (token = strsep(&controllers, ",")) != NULL) {
if (strcmp(token, "memory") == 0) {
assert(hierarchy_id == cg_infos[MEMORY_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
assert(hierarchy_id == cg_infos[MEMORY_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch for memory");
cg_infos[MEMORY_IDX]._cgroup_path = os::strdup(cgroup_path);
} else if (strcmp(token, "cpuset") == 0) {
assert(hierarchy_id == cg_infos[CPUSET_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
assert(hierarchy_id == cg_infos[CPUSET_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch for cpuset");
cg_infos[CPUSET_IDX]._cgroup_path = os::strdup(cgroup_path);
} else if (strcmp(token, "cpu") == 0) {
assert(hierarchy_id == cg_infos[CPU_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
assert(hierarchy_id == cg_infos[CPU_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch for cpu");
cg_infos[CPU_IDX]._cgroup_path = os::strdup(cgroup_path);
} else if (strcmp(token, "cpuacct") == 0) {
assert(hierarchy_id == cg_infos[CPUACCT_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
assert(hierarchy_id == cg_infos[CPUACCT_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch for cpuacc");
cg_infos[CPUACCT_IDX]._cgroup_path = os::strdup(cgroup_path);
} else if (strcmp(token, "pids") == 0) {
assert(hierarchy_id == cg_infos[PIDS_IDX]._hierarchy_id, "/proc/cgroups (%d) and /proc/self/cgroup (%d) hierarchy mismatch for pids",
cg_infos[PIDS_IDX]._hierarchy_id, hierarchy_id);
cg_infos[PIDS_IDX]._cgroup_path = os::strdup(cgroup_path);
}
}
if (is_cgroupsV2) {
Expand Down Expand Up @@ -281,13 +307,15 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,

/* Cgroup v1 relevant info
*
* Find the cgroup mount point for memory, cpuset, cpu, cpuacct
* Find the cgroup mount point for memory, cpuset, cpu, cpuacct, pids
*
* Example for docker:
* 219 214 0:29 /docker/7208cebd00fa5f2e342b1094f7bed87fa25661471a4637118e65f1c995be8a34 /sys/fs/cgroup/memory ro,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory
*
* Example for host:
* 34 28 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory
*
* 44 31 0:39 / /sys/fs/cgroup/pids rw,nosuid,nodev,noexec,relatime shared:23 - cgroup cgroup rw,pids
*/
if (sscanf(p, "%*d %*d %*d:%*d %s %s %*[^-]- %s %*s %s", tmproot, tmpmount, tmp_fs_type, tmpcgroups) == 4) {
if (strcmp("cgroup", tmp_fs_type) != 0) {
Expand Down Expand Up @@ -333,6 +361,12 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
cg_infos[CPUACCT_IDX]._mount_path = os::strdup(tmpmount);
cg_infos[CPUACCT_IDX]._root_mount_path = os::strdup(tmproot);
cg_infos[CPUACCT_IDX]._data_complete = true;
} else if (strcmp(token, "pids") == 0) {
any_cgroup_mounts_found = true;
assert(cg_infos[PIDS_IDX]._mount_path == NULL, "stomping of _mount_path");
cg_infos[PIDS_IDX]._mount_path = os::strdup(tmpmount);
cg_infos[PIDS_IDX]._root_mount_path = os::strdup(tmproot);
cg_infos[PIDS_IDX]._data_complete = true;
}
}
}
Expand Down Expand Up @@ -387,10 +421,13 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
*flags = INVALID_CGROUPS_V1;
return false;
}
if (log_is_enabled(Debug, os, container) && !cg_infos[PIDS_IDX]._data_complete) {
log_debug(os, container)("Optional cgroup v1 pids subsystem not found");
// keep the other controller info, pids is optional
}
// Cgroups v1 case, we have all the info we need.
*flags = CGROUPS_V1;
return true;

};

void CgroupSubsystemFactory::cleanup(CgroupInfo* cg_infos) {
Expand Down Expand Up @@ -514,3 +551,22 @@ jlong CgroupSubsystem::memory_limit_in_bytes() {
memory_limit->set_value(mem_limit, OSCONTAINER_CACHE_TIMEOUT);
return mem_limit;
}

jlong CgroupSubsystem::limit_from_str(char* limit_str) {
if (limit_str == NULL) {
return OSCONTAINER_ERROR;
}
// Unlimited memory in cgroups is the literal string 'max' for
// some controllers, for example the pids controller.
if (strcmp("max", limit_str) == 0) {
os::free(limit_str);
return (jlong)-1;
}
julong limit;
if (sscanf(limit_str, JULONG_FORMAT, &limit) != 1) {
os::free(limit_str);
return OSCONTAINER_ERROR;
}
os::free(limit_str);
return (jlong)limit;
}
9 changes: 6 additions & 3 deletions src/hotspot/os/linux/cgroupSubsystem_linux.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -61,12 +61,13 @@
#define INVALID_CGROUPS_NO_MOUNT 5
#define INVALID_CGROUPS_GENERIC 6

// Four controllers: cpu, cpuset, cpuacct, memory
#define CG_INFO_LENGTH 4
// Five controllers: cpu, cpuset, cpuacct, memory, pids
#define CG_INFO_LENGTH 5
#define CPUSET_IDX 0
#define CPU_IDX 1
#define CPUACCT_IDX 2
#define MEMORY_IDX 3
#define PIDS_IDX 4

typedef char * cptr;

Expand Down Expand Up @@ -238,10 +239,12 @@ class CgroupSubsystem: public CHeapObj<mtInternal> {
public:
jlong memory_limit_in_bytes();
int active_processor_count();
jlong limit_from_str(char* limit_str);

virtual int cpu_quota() = 0;
virtual int cpu_period() = 0;
virtual int cpu_shares() = 0;
virtual jlong pids_max() = 0;
virtual jlong memory_usage_in_bytes() = 0;
virtual jlong memory_and_swap_limit_in_bytes() = 0;
virtual jlong memory_soft_limit_in_bytes() = 0;
Expand Down
27 changes: 26 additions & 1 deletion src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -241,3 +241,28 @@ int CgroupV1Subsystem::cpu_shares() {

return shares;
}


char* CgroupV1Subsystem::pids_max_val() {
GET_CONTAINER_INFO_CPTR(cptr, _pids, "/pids.max",
"Maximum number of tasks is: %s", "%s %*d", pidsmax, 1024);
if (pidsmax == NULL) {
return NULL;
}
return os::strdup(pidsmax);
}

/* pids_max
*
* Return the maximum number of tasks available to the process
*
* return:
* maximum number of tasks
* -1 for unlimited
* OSCONTAINER_ERROR for not supported
*/
jlong CgroupV1Subsystem::pids_max() {
if (_pids == NULL) return OSCONTAINER_ERROR;
char * pidsmax_str = pids_max_val();
return limit_from_str(pidsmax_str);
}
9 changes: 8 additions & 1 deletion src/hotspot/os/linux/cgroupV1Subsystem_linux.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -87,6 +87,8 @@ class CgroupV1Subsystem: public CgroupSubsystem {

int cpu_shares();

jlong pids_max();

const char * container_type() {
return "cgroupv1";
}
Expand All @@ -101,15 +103,20 @@ class CgroupV1Subsystem: public CgroupSubsystem {
CgroupV1Controller* _cpuset = NULL;
CachingCgroupController* _cpu = NULL;
CgroupV1Controller* _cpuacct = NULL;
CgroupV1Controller* _pids = NULL;

char * pids_max_val();

public:
CgroupV1Subsystem(CgroupV1Controller* cpuset,
CgroupV1Controller* cpu,
CgroupV1Controller* cpuacct,
CgroupV1Controller* pids,
CgroupV1MemoryController* memory) {
_cpuset = cpuset;
_cpu = new CachingCgroupController(cpu);
_cpuacct = cpuacct;
_pids = pids;
_memory = new CachingCgroupController(memory);
_unlimited_memory = (LONG_MAX / os::vm_page_size()) * os::vm_page_size();
}
Expand Down
43 changes: 24 additions & 19 deletions src/hotspot/os/linux/cgroupV2Subsystem_linux.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, Red Hat Inc.
* Copyright (c) 2020, 2021, Red Hat Inc.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -203,24 +203,6 @@ jlong CgroupV2Subsystem::read_memory_limit_in_bytes() {
return limit;
}

jlong CgroupV2Subsystem::limit_from_str(char* limit_str) {
if (limit_str == NULL) {
return OSCONTAINER_ERROR;
}
// Unlimited memory in Cgroups V2 is the literal string 'max'
if (strcmp("max", limit_str) == 0) {
os::free(limit_str);
return (jlong)-1;
}
julong limit;
if (sscanf(limit_str, JULONG_FORMAT, &limit) != 1) {
os::free(limit_str);
return OSCONTAINER_ERROR;
}
os::free(limit_str);
return (jlong)limit;
}

char* CgroupV2Subsystem::mem_limit_val() {
GET_CONTAINER_INFO_CPTR(cptr, _unified, "/memory.max",
"Raw value for memory limit is: %s", "%s", mem_limit_str, 1024);
Expand All @@ -244,3 +226,26 @@ char* CgroupV2Controller::construct_path(char* mount_path, char *cgroup_path) {
return os::strdup(buf);
}

char* CgroupV2Subsystem::pids_max_val() {
GET_CONTAINER_INFO_CPTR(cptr, _unified, "/pids.max",
"Maximum number of tasks is: %s", "%s %*d", pidsmax, 1024);
if (pidsmax == NULL) {
return NULL;
}
return os::strdup(pidsmax);
}

/* pids_max
*
* Return the maximum number of tasks available to the process
*
* return:
* maximum number of tasks
* -1 for unlimited
* OSCONTAINER_ERROR for not supported
*/
jlong CgroupV2Subsystem::pids_max() {
char * pidsmax_str = pids_max_val();
return limit_from_str(pidsmax_str);
}

Loading

1 comment on commit 3de6b2c

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.