Skip to content

Commit 372af6a

Browse files
committed
8266490: Extend the OSContainer API to support the pids controller of cgroups
Reviewed-by: mdoerr Backport-of: 089e83bf1bf6f28cec8dd30288720b6d066301f0
1 parent 78aeb51 commit 372af6a

File tree

22 files changed

+526
-78
lines changed

22 files changed

+526
-78
lines changed

src/hotspot/os/linux/cgroupSubsystem_linux.cpp

Lines changed: 87 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -34,11 +34,15 @@
3434
#include "runtime/os.hpp"
3535
#include "utilities/globalDefinitions.hpp"
3636

37+
// controller names have to match the *_IDX indices
38+
static const char* cg_controller_name[] = { "cpu", "cpuset", "cpuacct", "memory", "pids" };
39+
3740
CgroupSubsystem* CgroupSubsystemFactory::create() {
3841
CgroupV1MemoryController* memory = NULL;
3942
CgroupV1Controller* cpuset = NULL;
4043
CgroupV1Controller* cpu = NULL;
4144
CgroupV1Controller* cpuacct = NULL;
45+
CgroupV1Controller* pids = NULL;
4246
CgroupInfo cg_infos[CG_INFO_LENGTH];
4347
u1 cg_type_flags = INVALID_CGROUPS_GENERIC;
4448
const char* proc_cgroups = "/proc/cgroups";
@@ -93,22 +97,29 @@ CgroupSubsystem* CgroupSubsystemFactory::create() {
9397
assert(is_cgroup_v1(&cg_type_flags), "Cgroup v1 expected");
9498
for (int i = 0; i < CG_INFO_LENGTH; i++) {
9599
CgroupInfo info = cg_infos[i];
96-
if (strcmp(info._name, "memory") == 0) {
97-
memory = new CgroupV1MemoryController(info._root_mount_path, info._mount_path);
98-
memory->set_subsystem_path(info._cgroup_path);
99-
} else if (strcmp(info._name, "cpuset") == 0) {
100-
cpuset = new CgroupV1Controller(info._root_mount_path, info._mount_path);
101-
cpuset->set_subsystem_path(info._cgroup_path);
102-
} else if (strcmp(info._name, "cpu") == 0) {
103-
cpu = new CgroupV1Controller(info._root_mount_path, info._mount_path);
104-
cpu->set_subsystem_path(info._cgroup_path);
105-
} else if (strcmp(info._name, "cpuacct") == 0) {
106-
cpuacct = new CgroupV1Controller(info._root_mount_path, info._mount_path);
107-
cpuacct->set_subsystem_path(info._cgroup_path);
100+
if (info._data_complete) { // pids controller might have incomplete data
101+
if (strcmp(info._name, "memory") == 0) {
102+
memory = new CgroupV1MemoryController(info._root_mount_path, info._mount_path);
103+
memory->set_subsystem_path(info._cgroup_path);
104+
} else if (strcmp(info._name, "cpuset") == 0) {
105+
cpuset = new CgroupV1Controller(info._root_mount_path, info._mount_path);
106+
cpuset->set_subsystem_path(info._cgroup_path);
107+
} else if (strcmp(info._name, "cpu") == 0) {
108+
cpu = new CgroupV1Controller(info._root_mount_path, info._mount_path);
109+
cpu->set_subsystem_path(info._cgroup_path);
110+
} else if (strcmp(info._name, "cpuacct") == 0) {
111+
cpuacct = new CgroupV1Controller(info._root_mount_path, info._mount_path);
112+
cpuacct->set_subsystem_path(info._cgroup_path);
113+
} else if (strcmp(info._name, "pids") == 0) {
114+
pids = new CgroupV1Controller(info._root_mount_path, info._mount_path);
115+
pids->set_subsystem_path(info._cgroup_path);
116+
}
117+
} else {
118+
log_debug(os, container)("CgroupInfo for %s not complete", cg_controller_name[i]);
108119
}
109120
}
110121
cleanup(cg_infos);
111-
return new CgroupV1Subsystem(cpuset, cpu, cpuacct, memory);
122+
return new CgroupV1Subsystem(cpuset, cpu, cpuacct, pids, memory);
112123
}
113124

114125
bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
@@ -122,9 +133,10 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
122133
char buf[MAXPATHLEN+1];
123134
char *p;
124135
bool is_cgroupsV2;
125-
// true iff all controllers, memory, cpu, cpuset, cpuacct are enabled
136+
// true iff all required controllers, memory, cpu, cpuset, cpuacct are enabled
126137
// at the kernel level.
127-
bool all_controllers_enabled;
138+
// pids might not be enabled on older Linux distros (SLES 12.1, RHEL 7.1)
139+
bool all_required_controllers_enabled;
128140

129141
/*
130142
* Read /proc/cgroups so as to be able to distinguish cgroups v2 vs cgroups v1.
@@ -136,10 +148,9 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
136148
*/
137149
cgroups = fopen(proc_cgroups, "r");
138150
if (cgroups == NULL) {
139-
log_debug(os, container)("Can't open %s, %s",
140-
proc_cgroups, os::strerror(errno));
141-
*flags = INVALID_CGROUPS_GENERIC;
142-
return false;
151+
log_debug(os, container)("Can't open %s, %s", proc_cgroups, os::strerror(errno));
152+
*flags = INVALID_CGROUPS_GENERIC;
153+
return false;
143154
}
144155

145156
while ((p = fgets(buf, MAXPATHLEN, cgroups)) != NULL) {
@@ -167,19 +178,30 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
167178
cg_infos[CPUACCT_IDX]._name = os::strdup(name);
168179
cg_infos[CPUACCT_IDX]._hierarchy_id = hierarchy_id;
169180
cg_infos[CPUACCT_IDX]._enabled = (enabled == 1);
181+
} else if (strcmp(name, "pids") == 0) {
182+
log_debug(os, container)("Detected optional pids controller entry in %s", proc_cgroups);
183+
cg_infos[PIDS_IDX]._name = os::strdup(name);
184+
cg_infos[PIDS_IDX]._hierarchy_id = hierarchy_id;
185+
cg_infos[PIDS_IDX]._enabled = (enabled == 1);
170186
}
171187
}
172188
fclose(cgroups);
173189

174190
is_cgroupsV2 = true;
175-
all_controllers_enabled = true;
191+
all_required_controllers_enabled = true;
176192
for (int i = 0; i < CG_INFO_LENGTH; i++) {
177-
is_cgroupsV2 = is_cgroupsV2 && cg_infos[i]._hierarchy_id == 0;
178-
all_controllers_enabled = all_controllers_enabled && cg_infos[i]._enabled;
193+
// pids controller is optional. All other controllers are required
194+
if (i != PIDS_IDX) {
195+
is_cgroupsV2 = is_cgroupsV2 && cg_infos[i]._hierarchy_id == 0;
196+
all_required_controllers_enabled = all_required_controllers_enabled && cg_infos[i]._enabled;
197+
}
198+
if (log_is_enabled(Debug, os, container) && !cg_infos[i]._enabled) {
199+
log_debug(os, container)("controller %s is not enabled\n", cg_controller_name[i]);
200+
}
179201
}
180202

181-
if (!all_controllers_enabled) {
182-
// one or more controllers disabled, disable container support
203+
if (!all_required_controllers_enabled) {
204+
// one or more required controllers disabled, disable container support
183205
log_debug(os, container)("One or more required controllers disabled at kernel level.");
184206
cleanup(cg_infos);
185207
*flags = INVALID_CGROUPS_GENERIC;
@@ -220,17 +242,21 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
220242

221243
while (!is_cgroupsV2 && (token = strsep(&controllers, ",")) != NULL) {
222244
if (strcmp(token, "memory") == 0) {
223-
assert(hierarchy_id == cg_infos[MEMORY_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
245+
assert(hierarchy_id == cg_infos[MEMORY_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch for memory");
224246
cg_infos[MEMORY_IDX]._cgroup_path = os::strdup(cgroup_path);
225247
} else if (strcmp(token, "cpuset") == 0) {
226-
assert(hierarchy_id == cg_infos[CPUSET_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
248+
assert(hierarchy_id == cg_infos[CPUSET_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch for cpuset");
227249
cg_infos[CPUSET_IDX]._cgroup_path = os::strdup(cgroup_path);
228250
} else if (strcmp(token, "cpu") == 0) {
229-
assert(hierarchy_id == cg_infos[CPU_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
251+
assert(hierarchy_id == cg_infos[CPU_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch for cpu");
230252
cg_infos[CPU_IDX]._cgroup_path = os::strdup(cgroup_path);
231253
} else if (strcmp(token, "cpuacct") == 0) {
232-
assert(hierarchy_id == cg_infos[CPUACCT_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
254+
assert(hierarchy_id == cg_infos[CPUACCT_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch for cpuacc");
233255
cg_infos[CPUACCT_IDX]._cgroup_path = os::strdup(cgroup_path);
256+
} else if (strcmp(token, "pids") == 0) {
257+
assert(hierarchy_id == cg_infos[PIDS_IDX]._hierarchy_id, "/proc/cgroups (%d) and /proc/self/cgroup (%d) hierarchy mismatch for pids",
258+
cg_infos[PIDS_IDX]._hierarchy_id, hierarchy_id);
259+
cg_infos[PIDS_IDX]._cgroup_path = os::strdup(cgroup_path);
234260
}
235261
}
236262
if (is_cgroupsV2) {
@@ -281,13 +307,15 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
281307

282308
/* Cgroup v1 relevant info
283309
*
284-
* Find the cgroup mount point for memory, cpuset, cpu, cpuacct
310+
* Find the cgroup mount point for memory, cpuset, cpu, cpuacct, pids
285311
*
286312
* Example for docker:
287313
* 219 214 0:29 /docker/7208cebd00fa5f2e342b1094f7bed87fa25661471a4637118e65f1c995be8a34 /sys/fs/cgroup/memory ro,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory
288314
*
289315
* Example for host:
290316
* 34 28 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory
317+
*
318+
* 44 31 0:39 / /sys/fs/cgroup/pids rw,nosuid,nodev,noexec,relatime shared:23 - cgroup cgroup rw,pids
291319
*/
292320
if (sscanf(p, "%*d %*d %*d:%*d %s %s %*[^-]- %s %*s %s", tmproot, tmpmount, tmp_fs_type, tmpcgroups) == 4) {
293321
if (strcmp("cgroup", tmp_fs_type) != 0) {
@@ -333,6 +361,12 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
333361
cg_infos[CPUACCT_IDX]._mount_path = os::strdup(tmpmount);
334362
cg_infos[CPUACCT_IDX]._root_mount_path = os::strdup(tmproot);
335363
cg_infos[CPUACCT_IDX]._data_complete = true;
364+
} else if (strcmp(token, "pids") == 0) {
365+
any_cgroup_mounts_found = true;
366+
assert(cg_infos[PIDS_IDX]._mount_path == NULL, "stomping of _mount_path");
367+
cg_infos[PIDS_IDX]._mount_path = os::strdup(tmpmount);
368+
cg_infos[PIDS_IDX]._root_mount_path = os::strdup(tmproot);
369+
cg_infos[PIDS_IDX]._data_complete = true;
336370
}
337371
}
338372
}
@@ -387,10 +421,13 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
387421
*flags = INVALID_CGROUPS_V1;
388422
return false;
389423
}
424+
if (log_is_enabled(Debug, os, container) && !cg_infos[PIDS_IDX]._data_complete) {
425+
log_debug(os, container)("Optional cgroup v1 pids subsystem not found");
426+
// keep the other controller info, pids is optional
427+
}
390428
// Cgroups v1 case, we have all the info we need.
391429
*flags = CGROUPS_V1;
392430
return true;
393-
394431
};
395432

396433
void CgroupSubsystemFactory::cleanup(CgroupInfo* cg_infos) {
@@ -514,3 +551,22 @@ jlong CgroupSubsystem::memory_limit_in_bytes() {
514551
memory_limit->set_value(mem_limit, OSCONTAINER_CACHE_TIMEOUT);
515552
return mem_limit;
516553
}
554+
555+
jlong CgroupSubsystem::limit_from_str(char* limit_str) {
556+
if (limit_str == NULL) {
557+
return OSCONTAINER_ERROR;
558+
}
559+
// Unlimited memory in cgroups is the literal string 'max' for
560+
// some controllers, for example the pids controller.
561+
if (strcmp("max", limit_str) == 0) {
562+
os::free(limit_str);
563+
return (jlong)-1;
564+
}
565+
julong limit;
566+
if (sscanf(limit_str, JULONG_FORMAT, &limit) != 1) {
567+
os::free(limit_str);
568+
return OSCONTAINER_ERROR;
569+
}
570+
os::free(limit_str);
571+
return (jlong)limit;
572+
}

src/hotspot/os/linux/cgroupSubsystem_linux.hpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -61,12 +61,13 @@
6161
#define INVALID_CGROUPS_NO_MOUNT 5
6262
#define INVALID_CGROUPS_GENERIC 6
6363

64-
// Four controllers: cpu, cpuset, cpuacct, memory
65-
#define CG_INFO_LENGTH 4
64+
// Five controllers: cpu, cpuset, cpuacct, memory, pids
65+
#define CG_INFO_LENGTH 5
6666
#define CPUSET_IDX 0
6767
#define CPU_IDX 1
6868
#define CPUACCT_IDX 2
6969
#define MEMORY_IDX 3
70+
#define PIDS_IDX 4
7071

7172
typedef char * cptr;
7273

@@ -240,10 +241,12 @@ class CgroupSubsystem: public CHeapObj<mtInternal> {
240241
public:
241242
jlong memory_limit_in_bytes();
242243
int active_processor_count();
244+
jlong limit_from_str(char* limit_str);
243245

244246
virtual int cpu_quota() = 0;
245247
virtual int cpu_period() = 0;
246248
virtual int cpu_shares() = 0;
249+
virtual jlong pids_max() = 0;
247250
virtual jlong memory_usage_in_bytes() = 0;
248251
virtual jlong memory_and_swap_limit_in_bytes() = 0;
249252
virtual jlong memory_soft_limit_in_bytes() = 0;

src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -241,3 +241,28 @@ int CgroupV1Subsystem::cpu_shares() {
241241

242242
return shares;
243243
}
244+
245+
246+
char* CgroupV1Subsystem::pids_max_val() {
247+
GET_CONTAINER_INFO_CPTR(cptr, _pids, "/pids.max",
248+
"Maximum number of tasks is: %s", "%s %*d", pidsmax, 1024);
249+
if (pidsmax == NULL) {
250+
return NULL;
251+
}
252+
return os::strdup(pidsmax);
253+
}
254+
255+
/* pids_max
256+
*
257+
* Return the maximum number of tasks available to the process
258+
*
259+
* return:
260+
* maximum number of tasks
261+
* -1 for unlimited
262+
* OSCONTAINER_ERROR for not supported
263+
*/
264+
jlong CgroupV1Subsystem::pids_max() {
265+
if (_pids == NULL) return OSCONTAINER_ERROR;
266+
char * pidsmax_str = pids_max_val();
267+
return limit_from_str(pidsmax_str);
268+
}

src/hotspot/os/linux/cgroupV1Subsystem_linux.hpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -87,6 +87,8 @@ class CgroupV1Subsystem: public CgroupSubsystem {
8787

8888
int cpu_shares();
8989

90+
jlong pids_max();
91+
9092
const char * container_type() {
9193
return "cgroupv1";
9294
}
@@ -101,15 +103,20 @@ class CgroupV1Subsystem: public CgroupSubsystem {
101103
CgroupV1Controller* _cpuset = NULL;
102104
CachingCgroupController* _cpu = NULL;
103105
CgroupV1Controller* _cpuacct = NULL;
106+
CgroupV1Controller* _pids = NULL;
107+
108+
char * pids_max_val();
104109

105110
public:
106111
CgroupV1Subsystem(CgroupV1Controller* cpuset,
107112
CgroupV1Controller* cpu,
108113
CgroupV1Controller* cpuacct,
114+
CgroupV1Controller* pids,
109115
CgroupV1MemoryController* memory) {
110116
_cpuset = cpuset;
111117
_cpu = new CachingCgroupController(cpu);
112118
_cpuacct = cpuacct;
119+
_pids = pids;
113120
_memory = new CachingCgroupController(memory);
114121
_unlimited_memory = (LONG_MAX / os::vm_page_size()) * os::vm_page_size();
115122
}

src/hotspot/os/linux/cgroupV2Subsystem_linux.cpp

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -203,24 +203,6 @@ jlong CgroupV2Subsystem::read_memory_limit_in_bytes() {
203203
return limit;
204204
}
205205

206-
jlong CgroupV2Subsystem::limit_from_str(char* limit_str) {
207-
if (limit_str == NULL) {
208-
return OSCONTAINER_ERROR;
209-
}
210-
// Unlimited memory in Cgroups V2 is the literal string 'max'
211-
if (strcmp("max", limit_str) == 0) {
212-
os::free(limit_str);
213-
return (jlong)-1;
214-
}
215-
julong limit;
216-
if (sscanf(limit_str, JULONG_FORMAT, &limit) != 1) {
217-
os::free(limit_str);
218-
return OSCONTAINER_ERROR;
219-
}
220-
os::free(limit_str);
221-
return (jlong)limit;
222-
}
223-
224206
char* CgroupV2Subsystem::mem_limit_val() {
225207
GET_CONTAINER_INFO_CPTR(cptr, _unified, "/memory.max",
226208
"Raw value for memory limit is: %s", "%s", mem_limit_str, 1024);
@@ -244,3 +226,25 @@ char* CgroupV2Controller::construct_path(char* mount_path, char *cgroup_path) {
244226
return os::strdup(buf);
245227
}
246228

229+
char* CgroupV2Subsystem::pids_max_val() {
230+
GET_CONTAINER_INFO_CPTR(cptr, _unified, "/pids.max",
231+
"Maximum number of tasks is: %s", "%s %*d", pidsmax, 1024);
232+
if (pidsmax == NULL) {
233+
return NULL;
234+
}
235+
return os::strdup(pidsmax);
236+
}
237+
238+
/* pids_max
239+
*
240+
* Return the maximum number of tasks available to the process
241+
*
242+
* return:
243+
* maximum number of tasks
244+
* -1 for unlimited
245+
* OSCONTAINER_ERROR for not supported
246+
*/
247+
jlong CgroupV2Subsystem::pids_max() {
248+
char * pidsmax_str = pids_max_val();
249+
return limit_from_str(pidsmax_str);
250+
}

0 commit comments

Comments
 (0)