Skip to content

Commit 8715f37

Browse files
committed
8230305: Cgroups v2: Container awareness
8229202: Docker reporting causes secondary crashes in error handling 8216366: Add rationale to PER_CPU_SHARES define Implement Cgroups v2 container awareness in hotspot Reviewed-by: sgehwolf Backport-of: d462a6b
1 parent b4b4e3e commit 8715f37

10 files changed

+1427
-614
lines changed

src/hotspot/os/linux/cgroupSubsystem_linux.cpp

Lines changed: 421 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 264 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,264 @@
1+
/*
2+
* Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*
23+
*/
24+
25+
#ifndef CGROUP_SUBSYSTEM_LINUX_HPP
26+
#define CGROUP_SUBSYSTEM_LINUX_HPP
27+
28+
#include "memory/allocation.hpp"
29+
#include "runtime/os.hpp"
30+
#include "logging/log.hpp"
31+
#include "utilities/globalDefinitions.hpp"
32+
#include "utilities/macros.hpp"
33+
#include "osContainer_linux.hpp"
34+
35+
// Shared cgroups code (used by cgroup version 1 and version 2)
36+
37+
/*
38+
* PER_CPU_SHARES has been set to 1024 because CPU shares' quota
39+
* is commonly used in cloud frameworks like Kubernetes[1],
40+
* AWS[2] and Mesos[3] in a similar way. They spawn containers with
41+
* --cpu-shares option values scaled by PER_CPU_SHARES. Thus, we do
42+
* the inverse for determining the number of possible available
43+
* CPUs to the JVM inside a container. See JDK-8216366.
44+
*
45+
* [1] https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#meaning-of-cpu
46+
* In particular:
47+
* When using Docker:
48+
* The spec.containers[].resources.requests.cpu is converted to its core value, which is potentially
49+
* fractional, and multiplied by 1024. The greater of this number or 2 is used as the value of the
50+
* --cpu-shares flag in the docker run command.
51+
* [2] https://docs.aws.amazon.com/AmazonECS/latest/APIReference/API_ContainerDefinition.html
52+
* [3] https://github.com/apache/mesos/blob/3478e344fb77d931f6122980c6e94cd3913c441d/src/docker/docker.cpp#L648
53+
* https://github.com/apache/mesos/blob/3478e344fb77d931f6122980c6e94cd3913c441d/src/slave/containerizer/mesos/isolators/cgroups/constants.hpp#L30
54+
*/
55+
#define PER_CPU_SHARES 1024
56+
57+
typedef char * cptr;
58+
59+
class CgroupController: public CHeapObj<mtInternal> {
60+
public:
61+
virtual char *subsystem_path();
62+
};
63+
64+
PRAGMA_DIAG_PUSH
65+
PRAGMA_FORMAT_NONLITERAL_IGNORED
66+
template <typename T> int subsystem_file_line_contents(CgroupController* c,
67+
const char *filename,
68+
const char *matchline,
69+
const char *scan_fmt,
70+
T returnval) {
71+
FILE *fp = NULL;
72+
char *p;
73+
char file[MAXPATHLEN+1];
74+
char buf[MAXPATHLEN+1];
75+
char discard[MAXPATHLEN+1];
76+
bool found_match = false;
77+
78+
if (c == NULL) {
79+
log_debug(os, container)("subsystem_file_line_contents: CgroupController* is NULL");
80+
return OSCONTAINER_ERROR;
81+
}
82+
if (c->subsystem_path() == NULL) {
83+
log_debug(os, container)("subsystem_file_line_contents: subsystem path is NULL");
84+
return OSCONTAINER_ERROR;
85+
}
86+
87+
strncpy(file, c->subsystem_path(), MAXPATHLEN);
88+
file[MAXPATHLEN-1] = '\0';
89+
int filelen = strlen(file);
90+
if ((filelen + strlen(filename)) > (MAXPATHLEN-1)) {
91+
log_debug(os, container)("File path too long %s, %s", file, filename);
92+
return OSCONTAINER_ERROR;
93+
}
94+
strncat(file, filename, MAXPATHLEN-filelen);
95+
log_trace(os, container)("Path to %s is %s", filename, file);
96+
fp = fopen(file, "r");
97+
if (fp != NULL) {
98+
int err = 0;
99+
while ((p = fgets(buf, MAXPATHLEN, fp)) != NULL) {
100+
found_match = false;
101+
if (matchline == NULL) {
102+
// single-line file case
103+
int matched = sscanf(p, scan_fmt, returnval);
104+
found_match = (matched == 1);
105+
} else {
106+
// multi-line file case
107+
if (strstr(p, matchline) != NULL) {
108+
// discard matchline string prefix
109+
int matched = sscanf(p, scan_fmt, discard, returnval);
110+
found_match = (matched == 2);
111+
} else {
112+
continue; // substring not found
113+
}
114+
}
115+
if (found_match) {
116+
fclose(fp);
117+
return 0;
118+
} else {
119+
err = 1;
120+
log_debug(os, container)("Type %s not found in file %s", scan_fmt, file);
121+
}
122+
}
123+
if (err == 0) {
124+
log_debug(os, container)("Empty file %s", file);
125+
}
126+
} else {
127+
log_debug(os, container)("Open of file %s failed, %s", file, os::strerror(errno));
128+
}
129+
if (fp != NULL)
130+
fclose(fp);
131+
return OSCONTAINER_ERROR;
132+
}
133+
PRAGMA_DIAG_POP
134+
135+
#define GET_CONTAINER_INFO(return_type, subsystem, filename, \
136+
logstring, scan_fmt, variable) \
137+
return_type variable; \
138+
{ \
139+
int err; \
140+
err = subsystem_file_line_contents(subsystem, \
141+
filename, \
142+
NULL, \
143+
scan_fmt, \
144+
&variable); \
145+
if (err != 0) \
146+
return (return_type) OSCONTAINER_ERROR; \
147+
\
148+
log_trace(os, container)(logstring, variable); \
149+
}
150+
151+
#define GET_CONTAINER_INFO_CPTR(return_type, subsystem, filename, \
152+
logstring, scan_fmt, variable, bufsize) \
153+
char variable[bufsize]; \
154+
{ \
155+
int err; \
156+
err = subsystem_file_line_contents(subsystem, \
157+
filename, \
158+
NULL, \
159+
scan_fmt, \
160+
variable); \
161+
if (err != 0) \
162+
return (return_type) NULL; \
163+
\
164+
log_trace(os, container)(logstring, variable); \
165+
}
166+
167+
#define GET_CONTAINER_INFO_LINE(return_type, controller, filename, \
168+
matchline, logstring, scan_fmt, variable) \
169+
return_type variable; \
170+
{ \
171+
int err; \
172+
err = subsystem_file_line_contents(controller, \
173+
filename, \
174+
matchline, \
175+
scan_fmt, \
176+
&variable); \
177+
if (err != 0) \
178+
return (return_type) OSCONTAINER_ERROR; \
179+
\
180+
log_trace(os, container)(logstring, variable); \
181+
}
182+
183+
// Four controllers: cpu, cpuset, cpuacct, memory
184+
#define CG_INFO_LENGTH 4
185+
186+
class CachedMetric : public CHeapObj<mtInternal>{
187+
private:
188+
volatile jlong _metric;
189+
volatile jlong _next_check_counter;
190+
public:
191+
CachedMetric() {
192+
_metric = -1;
193+
_next_check_counter = min_jlong;
194+
}
195+
bool should_check_metric() {
196+
return os::elapsed_counter() > _next_check_counter;
197+
}
198+
jlong value() { return _metric; }
199+
void set_value(jlong value, jlong timeout) {
200+
_metric = value;
201+
// Metric is unlikely to change, but we want to remain
202+
// responsive to configuration changes. A very short grace time
203+
// between re-read avoids excessive overhead during startup without
204+
// significantly reducing the VMs ability to promptly react to changed
205+
// metric config
206+
_next_check_counter = os::elapsed_counter() + timeout;
207+
}
208+
};
209+
210+
class CachingCgroupController : public CHeapObj<mtInternal> {
211+
private:
212+
CgroupController* _controller;
213+
CachedMetric* _metrics_cache;
214+
215+
public:
216+
CachingCgroupController(CgroupController* cont) {
217+
_controller = cont;
218+
_metrics_cache = new CachedMetric();
219+
}
220+
221+
CachedMetric* metrics_cache() { return _metrics_cache; }
222+
CgroupController* controller() { return _controller; }
223+
};
224+
225+
class CgroupSubsystem: public CHeapObj<mtInternal> {
226+
public:
227+
jlong memory_limit_in_bytes();
228+
int active_processor_count();
229+
230+
virtual int cpu_quota();
231+
virtual int cpu_period();
232+
virtual int cpu_shares();
233+
virtual jlong memory_usage_in_bytes();
234+
virtual jlong memory_and_swap_limit_in_bytes();
235+
virtual jlong memory_soft_limit_in_bytes();
236+
virtual jlong memory_max_usage_in_bytes();
237+
virtual char * cpu_cpuset_cpus();
238+
virtual char * cpu_cpuset_memory_nodes();
239+
virtual jlong read_memory_limit_in_bytes();
240+
virtual const char * container_type();
241+
virtual CachingCgroupController* memory_controller();
242+
virtual CachingCgroupController* cpu_controller();
243+
};
244+
245+
class CgroupSubsystemFactory: AllStatic {
246+
public:
247+
static CgroupSubsystem* create();
248+
};
249+
250+
// Class representing info in /proc/self/cgroup.
251+
// See man 7 cgroups
252+
class CgroupInfo : public StackObj {
253+
friend class CgroupSubsystemFactory;
254+
255+
private:
256+
char* _name;
257+
int _hierarchy_id;
258+
bool _enabled;
259+
char* _cgroup_path;
260+
261+
};
262+
263+
264+
#endif // CGROUP_SUBSYSTEM_LINUX_HPP

0 commit comments

Comments
 (0)