Skip to content

Commit 7eb2803

Browse files
author
Jonathan Dowland
committed
8230305: Cgroups v2: Container awareness
8216366: Add rationale to PER_CPU_SHARES define 8229202: Docker reporting causes secondary crashes in error handling 8232207: Linux os::available_memory re-reads cgroup configuration on every invocation 8254997: Remove unimplemented OSContainer::read_memory_limit_in_bytes Implement Cgroups v2 container awareness in hotspot Reviewed-by: sgehwolf, andrew Backport-of: d462a6b5c9bd3dae5257cca42ea38c19cb742e3c
1 parent 3b1bbef commit 7eb2803

10 files changed

+1498
-633
lines changed

hotspot/src/os/linux/vm/cgroupSubsystem_linux.cpp

Lines changed: 452 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 283 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,283 @@
1+
/*
2+
* Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*
23+
*/
24+
25+
#ifndef CGROUP_SUBSYSTEM_LINUX_HPP
26+
#define CGROUP_SUBSYSTEM_LINUX_HPP
27+
28+
#include "memory/allocation.hpp"
29+
#include "runtime/os.hpp"
30+
#include "utilities/globalDefinitions.hpp"
31+
#include "utilities/macros.hpp"
32+
#include "osContainer_linux.hpp"
33+
34+
// Shared cgroups code (used by cgroup version 1 and version 2)
35+
36+
/*
37+
* PER_CPU_SHARES has been set to 1024 because CPU shares' quota
38+
* is commonly used in cloud frameworks like Kubernetes[1],
39+
* AWS[2] and Mesos[3] in a similar way. They spawn containers with
40+
* --cpu-shares option values scaled by PER_CPU_SHARES. Thus, we do
41+
* the inverse for determining the number of possible available
42+
* CPUs to the JVM inside a container. See JDK-8216366.
43+
*
44+
* [1] https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#meaning-of-cpu
45+
* In particular:
46+
* When using Docker:
47+
* The spec.containers[].resources.requests.cpu is converted to its core value, which is potentially
48+
* fractional, and multiplied by 1024. The greater of this number or 2 is used as the value of the
49+
* --cpu-shares flag in the docker run command.
50+
* [2] https://docs.aws.amazon.com/AmazonECS/latest/APIReference/API_ContainerDefinition.html
51+
* [3] https://github.com/apache/mesos/blob/3478e344fb77d931f6122980c6e94cd3913c441d/src/docker/docker.cpp#L648
52+
* https://github.com/apache/mesos/blob/3478e344fb77d931f6122980c6e94cd3913c441d/src/slave/containerizer/mesos/isolators/cgroups/constants.hpp#L30
53+
*/
54+
#define PER_CPU_SHARES 1024
55+
56+
typedef char * cptr;
57+
58+
class CgroupController: public CHeapObj<mtInternal> {
59+
public:
60+
virtual char *subsystem_path();
61+
};
62+
63+
PRAGMA_DIAG_PUSH
64+
PRAGMA_FORMAT_NONLITERAL_IGNORED
65+
template <typename T> int subsystem_file_line_contents(CgroupController* c,
66+
const char *filename,
67+
const char *matchline,
68+
const char *scan_fmt,
69+
T returnval) {
70+
FILE *fp = NULL;
71+
char *p;
72+
char file[MAXPATHLEN+1];
73+
char buf[MAXPATHLEN+1];
74+
char discard[MAXPATHLEN+1];
75+
bool found_match = false;
76+
77+
if (c == NULL) {
78+
if (PrintContainerInfo) {
79+
tty->print_cr("subsystem_file_line_contents: CgroupController* is NULL");
80+
}
81+
return OSCONTAINER_ERROR;
82+
}
83+
if (c->subsystem_path() == NULL) {
84+
if (PrintContainerInfo) {
85+
tty->print_cr("subsystem_file_line_contents: subsystem path is NULL");
86+
}
87+
return OSCONTAINER_ERROR;
88+
}
89+
90+
strncpy(file, c->subsystem_path(), MAXPATHLEN);
91+
file[MAXPATHLEN-1] = '\0';
92+
int filelen = strlen(file);
93+
if ((filelen + strlen(filename)) > (MAXPATHLEN-1)) {
94+
if (PrintContainerInfo) {
95+
tty->print_cr("File path too long %s, %s", file, filename);
96+
}
97+
return OSCONTAINER_ERROR;
98+
}
99+
strncat(file, filename, MAXPATHLEN-filelen);
100+
if (PrintContainerInfo) {
101+
tty->print_cr("Path to %s is %s", filename, file);
102+
}
103+
fp = fopen(file, "r");
104+
if (fp != NULL) {
105+
int err = 0;
106+
while ((p = fgets(buf, MAXPATHLEN, fp)) != NULL) {
107+
found_match = false;
108+
if (matchline == NULL) {
109+
// single-line file case
110+
int matched = sscanf(p, scan_fmt, returnval);
111+
found_match = (matched == 1);
112+
} else {
113+
// multi-line file case
114+
if (strstr(p, matchline) != NULL) {
115+
// discard matchline string prefix
116+
int matched = sscanf(p, scan_fmt, discard, returnval);
117+
found_match = (matched == 2);
118+
} else {
119+
continue; // substring not found
120+
}
121+
}
122+
if (found_match) {
123+
fclose(fp);
124+
return 0;
125+
} else {
126+
err = 1;
127+
if (PrintContainerInfo) {
128+
tty->print_cr("Type %s not found in file %s", scan_fmt, file);
129+
}
130+
}
131+
}
132+
if (err == 0) {
133+
if (PrintContainerInfo) {
134+
tty->print_cr("Empty file %s", file);
135+
}
136+
}
137+
} else {
138+
if (PrintContainerInfo) {
139+
tty->print_cr("Open of file %s failed, %s", file, strerror(errno));
140+
}
141+
}
142+
if (fp != NULL)
143+
fclose(fp);
144+
return OSCONTAINER_ERROR;
145+
}
146+
PRAGMA_DIAG_POP
147+
148+
#define GET_CONTAINER_INFO(return_type, subsystem, filename, \
149+
logstring, scan_fmt, variable) \
150+
return_type variable; \
151+
{ \
152+
int err; \
153+
err = subsystem_file_line_contents(subsystem, \
154+
filename, \
155+
NULL, \
156+
scan_fmt, \
157+
&variable); \
158+
if (err != 0) \
159+
return (return_type) OSCONTAINER_ERROR; \
160+
\
161+
if (PrintContainerInfo) { \
162+
tty->print_cr(logstring, variable); \
163+
} \
164+
}
165+
166+
#define GET_CONTAINER_INFO_CPTR(return_type, subsystem, filename, \
167+
logstring, scan_fmt, variable, bufsize) \
168+
char variable[bufsize]; \
169+
{ \
170+
int err; \
171+
err = subsystem_file_line_contents(subsystem, \
172+
filename, \
173+
NULL, \
174+
scan_fmt, \
175+
variable); \
176+
if (err != 0) \
177+
return (return_type) NULL; \
178+
\
179+
if (PrintContainerInfo) { \
180+
tty->print_cr(logstring, variable); \
181+
} \
182+
}
183+
184+
#define GET_CONTAINER_INFO_LINE(return_type, controller, filename, \
185+
matchline, logstring, scan_fmt, variable) \
186+
return_type variable; \
187+
{ \
188+
int err; \
189+
err = subsystem_file_line_contents(controller, \
190+
filename, \
191+
matchline, \
192+
scan_fmt, \
193+
&variable); \
194+
if (err != 0) \
195+
return (return_type) OSCONTAINER_ERROR; \
196+
\
197+
if (PrintContainerInfo) { \
198+
tty->print_cr(logstring, variable); \
199+
} \
200+
}
201+
202+
// Four controllers: cpu, cpuset, cpuacct, memory
203+
#define CG_INFO_LENGTH 4
204+
205+
class CachedMetric : public CHeapObj<mtInternal>{
206+
private:
207+
volatile jlong _metric;
208+
volatile jlong _next_check_counter;
209+
public:
210+
CachedMetric() {
211+
_metric = -1;
212+
_next_check_counter = min_jlong;
213+
}
214+
bool should_check_metric() {
215+
return os::elapsed_counter() > _next_check_counter;
216+
}
217+
jlong value() { return _metric; }
218+
void set_value(jlong value, jlong timeout) {
219+
_metric = value;
220+
// Metric is unlikely to change, but we want to remain
221+
// responsive to configuration changes. A very short grace time
222+
// between re-read avoids excessive overhead during startup without
223+
// significantly reducing the VMs ability to promptly react to changed
224+
// metric config
225+
_next_check_counter = os::elapsed_counter() + timeout;
226+
}
227+
};
228+
229+
class CachingCgroupController : public CHeapObj<mtInternal> {
230+
private:
231+
CgroupController* _controller;
232+
CachedMetric* _metrics_cache;
233+
234+
public:
235+
CachingCgroupController(CgroupController* cont) {
236+
_controller = cont;
237+
_metrics_cache = new CachedMetric();
238+
}
239+
240+
CachedMetric* metrics_cache() { return _metrics_cache; }
241+
CgroupController* controller() { return _controller; }
242+
};
243+
244+
class CgroupSubsystem: public CHeapObj<mtInternal> {
245+
public:
246+
jlong memory_limit_in_bytes();
247+
int active_processor_count();
248+
249+
virtual int cpu_quota();
250+
virtual int cpu_period();
251+
virtual int cpu_shares();
252+
virtual jlong memory_usage_in_bytes();
253+
virtual jlong memory_and_swap_limit_in_bytes();
254+
virtual jlong memory_soft_limit_in_bytes();
255+
virtual jlong memory_max_usage_in_bytes();
256+
virtual char * cpu_cpuset_cpus();
257+
virtual char * cpu_cpuset_memory_nodes();
258+
virtual jlong read_memory_limit_in_bytes();
259+
virtual const char * container_type();
260+
virtual CachingCgroupController* memory_controller();
261+
virtual CachingCgroupController* cpu_controller();
262+
};
263+
264+
class CgroupSubsystemFactory: AllStatic {
265+
public:
266+
static CgroupSubsystem* create();
267+
};
268+
269+
// Class representing info in /proc/self/cgroup.
270+
// See man 7 cgroups
271+
class CgroupInfo : public StackObj {
272+
friend class CgroupSubsystemFactory;
273+
274+
private:
275+
char* _name;
276+
int _hierarchy_id;
277+
bool _enabled;
278+
char* _cgroup_path;
279+
280+
};
281+
282+
283+
#endif // CGROUP_SUBSYSTEM_LINUX_HPP

0 commit comments

Comments
 (0)