Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
8230305: Cgroups v2: Container awareness
Implement Cgroups v2 container awareness in hotspot Reviewed-by: bobv, dholmes
- Loading branch information
Showing
with
1,425 additions
and 638 deletions.
- +421 −0 src/hotspot/os/linux/cgroupSubsystem_linux.cpp
- +264 −0 src/hotspot/os/linux/cgroupSubsystem_linux.hpp
- +243 −0 src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp
- +118 −0 src/hotspot/os/linux/cgroupV1Subsystem_linux.hpp
- +235 −0 src/hotspot/os/linux/cgroupV2Subsystem_linux.cpp
- +89 −0 src/hotspot/os/linux/cgroupV2Subsystem_linux.hpp
- +35 −631 src/hotspot/os/linux/osContainer_linux.cpp
- +1 −2 src/hotspot/os/linux/osContainer_linux.hpp
- +2 −1 src/hotspot/os/linux/os_linux.hpp
- +17 −4 test/hotspot/jtreg/containers/docker/TestCPUAwareness.java
@@ -0,0 +1,264 @@ | ||
/* | ||
* Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved. | ||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | ||
* | ||
* This code is free software; you can redistribute it and/or modify it | ||
* under the terms of the GNU General Public License version 2 only, as | ||
* published by the Free Software Foundation. | ||
* | ||
* This code is distributed in the hope that it will be useful, but WITHOUT | ||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | ||
* version 2 for more details (a copy is included in the LICENSE file that | ||
* accompanied this code). | ||
* | ||
* You should have received a copy of the GNU General Public License version | ||
* 2 along with this work; if not, write to the Free Software Foundation, | ||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | ||
* | ||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA | ||
* or visit www.oracle.com if you need additional information or have any | ||
* questions. | ||
* | ||
*/ | ||
|
||
#ifndef CGROUP_SUBSYSTEM_LINUX_HPP | ||
#define CGROUP_SUBSYSTEM_LINUX_HPP | ||
|
||
#include "memory/allocation.hpp" | ||
#include "runtime/os.hpp" | ||
#include "logging/log.hpp" | ||
#include "utilities/globalDefinitions.hpp" | ||
#include "utilities/macros.hpp" | ||
#include "osContainer_linux.hpp" | ||
|
||
// Shared cgroups code (used by cgroup version 1 and version 2) | ||
|
||
/* | ||
* PER_CPU_SHARES has been set to 1024 because CPU shares' quota | ||
* is commonly used in cloud frameworks like Kubernetes[1], | ||
* AWS[2] and Mesos[3] in a similar way. They spawn containers with | ||
* --cpu-shares option values scaled by PER_CPU_SHARES. Thus, we do | ||
* the inverse for determining the number of possible available | ||
* CPUs to the JVM inside a container. See JDK-8216366. | ||
* | ||
* [1] https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#meaning-of-cpu | ||
* In particular: | ||
* When using Docker: | ||
* The spec.containers[].resources.requests.cpu is converted to its core value, which is potentially | ||
* fractional, and multiplied by 1024. The greater of this number or 2 is used as the value of the | ||
* --cpu-shares flag in the docker run command. | ||
* [2] https://docs.aws.amazon.com/AmazonECS/latest/APIReference/API_ContainerDefinition.html | ||
* [3] https://github.com/apache/mesos/blob/3478e344fb77d931f6122980c6e94cd3913c441d/src/docker/docker.cpp#L648 | ||
* https://github.com/apache/mesos/blob/3478e344fb77d931f6122980c6e94cd3913c441d/src/slave/containerizer/mesos/isolators/cgroups/constants.hpp#L30 | ||
*/ | ||
#define PER_CPU_SHARES 1024 | ||
|
||
typedef char * cptr; | ||
|
||
class CgroupController: public CHeapObj<mtInternal> { | ||
public: | ||
virtual char *subsystem_path(); | ||
}; | ||
|
||
PRAGMA_DIAG_PUSH | ||
PRAGMA_FORMAT_NONLITERAL_IGNORED | ||
template <typename T> int subsystem_file_line_contents(CgroupController* c, | ||
const char *filename, | ||
const char *matchline, | ||
const char *scan_fmt, | ||
T returnval) { | ||
FILE *fp = NULL; | ||
char *p; | ||
char file[MAXPATHLEN+1]; | ||
char buf[MAXPATHLEN+1]; | ||
char discard[MAXPATHLEN+1]; | ||
bool found_match = false; | ||
|
||
if (c == NULL) { | ||
log_debug(os, container)("subsystem_file_line_contents: CgroupController* is NULL"); | ||
return OSCONTAINER_ERROR; | ||
} | ||
if (c->subsystem_path() == NULL) { | ||
log_debug(os, container)("subsystem_file_line_contents: subsystem path is NULL"); | ||
return OSCONTAINER_ERROR; | ||
} | ||
|
||
strncpy(file, c->subsystem_path(), MAXPATHLEN); | ||
file[MAXPATHLEN-1] = '\0'; | ||
int filelen = strlen(file); | ||
if ((filelen + strlen(filename)) > (MAXPATHLEN-1)) { | ||
log_debug(os, container)("File path too long %s, %s", file, filename); | ||
return OSCONTAINER_ERROR; | ||
} | ||
strncat(file, filename, MAXPATHLEN-filelen); | ||
log_trace(os, container)("Path to %s is %s", filename, file); | ||
fp = fopen(file, "r"); | ||
if (fp != NULL) { | ||
int err = 0; | ||
while ((p = fgets(buf, MAXPATHLEN, fp)) != NULL) { | ||
found_match = false; | ||
if (matchline == NULL) { | ||
// single-line file case | ||
int matched = sscanf(p, scan_fmt, returnval); | ||
found_match = (matched == 1); | ||
} else { | ||
// multi-line file case | ||
if (strstr(p, matchline) != NULL) { | ||
// discard matchline string prefix | ||
int matched = sscanf(p, scan_fmt, discard, returnval); | ||
found_match = (matched == 2); | ||
} else { | ||
continue; // substring not found | ||
} | ||
} | ||
if (found_match) { | ||
fclose(fp); | ||
return 0; | ||
} else { | ||
err = 1; | ||
log_debug(os, container)("Type %s not found in file %s", scan_fmt, file); | ||
} | ||
} | ||
if (err == 0) { | ||
log_debug(os, container)("Empty file %s", file); | ||
} | ||
} else { | ||
log_debug(os, container)("Open of file %s failed, %s", file, os::strerror(errno)); | ||
} | ||
if (fp != NULL) | ||
fclose(fp); | ||
return OSCONTAINER_ERROR; | ||
} | ||
PRAGMA_DIAG_POP | ||
|
||
#define GET_CONTAINER_INFO(return_type, subsystem, filename, \ | ||
logstring, scan_fmt, variable) \ | ||
return_type variable; \ | ||
{ \ | ||
int err; \ | ||
err = subsystem_file_line_contents(subsystem, \ | ||
filename, \ | ||
NULL, \ | ||
scan_fmt, \ | ||
&variable); \ | ||
if (err != 0) \ | ||
return (return_type) OSCONTAINER_ERROR; \ | ||
\ | ||
log_trace(os, container)(logstring, variable); \ | ||
} | ||
|
||
#define GET_CONTAINER_INFO_CPTR(return_type, subsystem, filename, \ | ||
logstring, scan_fmt, variable, bufsize) \ | ||
char variable[bufsize]; \ | ||
{ \ | ||
int err; \ | ||
err = subsystem_file_line_contents(subsystem, \ | ||
filename, \ | ||
NULL, \ | ||
scan_fmt, \ | ||
variable); \ | ||
if (err != 0) \ | ||
return (return_type) NULL; \ | ||
\ | ||
log_trace(os, container)(logstring, variable); \ | ||
} | ||
|
||
#define GET_CONTAINER_INFO_LINE(return_type, controller, filename, \ | ||
matchline, logstring, scan_fmt, variable) \ | ||
return_type variable; \ | ||
{ \ | ||
int err; \ | ||
err = subsystem_file_line_contents(controller, \ | ||
filename, \ | ||
matchline, \ | ||
scan_fmt, \ | ||
&variable); \ | ||
if (err != 0) \ | ||
return (return_type) OSCONTAINER_ERROR; \ | ||
\ | ||
log_trace(os, container)(logstring, variable); \ | ||
} | ||
|
||
// Four controllers: cpu, cpuset, cpuacct, memory | ||
#define CG_INFO_LENGTH 4 | ||
|
||
class CachedMetric : public CHeapObj<mtInternal>{ | ||
private: | ||
volatile jlong _metric; | ||
volatile jlong _next_check_counter; | ||
public: | ||
CachedMetric() { | ||
_metric = -1; | ||
_next_check_counter = min_jlong; | ||
} | ||
bool should_check_metric() { | ||
return os::elapsed_counter() > _next_check_counter; | ||
} | ||
jlong value() { return _metric; } | ||
void set_value(jlong value, jlong timeout) { | ||
_metric = value; | ||
// Metric is unlikely to change, but we want to remain | ||
// responsive to configuration changes. A very short grace time | ||
// between re-read avoids excessive overhead during startup without | ||
// significantly reducing the VMs ability to promptly react to changed | ||
// metric config | ||
_next_check_counter = os::elapsed_counter() + timeout; | ||
} | ||
}; | ||
|
||
class CachingCgroupController : public CHeapObj<mtInternal> { | ||
private: | ||
CgroupController* _controller; | ||
CachedMetric* _metrics_cache; | ||
|
||
public: | ||
CachingCgroupController(CgroupController* cont) { | ||
_controller = cont; | ||
_metrics_cache = new CachedMetric(); | ||
} | ||
|
||
CachedMetric* metrics_cache() { return _metrics_cache; } | ||
CgroupController* controller() { return _controller; } | ||
}; | ||
|
||
class CgroupSubsystem: public CHeapObj<mtInternal> { | ||
public: | ||
jlong memory_limit_in_bytes(); | ||
int active_processor_count(); | ||
|
||
virtual int cpu_quota(); | ||
virtual int cpu_period(); | ||
virtual int cpu_shares(); | ||
virtual jlong memory_usage_in_bytes(); | ||
virtual jlong memory_and_swap_limit_in_bytes(); | ||
virtual jlong memory_soft_limit_in_bytes(); | ||
virtual jlong memory_max_usage_in_bytes(); | ||
virtual char * cpu_cpuset_cpus(); | ||
virtual char * cpu_cpuset_memory_nodes(); | ||
virtual jlong read_memory_limit_in_bytes(); | ||
virtual const char * container_type(); | ||
virtual CachingCgroupController* memory_controller(); | ||
virtual CachingCgroupController* cpu_controller(); | ||
}; | ||
|
||
class CgroupSubsystemFactory: AllStatic { | ||
public: | ||
static CgroupSubsystem* create(); | ||
}; | ||
|
||
// Class representing info in /proc/self/cgroup. | ||
// See man 7 cgroups | ||
class CgroupInfo : public StackObj { | ||
friend class CgroupSubsystemFactory; | ||
|
||
private: | ||
char* _name; | ||
int _hierarchy_id; | ||
bool _enabled; | ||
char* _cgroup_path; | ||
|
||
}; | ||
|
||
|
||
#endif // CGROUP_SUBSYSTEM_LINUX_HPP |
Oops, something went wrong.