Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 87 additions & 31 deletions src/hotspot/os/linux/cgroupSubsystem_linux.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -34,11 +34,15 @@
#include "runtime/os.hpp"
#include "utilities/globalDefinitions.hpp"

// controller names have to match the *_IDX indices
static const char* cg_controller_name[] = { "cpu", "cpuset", "cpuacct", "memory", "pids" };

CgroupSubsystem* CgroupSubsystemFactory::create() {
CgroupV1MemoryController* memory = NULL;
CgroupV1Controller* cpuset = NULL;
CgroupV1Controller* cpu = NULL;
CgroupV1Controller* cpuacct = NULL;
CgroupV1Controller* pids = NULL;
CgroupInfo cg_infos[CG_INFO_LENGTH];
u1 cg_type_flags = INVALID_CGROUPS_GENERIC;
const char* proc_cgroups = "/proc/cgroups";
Expand Down Expand Up @@ -93,22 +97,29 @@ CgroupSubsystem* CgroupSubsystemFactory::create() {
assert(is_cgroup_v1(&cg_type_flags), "Cgroup v1 expected");
for (int i = 0; i < CG_INFO_LENGTH; i++) {
CgroupInfo info = cg_infos[i];
if (strcmp(info._name, "memory") == 0) {
memory = new CgroupV1MemoryController(info._root_mount_path, info._mount_path);
memory->set_subsystem_path(info._cgroup_path);
} else if (strcmp(info._name, "cpuset") == 0) {
cpuset = new CgroupV1Controller(info._root_mount_path, info._mount_path);
cpuset->set_subsystem_path(info._cgroup_path);
} else if (strcmp(info._name, "cpu") == 0) {
cpu = new CgroupV1Controller(info._root_mount_path, info._mount_path);
cpu->set_subsystem_path(info._cgroup_path);
} else if (strcmp(info._name, "cpuacct") == 0) {
cpuacct = new CgroupV1Controller(info._root_mount_path, info._mount_path);
cpuacct->set_subsystem_path(info._cgroup_path);
if (info._data_complete) { // pids controller might have incomplete data
if (strcmp(info._name, "memory") == 0) {
memory = new CgroupV1MemoryController(info._root_mount_path, info._mount_path);
memory->set_subsystem_path(info._cgroup_path);
} else if (strcmp(info._name, "cpuset") == 0) {
cpuset = new CgroupV1Controller(info._root_mount_path, info._mount_path);
cpuset->set_subsystem_path(info._cgroup_path);
} else if (strcmp(info._name, "cpu") == 0) {
cpu = new CgroupV1Controller(info._root_mount_path, info._mount_path);
cpu->set_subsystem_path(info._cgroup_path);
} else if (strcmp(info._name, "cpuacct") == 0) {
cpuacct = new CgroupV1Controller(info._root_mount_path, info._mount_path);
cpuacct->set_subsystem_path(info._cgroup_path);
} else if (strcmp(info._name, "pids") == 0) {
pids = new CgroupV1Controller(info._root_mount_path, info._mount_path);
pids->set_subsystem_path(info._cgroup_path);
}
} else {
log_debug(os, container)("CgroupInfo for %s not complete", cg_controller_name[i]);
}
}
cleanup(cg_infos);
return new CgroupV1Subsystem(cpuset, cpu, cpuacct, memory);
return new CgroupV1Subsystem(cpuset, cpu, cpuacct, pids, memory);
}

bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
Expand All @@ -122,9 +133,10 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
char buf[MAXPATHLEN+1];
char *p;
bool is_cgroupsV2;
// true iff all controllers, memory, cpu, cpuset, cpuacct are enabled
// true iff all required controllers, memory, cpu, cpuset, cpuacct are enabled
// at the kernel level.
bool all_controllers_enabled;
// pids might not be enabled on older Linux distros (SLES 12.1, RHEL 7.1)
bool all_required_controllers_enabled;

/*
* Read /proc/cgroups so as to be able to distinguish cgroups v2 vs cgroups v1.
Expand All @@ -136,10 +148,9 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
*/
cgroups = fopen(proc_cgroups, "r");
if (cgroups == NULL) {
log_debug(os, container)("Can't open %s, %s",
proc_cgroups, os::strerror(errno));
*flags = INVALID_CGROUPS_GENERIC;
return false;
log_debug(os, container)("Can't open %s, %s", proc_cgroups, os::strerror(errno));
*flags = INVALID_CGROUPS_GENERIC;
return false;
}

while ((p = fgets(buf, MAXPATHLEN, cgroups)) != NULL) {
Expand Down Expand Up @@ -167,19 +178,30 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
cg_infos[CPUACCT_IDX]._name = os::strdup(name);
cg_infos[CPUACCT_IDX]._hierarchy_id = hierarchy_id;
cg_infos[CPUACCT_IDX]._enabled = (enabled == 1);
} else if (strcmp(name, "pids") == 0) {
log_debug(os, container)("Detected optional pids controller entry in %s", proc_cgroups);
cg_infos[PIDS_IDX]._name = os::strdup(name);
cg_infos[PIDS_IDX]._hierarchy_id = hierarchy_id;
cg_infos[PIDS_IDX]._enabled = (enabled == 1);
}
}
fclose(cgroups);

is_cgroupsV2 = true;
all_controllers_enabled = true;
all_required_controllers_enabled = true;
for (int i = 0; i < CG_INFO_LENGTH; i++) {
is_cgroupsV2 = is_cgroupsV2 && cg_infos[i]._hierarchy_id == 0;
all_controllers_enabled = all_controllers_enabled && cg_infos[i]._enabled;
// pids controller is optional. All other controllers are required
if (i != PIDS_IDX) {
is_cgroupsV2 = is_cgroupsV2 && cg_infos[i]._hierarchy_id == 0;
all_required_controllers_enabled = all_required_controllers_enabled && cg_infos[i]._enabled;
}
if (log_is_enabled(Debug, os, container) && !cg_infos[i]._enabled) {
log_debug(os, container)("controller %s is not enabled\n", cg_controller_name[i]);
}
}

if (!all_controllers_enabled) {
// one or more controllers disabled, disable container support
if (!all_required_controllers_enabled) {
// one or more required controllers disabled, disable container support
log_debug(os, container)("One or more required controllers disabled at kernel level.");
cleanup(cg_infos);
*flags = INVALID_CGROUPS_GENERIC;
Expand Down Expand Up @@ -220,17 +242,21 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,

while (!is_cgroupsV2 && (token = strsep(&controllers, ",")) != NULL) {
if (strcmp(token, "memory") == 0) {
assert(hierarchy_id == cg_infos[MEMORY_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
assert(hierarchy_id == cg_infos[MEMORY_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch for memory");
cg_infos[MEMORY_IDX]._cgroup_path = os::strdup(cgroup_path);
} else if (strcmp(token, "cpuset") == 0) {
assert(hierarchy_id == cg_infos[CPUSET_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
assert(hierarchy_id == cg_infos[CPUSET_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch for cpuset");
cg_infos[CPUSET_IDX]._cgroup_path = os::strdup(cgroup_path);
} else if (strcmp(token, "cpu") == 0) {
assert(hierarchy_id == cg_infos[CPU_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
assert(hierarchy_id == cg_infos[CPU_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch for cpu");
cg_infos[CPU_IDX]._cgroup_path = os::strdup(cgroup_path);
} else if (strcmp(token, "cpuacct") == 0) {
assert(hierarchy_id == cg_infos[CPUACCT_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
assert(hierarchy_id == cg_infos[CPUACCT_IDX]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch for cpuacc");
cg_infos[CPUACCT_IDX]._cgroup_path = os::strdup(cgroup_path);
} else if (strcmp(token, "pids") == 0) {
assert(hierarchy_id == cg_infos[PIDS_IDX]._hierarchy_id, "/proc/cgroups (%d) and /proc/self/cgroup (%d) hierarchy mismatch for pids",
cg_infos[PIDS_IDX]._hierarchy_id, hierarchy_id);
cg_infos[PIDS_IDX]._cgroup_path = os::strdup(cgroup_path);
}
}
if (is_cgroupsV2) {
Expand Down Expand Up @@ -281,13 +307,15 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,

/* Cgroup v1 relevant info
*
* Find the cgroup mount point for memory, cpuset, cpu, cpuacct
* Find the cgroup mount point for memory, cpuset, cpu, cpuacct, pids
*
* Example for docker:
* 219 214 0:29 /docker/7208cebd00fa5f2e342b1094f7bed87fa25661471a4637118e65f1c995be8a34 /sys/fs/cgroup/memory ro,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory
*
* Example for host:
* 34 28 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory
*
* 44 31 0:39 / /sys/fs/cgroup/pids rw,nosuid,nodev,noexec,relatime shared:23 - cgroup cgroup rw,pids
*/
if (sscanf(p, "%*d %*d %*d:%*d %s %s %*[^-]- %s %*s %s", tmproot, tmpmount, tmp_fs_type, tmpcgroups) == 4) {
if (strcmp("cgroup", tmp_fs_type) != 0) {
Expand Down Expand Up @@ -333,6 +361,12 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
cg_infos[CPUACCT_IDX]._mount_path = os::strdup(tmpmount);
cg_infos[CPUACCT_IDX]._root_mount_path = os::strdup(tmproot);
cg_infos[CPUACCT_IDX]._data_complete = true;
} else if (strcmp(token, "pids") == 0) {
any_cgroup_mounts_found = true;
assert(cg_infos[PIDS_IDX]._mount_path == NULL, "stomping of _mount_path");
cg_infos[PIDS_IDX]._mount_path = os::strdup(tmpmount);
cg_infos[PIDS_IDX]._root_mount_path = os::strdup(tmproot);
cg_infos[PIDS_IDX]._data_complete = true;
}
}
}
Expand Down Expand Up @@ -387,10 +421,13 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
*flags = INVALID_CGROUPS_V1;
return false;
}
if (log_is_enabled(Debug, os, container) && !cg_infos[PIDS_IDX]._data_complete) {
log_debug(os, container)("Optional cgroup v1 pids subsystem not found");
// keep the other controller info, pids is optional
}
// Cgroups v1 case, we have all the info we need.
*flags = CGROUPS_V1;
return true;

};

void CgroupSubsystemFactory::cleanup(CgroupInfo* cg_infos) {
Expand Down Expand Up @@ -514,3 +551,22 @@ jlong CgroupSubsystem::memory_limit_in_bytes() {
memory_limit->set_value(mem_limit, OSCONTAINER_CACHE_TIMEOUT);
return mem_limit;
}

jlong CgroupSubsystem::limit_from_str(char* limit_str) {
if (limit_str == NULL) {
return OSCONTAINER_ERROR;
}
// Unlimited memory in cgroups is the literal string 'max' for
// some controllers, for example the pids controller.
if (strcmp("max", limit_str) == 0) {
os::free(limit_str);
return (jlong)-1;
}
julong limit;
if (sscanf(limit_str, JULONG_FORMAT, &limit) != 1) {
os::free(limit_str);
return OSCONTAINER_ERROR;
}
os::free(limit_str);
return (jlong)limit;
}
9 changes: 6 additions & 3 deletions src/hotspot/os/linux/cgroupSubsystem_linux.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -61,12 +61,13 @@
#define INVALID_CGROUPS_NO_MOUNT 5
#define INVALID_CGROUPS_GENERIC 6

// Four controllers: cpu, cpuset, cpuacct, memory
#define CG_INFO_LENGTH 4
// Five controllers: cpu, cpuset, cpuacct, memory, pids
#define CG_INFO_LENGTH 5
#define CPUSET_IDX 0
#define CPU_IDX 1
#define CPUACCT_IDX 2
#define MEMORY_IDX 3
#define PIDS_IDX 4

typedef char * cptr;

Expand Down Expand Up @@ -238,10 +239,12 @@ class CgroupSubsystem: public CHeapObj<mtInternal> {
public:
jlong memory_limit_in_bytes();
int active_processor_count();
jlong limit_from_str(char* limit_str);

virtual int cpu_quota() = 0;
virtual int cpu_period() = 0;
virtual int cpu_shares() = 0;
virtual jlong pids_max() = 0;
virtual jlong memory_usage_in_bytes() = 0;
virtual jlong memory_and_swap_limit_in_bytes() = 0;
virtual jlong memory_soft_limit_in_bytes() = 0;
Expand Down
27 changes: 26 additions & 1 deletion src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -241,3 +241,28 @@ int CgroupV1Subsystem::cpu_shares() {

return shares;
}


char* CgroupV1Subsystem::pids_max_val() {
GET_CONTAINER_INFO_CPTR(cptr, _pids, "/pids.max",
"Maximum number of tasks is: %s", "%s %*d", pidsmax, 1024);
if (pidsmax == NULL) {
return NULL;
}
return os::strdup(pidsmax);
}

/* pids_max
*
* Return the maximum number of tasks available to the process
*
* return:
* maximum number of tasks
* -1 for unlimited
* OSCONTAINER_ERROR for not supported
*/
jlong CgroupV1Subsystem::pids_max() {
if (_pids == NULL) return OSCONTAINER_ERROR;
char * pidsmax_str = pids_max_val();
return limit_from_str(pidsmax_str);
}
9 changes: 8 additions & 1 deletion src/hotspot/os/linux/cgroupV1Subsystem_linux.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -87,6 +87,8 @@ class CgroupV1Subsystem: public CgroupSubsystem {

int cpu_shares();

jlong pids_max();

const char * container_type() {
return "cgroupv1";
}
Expand All @@ -101,15 +103,20 @@ class CgroupV1Subsystem: public CgroupSubsystem {
CgroupV1Controller* _cpuset = NULL;
CachingCgroupController* _cpu = NULL;
CgroupV1Controller* _cpuacct = NULL;
CgroupV1Controller* _pids = NULL;

char * pids_max_val();

public:
CgroupV1Subsystem(CgroupV1Controller* cpuset,
CgroupV1Controller* cpu,
CgroupV1Controller* cpuacct,
CgroupV1Controller* pids,
CgroupV1MemoryController* memory) {
_cpuset = cpuset;
_cpu = new CachingCgroupController(cpu);
_cpuacct = cpuacct;
_pids = pids;
_memory = new CachingCgroupController(memory);
_unlimited_memory = (LONG_MAX / os::vm_page_size()) * os::vm_page_size();
}
Expand Down
43 changes: 24 additions & 19 deletions src/hotspot/os/linux/cgroupV2Subsystem_linux.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, Red Hat Inc.
* Copyright (c) 2020, 2021, Red Hat Inc.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -203,24 +203,6 @@ jlong CgroupV2Subsystem::read_memory_limit_in_bytes() {
return limit;
}

jlong CgroupV2Subsystem::limit_from_str(char* limit_str) {
if (limit_str == NULL) {
return OSCONTAINER_ERROR;
}
// Unlimited memory in Cgroups V2 is the literal string 'max'
if (strcmp("max", limit_str) == 0) {
os::free(limit_str);
return (jlong)-1;
}
julong limit;
if (sscanf(limit_str, JULONG_FORMAT, &limit) != 1) {
os::free(limit_str);
return OSCONTAINER_ERROR;
}
os::free(limit_str);
return (jlong)limit;
}

char* CgroupV2Subsystem::mem_limit_val() {
GET_CONTAINER_INFO_CPTR(cptr, _unified, "/memory.max",
"Raw value for memory limit is: %s", "%s", mem_limit_str, 1024);
Expand All @@ -244,3 +226,26 @@ char* CgroupV2Controller::construct_path(char* mount_path, char *cgroup_path) {
return os::strdup(buf);
}

char* CgroupV2Subsystem::pids_max_val() {
GET_CONTAINER_INFO_CPTR(cptr, _unified, "/pids.max",
"Maximum number of tasks is: %s", "%s %*d", pidsmax, 1024);
if (pidsmax == NULL) {
return NULL;
}
return os::strdup(pidsmax);
}

/* pids_max
*
* Return the maximum number of tasks available to the process
*
* return:
* maximum number of tasks
* -1 for unlimited
* OSCONTAINER_ERROR for not supported
*/
jlong CgroupV2Subsystem::pids_max() {
char * pidsmax_str = pids_max_val();
return limit_from_str(pidsmax_str);
}

Loading