Skip to content

Commit

Permalink
cgroups: rework to handle nested containers with multiple and partial…
Browse files Browse the repository at this point in the history
… mounts

Currently, if you create a container and use the mountcgruop hook,
you get the /lxc/c1/c1.real cgroup mounted to /.  If you then try
to start containers inside that container, lxc can get confused.
This patch addresses that, by accepting that the cgroup as found
in /proc/self/cgroup can be partially hidden by bind mounts.

In this patch:

Add optional 'lxc.cgroup.use' to /etc/lxc/lxc.conf to specify which
mounted cgroup filesystems lxc should use.  So far only the cgroup
creation respects this.

Keep separate cgroup information for each cgroup mountpoint.  So if
the caller is in devices cgroup /a but cpuset cgroup /b that should
now be ok.

Change how we decide whether to ignore failure to set devices cgroup
settings.  Actually look to see if our current cgroup already has the
settings.  If not, add them.

Finally, the real reason for this patch: in a nested container,
/proc/self/cgroup says nothing about where under /sys/fs/cgroup you
might find yourself.  Handle this by searching for our pid in tasks
files, and keep that info in the cgroup handler.

Also remove all strdupa from cgroup.c (not android-friendly).

Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com>
  • Loading branch information
hallyn committed Aug 14, 2013
1 parent 070a4b8 commit b98f7d6
Show file tree
Hide file tree
Showing 12 changed files with 778 additions and 605 deletions.
1,087 changes: 691 additions & 396 deletions src/lxc/cgroup.c

Large diffs are not rendered by default.

31 changes: 25 additions & 6 deletions src/lxc/cgroup.h
Expand Up @@ -24,15 +24,34 @@
#define _cgroup_h
#include <stdbool.h>

/*
* cgroup_desc: describe a container's cgroup membership
*/
struct cgroup_desc {
char *mntpt; /* where this is mounted */
char *subsystems; /* comma-separated list of subsystems, or NULL */
char *curcgroup; /* task's current cgroup, full pathanme */
char *realcgroup; /* the cgroup as known in /proc/self/cgroup */
struct cgroup_desc *next;
};

struct lxc_handler;
extern int lxc_cgroup_destroy(const char *cgpath);
extern void lxc_cgroup_destroy_desc(struct cgroup_desc *cgroups);
extern char *lxc_cgroup_path_get(const char *subsystem, const char *name,
const char *lxcpath);
extern int lxc_cgroup_nrtasks(const char *cgpath);
extern char *lxc_cgroup_path_create(const char *lxcgroup, const char *name);
extern int lxc_cgroup_enter(const char *cgpath, pid_t pid);
extern int lxc_cgroup_nrtasks(struct lxc_handler *handler);
struct cgroup_desc *lxc_cgroup_path_create(const char *name);
extern int lxc_cgroup_enter(struct cgroup_desc *cgroups, pid_t pid);
extern int lxc_cgroup_attach(pid_t pid, const char *name, const char *lxcpath);
extern char *cgroup_path_get(const char *subsystem, const char *cgpath);
extern bool is_in_subcgroup(int pid, const char *subsystem, const char *cgpath);
extern int lxc_curcgroup(char *cgroup, int inlen);
extern bool get_subsys_mount(char *dest, const char *subsystem);
extern bool is_in_subcgroup(int pid, const char *subsystem, struct cgroup_desc *d);
/*
* Called by commands.c by a container's monitor to find out the
* container's cgroup path in a specific subsystem
*/
extern char *cgroup_get_subsys_path(struct lxc_handler *handler, const char *subsys);
struct lxc_list;
extern int setup_cgroup(struct lxc_handler *h, struct lxc_list *cgroups);
extern int setup_cgroup_devices(struct lxc_handler *h, struct lxc_list *cgroups);
#endif
34 changes: 27 additions & 7 deletions src/lxc/commands.c
Expand Up @@ -341,22 +341,29 @@ static int lxc_cmd_get_clone_flags_callback(int fd, struct lxc_cmd_req *req,
return lxc_cmd_rsp_send(fd, &rsp);
}

extern char *cgroup_get_subsys_path(struct lxc_handler *handler, const char *subsys);
/*
* lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a
* particular subsystem. This is the cgroup path relative to the root
* of the cgroup filesystem.
*
* @name : name of container to connect to
* @lxcpath : the lxcpath in which the container is running
* @subsystem : the subsystem being asked about
*
* Returns the path on success, NULL on failure. The caller must free() the
* returned path.
*/
char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath)
char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
const char *subsystem)
{
int ret, stopped = 0;
struct lxc_cmd_rr cmd = {
.req = { .cmd = LXC_CMD_GET_CGROUP },
.req = {
.cmd = LXC_CMD_GET_CGROUP,
.datalen = strlen(subsystem)+1,
.data = subsystem,
},
};

ret = lxc_cmd(name, &cmd, &stopped, lxcpath);
Expand All @@ -381,10 +388,17 @@ char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath)
static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req,
struct lxc_handler *handler)
{
struct lxc_cmd_rsp rsp = {
.datalen = strlen(handler->cgroup) + 1,
.data = handler->cgroup,
};
struct lxc_cmd_rsp rsp;
char *path;

if (req->datalen < 1)
return -1;

path = cgroup_get_subsys_path(handler, req->data);
if (!path)
return -1;
rsp.datalen = strlen(path) + 1,
rsp.data = path;

return lxc_cmd_rsp_send(fd, &rsp);
}
Expand Down Expand Up @@ -535,7 +549,13 @@ static int lxc_cmd_stop_callback(int fd, struct lxc_cmd_req *req,
memset(&rsp, 0, sizeof(rsp));
rsp.ret = kill(handler->pid, stopsignal);
if (!rsp.ret) {
ret = lxc_unfreeze_bypath(handler->cgroup);
char *path = cgroup_get_subsys_path(handler, "freezer");
if (!path) {
ERROR("container %s:%s is not in a freezer cgroup",
handler->lxcpath, handler->name);
return 0;
}
ret = lxc_unfreeze_bypath(path);
if (!ret)
return 0;

Expand Down
7 changes: 6 additions & 1 deletion src/lxc/commands.h
Expand Up @@ -69,7 +69,12 @@ struct lxc_cmd_console_rsp_data {
extern int lxc_cmd_console_winch(const char *name, const char *lxcpath);
extern int lxc_cmd_console(const char *name, int *ttynum, int *fd,
const char *lxcpath);
extern char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath);
/*
* Get the 'real' cgroup path (as seen in /proc/self/cgroup) for a container
* for a particular subsystem
*/
extern char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath,
const char *subsystem);
extern int lxc_cmd_get_clone_flags(const char *name, const char *lxcpath);
extern char *lxc_cmd_get_config_item(const char *name, const char *item, const char *lxcpath);
extern pid_t lxc_cmd_get_init_pid(const char *name, const char *lxcpath);
Expand Down
41 changes: 0 additions & 41 deletions src/lxc/conf.c
Expand Up @@ -1487,47 +1487,6 @@ static int setup_kmsg(const struct lxc_rootfs *rootfs,
return 0;
}

static int _setup_cgroup(const char *cgpath, struct lxc_list *cgroups,
int devices)
{
struct lxc_list *iterator;
struct lxc_cgroup *cg;
int ret = -1;

if (lxc_list_empty(cgroups))
return 0;

lxc_list_for_each(iterator, cgroups) {
cg = iterator->elem;

if (devices == !strncmp("devices", cg->subsystem, 7)) {
if (lxc_cgroup_set_bypath(cgpath, cg->subsystem,
cg->value)) {
ERROR("Error setting %s to %s for %s\n",
cg->subsystem, cg->value, cgpath);
goto out;
}
}

DEBUG("cgroup '%s' set to '%s'", cg->subsystem, cg->value);
}

ret = 0;
INFO("cgroup has been setup");
out:
return ret;
}

int setup_cgroup_devices(const char *cgpath, struct lxc_list *cgroups)
{
return _setup_cgroup(cgpath, cgroups, 1);
}

int setup_cgroup(const char *cgpath, struct lxc_list *cgroups)
{
return _setup_cgroup(cgpath, cgroups, 0);
}

static void parse_mntopt(char *opt, unsigned long *flags, char **data)
{
struct mount_opt *mo;
Expand Down
2 changes: 0 additions & 2 deletions src/lxc/conf.h
Expand Up @@ -301,8 +301,6 @@ struct lxc_conf {
int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf,
const char *lxcpath, char *argv[]);

extern int setup_cgroup(const char *cgpath, struct lxc_list *cgroups);
extern int setup_cgroup_devices(const char *cgpath, struct lxc_list *cgroups);
extern int detect_shared_rootfs(void);

/*
Expand Down
17 changes: 10 additions & 7 deletions src/lxc/freezer.c
Expand Up @@ -145,14 +145,17 @@ int lxc_unfreeze(const char *name, const char *lxcpath)

int lxc_unfreeze_bypath(const char *cgrelpath)
{
char *cgabspath;
int ret;
char cgabspath[MAXPATHLEN];
int len, ret;

cgabspath = cgroup_path_get("freezer", cgrelpath);
if (!cgabspath)
if (!get_subsys_mount(cgabspath, "freezer"))
return -1;
len = strlen(cgabspath);
ret = snprintf(cgabspath+len, MAXPATHLEN-len, "/%s", cgrelpath);
if (ret < 0 || ret >= MAXPATHLEN-len) {
ERROR("freezer path name too long");
return -1;
}

ret = do_unfreeze(cgabspath, 0, NULL, NULL);
free(cgabspath);
return ret;
return do_unfreeze(cgabspath, 0, NULL, NULL);
}
6 changes: 4 additions & 2 deletions src/lxc/lxc.h
Expand Up @@ -136,15 +136,17 @@ extern int lxc_unfreeze_bypath(const char *cgpath);
*/
extern lxc_state_t lxc_state(const char *name, const char *lxcpath);

struct lxc_handler;
/*
* Set a specified value for a specified subsystem. The specified
* subsystem must be fully specified, eg. "cpu.shares"
* @cgpath : the cgroup path of the container
* @d : the cgroup descriptor for the container
* @filename : the cgroup attribute filename
* @value : the value to be set
* Returns 0 on success, < 0 otherwise
*/
extern int lxc_cgroup_set_bypath(const char *cgpath, const char *filename, const char *value);
extern int lxc_cgroup_set_value(struct lxc_handler *hander, const char *filename,
const char *value);

/*
* Set a specified value for a specified subsystem. The specified
Expand Down
2 changes: 1 addition & 1 deletion src/lxc/lxcutmp.c
Expand Up @@ -283,7 +283,7 @@ static int utmp_get_ntasks(struct lxc_handler *handler)
{
int ntasks;

ntasks = lxc_cgroup_nrtasks(handler->cgroup);
ntasks = lxc_cgroup_nrtasks(handler);

if (ntasks < 0) {
ERROR("failed to get the number of tasks");
Expand Down
32 changes: 8 additions & 24 deletions src/lxc/start.c
Expand Up @@ -374,8 +374,7 @@ static void lxc_fini(const char *name, struct lxc_handler *handler)
handler->conf->maincmd_fd = -1;
free(handler->name);
if (handler->cgroup) {
lxc_cgroup_destroy(handler->cgroup);
free(handler->cgroup);
lxc_cgroup_destroy_desc(handler->cgroup);
handler->cgroup = NULL;
}
free(handler);
Expand Down Expand Up @@ -594,12 +593,11 @@ int save_phys_nics(struct lxc_conf *conf)
return 0;
}

extern bool is_in_subcgroup(int pid, const char *subsystem, const char *cgpath);
extern bool is_in_subcgroup(int pid, const char *subsystem, struct cgroup_desc *d);
int lxc_spawn(struct lxc_handler *handler)
{
int failed_before_rename = 0, len;
int failed_before_rename = 0;
const char *name = handler->name;
char *curcgroup = NULL;

if (lxc_sync_init(handler))
return -1;
Expand Down Expand Up @@ -661,18 +659,10 @@ int lxc_spawn(struct lxc_handler *handler)
if (lxc_sync_wait_child(handler, LXC_SYNC_CONFIGURE))
failed_before_rename = 1;

if ((len = lxc_curcgroup(NULL, 0)) > 1) {
curcgroup = alloca(len);
if (lxc_curcgroup(curcgroup, len) <= 1)
curcgroup = NULL;
FILE *f = fopen("/tmp/a", "a");
fprintf(f, "curcgroup is %s\n", curcgroup);
fclose(f);
}
if ((handler->cgroup = lxc_cgroup_path_create(curcgroup, name)) == NULL)
if ((handler->cgroup = lxc_cgroup_path_create(name)) == NULL)
goto out_delete_net;

if (setup_cgroup(handler->cgroup, &handler->conf->cgroup)) {
if (setup_cgroup(handler, &handler->conf->cgroup)) {
ERROR("failed to setup the cgroups for '%s'", name);
goto out_delete_net;
}
Expand Down Expand Up @@ -707,15 +697,9 @@ int lxc_spawn(struct lxc_handler *handler)
if (lxc_sync_barrier_child(handler, LXC_SYNC_POST_CONFIGURE))
goto out_delete_net;

if (setup_cgroup_devices(handler->cgroup, &handler->conf->cgroup)) {
/* an unfortunate special case: startup hooks may have already
* setup the cgroup. If a setting fails, and this is the devices
* subsystem, *and* we are already in a subset of the cgroup,
* then ignore the failure */
if (!is_in_subcgroup(handler->pid, "devices", handler->cgroup)) {
ERROR("failed to setup the devices cgroup for '%s'", name);
goto out_delete_net;
}
if (setup_cgroup_devices(handler, &handler->conf->cgroup)) {
ERROR("failed to setup the devices cgroup for '%s'", name);
goto out_delete_net;
}

/* Tell the child to complete its initialization and wait for
Expand Down
4 changes: 3 additions & 1 deletion src/lxc/start.h
Expand Up @@ -37,6 +37,8 @@ struct lxc_operations {
int (*post_start)(struct lxc_handler *, void *);
};

struct cgroup_desc;

struct lxc_handler {
pid_t pid;
char *name;
Expand All @@ -53,7 +55,7 @@ struct lxc_handler {
#endif
int pinfd;
const char *lxcpath;
char *cgroup;
struct cgroup_desc *cgroup;
};

extern struct lxc_handler *lxc_init(const char *name, struct lxc_conf *, const char *);
Expand Down

0 comments on commit b98f7d6

Please sign in to comment.