Skip to content

Commit

Permalink
nsenter: cloned_binary: add memfd_create(2) opt-out
Browse files Browse the repository at this point in the history
memfd_create(2) added ~10M of memory usage to the cgroup associated with
the container, which can result in some setups getting OOM'd. The
nicest way of solving this was to add _LIBCONTAINER_DISABLE_MEMFD_CLONE
as an environment setting to disable memfd_create(2) usage and instead
use temporary files that are placed within the runc state directory.

Signed-off-by: Aleksa Sarai <asarai@suse.de>
  • Loading branch information
cyphar committed Feb 17, 2019
1 parent d9edfa7 commit 2dbb8e1
Showing 1 changed file with 45 additions and 35 deletions.
80 changes: 45 additions & 35 deletions libcontainer/nsenter/cloned_binary.c
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,9 @@ static int fetchve(char ***argv)
#define EFD_MEMFD 0x01
#define EFD_TMPFILE 0x02
#define EFD_FILE 0x04
#define EFD_ALL (EFD_MEMFD | EFD_TMPFILE | EFD_FILE)

static int make_execfd(int *fdtype)
static int make_execfd(int *fdtype, int allowed)
{
int fd;
char template[] = "/tmp/runc-cloned-binary.XXXXXX";
Expand All @@ -205,52 +206,58 @@ static int make_execfd(int *fdtype)
* sealing and also doesn't require assumptions like /tmp.
*/
*fdtype = EFD_MEMFD;
fd = memfd_create(RUNC_MEMFD_COMMENT, MFD_CLOEXEC | MFD_ALLOW_SEALING);
if (fd >= 0)
return fd;
if (errno != ENOSYS)
goto err;
if (*fdtype & allowed) {
fd = memfd_create(RUNC_MEMFD_COMMENT, MFD_CLOEXEC | MFD_ALLOW_SEALING);
if (fd >= 0)
return fd;
if (errno != ENOSYS)
goto err;
}

/*
* Try O_TMPFILE to avoid races where someone might snatch our file. Note
* that O_EXCL isn't actually a security measure here (since you can just
* fd re-open it and clear O_EXCL).
*/
*fdtype = EFD_TMPFILE;
fd = open("/tmp", O_TMPFILE | O_EXCL | O_RDWR | O_CLOEXEC, 0700);
if (fd >= 0) {
struct stat statbuf = {};
bool working_otmpfile = false;

/*
* open(2) ignores unknown O_* flags -- yeah, I was surprised when I
* found this out too. As a result we can't check for EINVAL. However,
* if we get nlink != 0 (or EISDIR) then we know that this kernel
* doesn't support O_TMPFILE.
*/
if (fstat(fd, &statbuf) >= 0)
working_otmpfile = (statbuf.st_nlink == 0);

if (working_otmpfile)
return fd;

/* Pretend that we got EISDIR since O_TMPFILE failed. */
close(fd);
errno = EISDIR;
if (*fdtype & allowed) {
fd = open("/tmp", O_TMPFILE | O_EXCL | O_RDWR | O_CLOEXEC, 0700);
if (fd >= 0) {
struct stat statbuf = {};
bool working_otmpfile = false;

/*
* open(2) ignores unknown O_* flags -- yeah, I was surprised when I
* found this out too. As a result we can't check for EINVAL. However,
* if we get nlink != 0 (or EISDIR) then we know that this kernel
* doesn't support O_TMPFILE.
*/
if (fstat(fd, &statbuf) >= 0)
working_otmpfile = (statbuf.st_nlink == 0);

if (working_otmpfile)
return fd;

/* Pretend that we got EISDIR since O_TMPFILE failed. */
close(fd);
errno = EISDIR;
}
if (errno != EISDIR)
goto err;
}
if (errno != EISDIR)
goto err;

/*
* Our final option is to create a temporary file the old-school way, and
* then unlink it so that nothing else sees it by accident.
*/
*fdtype = EFD_FILE;
fd = mkostemp(template, O_CLOEXEC);
if (fd >= 0) {
if (!unlink(template))
return fd;
close(fd);
if (*fdtype & allowed) {
fd = mkostemp(template, O_CLOEXEC);
if (fd >= 0) {
if (!unlink(template))
return fd;
close(fd);
}
}

err:
Expand Down Expand Up @@ -294,9 +301,12 @@ static int clone_binary(void)
int binfd, memfd;
struct stat statbuf = {};
size_t sent = 0;
int fdtype = EFD_NONE;
int fdtype = EFD_NONE, allowed = EFD_ALL;

if (secure_getenv("_LIBCONTAINER_DISABLE_MEMFD_CLONE"))
allowed &= ~EFD_MEMFD;

memfd = make_execfd(&fdtype);
memfd = make_execfd(&fdtype, allowed);
if (memfd < 0)
return -ENOTRECOVERABLE;

Expand Down

0 comments on commit 2dbb8e1

Please sign in to comment.