Skip to content

Commit

Permalink
Merge pull request #2664 from brauner/2018-09-30/syscall_wrappers
Browse files Browse the repository at this point in the history
syscalls: add wrappers and explicit raw syscalls
  • Loading branch information
stgraber committed Sep 30, 2018
2 parents 6810d90 + 1f797c3 commit 74d9689
Show file tree
Hide file tree
Showing 22 changed files with 241 additions and 165 deletions.
20 changes: 16 additions & 4 deletions src/lxc/Makefile.am
Expand Up @@ -23,6 +23,7 @@ noinst_HEADERS = api_extensions.h \
macro.h \
monitor.h \
namespace.h \
raw_syscalls.h \
start.h \
state.h \
storage/btrfs.h \
Expand Down Expand Up @@ -116,6 +117,7 @@ liblxc_la_SOURCES = af_unix.c af_unix.h \
network.c network.h \
monitor.c monitor.h \
parse.c parse.h \
raw_syscalls.c raw_syscalls.h \
ringbuf.c ringbuf.h \
rtnl.c rtnl.h \
state.c state.h \
Expand Down Expand Up @@ -339,21 +341,31 @@ endif
if ENABLE_COMMANDS
# Binaries shipping with liblxc
init_lxc_SOURCES = cmd/lxc_init.c \
compiler.h \
error.h \
initutils.c initutils.h \
log.c log.h \
parse.c parse.h \
raw_syscalls.c raw_syscalls.h \
string_utils.c string_utils.h
lxc_monitord_SOURCES = cmd/lxc_monitord.c
lxc_monitord_SOURCES = cmd/lxc_monitord.c \
af_unix.c af_unix.h \
log.c log.h \
mainloop.c mainloop.h \
monitor.c monitor.h \
raw_syscalls.c raw_syscalls.h \
utils.c utils.h
lxc_user_nic_SOURCES = cmd/lxc_user_nic.c \
../include/netns_ifaddrs.c ../include/netns_ifaddrs.h \
log.c log.h \
namespace.c namespace.h \
network.c network.h \
parse.c parse.h
parse.c parse.h \
raw_syscalls.c raw_syscalls.h
lxc_usernsexec_SOURCES = cmd/lxc_usernsexec.c \
conf.c conf.h \
list.h \
log.c log.h \
macro.h \
namespace.c namespace.h \
file_utils.c file_utils.h \
string_utils.c string_utils.h \
utils.c utils.h
Expand Down
1 change: 1 addition & 0 deletions src/lxc/af_unix.c
Expand Up @@ -37,6 +37,7 @@

#include "config.h"
#include "log.h"
#include "raw_syscalls.h"
#include "utils.h"

#ifndef HAVE_STRLCPY
Expand Down
1 change: 1 addition & 0 deletions src/lxc/attach.c
Expand Up @@ -59,6 +59,7 @@
#include "macro.h"
#include "mainloop.h"
#include "namespace.h"
#include "raw_syscalls.h"
#include "terminal.h"
#include "utils.h"

Expand Down
2 changes: 1 addition & 1 deletion src/lxc/cmd/lxc_init.c
Expand Up @@ -46,8 +46,8 @@
#include "error.h"
#include "initutils.h"
#include "log.h"
#include "namespace.h"
#include "parse.h"
#include "raw_syscalls.h"
#include "string_utils.h"

/* option keys for long only options */
Expand Down
1 change: 1 addition & 0 deletions src/lxc/cmd/lxc_monitord.c
Expand Up @@ -49,6 +49,7 @@
#include "log.h"
#include "mainloop.h"
#include "monitor.h"
#include "raw_syscalls.h"
#include "utils.h"

#define CLIENTFDS_CHUNK 64
Expand Down
2 changes: 1 addition & 1 deletion src/lxc/cmd/lxc_user_nic.c
Expand Up @@ -49,9 +49,9 @@

#include "config.h"
#include "log.h"
#include "namespace.h"
#include "network.h"
#include "parse.h"
#include "raw_syscalls.h"
#include "utils.h"

#ifndef HAVE_STRLCPY
Expand Down
3 changes: 2 additions & 1 deletion src/lxc/cmd/lxc_usernsexec.c
Expand Up @@ -47,7 +47,8 @@
#include "list.h"
#include "log.h"
#include "macro.h"
#include "namespace.h"
#include "file_utils.h"
#include "string_utils.h"
#include "utils.h"

extern int lxc_log_fd;
Expand Down
29 changes: 3 additions & 26 deletions src/lxc/conf.c
Expand Up @@ -70,10 +70,12 @@
#include "namespace.h"
#include "network.h"
#include "parse.h"
#include "raw_syscalls.h"
#include "ringbuf.h"
#include "start.h"
#include "storage.h"
#include "storage/overlay.h"
#include "syscall_wrappers.h"
#include "terminal.h"
#include "utils.h"

Expand Down Expand Up @@ -124,21 +126,6 @@ thread_local struct lxc_conf *current_config;
struct lxc_conf *current_config;
#endif

/* Define pivot_root() if missing from the C library */
#ifndef HAVE_PIVOT_ROOT
static int pivot_root(const char *new_root, const char *put_old)
{
#ifdef __NR_pivot_root
return syscall(__NR_pivot_root, new_root, put_old);
#else
errno = ENOSYS;
return -1;
#endif
}
#else
extern int pivot_root(const char *new_root, const char *put_old);
#endif

char *lxchook_names[NUM_LXC_HOOKS] = {
"pre-start",
"pre-mount",
Expand Down Expand Up @@ -3549,21 +3536,11 @@ static bool verify_start_hooks(struct lxc_conf *conf)

static bool execveat_supported(void)
{
#ifdef __NR_execveat
/*
* We use the syscall here, because it was introduced in kernel 3.19,
* while glibc got support for using the syscall much later, in 2.27.
* We don't want to use glibc because it falls back to /proc, and the
* container may not have /proc mounted depending on its configuration.
*/
syscall(__NR_execveat, -1, "", NULL, NULL, AT_EMPTY_PATH);
lxc_raw_execveat(-1, "", NULL, NULL, AT_EMPTY_PATH);
if (errno == ENOSYS)
return false;

return true;
#else
return false;
#endif
}

int lxc_setup(struct lxc_handler *handler)
Expand Down
7 changes: 2 additions & 5 deletions src/lxc/execute.c
Expand Up @@ -35,6 +35,7 @@
#include "config.h"
#include "log.h"
#include "start.h"
#include "raw_syscalls.h"
#include "utils.h"

lxc_log_define(execute, start);
Expand Down Expand Up @@ -122,11 +123,7 @@ static int execute_start(struct lxc_handler *handler, void* data)
NOTICE("Exec'ing \"%s\"", my_args->argv[0]);

if (my_args->init_fd >= 0)
#ifdef __NR_execveat
syscall(__NR_execveat, my_args->init_fd, "", argv, environ, AT_EMPTY_PATH);
#else
ERROR("System seems to be missing execveat syscall number");
#endif
lxc_raw_execveat(my_args->init_fd, "", argv, environ, AT_EMPTY_PATH);
else
execvp(argv[0], argv);
SYSERROR("Failed to exec %s", argv[0]);
Expand Down
1 change: 1 addition & 0 deletions src/lxc/lsm/apparmor.c
Expand Up @@ -37,6 +37,7 @@
#include "log.h"
#include "lsm.h"
#include "parse.h"
#include "raw_syscalls.h"
#include "utils.h"

lxc_log_define(apparmor, lsm);
Expand Down
1 change: 1 addition & 0 deletions src/lxc/lxccontainer.c
Expand Up @@ -65,6 +65,7 @@
#include "namespace.h"
#include "network.h"
#include "parse.h"
#include "raw_syscalls.h"
#include "start.h"
#include "state.h"
#include "storage.h"
Expand Down
74 changes: 0 additions & 74 deletions src/lxc/namespace.c
Expand Up @@ -75,80 +75,6 @@ pid_t lxc_clone(int (*fn)(void *), void *arg, int flags)
return ret;
}

/**
* This is based on raw_clone in systemd but adapted to our needs. This uses
* copy on write semantics and doesn't pass a stack. CLONE_VM is tricky and
* doesn't really matter to us so disallow it.
*
* The nice thing about this is that we get fork() behavior. That is
* lxc_raw_clone() returns 0 in the child and the child pid in the parent.
*/
pid_t lxc_raw_clone(unsigned long flags)
{

/* These flags don't interest at all so we don't jump through any hoopes
* of retrieving them and passing them to the kernel.
*/
errno = EINVAL;
if ((flags & (CLONE_VM | CLONE_PARENT_SETTID | CLONE_CHILD_SETTID |
CLONE_CHILD_CLEARTID | CLONE_SETTLS)))
return -EINVAL;

#if defined(__s390x__) || defined(__s390__) || defined(__CRIS__)
/* On s390/s390x and cris the order of the first and second arguments
* of the system call is reversed.
*/
return (int)syscall(__NR_clone, NULL, flags | SIGCHLD);
#elif defined(__sparc__) && defined(__arch64__)
{
/**
* sparc64 always returns the other process id in %o0, and
* a boolean flag whether this is the child or the parent in
* %o1. Inline assembly is needed to get the flag returned
* in %o1.
*/
int in_child;
int child_pid;
asm volatile("mov %2, %%g1\n\t"
"mov %3, %%o0\n\t"
"mov 0 , %%o1\n\t"
"t 0x6d\n\t"
"mov %%o1, %0\n\t"
"mov %%o0, %1"
: "=r"(in_child), "=r"(child_pid)
: "i"(__NR_clone), "r"(flags | SIGCHLD)
: "%o1", "%o0", "%g1");

if (in_child)
return 0;
else
return child_pid;
}
#elif defined(__ia64__)
/* On ia64 the stack and stack size are passed as separate arguments. */
return (int)syscall(__NR_clone, flags | SIGCHLD, NULL, 0);
#else
return (int)syscall(__NR_clone, flags | SIGCHLD, NULL);
#endif
}

pid_t lxc_raw_clone_cb(int (*fn)(void *), void *args, unsigned long flags)
{
pid_t pid;

pid = lxc_raw_clone(flags);
if (pid < 0)
return -1;

/* exit() is not thread-safe and might mess with the parent's signal
* handlers and other stuff when exec() fails.
*/
if (pid == 0)
_exit(fn(args));

return pid;
}

/* Leave the user namespace at the first position in the array of structs so
* that we always attach to it first when iterating over the struct and using
* setns() to switch namespaces. This especially affects lxc_attach(): Suppose
Expand Down
53 changes: 1 addition & 52 deletions src/lxc/namespace.h
Expand Up @@ -128,67 +128,16 @@ int clone(int (*fn)(void *), void *child_stack,
* corresponding libc wrapper. glibc currently does not run pthread_atfork()
* handlers but does not guarantee that they are not. Other libcs might or
* might not run pthread_atfork() handlers. If you require guarantees please
* refer to the lxc_raw_clone*() functions below.
* refer to the lxc_raw_clone*() functions in raw_syscalls.{c,h}.
*
* - should call lxc_raw_getpid():
* The child should use lxc_raw_getpid() to retrieve its pid.
*/
extern pid_t lxc_clone(int (*fn)(void *), void *arg, int flags);

/**
* lxc_raw_clone() - create a new process
*
* - fork() behavior:
* This function returns 0 in the child and > 0 in the parent.
*
* - copy-on-write:
* This function does not allocate a new stack and relies on copy-on-write
* semantics.
*
* - supports subset of ClONE_* flags:
* lxc_raw_clone() intentionally only supports a subset of the flags available
* to the actual system call. Please refer to the implementation what flags
* cannot be used. Also, please don't assume that just because a flag isn't
* explicitly checked for as being unsupported that it is supported. If in
* doubt or not sufficiently familiar with process creation in the kernel and
* interactions with libcs this function should be used.
*
* - no pthread_atfork() handlers:
* This function circumvents - as much as this this is possible - any libc
* wrappers and thus does not run any pthread_atfork() handlers. Make sure
* that this is safe to do in the context you are trying to call this
* function.
*
* - must call lxc_raw_getpid():
* The child must use lxc_raw_getpid() to retrieve its pid.
*/
extern pid_t lxc_raw_clone(unsigned long flags);
/**
* lxc_raw_clone_cb() - create a new process
*
* - non-fork() behavior:
* Function does return pid of the child or -1 on error. Pass in a callback
* function via the "fn" argument that gets executed in the child process. The
* "args" argument is passed to "fn".
*
* All other comments that apply to lxc_raw_clone() apply to lxc_raw_clone_cb()
* as well.
*/
extern pid_t lxc_raw_clone_cb(int (*fn)(void *), void *args,
unsigned long flags);

extern int lxc_namespace_2_cloneflag(const char *namespace);
extern int lxc_namespace_2_ns_idx(const char *namespace);
extern int lxc_namespace_2_std_identifiers(char *namespaces);
extern int lxc_fill_namespace_flags(char *flaglist, int *flags);

/**
* Because of older glibc's pid cache (up to 2.25) whenever clone() is called
* the child must must retrieve it's own pid via lxc_raw_getpid().
*/
static inline pid_t lxc_raw_getpid(void)
{
return (pid_t) syscall(SYS_getpid);
}

#endif
1 change: 1 addition & 0 deletions src/lxc/network.c
Expand Up @@ -56,6 +56,7 @@
#include "macro.h"
#include "network.h"
#include "nl.h"
#include "raw_syscalls.h"
#include "utils.h"

#ifndef HAVE_STRLCPY
Expand Down

0 comments on commit 74d9689

Please sign in to comment.