diff --git a/src/lxc/tools/lxc_monitor.c b/src/lxc/tools/lxc_monitor.c index 0b094af114..294e8c2f05 100644 --- a/src/lxc/tools/lxc_monitor.c +++ b/src/lxc/tools/lxc_monitor.c @@ -20,22 +20,31 @@ * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ + +#define _GNU_SOURCE +#define __STDC_FORMAT_MACROS +#include +#include +#include +#include +#include +#include +#include #include #include #include -#include #include -#include +#include +#include +#include #include +#include #include -#include -#include -#include "lxc.h" -#include "log.h" -#include "monitor.h" +#include + #include "arguments.h" -#include "lxccontainer.h" +#include "tool_utils.h" static bool quit_monitord; @@ -82,6 +91,408 @@ static void close_fds(struct pollfd *fds, nfds_t nfds) } } +typedef enum { + lxc_msg_state, + lxc_msg_priority, + lxc_msg_exit_code, +} lxc_msg_type_t; + +struct lxc_msg { + lxc_msg_type_t type; + char name[NAME_MAX+1]; + int value; +}; + +typedef enum { + STOPPED, + STARTING, + RUNNING, + STOPPING, + ABORTING, + FREEZING, + FROZEN, + THAWED, + MAX_STATE, +} lxc_state_t; + +static const char *const strstate[] = { + "STOPPED", "STARTING", "RUNNING", "STOPPING", + "ABORTING", "FREEZING", "FROZEN", "THAWED", +}; + +const char *lxc_state2str(lxc_state_t state) +{ + if (state < STOPPED || state > MAX_STATE - 1) + return NULL; + return strstate[state]; +} + +/* Note we don't use SHA-1 here as we don't want to depend on HAVE_GNUTLS. + * FNV has good anti collision properties and we're not worried + * about pre-image resistance or one-way-ness, we're just trying to make + * the name unique in the 108 bytes of space we have. + */ +#define FNV1A_64_INIT ((uint64_t)0xcbf29ce484222325ULL) +static uint64_t fnv_64a_buf(void *buf, size_t len, uint64_t hval) +{ + unsigned char *bp; + + for(bp = buf; bp < (unsigned char *)buf + len; bp++) + { + /* xor the bottom with the current octet */ + hval ^= (uint64_t)*bp; + + /* gcc optimised: + * multiply by the 64 bit FNV magic prime mod 2^64 + */ + hval += (hval << 1) + (hval << 4) + (hval << 5) + + (hval << 7) + (hval << 8) + (hval << 40); + } + + return hval; +} + +static int open_devnull(void) +{ + int fd = open("/dev/null", O_RDWR); + + if (fd < 0) + fprintf(stderr, "%s - Failed to open \"/dev/null\"\n", + strerror(errno)); + + return fd; +} + +static int set_stdfds(int fd) +{ + int ret; + + if (fd < 0) + return -1; + + ret = dup2(fd, STDIN_FILENO); + if (ret < 0) + return -1; + + ret = dup2(fd, STDOUT_FILENO); + if (ret < 0) + return -1; + + ret = dup2(fd, STDERR_FILENO); + if (ret < 0) + return -1; + + return 0; +} + +static int null_stdfds(void) +{ + int ret = -1; + int fd = open_devnull(); + + if (fd >= 0) { + ret = set_stdfds(fd); + close(fd); + } + + return ret; +} + +static int lxc_check_inherited(bool closeall, int *fds_to_ignore, size_t len_fds) +{ + struct dirent *direntp; + int fd, fddir; + size_t i; + DIR *dir; + +restart: + dir = opendir("/proc/self/fd"); + if (!dir) { + fprintf(stderr, "%s - Failed to open directory\n", + strerror(errno)); + return -1; + } + + fddir = dirfd(dir); + + while ((direntp = readdir(dir))) { + if (!strcmp(direntp->d_name, ".")) + continue; + + if (!strcmp(direntp->d_name, "..")) + continue; + + if (lxc_safe_int(direntp->d_name, &fd) < 0) + continue; + + for (i = 0; i < len_fds; i++) + if (fds_to_ignore[i] == fd) + break; + + if (fd == fddir || (i < len_fds && fd == fds_to_ignore[i])) + continue; + + if (fd == 0 || fd == 1 || fd == 2) + continue; + + if (closeall) { + close(fd); + closedir(dir); + goto restart; + } + } + + closedir(dir); + return 0; +} + +/* Enforces \0-termination for the abstract unix socket. This is not required + * but allows us to print it out. + * + * Older version of liblxc only allowed for 105 bytes to be used for the + * abstract unix domain socket name because the code for our abstract unix + * socket handling performed invalid checks. Since we \0-terminate we could now + * have a maximum of 106 chars. But to not break backwards compatibility we keep + * the limit at 105. + */ +static int lxc_monitor_sock_name(const char *lxcpath, struct sockaddr_un *addr) +{ + size_t len; + int ret; + char *path; + uint64_t hash; + + /* addr.sun_path is only 108 bytes, so we hash the full name and + * then append as much of the name as we can fit. + */ + memset(addr, 0, sizeof(*addr)); + addr->sun_family = AF_UNIX; + + /* strlen("lxc/") + strlen("/monitor-sock") + 1 = 18 */ + len = strlen(lxcpath) + 18; + path = alloca(len); + ret = snprintf(path, len, "lxc/%s/monitor-sock", lxcpath); + if (ret < 0 || (size_t)ret >= len) { + fprintf(stderr, "failed to create name for monitor socket\n"); + return -1; + } + + /* Note: snprintf() will \0-terminate addr->sun_path on the 106th byte + * and so the abstract socket name has 105 "meaningful" characters. This + * is absolutely intentional. For further info read the comment for this + * function above! + */ + len = sizeof(addr->sun_path) - 1; + hash = fnv_64a_buf(path, ret, FNV1A_64_INIT); + ret = snprintf(addr->sun_path, len, "@lxc/%016" PRIx64 "/%s", hash, lxcpath); + if (ret < 0) { + fprintf(stderr, "failed to create hashed name for monitor socket\n"); + return -1; + } + + /* replace @ with \0 */ + addr->sun_path[0] = '\0'; + + return 0; +} + +static int lxc_abstract_unix_connect(const char *path) +{ + int fd, ret; + size_t len; + struct sockaddr_un addr; + + fd = socket(PF_UNIX, SOCK_STREAM, 0); + if (fd < 0) + return -1; + + memset(&addr, 0, sizeof(addr)); + + addr.sun_family = AF_UNIX; + + len = strlen(&path[1]); + /* do not enforce \0-termination */ + if (len >= sizeof(addr.sun_path)) { + close(fd); + errno = ENAMETOOLONG; + return -1; + } + /* addr.sun_path[0] has already been set to 0 by memset() */ + strncpy(&addr.sun_path[1], &path[1], strlen(&path[1])); + + ret = connect(fd, (struct sockaddr *)&addr, + offsetof(struct sockaddr_un, sun_path) + len + 1); + if (ret < 0) { + close(fd); + return -1; + } + + return fd; +} + +static int lxc_monitor_open(const char *lxcpath) +{ + struct sockaddr_un addr; + int fd; + size_t retry; + size_t len; + int ret = -1; + int backoff_ms[] = {10, 50, 100}; + + if (lxc_monitor_sock_name(lxcpath, &addr) < 0) + return -1; + + fd = socket(PF_UNIX, SOCK_STREAM, 0); + if (fd < 0) { + fprintf(stderr, "Failed to create socket: %s\n", strerror(errno)); + return -errno; + } + + len = strlen(&addr.sun_path[1]); + if (len >= sizeof(addr.sun_path) - 1) { + errno = ENAMETOOLONG; + ret = -errno; + fprintf(stderr, "name of monitor socket too long (%zu bytes): %s\n", len, strerror(errno)); + goto on_error; + } + + for (retry = 0; retry < sizeof(backoff_ms) / sizeof(backoff_ms[0]); retry++) { + fd = lxc_abstract_unix_connect(addr.sun_path); + if (fd != -1 || errno != ECONNREFUSED) + break; + fprintf(stderr, "Failed to connect to monitor socket. Retrying in %d ms: %s\n", backoff_ms[retry], strerror(errno)); + usleep(backoff_ms[retry] * 1000); + } + + if (fd < 0) { + ret = -errno; + fprintf(stderr, "Failed to connect to monitor socket: %s\n", strerror(errno)); + goto on_error; + } + + return fd; + +on_error: + close(fd); + return ret; +} + +static int lxc_monitor_read_fdset(struct pollfd *fds, nfds_t nfds, + struct lxc_msg *msg, int timeout) +{ + nfds_t i; + int ret; + + ret = poll(fds, nfds, timeout * 1000); + if (ret == -1) + return -1; + else if (ret == 0) + return -2; /* timed out */ + + /* Only read from the first ready fd, the others will remain ready for + * when this routine is called again. + */ + for (i = 0; i < nfds; i++) { + if (fds[i].revents != 0) { + fds[i].revents = 0; + ret = recv(fds[i].fd, msg, sizeof(*msg), 0); + if (ret <= 0) { + fprintf(stderr, "%s - Failed to receive message. Did monitord die?\n", strerror(errno)); + return -1; + } + return ret; + } + } + + return -1; +} + +#define LXC_MONITORD_PATH LIBEXECDIR "/lxc/lxc-monitord" + +/* Used to spawn a monitord either on startup of a daemon container, or when + * lxc-monitor starts. + */ +static int lxc_monitord_spawn(const char *lxcpath) +{ + int ret; + int pipefd[2]; + char pipefd_str[TOOL_NUMSTRLEN64]; + pid_t pid1, pid2; + + char *const args[] = { + LXC_MONITORD_PATH, + (char *)lxcpath, + pipefd_str, + NULL, + }; + + /* double fork to avoid zombies when monitord exits */ + pid1 = fork(); + if (pid1 < 0) { + fprintf(stderr, "Failed to fork()\n"); + return -1; + } + + if (pid1) { + if (waitpid(pid1, NULL, 0) != pid1) + return -1; + return 0; + } + + if (pipe(pipefd) < 0) { + fprintf(stderr, "Failed to create pipe\n"); + exit(EXIT_FAILURE); + } + + pid2 = fork(); + if (pid2 < 0) { + fprintf(stderr, "Failed to fork()\n"); + exit(EXIT_FAILURE); + } + + if (pid2) { + char c; + /* Wait for daemon to create socket. */ + close(pipefd[1]); + + /* Sync with child, we're ignoring the return from read + * because regardless if it works or not, either way we've + * synced with the child process. the if-empty-statement + * construct is to quiet the warn-unused-result warning. + */ + if (read(pipefd[0], &c, 1)) + ; + + close(pipefd[0]); + + exit(EXIT_SUCCESS); + } + + if (setsid() < 0) { + fprintf(stderr, "Failed to setsid()\n"); + exit(EXIT_FAILURE); + } + + lxc_check_inherited(true, &pipefd[1], 1); + if (null_stdfds() < 0) { + fprintf(stderr, "Failed to dup2() standard file descriptors to /dev/null\n"); + exit(EXIT_FAILURE); + } + + close(pipefd[0]); + + ret = snprintf(pipefd_str, TOOL_NUMSTRLEN64, "%d", pipefd[1]); + if (ret < 0 || ret >= TOOL_NUMSTRLEN64) { + fprintf(stderr, "Failed to create pid argument to pass to monitord\n"); + exit(EXIT_FAILURE); + } + + execvp(args[0], args); + fprintf(stderr, "Failed to exec lxc-monitord\n"); + + exit(EXIT_FAILURE); +} + int main(int argc, char *argv[]) { char *regexp; @@ -109,7 +520,6 @@ int main(int argc, char *argv[]) if (lxc_log_init(&log)) exit(rc_main); - lxc_log_options_no_override(); /* REMOVE IN LXC 3.0 */ setenv("LXC_UPDATE_CONFIG_FORMAT", "1", 0); @@ -160,7 +570,7 @@ int main(int argc, char *argv[]) } nfds = my_args.lxcpath_cnt; - for (i = 0; i < nfds; i++) { + for (i = 0; (unsigned long)i < nfds; i++) { int fd; lxc_monitord_spawn(my_args.lxcpath[i]); diff --git a/src/lxc/tools/tool_utils.h b/src/lxc/tools/tool_utils.h index b937137140..851fa97c44 100644 --- a/src/lxc/tools/tool_utils.h +++ b/src/lxc/tools/tool_utils.h @@ -34,6 +34,7 @@ #include "tool_list.h" #define TOOL_MAXPATHLEN 4096 +#define TOOL_NUMSTRLEN64 21 #ifndef CLONE_PARENT_SETTID #define CLONE_PARENT_SETTID 0x00100000