Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions src/syscall/abi.h
Original file line number Diff line number Diff line change
Expand Up @@ -161,19 +161,19 @@
#define SYS_copy_file_range 285
#define SYS_statx 291
#define SYS_rseq 293
/* xattr syscalls */
#define SYS_lgetxattr 9
/* xattr syscalls (numbers match aarch64 asm-generic/unistd.h) */
#define SYS_setxattr 5
#define SYS_lsetxattr 6
#define SYS_fsetxattr 7
#define SYS_getxattr 8
#define SYS_setxattr 5
#define SYS_lgetxattr 9
#define SYS_fgetxattr 10
#define SYS_listxattr 11
#define SYS_llistxattr 12
#define SYS_flistxattr 13
#define SYS_removexattr 14
#define SYS_lremovexattr 15
#define SYS_fgetxattr 16
#define SYS_fsetxattr 7
#define SYS_flistxattr 13
#define SYS_fremovexattr 18
#define SYS_fremovexattr 16
/* chroot */
#define SYS_chroot 51
/* network batch I/O */
Expand Down
18 changes: 13 additions & 5 deletions src/syscall/fs-xattr.c
Original file line number Diff line number Diff line change
Expand Up @@ -219,8 +219,12 @@ int64_t sys_fsetxattr(guest_t *g,
int flags)
{
host_fd_ref_t host_ref;
if (host_fd_ref_open(fd, &host_ref) < 0)
return -LINUX_EBADF;
/* Linux: fsetxattr on an O_PATH fd returns EBADF (the descriptor lacks the
* write reference required by mnt_want_write_file).
*/
int64_t err = host_fd_ref_open_io(fd, &host_ref);
if (err < 0)
return err;

char name[LINUX_XATTR_NAME_MAX + 1];
if (guest_read_str(g, name_gva, name, sizeof(name)) < 0) {
Expand All @@ -229,7 +233,7 @@ int64_t sys_fsetxattr(guest_t *g,
}

void *buf;
int64_t err = xattr_alloc_buf(size, &buf);
err = xattr_alloc_buf(size, &buf);
if (err < 0) {
host_fd_ref_close(&host_ref);
return err;
Expand Down Expand Up @@ -284,8 +288,12 @@ int64_t sys_flistxattr(guest_t *g, int fd, uint64_t list_gva, uint64_t size)
int64_t sys_fremovexattr(guest_t *g, int fd, uint64_t name_gva)
{
host_fd_ref_t host_ref;
if (host_fd_ref_open(fd, &host_ref) < 0)
return -LINUX_EBADF;
/* Linux: fremovexattr on an O_PATH fd returns EBADF, same reason as
* fsetxattr above.
*/
int64_t err = host_fd_ref_open_io(fd, &host_ref);
if (err < 0)
return err;

char name[LINUX_XATTR_NAME_MAX + 1];
if (guest_read_str(g, name_gva, name, sizeof(name)) < 0) {
Expand Down
31 changes: 21 additions & 10 deletions src/syscall/fs.c
Original file line number Diff line number Diff line change
Expand Up @@ -706,6 +706,12 @@ int64_t sys_getdents64(guest_t *g, int fd, uint64_t buf_gva, uint64_t count)
return -LINUX_EBADF;
if (fd_table[fd].type == FD_CLOSED)
return -LINUX_EBADF;
/* Linux: getdents on an O_PATH fd returns EBADF, even when the underlying
* inode is a directory. The early gate keeps the next NOTDIR fallback
* specific to non-directory regular fds.
*/
if (fd_table[fd].type == FD_PATH)
return -LINUX_EBADF;

DIR *dir = (DIR *) fd_table[fd].dir;
if (!dir)
Expand Down Expand Up @@ -910,8 +916,9 @@ int64_t sys_pipe2(guest_t *g, uint64_t fds_gva, int linux_flags)
int64_t sys_lseek(int fd, int64_t offset, int whence)
{
host_fd_ref_t host_ref;
if (host_fd_ref_open(fd, &host_ref) < 0)
return -LINUX_EBADF;
int64_t err = host_fd_ref_open_io(fd, &host_ref);
if (err < 0)
return err;

off_t ret = lseek(host_ref.fd, offset, whence);
host_fd_ref_close(&host_ref);
Expand Down Expand Up @@ -1433,18 +1440,22 @@ int64_t sys_faccessat(guest_t *g,

int64_t sys_ftruncate(int fd, int64_t length)
{
fd_entry_t snap;
if (!fd_snapshot(fd, &snap))
return -LINUX_EBADF;
/* Linux: ftruncate on an O_PATH fd returns EBADF. */
if (snap.type == FD_PATH)
return -LINUX_EBADF;

/* Enforce memfd seals on truncate. */
int seals = snap.seals;
if (seals & LINUX_F_SEAL_WRITE)
return -LINUX_EPERM;

host_fd_ref_t host_ref;
if (host_fd_ref_open(fd, &host_ref) < 0)
return -LINUX_EBADF;

/* Enforce memfd seals on truncate.
* fd_to_host above already validated fd is in range.
*/
int seals = fd_table[fd].seals;
if (seals & LINUX_F_SEAL_WRITE) {
host_fd_ref_close(&host_ref);
return -LINUX_EPERM;
}
if (seals & (LINUX_F_SEAL_SHRINK | LINUX_F_SEAL_GROW)) {
struct stat st;
if (fstat(host_ref.fd, &st) == 0) {
Expand Down
24 changes: 24 additions & 0 deletions src/syscall/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,30 @@ static inline int host_dirfd_ref_open(guest_fd_t dirfd, host_fd_ref_t *ref)
return host_fd_ref_open(dirfd, ref);
}

/* Open a host fd reference, rejecting O_PATH (FD_PATH) entries with -EBADF.
* Use this for syscalls that operate on the underlying file -- read/write,
* lseek, ftruncate, fsync/fdatasync, flock, fsetxattr/fremovexattr, ioctl, etc.
* Linux returns EBADF on those calls when the fd was opened O_PATH; the host fd
* here is a plain O_RDONLY descriptor, so without this gate the host call would
* silently succeed and diverge from Linux semantics.
*
* Calls that are explicitly allowed on O_PATH (fstat, fstatfs, fchdir, close,
* dup, fcntl get/set CLOEXEC, *at() dirfd) keep using host_{fd,dirfd}_ref_open
* helpers above.
*/
static inline int64_t host_fd_ref_open_io(guest_fd_t guest_fd,
host_fd_ref_t *ref)
{
fd_entry_t snap;
if (!fd_snapshot(guest_fd, &snap))
return -LINUX_EBADF;
if (snap.type == FD_PATH)
return -LINUX_EBADF;
if (host_fd_ref_open(guest_fd, ref) < 0)
return -LINUX_EBADF;
return 0;
}

/* Read a guest path string with small-buffer optimization.
*
* Tries the stack-allocated short_buf first; falls back to long_buf for
Expand Down
21 changes: 12 additions & 9 deletions src/syscall/io.c
Original file line number Diff line number Diff line change
Expand Up @@ -461,19 +461,22 @@ static int64_t host_fd_ref_open_checked(int guest_fd,
host_fd_ref_t *ref,
bool check_write_seal)
{
fd_entry_t snap;
if (!fd_snapshot(guest_fd, &snap))
return -LINUX_EBADF;
if (snap.type == FD_PATH)
return -LINUX_EBADF;
if (check_write_seal && (snap.seals & LINUX_F_SEAL_WRITE))
return -LINUX_EPERM;
return host_fd_ref_open(guest_fd, ref) < 0 ? -LINUX_EBADF : 0;
if (check_write_seal) {
fd_entry_t snap;
if (!fd_snapshot(guest_fd, &snap))
return -LINUX_EBADF;
if (snap.type == FD_PATH)
return -LINUX_EBADF;
if (snap.seals & LINUX_F_SEAL_WRITE)
return -LINUX_EPERM;
return host_fd_ref_open(guest_fd, ref) < 0 ? -LINUX_EBADF : 0;
}
return host_fd_ref_open_io(guest_fd, ref);
}

static int64_t host_fd_ref_open_regular_io(int guest_fd, host_fd_ref_t *ref)
{
return host_fd_ref_open_checked(guest_fd, ref, false);
return host_fd_ref_open_io(guest_fd, ref);
}

static int64_t proc_try_writev_intercept(int fd,
Expand Down
10 changes: 6 additions & 4 deletions src/syscall/syscall.c
Original file line number Diff line number Diff line change
Expand Up @@ -1013,8 +1013,9 @@ static int64_t sc_flock(guest_t *g,
(void) x5;
(void) verbose;
host_fd_ref_t host_ref;
if (host_fd_ref_open((int) x0, &host_ref) < 0)
return -LINUX_EBADF;
int64_t err = host_fd_ref_open_io((int) x0, &host_ref);
if (err < 0)
return err;
int64_t ret = flock(host_ref.fd, (int) x1) < 0 ? linux_errno() : 0;
host_fd_ref_close(&host_ref);
return ret;
Expand All @@ -1038,8 +1039,9 @@ static int64_t sc_fsync_common(guest_t *g,
(void) x5;
(void) verbose;
host_fd_ref_t host_ref;
if (host_fd_ref_open((int) x0, &host_ref) < 0)
return -LINUX_EBADF;
int64_t err = host_fd_ref_open_io((int) x0, &host_ref);
if (err < 0)
return err;
int64_t ret = (fsync(host_ref.fd) < 0) ? linux_errno() : 0;
host_fd_ref_close(&host_ref);
return ret;
Expand Down
3 changes: 3 additions & 0 deletions tests/manifest.txt
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ test-cow-fork
[section] O_CLOEXEC tests
test-cloexec

[section] O_PATH semantics tests
test-opath

[section] Guard page / mmap edge cases
test-guard-page

Expand Down
Loading
Loading