From d135f781405f7c78153aa65e0327b05a4aa72e50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Fri, 23 Sep 2022 12:04:13 +0100 Subject: [PATCH 01/38] linux-user: use 'max' instead of 'qemu32' / 'qemu64' by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'qemu64' CPU model implements the least featureful x86_64 CPU that's possible. Historically this hasn't been an issue since it was rare for OS distros to build with a higher mandatory CPU baseline. With RHEL-9, however, the entire distro is built for the x86_64-v2 ABI baseline: https://developers.redhat.com/blog/2021/01/05/building-red-hat-enterprise-linux-9-for-the-x86-64-v2-microarchitecture-level It is likely that other distros may take similar steps in the not too distant future. For example, it has been suggested for Fedora on a number of occasions. This new baseline is not compatible with the qemu64 CPU model though. While it is possible to pass a '-cpu xxx' flag to qemu-x86_64, the usage of QEMU doesn't always allow for this. For example, the args are typically controlled via binfmt rules that the user has no ability to change. This impacts users who are trying to use podman on aarch64 platforms, to run containers with x86_64 content. There's no arg to podman that can be used to change the qemu-x86_64 args, and a non-root user of podman can not change binfmt rules without elevating privileges: https://github.com/containers/podman/issues/15456#issuecomment-1228210973 Changing to the 'max' CPU model gives 'qemu-x86_64' maximum compatibility with binaries it is likely to encounter in the wild, and not likely to have a significant downside for existing usage. Most other architectures already use an 'any' CPU model, which is often mapped to 'max' (or similar) already, rather than the oldest possible CPU model. For the sake of consistency the 'i386' architecture is also changed from using 'qemu32' to 'max'. Reviewed-by: Richard Henderson Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Daniel P. Berrangé Message-Id: <20220923110413.70593-1-berrange@redhat.com> Signed-off-by: Laurent Vivier --- linux-user/i386/target_elf.h | 2 +- linux-user/x86_64/target_elf.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/linux-user/i386/target_elf.h b/linux-user/i386/target_elf.h index 1c6142e7da0d7..238a9aba738a0 100644 --- a/linux-user/i386/target_elf.h +++ b/linux-user/i386/target_elf.h @@ -9,6 +9,6 @@ #define I386_TARGET_ELF_H static inline const char *cpu_get_model(uint32_t eflags) { - return "qemu32"; + return "max"; } #endif diff --git a/linux-user/x86_64/target_elf.h b/linux-user/x86_64/target_elf.h index 7b76a90de8805..3f628f8d66197 100644 --- a/linux-user/x86_64/target_elf.h +++ b/linux-user/x86_64/target_elf.h @@ -9,6 +9,6 @@ #define X86_64_TARGET_ELF_H static inline const char *cpu_get_model(uint32_t eflags) { - return "qemu64"; + return "max"; } #endif From 65d4830dac7d88059ba77f1d31ec0c2f8f65ae28 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Mon, 8 Aug 2022 15:07:27 -0400 Subject: [PATCH 02/38] linux-user: fix readlinkat handling with magic exe symlink Exactly the same as f17f4989fa193fa8279474c5462289a3cfe69aea before was for readlink. I suppose this was simply missed at the time. Signed-off-by: Jameson Nash Reviewed-by: Laurent Vivier Message-Id: <20220808190727.875155-1-vtjnash@gmail.com> Signed-off-by: Laurent Vivier --- linux-user/syscall.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index f4091212027c2..abf82bab2a188 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -9912,11 +9912,22 @@ static abi_long do_syscall1(CPUArchState *cpu_env, int num, abi_long arg1, p2 = lock_user(VERIFY_WRITE, arg3, arg4, 0); if (!p || !p2) { ret = -TARGET_EFAULT; + } else if (!arg4) { + /* Short circuit this for the magic exe check. */ + ret = -TARGET_EINVAL; } else if (is_proc_myself((const char *)p, "exe")) { char real[PATH_MAX], *temp; temp = realpath(exec_path, real); - ret = temp == NULL ? get_errno(-1) : strlen(real) ; - snprintf((char *)p2, arg4, "%s", real); + /* Return value is # of bytes that we wrote to the buffer. */ + if (temp == NULL) { + ret = get_errno(-1); + } else { + /* Don't worry about sign mismatch as earlier mapping + * logic would have thrown a bad address error. */ + ret = MIN(strlen(real), arg4); + /* We cannot NUL terminate the string. */ + memcpy(p2, real, ret); + } } else { ret = get_errno(readlinkat(arg1, path(p), p2, arg4)); } From 7b72aa1d81d263c9d84bc02b89ea10892a08b451 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 18 Sep 2022 21:45:44 +0200 Subject: [PATCH 03/38] linux-user: Add missing signals in strace output Some of the guest signal numbers are currently not converted to their representative names in the strace output, e.g. SIGVTALRM. This patch introduces a smart way to generate and keep in sync the host-to-guest and guest-to-host signal conversion tables for usage in the qemu signal and strace code. This ensures that any signals will now show up in both tables. There is no functional change in this patch - with the exception that yet missing signal names now show up in the strace code too. Signed-off-by: Helge Deller Reviewed-by: Laurent Vivier Message-Id: <20220918194555.83535-2-deller@gmx.de> Signed-off-by: Laurent Vivier --- linux-user/signal-common.h | 46 ++++++++++++++++++++++++++++++++++++++ linux-user/signal.c | 37 +++--------------------------- linux-user/strace.c | 30 +++++++++---------------- 3 files changed, 60 insertions(+), 53 deletions(-) diff --git a/linux-user/signal-common.h b/linux-user/signal-common.h index 6a7e4a93fc139..3e2dc604c2fb0 100644 --- a/linux-user/signal-common.h +++ b/linux-user/signal-common.h @@ -118,4 +118,50 @@ static inline void finish_sigsuspend_mask(int ret) } } +#if defined(SIGSTKFLT) && defined(TARGET_SIGSTKFLT) +#define MAKE_SIG_ENTRY_SIGSTKFLT MAKE_SIG_ENTRY(SIGSTKFLT) +#else +#define MAKE_SIG_ENTRY_SIGSTKFLT +#endif + +#if defined(SIGIOT) && defined(TARGET_SIGIOT) +#define MAKE_SIG_ENTRY_SIGIOT MAKE_SIG_ENTRY(SIGIOT) +#else +#define MAKE_SIG_ENTRY_SIGIOT +#endif + +#define MAKE_SIGNAL_LIST \ + MAKE_SIG_ENTRY(SIGHUP) \ + MAKE_SIG_ENTRY(SIGINT) \ + MAKE_SIG_ENTRY(SIGQUIT) \ + MAKE_SIG_ENTRY(SIGILL) \ + MAKE_SIG_ENTRY(SIGTRAP) \ + MAKE_SIG_ENTRY(SIGABRT) \ + MAKE_SIG_ENTRY(SIGBUS) \ + MAKE_SIG_ENTRY(SIGFPE) \ + MAKE_SIG_ENTRY(SIGKILL) \ + MAKE_SIG_ENTRY(SIGUSR1) \ + MAKE_SIG_ENTRY(SIGSEGV) \ + MAKE_SIG_ENTRY(SIGUSR2) \ + MAKE_SIG_ENTRY(SIGPIPE) \ + MAKE_SIG_ENTRY(SIGALRM) \ + MAKE_SIG_ENTRY(SIGTERM) \ + MAKE_SIG_ENTRY(SIGCHLD) \ + MAKE_SIG_ENTRY(SIGCONT) \ + MAKE_SIG_ENTRY(SIGSTOP) \ + MAKE_SIG_ENTRY(SIGTSTP) \ + MAKE_SIG_ENTRY(SIGTTIN) \ + MAKE_SIG_ENTRY(SIGTTOU) \ + MAKE_SIG_ENTRY(SIGURG) \ + MAKE_SIG_ENTRY(SIGXCPU) \ + MAKE_SIG_ENTRY(SIGXFSZ) \ + MAKE_SIG_ENTRY(SIGVTALRM) \ + MAKE_SIG_ENTRY(SIGPROF) \ + MAKE_SIG_ENTRY(SIGWINCH) \ + MAKE_SIG_ENTRY(SIGIO) \ + MAKE_SIG_ENTRY(SIGPWR) \ + MAKE_SIG_ENTRY(SIGSYS) \ + MAKE_SIG_ENTRY_SIGSTKFLT \ + MAKE_SIG_ENTRY_SIGIOT + #endif diff --git a/linux-user/signal.c b/linux-user/signal.c index 8d29bfaa6b5c5..61c6fa3fcf122 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -53,40 +53,9 @@ abi_ulong default_rt_sigreturn; QEMU_BUILD_BUG_ON(__SIGRTMAX + 1 != _NSIG); #endif static uint8_t host_to_target_signal_table[_NSIG] = { - [SIGHUP] = TARGET_SIGHUP, - [SIGINT] = TARGET_SIGINT, - [SIGQUIT] = TARGET_SIGQUIT, - [SIGILL] = TARGET_SIGILL, - [SIGTRAP] = TARGET_SIGTRAP, - [SIGABRT] = TARGET_SIGABRT, -/* [SIGIOT] = TARGET_SIGIOT,*/ - [SIGBUS] = TARGET_SIGBUS, - [SIGFPE] = TARGET_SIGFPE, - [SIGKILL] = TARGET_SIGKILL, - [SIGUSR1] = TARGET_SIGUSR1, - [SIGSEGV] = TARGET_SIGSEGV, - [SIGUSR2] = TARGET_SIGUSR2, - [SIGPIPE] = TARGET_SIGPIPE, - [SIGALRM] = TARGET_SIGALRM, - [SIGTERM] = TARGET_SIGTERM, -#ifdef SIGSTKFLT - [SIGSTKFLT] = TARGET_SIGSTKFLT, -#endif - [SIGCHLD] = TARGET_SIGCHLD, - [SIGCONT] = TARGET_SIGCONT, - [SIGSTOP] = TARGET_SIGSTOP, - [SIGTSTP] = TARGET_SIGTSTP, - [SIGTTIN] = TARGET_SIGTTIN, - [SIGTTOU] = TARGET_SIGTTOU, - [SIGURG] = TARGET_SIGURG, - [SIGXCPU] = TARGET_SIGXCPU, - [SIGXFSZ] = TARGET_SIGXFSZ, - [SIGVTALRM] = TARGET_SIGVTALRM, - [SIGPROF] = TARGET_SIGPROF, - [SIGWINCH] = TARGET_SIGWINCH, - [SIGIO] = TARGET_SIGIO, - [SIGPWR] = TARGET_SIGPWR, - [SIGSYS] = TARGET_SIGSYS, +#define MAKE_SIG_ENTRY(sig) [sig] = TARGET_##sig, + MAKE_SIGNAL_LIST +#undef MAKE_SIG_ENTRY /* next signals stay the same */ }; diff --git a/linux-user/strace.c b/linux-user/strace.c index 7d882526da035..a4eeef7ae1ca3 100644 --- a/linux-user/strace.c +++ b/linux-user/strace.c @@ -17,6 +17,7 @@ #include "qemu.h" #include "user-internals.h" #include "strace.h" +#include "signal-common.h" struct syscallname { int nr; @@ -141,30 +142,21 @@ if( cmd == val ) { \ qemu_log("%d", cmd); } +static const char * const target_signal_name[] = { +#define MAKE_SIG_ENTRY(sig) [TARGET_##sig] = #sig, + MAKE_SIGNAL_LIST +#undef MAKE_SIG_ENTRY +}; + static void print_signal(abi_ulong arg, int last) { const char *signal_name = NULL; - switch(arg) { - case TARGET_SIGHUP: signal_name = "SIGHUP"; break; - case TARGET_SIGINT: signal_name = "SIGINT"; break; - case TARGET_SIGQUIT: signal_name = "SIGQUIT"; break; - case TARGET_SIGILL: signal_name = "SIGILL"; break; - case TARGET_SIGABRT: signal_name = "SIGABRT"; break; - case TARGET_SIGFPE: signal_name = "SIGFPE"; break; - case TARGET_SIGKILL: signal_name = "SIGKILL"; break; - case TARGET_SIGSEGV: signal_name = "SIGSEGV"; break; - case TARGET_SIGPIPE: signal_name = "SIGPIPE"; break; - case TARGET_SIGALRM: signal_name = "SIGALRM"; break; - case TARGET_SIGTERM: signal_name = "SIGTERM"; break; - case TARGET_SIGUSR1: signal_name = "SIGUSR1"; break; - case TARGET_SIGUSR2: signal_name = "SIGUSR2"; break; - case TARGET_SIGCHLD: signal_name = "SIGCHLD"; break; - case TARGET_SIGCONT: signal_name = "SIGCONT"; break; - case TARGET_SIGSTOP: signal_name = "SIGSTOP"; break; - case TARGET_SIGTTIN: signal_name = "SIGTTIN"; break; - case TARGET_SIGTTOU: signal_name = "SIGTTOU"; break; + + if (arg < ARRAY_SIZE(target_signal_name)) { + signal_name = target_signal_name[arg]; } + if (signal_name == NULL) { print_raw_param("%ld", arg, last); return; From aad43d15422c358ece148e39887fbd0821bca657 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 18 Sep 2022 21:45:45 +0200 Subject: [PATCH 04/38] linux-user: Add missing clock_gettime64() syscall strace Allow linux-user to strace the clock_gettime64() syscall. This syscall is used a lot on 32-bit guest architectures which use newer glibc versions. Signed-off-by: Helge Deller Reviewed-by: Laurent Vivier Message-Id: <20220918194555.83535-3-deller@gmx.de> Signed-off-by: Laurent Vivier --- linux-user/strace.c | 53 ++++++++++++++++++++++++++++++++++++++++++ linux-user/strace.list | 4 ++++ 2 files changed, 57 insertions(+) diff --git a/linux-user/strace.c b/linux-user/strace.c index a4eeef7ae1ca3..816e679995397 100644 --- a/linux-user/strace.c +++ b/linux-user/strace.c @@ -82,6 +82,7 @@ UNUSED static void print_buf(abi_long addr, abi_long len, int last); UNUSED static void print_raw_param(const char *, abi_long, int); UNUSED static void print_timeval(abi_ulong, int); UNUSED static void print_timespec(abi_ulong, int); +UNUSED static void print_timespec64(abi_ulong, int); UNUSED static void print_timezone(abi_ulong, int); UNUSED static void print_itimerval(abi_ulong, int); UNUSED static void print_number(abi_long, int); @@ -795,6 +796,24 @@ print_syscall_ret_clock_gettime(CPUArchState *cpu_env, const struct syscallname #define print_syscall_ret_clock_getres print_syscall_ret_clock_gettime #endif +#if defined(TARGET_NR_clock_gettime64) +static void +print_syscall_ret_clock_gettime64(CPUArchState *cpu_env, const struct syscallname *name, + abi_long ret, abi_long arg0, abi_long arg1, + abi_long arg2, abi_long arg3, abi_long arg4, + abi_long arg5) +{ + if (!print_syscall_err(ret)) { + qemu_log(TARGET_ABI_FMT_ld, ret); + qemu_log(" ("); + print_timespec64(arg1, 1); + qemu_log(")"); + } + + qemu_log("\n"); +} +#endif + #ifdef TARGET_NR_gettimeofday static void print_syscall_ret_gettimeofday(CPUArchState *cpu_env, const struct syscallname *name, @@ -1652,6 +1671,27 @@ print_timespec(abi_ulong ts_addr, int last) } } +static void +print_timespec64(abi_ulong ts_addr, int last) +{ + if (ts_addr) { + struct target__kernel_timespec *ts; + + ts = lock_user(VERIFY_READ, ts_addr, sizeof(*ts), 1); + if (!ts) { + print_pointer(ts_addr, last); + return; + } + qemu_log("{tv_sec = %lld" + ",tv_nsec = %lld}%s", + (long long)tswap64(ts->tv_sec), (long long)tswap64(ts->tv_nsec), + get_comma(last)); + unlock_user(ts, ts_addr, 0); + } else { + qemu_log("NULL%s", get_comma(last)); + } +} + static void print_timezone(abi_ulong tz_addr, int last) { @@ -2267,6 +2307,19 @@ print_clock_gettime(CPUArchState *cpu_env, const struct syscallname *name, #define print_clock_getres print_clock_gettime #endif +#if defined(TARGET_NR_clock_gettime64) +static void +print_clock_gettime64(CPUArchState *cpu_env, const struct syscallname *name, + abi_long arg0, abi_long arg1, abi_long arg2, + abi_long arg3, abi_long arg4, abi_long arg5) +{ + print_syscall_prologue(name); + print_enums(clockids, arg0, 0); + print_pointer(arg1, 1); + print_syscall_epilogue(name); +} +#endif + #ifdef TARGET_NR_clock_settime static void print_clock_settime(CPUArchState *cpu_env, const struct syscallname *name, diff --git a/linux-user/strace.list b/linux-user/strace.list index 72e17b1acf00a..a78cdf3cdfef7 100644 --- a/linux-user/strace.list +++ b/linux-user/strace.list @@ -1676,3 +1676,7 @@ #ifdef TARGET_NR_copy_file_range { TARGET_NR_copy_file_range, "copy_file_range", "%s(%d,%p,%d,%p,"TARGET_ABI_FMT_lu",%u)", NULL, NULL }, #endif +#ifdef TARGET_NR_clock_gettime64 +{ TARGET_NR_clock_gettime64, "clock_gettime64" , NULL, print_clock_gettime64, + print_syscall_ret_clock_gettime64 }, +#endif From cc054c6f139cf54ce8fbefd6fd536f50b4cba694 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 18 Sep 2022 21:45:46 +0200 Subject: [PATCH 05/38] linux-user: Add pidfd_open(), pidfd_send_signal() and pidfd_getfd() syscalls I noticed those were missing when running the glib2.0 testsuite. Add the syscalls including the strace output. Signed-off-by: Helge Deller Reviewed-by: Laurent Vivier Message-Id: <20220918194555.83535-4-deller@gmx.de> Signed-off-by: Laurent Vivier --- linux-user/strace.c | 28 ++++++++++++++++++++++++++++ linux-user/strace.list | 9 +++++++++ linux-user/syscall.c | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+) diff --git a/linux-user/strace.c b/linux-user/strace.c index 816e679995397..5ac64df02b854 100644 --- a/linux-user/strace.c +++ b/linux-user/strace.c @@ -3317,6 +3317,34 @@ print_openat(CPUArchState *cpu_env, const struct syscallname *name, } #endif +#ifdef TARGET_NR_pidfd_send_signal +static void +print_pidfd_send_signal(CPUArchState *cpu_env, const struct syscallname *name, + abi_long arg0, abi_long arg1, abi_long arg2, + abi_long arg3, abi_long arg4, abi_long arg5) +{ + void *p; + target_siginfo_t uinfo; + + print_syscall_prologue(name); + print_raw_param("%d", arg0, 0); + print_signal(arg1, 0); + + p = lock_user(VERIFY_READ, arg2, sizeof(target_siginfo_t), 1); + if (p) { + get_target_siginfo(&uinfo, p); + print_siginfo(&uinfo); + + unlock_user(p, arg2, 0); + } else { + print_pointer(arg2, 1); + } + + print_raw_param("%u", arg3, 0); + print_syscall_epilogue(name); +} +#endif + #ifdef TARGET_NR_mq_unlink static void print_mq_unlink(CPUArchState *cpu_env, const struct syscallname *name, diff --git a/linux-user/strace.list b/linux-user/strace.list index a78cdf3cdfef7..4d8b7f6a5e0cd 100644 --- a/linux-user/strace.list +++ b/linux-user/strace.list @@ -1664,6 +1664,15 @@ #ifdef TARGET_NR_pipe2 { TARGET_NR_pipe2, "pipe2", NULL, NULL, NULL }, #endif +#ifdef TARGET_NR_pidfd_open +{ TARGET_NR_pidfd_open, "pidfd_open", "%s(%d,%u)", NULL, NULL }, +#endif +#ifdef TARGET_NR_pidfd_send_signal +{ TARGET_NR_pidfd_send_signal, "pidfd_send_signal", NULL, print_pidfd_send_signal, NULL }, +#endif +#ifdef TARGET_NR_pidfd_getfd +{ TARGET_NR_pidfd_getfd, "pidfd_getfd", "%s(%d,%d,%u)", NULL, NULL }, +#endif #ifdef TARGET_NR_atomic_cmpxchg_32 { TARGET_NR_atomic_cmpxchg_32, "atomic_cmpxchg_32", NULL, NULL, NULL }, #endif diff --git a/linux-user/syscall.c b/linux-user/syscall.c index abf82bab2a188..850410c9b5537 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -346,6 +346,16 @@ _syscall6(int,sys_futex,int *,uaddr,int,op,int,val, _syscall6(int,sys_futex_time64,int *,uaddr,int,op,int,val, const struct timespec *,timeout,int *,uaddr2,int,val3) #endif +#if defined(__NR_pidfd_open) && defined(TARGET_NR_pidfd_open) +_syscall2(int, pidfd_open, pid_t, pid, unsigned int, flags); +#endif +#if defined(__NR_pidfd_send_signal) && defined(TARGET_NR_pidfd_send_signal) +_syscall4(int, pidfd_send_signal, int, pidfd, int, sig, siginfo_t *, info, + unsigned int, flags); +#endif +#if defined(__NR_pidfd_getfd) && defined(TARGET_NR_pidfd_getfd) +_syscall3(int, pidfd_getfd, int, pidfd, int, targetfd, unsigned int, flags); +#endif #define __NR_sys_sched_getaffinity __NR_sched_getaffinity _syscall3(int, sys_sched_getaffinity, pid_t, pid, unsigned int, len, unsigned long *, user_mask_ptr); @@ -8683,6 +8693,30 @@ static abi_long do_syscall1(CPUArchState *cpu_env, int num, abi_long arg1, ret = do_open_by_handle_at(arg1, arg2, arg3); fd_trans_unregister(ret); return ret; +#endif +#if defined(__NR_pidfd_open) && defined(TARGET_NR_pidfd_open) + case TARGET_NR_pidfd_open: + return get_errno(pidfd_open(arg1, arg2)); +#endif +#if defined(__NR_pidfd_send_signal) && defined(TARGET_NR_pidfd_send_signal) + case TARGET_NR_pidfd_send_signal: + { + siginfo_t uinfo; + + p = lock_user(VERIFY_READ, arg3, sizeof(target_siginfo_t), 1); + if (!p) { + return -TARGET_EFAULT; + } + target_to_host_siginfo(&uinfo, p); + unlock_user(p, arg3, 0); + ret = get_errno(pidfd_send_signal(arg1, target_to_host_signal(arg2), + &uinfo, arg4)); + } + return ret; +#endif +#if defined(__NR_pidfd_getfd) && defined(TARGET_NR_pidfd_getfd) + case TARGET_NR_pidfd_getfd: + return get_errno(pidfd_getfd(arg1, arg2, arg3)); #endif case TARGET_NR_close: fd_trans_unregister(arg1); From 35dffc568164f08ebdb42bc8eb6307fed9af0ffe Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 18 Sep 2022 21:45:47 +0200 Subject: [PATCH 06/38] linux-user: Log failing executable in EXCP_DUMP() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enhance the EXCP_DUMP() macro to print out the failing program too. During debugging it's sometimes hard to track down the actual failing program if you are e.g. building a whole debian package. Signed-off-by: Helge Deller Reviewed-by: Laurent Vivier Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20220918194555.83535-5-deller@gmx.de> Signed-off-by: Laurent Vivier --- linux-user/cpu_loop-common.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/linux-user/cpu_loop-common.h b/linux-user/cpu_loop-common.h index dc0042e4de352..36ff5b14f2a7a 100644 --- a/linux-user/cpu_loop-common.h +++ b/linux-user/cpu_loop-common.h @@ -27,9 +27,11 @@ do { \ CPUState *cs = env_cpu(env); \ fprintf(stderr, fmt , ## __VA_ARGS__); \ + fprintf(stderr, "Failing executable: %s\n", exec_path); \ cpu_dump_state(cs, stderr, 0); \ if (qemu_log_separate()) { \ qemu_log(fmt, ## __VA_ARGS__); \ + qemu_log("Failing executable: %s\n", exec_path); \ log_cpu_state(cs, 0); \ } \ } while (0) From 12640b4fe96fe23dc344b03378dda1eea5b7a191 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 18 Sep 2022 21:45:48 +0200 Subject: [PATCH 07/38] linux-user/hppa: Use EXCP_DUMP() to show enhanced debug info Enhance the hppa linux-user cpu_loop() to show more debugging info on hard errors. Signed-off-by: Helge Deller Reviewed-by: Laurent Vivier Message-Id: <20220918194555.83535-6-deller@gmx.de> Signed-off-by: Laurent Vivier --- linux-user/hppa/cpu_loop.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/linux-user/hppa/cpu_loop.c b/linux-user/hppa/cpu_loop.c index 64263c3dc4068..1ef3b461911cc 100644 --- a/linux-user/hppa/cpu_loop.c +++ b/linux-user/hppa/cpu_loop.c @@ -147,12 +147,15 @@ void cpu_loop(CPUHPPAState *env) force_sig_fault(TARGET_SIGSEGV, TARGET_SEGV_MAPERR, env->iaoq_f); break; case EXCP_ILL: + EXCP_DUMP(env, "qemu: got CPU exception 0x%x - aborting\n", trapnr); force_sig_fault(TARGET_SIGILL, TARGET_ILL_ILLOPN, env->iaoq_f); break; case EXCP_PRIV_OPR: + EXCP_DUMP(env, "qemu: got CPU exception 0x%x - aborting\n", trapnr); force_sig_fault(TARGET_SIGILL, TARGET_ILL_PRVOPC, env->iaoq_f); break; case EXCP_PRIV_REG: + EXCP_DUMP(env, "qemu: got CPU exception 0x%x - aborting\n", trapnr); force_sig_fault(TARGET_SIGILL, TARGET_ILL_PRVREG, env->iaoq_f); break; case EXCP_OVERFLOW: @@ -171,7 +174,8 @@ void cpu_loop(CPUHPPAState *env) /* just indicate that signals should be handled asap */ break; default: - g_assert_not_reached(); + EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x - aborting\n", trapnr); + abort(); } process_pending_signals(env); } From 770525f8eac9a2d048896fd54e4c596af747f238 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 18 Sep 2022 21:45:49 +0200 Subject: [PATCH 08/38] linux-user/hppa: Dump IIR on register dump MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Include the IIR register (which holds the opcode of the failing instruction) when dumping the hppa registers. Signed-off-by: Helge Deller Reviewed-by: Laurent Vivier Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20220918194555.83535-7-deller@gmx.de> Signed-off-by: Laurent Vivier --- target/hppa/helper.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/target/hppa/helper.c b/target/hppa/helper.c index e2758d8df3807..74b8747083f96 100644 --- a/target/hppa/helper.c +++ b/target/hppa/helper.c @@ -85,9 +85,11 @@ void hppa_cpu_dump_state(CPUState *cs, FILE *f, int flags) char psw_c[20]; int i; - qemu_fprintf(f, "IA_F " TARGET_FMT_lx " IA_B " TARGET_FMT_lx "\n", + qemu_fprintf(f, "IA_F " TARGET_FMT_lx " IA_B " TARGET_FMT_lx + " IIR " TREG_FMT_lx "\n", hppa_form_gva_psw(psw, env->iasq_f, env->iaoq_f), - hppa_form_gva_psw(psw, env->iasq_b, env->iaoq_b)); + hppa_form_gva_psw(psw, env->iasq_b, env->iaoq_b), + env->cr[CR_IIR]); psw_c[0] = (psw & PSW_W ? 'W' : '-'); psw_c[1] = (psw & PSW_E ? 'E' : '-'); From 105d599a33462bd64529bc3bacc68e2d0fbb876b Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 18 Sep 2022 21:45:50 +0200 Subject: [PATCH 09/38] linux-user: Fix strace of chmod() if mode == 0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the mode parameter of chmod() is zero, this value isn't shown when stracing a program: chmod("filename",) This patch fixes it up to show the zero-value as well: chmod("filename",000) Signed-off-by: Helge Deller Reviewed-by: Laurent Vivier Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20220918194555.83535-8-deller@gmx.de> Signed-off-by: Laurent Vivier --- linux-user/strace.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/linux-user/strace.c b/linux-user/strace.c index 5ac64df02b854..2f539845bb908 100644 --- a/linux-user/strace.c +++ b/linux-user/strace.c @@ -1505,6 +1505,11 @@ print_file_mode(abi_long mode, int last) const char *sep = ""; const struct flags *m; + if (mode == 0) { + qemu_log("000%s", get_comma(last)); + return; + } + for (m = &mode_flags[0]; m->f_string != NULL; m++) { if ((m->f_value & mode) == m->f_value) { qemu_log("%s%s", m->f_string, sep); From 9c9b5d7b9220d2f52a2df24373c8f35604f444fc Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 18 Sep 2022 21:45:51 +0200 Subject: [PATCH 10/38] linux-user/hppa: Set TASK_UNMAPPED_BASE to 0xfa000000 for hppa arch On the parisc architecture the stack grows upwards. Move the TASK_UNMAPPED_BASE to high memory area as it's done by the kernel on physical machines. Signed-off-by: Helge Deller Message-Id: <20220918194555.83535-9-deller@gmx.de> Signed-off-by: Laurent Vivier --- linux-user/mmap.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/linux-user/mmap.c b/linux-user/mmap.c index 6a828e8418a75..83fdae7034ea1 100644 --- a/linux-user/mmap.c +++ b/linux-user/mmap.c @@ -253,8 +253,12 @@ static int mmap_frag(abi_ulong real_start, # define TASK_UNMAPPED_BASE (1ul << 38) #endif #else +#ifdef TARGET_HPPA +# define TASK_UNMAPPED_BASE 0xfa000000 +#else # define TASK_UNMAPPED_BASE 0x40000000 #endif +#endif abi_ulong mmap_next_start = TASK_UNMAPPED_BASE; unsigned long last_brk; From 05f3adc982caf4d10128f087775385decc7fb3b3 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 18 Sep 2022 21:45:52 +0200 Subject: [PATCH 11/38] linux-user: Add strace for clock_nanosleep() Signed-off-by: Helge Deller Reviewed-by: Laurent Vivier Message-Id: <20220918194555.83535-10-deller@gmx.de> Signed-off-by: Laurent Vivier --- linux-user/strace.c | 15 +++++++++++++++ linux-user/strace.list | 3 ++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/linux-user/strace.c b/linux-user/strace.c index 2f539845bb908..6f818212d5292 100644 --- a/linux-user/strace.c +++ b/linux-user/strace.c @@ -3567,6 +3567,21 @@ print_unshare(CPUArchState *cpu_env, const struct syscallname *name, } #endif +#ifdef TARGET_NR_clock_nanosleep +static void +print_clock_nanosleep(CPUArchState *cpu_env, const struct syscallname *name, + abi_long arg0, abi_long arg1, abi_long arg2, + abi_long arg3, abi_long arg4, abi_long arg5) +{ + print_syscall_prologue(name); + print_enums(clockids, arg0, 0); + print_raw_param("%d", arg1, 0); + print_timespec(arg2, 0); + print_timespec(arg3, 1); + print_syscall_epilogue(name); +} +#endif + #ifdef TARGET_NR_utime static void print_utime(CPUArchState *cpu_env, const struct syscallname *name, diff --git a/linux-user/strace.list b/linux-user/strace.list index 4d8b7f6a5e0cd..215d971b2aea6 100644 --- a/linux-user/strace.list +++ b/linux-user/strace.list @@ -91,7 +91,8 @@ print_syscall_ret_clock_gettime }, #endif #ifdef TARGET_NR_clock_nanosleep -{ TARGET_NR_clock_nanosleep, "clock_nanosleep" , NULL, NULL, NULL }, +{ TARGET_NR_clock_nanosleep, "clock_nanosleep" , NULL, print_clock_nanosleep, + NULL }, #endif #ifdef TARGET_NR_clock_settime { TARGET_NR_clock_settime, "clock_settime" , NULL, print_clock_settime, NULL }, From 811ee5cfc5da2e0b94eb11192ce2700fcd7063d1 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 18 Sep 2022 21:45:53 +0200 Subject: [PATCH 12/38] linux-user: Show timespec on strace for futex() Signed-off-by: Helge Deller Reviewed-by: Laurent Vivier Message-Id: <20220918194555.83535-11-deller@gmx.de> Signed-off-by: Laurent Vivier --- linux-user/strace.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/linux-user/strace.c b/linux-user/strace.c index 6f818212d5292..b6b9abaea4666 100644 --- a/linux-user/strace.c +++ b/linux-user/strace.c @@ -3714,11 +3714,20 @@ print_futex(CPUArchState *cpu_env, const struct syscallname *name, abi_long arg0, abi_long arg1, abi_long arg2, abi_long arg3, abi_long arg4, abi_long arg5) { + abi_long op = arg1 & FUTEX_CMD_MASK; print_syscall_prologue(name); print_pointer(arg0, 0); print_futex_op(arg1, 0); print_raw_param(",%d", arg2, 0); - print_pointer(arg3, 0); /* struct timespec */ + switch (op) { + case FUTEX_WAIT: + case FUTEX_WAIT_BITSET: + print_timespec(arg3, 0); + break; + default: + print_pointer(arg3, 0); + break; + } print_pointer(arg4, 0); print_raw_param("%d", arg4, 1); print_syscall_epilogue(name); From 9f22020b91ae235be2b27c47d11e842872ec5e85 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 6 Sep 2022 02:08:35 +0200 Subject: [PATCH 13/38] linux-user: Provide MADV_* definitions Provide MADV_* definitions using target_mman.h header, similar to what kernel does. Most architectures use the same values, with the exception of alpha and hppa. Signed-off-by: Ilya Leoshkevich Reviewed-by: Richard Henderson Message-Id: <20220906000839.1672934-2-iii@linux.ibm.com> Signed-off-by: Laurent Vivier --- linux-user/aarch64/target_mman.h | 1 + linux-user/alpha/target_mman.h | 8 +++ linux-user/arm/target_mman.h | 1 + linux-user/cris/target_mman.h | 1 + linux-user/generic/target_mman.h | 92 ++++++++++++++++++++++++++++ linux-user/hexagon/target_mman.h | 1 + linux-user/hppa/target_mman.h | 15 +++++ linux-user/i386/target_mman.h | 1 + linux-user/loongarch64/target_mman.h | 1 + linux-user/m68k/target_mman.h | 1 + linux-user/microblaze/target_mman.h | 1 + linux-user/mips/target_mman.h | 1 + linux-user/mips64/target_mman.h | 1 + linux-user/nios2/target_mman.h | 1 + linux-user/openrisc/target_mman.h | 1 + linux-user/ppc/target_mman.h | 1 + linux-user/riscv/target_mman.h | 1 + linux-user/s390x/target_mman.h | 1 + linux-user/sh4/target_mman.h | 1 + linux-user/sparc/target_mman.h | 1 + linux-user/x86_64/target_mman.h | 1 + linux-user/xtensa/target_mman.h | 1 + 22 files changed, 134 insertions(+) create mode 100644 linux-user/aarch64/target_mman.h create mode 100644 linux-user/alpha/target_mman.h create mode 100644 linux-user/arm/target_mman.h create mode 100644 linux-user/cris/target_mman.h create mode 100644 linux-user/generic/target_mman.h create mode 100644 linux-user/hexagon/target_mman.h create mode 100644 linux-user/hppa/target_mman.h create mode 100644 linux-user/i386/target_mman.h create mode 100644 linux-user/loongarch64/target_mman.h create mode 100644 linux-user/m68k/target_mman.h create mode 100644 linux-user/microblaze/target_mman.h create mode 100644 linux-user/mips/target_mman.h create mode 100644 linux-user/mips64/target_mman.h create mode 100644 linux-user/nios2/target_mman.h create mode 100644 linux-user/openrisc/target_mman.h create mode 100644 linux-user/ppc/target_mman.h create mode 100644 linux-user/riscv/target_mman.h create mode 100644 linux-user/s390x/target_mman.h create mode 100644 linux-user/sh4/target_mman.h create mode 100644 linux-user/sparc/target_mman.h create mode 100644 linux-user/x86_64/target_mman.h create mode 100644 linux-user/xtensa/target_mman.h diff --git a/linux-user/aarch64/target_mman.h b/linux-user/aarch64/target_mman.h new file mode 100644 index 0000000000000..e7ba6070fe7cc --- /dev/null +++ b/linux-user/aarch64/target_mman.h @@ -0,0 +1 @@ +#include "../generic/target_mman.h" diff --git a/linux-user/alpha/target_mman.h b/linux-user/alpha/target_mman.h new file mode 100644 index 0000000000000..cd6e3d70a6047 --- /dev/null +++ b/linux-user/alpha/target_mman.h @@ -0,0 +1,8 @@ +#ifndef ALPHA_TARGET_MMAN_H +#define ALPHA_TARGET_MMAN_H + +#define TARGET_MADV_DONTNEED 6 + +#include "../generic/target_mman.h" + +#endif diff --git a/linux-user/arm/target_mman.h b/linux-user/arm/target_mman.h new file mode 100644 index 0000000000000..e7ba6070fe7cc --- /dev/null +++ b/linux-user/arm/target_mman.h @@ -0,0 +1 @@ +#include "../generic/target_mman.h" diff --git a/linux-user/cris/target_mman.h b/linux-user/cris/target_mman.h new file mode 100644 index 0000000000000..e7ba6070fe7cc --- /dev/null +++ b/linux-user/cris/target_mman.h @@ -0,0 +1 @@ +#include "../generic/target_mman.h" diff --git a/linux-user/generic/target_mman.h b/linux-user/generic/target_mman.h new file mode 100644 index 0000000000000..1436a3c54308c --- /dev/null +++ b/linux-user/generic/target_mman.h @@ -0,0 +1,92 @@ +#ifndef LINUX_USER_TARGET_MMAN_H +#define LINUX_USER_TARGET_MMAN_H + +#ifndef TARGET_MADV_NORMAL +#define TARGET_MADV_NORMAL 0 +#endif + +#ifndef TARGET_MADV_RANDOM +#define TARGET_MADV_RANDOM 1 +#endif + +#ifndef TARGET_MADV_SEQUENTIAL +#define TARGET_MADV_SEQUENTIAL 2 +#endif + +#ifndef TARGET_MADV_WILLNEED +#define TARGET_MADV_WILLNEED 3 +#endif + +#ifndef TARGET_MADV_DONTNEED +#define TARGET_MADV_DONTNEED 4 +#endif + +#ifndef TARGET_MADV_FREE +#define TARGET_MADV_FREE 8 +#endif + +#ifndef TARGET_MADV_REMOVE +#define TARGET_MADV_REMOVE 9 +#endif + +#ifndef TARGET_MADV_DONTFORK +#define TARGET_MADV_DONTFORK 10 +#endif + +#ifndef TARGET_MADV_DOFORK +#define TARGET_MADV_DOFORK 11 +#endif + +#ifndef TARGET_MADV_MERGEABLE +#define TARGET_MADV_MERGEABLE 12 +#endif + +#ifndef TARGET_MADV_UNMERGEABLE +#define TARGET_MADV_UNMERGEABLE 13 +#endif + +#ifndef TARGET_MADV_HUGEPAGE +#define TARGET_MADV_HUGEPAGE 14 +#endif + +#ifndef TARGET_MADV_NOHUGEPAGE +#define TARGET_MADV_NOHUGEPAGE 15 +#endif + +#ifndef TARGET_MADV_DONTDUMP +#define TARGET_MADV_DONTDUMP 16 +#endif + +#ifndef TARGET_MADV_DODUMP +#define TARGET_MADV_DODUMP 17 +#endif + +#ifndef TARGET_MADV_WIPEONFORK +#define TARGET_MADV_WIPEONFORK 18 +#endif + +#ifndef TARGET_MADV_KEEPONFORK +#define TARGET_MADV_KEEPONFORK 19 +#endif + +#ifndef TARGET_MADV_COLD +#define TARGET_MADV_COLD 20 +#endif + +#ifndef TARGET_MADV_PAGEOUT +#define TARGET_MADV_PAGEOUT 21 +#endif + +#ifndef TARGET_MADV_POPULATE_READ +#define TARGET_MADV_POPULATE_READ 22 +#endif + +#ifndef TARGET_MADV_POPULATE_WRITE +#define TARGET_MADV_POPULATE_WRITE 23 +#endif + +#ifndef TARGET_MADV_DONTNEED_LOCKED +#define TARGET_MADV_DONTNEED_LOCKED 24 +#endif + +#endif diff --git a/linux-user/hexagon/target_mman.h b/linux-user/hexagon/target_mman.h new file mode 100644 index 0000000000000..e7ba6070fe7cc --- /dev/null +++ b/linux-user/hexagon/target_mman.h @@ -0,0 +1 @@ +#include "../generic/target_mman.h" diff --git a/linux-user/hppa/target_mman.h b/linux-user/hppa/target_mman.h new file mode 100644 index 0000000000000..66dd9f79414ed --- /dev/null +++ b/linux-user/hppa/target_mman.h @@ -0,0 +1,15 @@ +#ifndef HPPA_TARGET_MMAN_H +#define HPPA_TARGET_MMAN_H + +#define TARGET_MADV_MERGEABLE 65 +#define TARGET_MADV_UNMERGEABLE 66 +#define TARGET_MADV_HUGEPAGE 67 +#define TARGET_MADV_NOHUGEPAGE 68 +#define TARGET_MADV_DONTDUMP 69 +#define TARGET_MADV_DODUMP 70 +#define TARGET_MADV_WIPEONFORK 71 +#define TARGET_MADV_KEEPONFORK 72 + +#include "../generic/target_mman.h" + +#endif diff --git a/linux-user/i386/target_mman.h b/linux-user/i386/target_mman.h new file mode 100644 index 0000000000000..e7ba6070fe7cc --- /dev/null +++ b/linux-user/i386/target_mman.h @@ -0,0 +1 @@ +#include "../generic/target_mman.h" diff --git a/linux-user/loongarch64/target_mman.h b/linux-user/loongarch64/target_mman.h new file mode 100644 index 0000000000000..e7ba6070fe7cc --- /dev/null +++ b/linux-user/loongarch64/target_mman.h @@ -0,0 +1 @@ +#include "../generic/target_mman.h" diff --git a/linux-user/m68k/target_mman.h b/linux-user/m68k/target_mman.h new file mode 100644 index 0000000000000..e7ba6070fe7cc --- /dev/null +++ b/linux-user/m68k/target_mman.h @@ -0,0 +1 @@ +#include "../generic/target_mman.h" diff --git a/linux-user/microblaze/target_mman.h b/linux-user/microblaze/target_mman.h new file mode 100644 index 0000000000000..e7ba6070fe7cc --- /dev/null +++ b/linux-user/microblaze/target_mman.h @@ -0,0 +1 @@ +#include "../generic/target_mman.h" diff --git a/linux-user/mips/target_mman.h b/linux-user/mips/target_mman.h new file mode 100644 index 0000000000000..e7ba6070fe7cc --- /dev/null +++ b/linux-user/mips/target_mman.h @@ -0,0 +1 @@ +#include "../generic/target_mman.h" diff --git a/linux-user/mips64/target_mman.h b/linux-user/mips64/target_mman.h new file mode 100644 index 0000000000000..e7ba6070fe7cc --- /dev/null +++ b/linux-user/mips64/target_mman.h @@ -0,0 +1 @@ +#include "../generic/target_mman.h" diff --git a/linux-user/nios2/target_mman.h b/linux-user/nios2/target_mman.h new file mode 100644 index 0000000000000..e7ba6070fe7cc --- /dev/null +++ b/linux-user/nios2/target_mman.h @@ -0,0 +1 @@ +#include "../generic/target_mman.h" diff --git a/linux-user/openrisc/target_mman.h b/linux-user/openrisc/target_mman.h new file mode 100644 index 0000000000000..e7ba6070fe7cc --- /dev/null +++ b/linux-user/openrisc/target_mman.h @@ -0,0 +1 @@ +#include "../generic/target_mman.h" diff --git a/linux-user/ppc/target_mman.h b/linux-user/ppc/target_mman.h new file mode 100644 index 0000000000000..e7ba6070fe7cc --- /dev/null +++ b/linux-user/ppc/target_mman.h @@ -0,0 +1 @@ +#include "../generic/target_mman.h" diff --git a/linux-user/riscv/target_mman.h b/linux-user/riscv/target_mman.h new file mode 100644 index 0000000000000..e7ba6070fe7cc --- /dev/null +++ b/linux-user/riscv/target_mman.h @@ -0,0 +1 @@ +#include "../generic/target_mman.h" diff --git a/linux-user/s390x/target_mman.h b/linux-user/s390x/target_mman.h new file mode 100644 index 0000000000000..e7ba6070fe7cc --- /dev/null +++ b/linux-user/s390x/target_mman.h @@ -0,0 +1 @@ +#include "../generic/target_mman.h" diff --git a/linux-user/sh4/target_mman.h b/linux-user/sh4/target_mman.h new file mode 100644 index 0000000000000..e7ba6070fe7cc --- /dev/null +++ b/linux-user/sh4/target_mman.h @@ -0,0 +1 @@ +#include "../generic/target_mman.h" diff --git a/linux-user/sparc/target_mman.h b/linux-user/sparc/target_mman.h new file mode 100644 index 0000000000000..e7ba6070fe7cc --- /dev/null +++ b/linux-user/sparc/target_mman.h @@ -0,0 +1 @@ +#include "../generic/target_mman.h" diff --git a/linux-user/x86_64/target_mman.h b/linux-user/x86_64/target_mman.h new file mode 100644 index 0000000000000..e7ba6070fe7cc --- /dev/null +++ b/linux-user/x86_64/target_mman.h @@ -0,0 +1 @@ +#include "../generic/target_mman.h" diff --git a/linux-user/xtensa/target_mman.h b/linux-user/xtensa/target_mman.h new file mode 100644 index 0000000000000..e7ba6070fe7cc --- /dev/null +++ b/linux-user/xtensa/target_mman.h @@ -0,0 +1 @@ +#include "../generic/target_mman.h" From 8655b4c7099dd198836610bdfb22f09d332b28d3 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 6 Sep 2022 02:08:36 +0200 Subject: [PATCH 14/38] linux-user: Fix madvise(MADV_DONTNEED) on alpha MADV_DONTNEED has a different value on alpha, compared to all the other architectures. Fix by using TARGET_MADV_DONTNEED instead of MADV_DONTNEED. Fixes: 892a4f6a750a ("linux-user: Add partial support for MADV_DONTNEED") Signed-off-by: Ilya Leoshkevich Reviewed-by: Richard Henderson Message-Id: <20220906000839.1672934-3-iii@linux.ibm.com> Signed-off-by: Laurent Vivier --- linux-user/mmap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/linux-user/mmap.c b/linux-user/mmap.c index 83fdae7034ea1..942bec7e8528e 100644 --- a/linux-user/mmap.c +++ b/linux-user/mmap.c @@ -22,6 +22,7 @@ #include "qemu.h" #include "user-internals.h" #include "user-mmap.h" +#include "target_mman.h" static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER; static __thread int mmap_lock_count; @@ -897,7 +898,7 @@ abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice) * anonymous mappings. In this case passthrough is safe, so do it. */ mmap_lock(); - if (advice == MADV_DONTNEED && + if (advice == TARGET_MADV_DONTNEED && can_passthrough_madv_dontneed(start, end)) { ret = get_errno(madvise(g2h_untagged(start), len, MADV_DONTNEED)); if (ret == 0) { From 375ce49be285c5ff73674674350cc99807dfac83 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 6 Sep 2022 02:08:37 +0200 Subject: [PATCH 15/38] linux-user: Implement stracing madvise() The default implementation has several problems: the first argument is not displayed as a pointer, making it harder to grep; the third argument is not symbolized; and there are several extra unused arguments. Signed-off-by: Ilya Leoshkevich Reviewed-by: Richard Henderson Message-Id: <20220906000839.1672934-4-iii@linux.ibm.com> Signed-off-by: Laurent Vivier --- linux-user/strace.c | 41 +++++++++++++++++++++++++++++++++++++++++ linux-user/strace.list | 2 +- 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/linux-user/strace.c b/linux-user/strace.c index b6b9abaea4666..2deb84a2c1061 100644 --- a/linux-user/strace.c +++ b/linux-user/strace.c @@ -18,6 +18,7 @@ #include "user-internals.h" #include "strace.h" #include "signal-common.h" +#include "target_mman.h" struct syscallname { int nr; @@ -3019,6 +3020,46 @@ print_stat(CPUArchState *cpu_env, const struct syscallname *name, #define print_lstat64 print_stat #endif +#if defined(TARGET_NR_madvise) +static struct enums madvise_advice[] = { + ENUM_TARGET(MADV_NORMAL), + ENUM_TARGET(MADV_RANDOM), + ENUM_TARGET(MADV_SEQUENTIAL), + ENUM_TARGET(MADV_WILLNEED), + ENUM_TARGET(MADV_DONTNEED), + ENUM_TARGET(MADV_FREE), + ENUM_TARGET(MADV_REMOVE), + ENUM_TARGET(MADV_DONTFORK), + ENUM_TARGET(MADV_DOFORK), + ENUM_TARGET(MADV_MERGEABLE), + ENUM_TARGET(MADV_UNMERGEABLE), + ENUM_TARGET(MADV_HUGEPAGE), + ENUM_TARGET(MADV_NOHUGEPAGE), + ENUM_TARGET(MADV_DONTDUMP), + ENUM_TARGET(MADV_DODUMP), + ENUM_TARGET(MADV_WIPEONFORK), + ENUM_TARGET(MADV_KEEPONFORK), + ENUM_TARGET(MADV_COLD), + ENUM_TARGET(MADV_PAGEOUT), + ENUM_TARGET(MADV_POPULATE_READ), + ENUM_TARGET(MADV_POPULATE_WRITE), + ENUM_TARGET(MADV_DONTNEED_LOCKED), + ENUM_END, +}; + +static void +print_madvise(CPUArchState *cpu_env, const struct syscallname *name, + abi_long arg0, abi_long arg1, abi_long arg2, + abi_long arg3, abi_long arg4, abi_long arg5) +{ + print_syscall_prologue(name); + print_pointer(arg0, 0); + print_raw_param("%d", arg1, 0); + print_enums(madvise_advice, arg2, 1); + print_syscall_epilogue(name); +} +#endif + #if defined(TARGET_NR_fstat) || defined(TARGET_NR_fstat64) static void print_fstat(CPUArchState *cpu_env, const struct syscallname *name, diff --git a/linux-user/strace.list b/linux-user/strace.list index 215d971b2aea6..c6f748505a9ec 100644 --- a/linux-user/strace.list +++ b/linux-user/strace.list @@ -542,7 +542,7 @@ { TARGET_NR_lstat64, "lstat64" , NULL, print_lstat64, NULL }, #endif #ifdef TARGET_NR_madvise -{ TARGET_NR_madvise, "madvise" , NULL, NULL, NULL }, +{ TARGET_NR_madvise, "madvise" , NULL, print_madvise, NULL }, #endif #ifdef TARGET_NR_madvise1 { TARGET_NR_madvise1, "madvise1" , NULL, NULL, NULL }, From f93b76958a358e82bfdd1474598fe75184afeb4d Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 6 Sep 2022 02:08:38 +0200 Subject: [PATCH 16/38] linux-user: Passthrough MADV_DONTNEED for certain file mappings This is a follow-up for commit 892a4f6a750a ("linux-user: Add partial support for MADV_DONTNEED"), which added passthrough for anonymous mappings. File mappings can be handled in a similar manner. In order to do that, mark pages, for which mmap() was passed through, with PAGE_PASSTHROUGH, and then allow madvise() passthrough for these pages. Drop the explicit PAGE_ANON check, since anonymous mappings are expected to have PAGE_PASSTHROUGH anyway. Add PAGE_PASSTHROUGH to PAGE_STICKY in order to keep it on mprotect(). Signed-off-by: Ilya Leoshkevich Reviewed-by: Richard Henderson Message-Id: <20220725125043.43048-1-iii@linux.ibm.com> Message-Id: <20220906000839.1672934-5-iii@linux.ibm.com> Signed-off-by: Laurent Vivier --- accel/tcg/translate-all.c | 2 +- include/exec/cpu-all.h | 6 ++++++ linux-user/mmap.c | 27 ++++++++++++++++++++++----- 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index f5e8592d4a472..d71d04d338db1 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -2258,7 +2258,7 @@ int page_get_flags(target_ulong address) #ifndef PAGE_TARGET_STICKY #define PAGE_TARGET_STICKY 0 #endif -#define PAGE_STICKY (PAGE_ANON | PAGE_TARGET_STICKY) +#define PAGE_STICKY (PAGE_ANON | PAGE_PASSTHROUGH | PAGE_TARGET_STICKY) /* Modify the flags of a page and invalidate the code if necessary. The flag PAGE_WRITE_ORG is positioned automatically depending diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h index 491629b9ba7a3..16b7df41bf5ea 100644 --- a/include/exec/cpu-all.h +++ b/include/exec/cpu-all.h @@ -262,6 +262,12 @@ extern const TargetPageBits target_page; #define PAGE_TARGET_1 0x0200 #define PAGE_TARGET_2 0x0400 +/* + * For linux-user, indicates that the page is mapped with the same semantics + * in both guest and host. + */ +#define PAGE_PASSTHROUGH 0x0800 + #if defined(CONFIG_USER_ONLY) void page_dump(FILE *f); diff --git a/linux-user/mmap.c b/linux-user/mmap.c index 942bec7e8528e..e557f3626e5db 100644 --- a/linux-user/mmap.c +++ b/linux-user/mmap.c @@ -431,7 +431,8 @@ abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align) abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, int flags, int fd, abi_ulong offset) { - abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len; + abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len, + passthrough_start = -1, passthrough_end = -1; int page_flags, host_prot; mmap_lock(); @@ -544,6 +545,8 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, host_start += offset - host_offset; } start = h2g(host_start); + passthrough_start = start; + passthrough_end = start + len; } else { if (start & ~TARGET_PAGE_MASK) { errno = EINVAL; @@ -626,6 +629,8 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, host_prot, flags, fd, offset1); if (p == MAP_FAILED) goto fail; + passthrough_start = real_start; + passthrough_end = real_end; } } the_end1: @@ -633,7 +638,18 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot, page_flags |= PAGE_ANON; } page_flags |= PAGE_RESET; - page_set_flags(start, start + len, page_flags); + if (passthrough_start == passthrough_end) { + page_set_flags(start, start + len, page_flags); + } else { + if (start < passthrough_start) { + page_set_flags(start, passthrough_start, page_flags); + } + page_set_flags(passthrough_start, passthrough_end, + page_flags | PAGE_PASSTHROUGH); + if (passthrough_end < start + len) { + page_set_flags(passthrough_end, start + len, page_flags); + } + } the_end: trace_target_mmap_complete(start); if (qemu_loglevel_mask(CPU_LOG_PAGE)) { @@ -852,7 +868,7 @@ static bool can_passthrough_madv_dontneed(abi_ulong start, abi_ulong end) } for (addr = start; addr < end; addr += TARGET_PAGE_SIZE) { - if (!(page_get_flags(addr) & PAGE_ANON)) { + if (!(page_get_flags(addr) & PAGE_PASSTHROUGH)) { return false; } } @@ -894,8 +910,9 @@ abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice) * This is a hint, so ignoring and returning success is ok. * * This breaks MADV_DONTNEED, completely implementing which is quite - * complicated. However, there is one low-hanging fruit: host-page-aligned - * anonymous mappings. In this case passthrough is safe, so do it. + * complicated. However, there is one low-hanging fruit: mappings that are + * known to have the same semantics in the host and the guest. In this case + * passthrough is safe, so do it. */ mmap_lock(); if (advice == TARGET_MADV_DONTNEED && From 38b870cc8c5e9d079649d8769c8f271f28953d24 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 6 Sep 2022 02:08:39 +0200 Subject: [PATCH 17/38] tests/tcg/linux-test: Add linux-madvise test Add a test that checks madvise(MADV_DONTNEED) behavior with anonymous and file mappings in order to prevent regressions. Signed-off-by: Ilya Leoshkevich Reviewed-by: Richard Henderson Message-Id: <20220906000839.1672934-6-iii@linux.ibm.com> Signed-off-by: Laurent Vivier --- tests/tcg/multiarch/linux/linux-madvise.c | 70 +++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 tests/tcg/multiarch/linux/linux-madvise.c diff --git a/tests/tcg/multiarch/linux/linux-madvise.c b/tests/tcg/multiarch/linux/linux-madvise.c new file mode 100644 index 0000000000000..29d0997e687c1 --- /dev/null +++ b/tests/tcg/multiarch/linux/linux-madvise.c @@ -0,0 +1,70 @@ +#include +#include +#include +#include + +static void test_anonymous(void) +{ + int pagesize = getpagesize(); + char *page; + int ret; + + page = mmap(NULL, pagesize, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + assert(page != MAP_FAILED); + + /* Check that mprotect() does not interfere with MADV_DONTNEED. */ + ret = mprotect(page, pagesize, PROT_READ | PROT_WRITE); + assert(ret == 0); + + /* Check that MADV_DONTNEED clears the page. */ + *page = 42; + ret = madvise(page, pagesize, MADV_DONTNEED); + assert(ret == 0); + assert(*page == 0); + + ret = munmap(page, pagesize); + assert(ret == 0); +} + +static void test_file(void) +{ + char tempname[] = "/tmp/.cmadviseXXXXXX"; + int pagesize = getpagesize(); + ssize_t written; + char c = 42; + char *page; + int ret; + int fd; + + fd = mkstemp(tempname); + assert(fd != -1); + ret = unlink(tempname); + assert(ret == 0); + written = write(fd, &c, sizeof(c)); + assert(written == sizeof(c)); + page = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE, fd, 0); + assert(page != MAP_FAILED); + + /* Check that mprotect() does not interfere with MADV_DONTNEED. */ + ret = mprotect(page, pagesize, PROT_READ | PROT_WRITE); + assert(ret == 0); + + /* Check that MADV_DONTNEED resets the page. */ + *page = 0; + ret = madvise(page, pagesize, MADV_DONTNEED); + assert(ret == 0); + assert(*page == c); + + ret = munmap(page, pagesize); + assert(ret == 0); + ret = close(fd); + assert(ret == 0); +} + +int main(void) +{ + test_anonymous(); + test_file(); + + return EXIT_SUCCESS; +} From d124853bd73057cd6a60c810413f1a416bd04d34 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 24 Sep 2022 13:44:55 +0200 Subject: [PATCH 18/38] linux-user: Fix TARGET_PROT_SEM for XTENSA The xtensa platform has a value of 0x10 for PROT_SEM. Signed-off-by: Helge Deller Reviewed-by: Laurent Vivier Message-Id: <20220924114501.21767-2-deller@gmx.de> Signed-off-by: Laurent Vivier --- linux-user/syscall_defs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h index 85b0f33e91d7a..1e3577bfa56ff 100644 --- a/linux-user/syscall_defs.h +++ b/linux-user/syscall_defs.h @@ -1246,7 +1246,7 @@ struct target_winsize { #include "termbits.h" -#if defined(TARGET_MIPS) +#if defined(TARGET_MIPS) || defined(TARGET_XTENSA) #define TARGET_PROT_SEM 0x10 #else #define TARGET_PROT_SEM 0x08 From 785783bab1ae879817a26f6757a997ebdc5e89c5 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 24 Sep 2022 13:44:56 +0200 Subject: [PATCH 19/38] linux-user: Add proper strace format strings for getdents()/getdents64() Signed-off-by: Helge Deller Message-Id: <20220924114501.21767-3-deller@gmx.de> Signed-off-by: Laurent Vivier --- linux-user/strace.list | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/linux-user/strace.list b/linux-user/strace.list index c6f748505a9ec..f8a771b4f2561 100644 --- a/linux-user/strace.list +++ b/linux-user/strace.list @@ -279,10 +279,10 @@ { TARGET_NR_getcwd, "getcwd" , "%s(%p,%d)", NULL, NULL }, #endif #ifdef TARGET_NR_getdents -{ TARGET_NR_getdents, "getdents" , NULL, NULL, NULL }, +{ TARGET_NR_getdents, "getdents" , "%s(%d,%p,%u)", NULL, NULL }, #endif #ifdef TARGET_NR_getdents64 -{ TARGET_NR_getdents64, "getdents64" , NULL, NULL, NULL }, +{ TARGET_NR_getdents64, "getdents64" , "%s(%d,%p,%u)", NULL, NULL }, #endif #ifdef TARGET_NR_getdomainname { TARGET_NR_getdomainname, "getdomainname" , NULL, NULL, NULL }, From 47393189ce9e23b560865e361f4ab8fb93a904d0 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 24 Sep 2022 13:44:57 +0200 Subject: [PATCH 20/38] linux-user/hppa: Add signal trampoline for hppa target In Linux kernel v5.18 the vDSO for signal trampoline was added. This code mimiks the bare minimum of this vDSO and thus avoids that the parisc emulation needs executable stacks. Signed-off-by: Helge Deller Message-Id: <20220924114501.21767-4-deller@gmx.de> Signed-off-by: Laurent Vivier --- linux-user/hppa/signal.c | 41 +++++++++++++++++++++++++-------- linux-user/hppa/target_signal.h | 14 +---------- 2 files changed, 32 insertions(+), 23 deletions(-) diff --git a/linux-user/hppa/signal.c b/linux-user/hppa/signal.c index 962f551c04295..396e310dc92df 100644 --- a/linux-user/hppa/signal.c +++ b/linux-user/hppa/signal.c @@ -41,7 +41,7 @@ struct target_ucontext { }; struct target_rt_sigframe { - abi_uint tramp[9]; + abi_uint tramp[2]; /* syscall restart return address */ target_siginfo_t info; struct target_ucontext uc; /* hidden location of upper halves of pa2.0 64-bit gregs */ @@ -101,9 +101,15 @@ static void restore_sigcontext(CPUArchState *env, struct target_sigcontext *sc) __get_user(env->cr[CR_SAR], &sc->sc_sar); } -/* No, this doesn't look right, but it's copied straight from the kernel. */ +#if TARGET_ABI_BITS == 32 +#define SIGFRAME 64 +#define FUNCTIONCALLFRAME 48 +#else +#define SIGFRAME 128 +#define FUNCTIONCALLFRAME 96 +#endif #define PARISC_RT_SIGFRAME_SIZE32 \ - ((sizeof(struct target_rt_sigframe) + 48 + 64) & -64) + ((sizeof(struct target_rt_sigframe) + FUNCTIONCALLFRAME + SIGFRAME) & -SIGFRAME) void setup_rt_frame(int sig, struct target_sigaction *ka, target_siginfo_t *info, @@ -118,7 +124,7 @@ void setup_rt_frame(int sig, struct target_sigaction *ka, if ((ka->sa_flags & TARGET_SA_ONSTACK) && !sas_ss_flags(sp)) { sp = (ts->sigaltstack_used.ss_sp + 0x7f) & ~0x3f; } - frame_addr = QEMU_ALIGN_UP(sp, 64); + frame_addr = QEMU_ALIGN_UP(sp, SIGFRAME); sp = frame_addr + PARISC_RT_SIGFRAME_SIZE32; trace_user_setup_rt_frame(env, frame_addr); @@ -139,14 +145,9 @@ void setup_rt_frame(int sig, struct target_sigaction *ka, setup_sigcontext(&frame->uc.tuc_mcontext, env); - __put_user(0x34190000, frame->tramp + 0); /* ldi 0,%r25 */ - __put_user(0x3414015a, frame->tramp + 1); /* ldi __NR_rt_sigreturn,%r20 */ - __put_user(0xe4008200, frame->tramp + 2); /* be,l 0x100(%sr2,%r0) */ - __put_user(0x08000240, frame->tramp + 3); /* nop */ - unlock_user_struct(frame, frame_addr, 1); - env->gr[2] = h2g(frame->tramp); + env->gr[2] = default_rt_sigreturn; env->gr[30] = sp; env->gr[26] = sig; env->gr[25] = h2g(&frame->info); @@ -197,3 +198,23 @@ long do_rt_sigreturn(CPUArchState *env) force_sig(TARGET_SIGSEGV); return -QEMU_ESIGRETURN; } + +void setup_sigtramp(abi_ulong sigtramp_page) +{ + uint32_t *tramp = lock_user(VERIFY_WRITE, sigtramp_page, 6*4, 0); + abi_ulong SIGFRAME_CONTEXT_REGS32; + assert(tramp != NULL); + + SIGFRAME_CONTEXT_REGS32 = offsetof(struct target_rt_sigframe, uc.tuc_mcontext); + SIGFRAME_CONTEXT_REGS32 -= PARISC_RT_SIGFRAME_SIZE32; + + __put_user(SIGFRAME_CONTEXT_REGS32, tramp + 0); + __put_user(0x08000240, tramp + 1); /* nop - b/c dwarf2 unwind routines */ + __put_user(0x34190000, tramp + 2); /* ldi 0, %r25 (in_syscall=0) */ + __put_user(0x3414015a, tramp + 3); /* ldi __NR_rt_sigreturn, %r20 */ + __put_user(0xe4008200, tramp + 4); /* ble 0x100(%sr2, %r0) */ + __put_user(0x08000240, tramp + 5); /* nop */ + + default_rt_sigreturn = (sigtramp_page + 8) | 3; + unlock_user(tramp, sigtramp_page, 6*4); +} diff --git a/linux-user/hppa/target_signal.h b/linux-user/hppa/target_signal.h index af6c2fce589d3..190bb3d653b62 100644 --- a/linux-user/hppa/target_signal.h +++ b/linux-user/hppa/target_signal.h @@ -70,18 +70,6 @@ typedef struct target_sigaltstack { /* mask for all SS_xxx flags */ #define TARGET_SS_FLAG_BITS TARGET_SS_AUTODISARM -/* - * We cannot use a bare sigtramp page for hppa-linux. - * - * Unlike other guests where we use the instructions at PC to validate - * an offset from SP, the hppa libgcc signal frame fallback unwinding uses - * the PC address itself to find the frame. This is due to the fact that - * the hppa grows the stack upward, and the frame is of unknown size. - * - * TODO: We should be able to use a VDSO to address this, by providing - * proper unwind info for the sigtramp code, at which point the fallback - * unwinder will not be used. - */ -#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 0 +#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1 #endif /* HPPA_TARGET_SIGNAL_H */ From f43882052f330ce5f5a1a2553466df5aa0808fa0 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 24 Sep 2022 13:44:58 +0200 Subject: [PATCH 21/38] linux-user/hppa: Drop stack guard page on hppa target The stack-overflow check when building the "grep" debian package fails on the debian hppa target. Reason is, that the guard page at the top of the stack (which is added by qemu) prevents the fault handler in the grep program to properly detect the stack overflow. The Linux kernel on a physical machine doesn't install a guard page either, so drop it and as such fix the build of "grep". Signed-off-by: Helge Deller Message-Id: <20220924114501.21767-5-deller@gmx.de> Signed-off-by: Laurent Vivier --- linux-user/elfload.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index ba5c4c02e5911..c7e3f1d47c168 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -2096,9 +2096,15 @@ static abi_ulong setup_arg_pages(struct linux_binprm *bprm, if (size < STACK_LOWER_LIMIT) { size = STACK_LOWER_LIMIT; } - guard = TARGET_PAGE_SIZE; - if (guard < qemu_real_host_page_size()) { - guard = qemu_real_host_page_size(); + + if (STACK_GROWS_DOWN) { + guard = TARGET_PAGE_SIZE; + if (guard < qemu_real_host_page_size()) { + guard = qemu_real_host_page_size(); + } + } else { + /* no guard page for hppa target where stack grows upwards. */ + guard = 0; } prot = PROT_READ | PROT_WRITE; @@ -2118,7 +2124,6 @@ static abi_ulong setup_arg_pages(struct linux_binprm *bprm, info->stack_limit = error + guard; return info->stack_limit + size - sizeof(void *); } else { - target_mprotect(error + size, guard, PROT_NONE); info->stack_limit = error + size; return error; } From 0a3346b5938530178e20abea5219e80130cf0204 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 24 Sep 2022 13:44:59 +0200 Subject: [PATCH 22/38] linux-user/hppa: Increase guest stack size to 80MB for hppa target The hppa target requires a much bigger stack than many other targets, and the Linux kernel allocates 80 MB by default for it. This patch increases the guest stack for hppa to 80MB, and prevents that this default stack size gets reduced by a lower stack limit on the host. Since the stack grows upwards on hppa, the stack_limit value marks the upper boundary of the stack. Fix the output of /proc/self/maps (in the guest) to show the [stack] marker on the correct memory area. Signed-off-by: Helge Deller Message-Id: <20220924114501.21767-6-deller@gmx.de> Signed-off-by: Laurent Vivier --- linux-user/hppa/target_syscall.h | 2 ++ linux-user/main.c | 9 +++++++-- linux-user/syscall.c | 4 ++++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/linux-user/hppa/target_syscall.h b/linux-user/hppa/target_syscall.h index 4b382c1fcf4ab..9a8f8ca628175 100644 --- a/linux-user/hppa/target_syscall.h +++ b/linux-user/hppa/target_syscall.h @@ -26,4 +26,6 @@ struct target_pt_regs { #define TARGET_MCL_FUTURE 2 #define TARGET_MCL_ONFAULT 4 +#define TARGET_DEFAULT_STACK_SIZE 80 * 1024 * 1024UL + #endif /* HPPA_TARGET_SYSCALL_H */ diff --git a/linux-user/main.c b/linux-user/main.c index e44bdb17b853b..88fccfe261499 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -124,10 +124,14 @@ static void usage(int exitcode); static const char *interp_prefix = CONFIG_QEMU_INTERP_PREFIX; const char *qemu_uname_release; +#if !defined(TARGET_DEFAULT_STACK_SIZE) /* XXX: on x86 MAP_GROWSDOWN only works if ESP <= address + 32, so we allocate a bigger stack. Need a better solution, for example by remapping the process stack directly at the right place */ -unsigned long guest_stack_size = 8 * 1024 * 1024UL; +#define TARGET_DEFAULT_STACK_SIZE 8 * 1024 * 1024UL +#endif + +unsigned long guest_stack_size = TARGET_DEFAULT_STACK_SIZE; /***********************************************************/ /* Helper routines for implementing atomic operations. */ @@ -668,7 +672,8 @@ int main(int argc, char **argv, char **envp) struct rlimit lim; if (getrlimit(RLIMIT_STACK, &lim) == 0 && lim.rlim_cur != RLIM_INFINITY - && lim.rlim_cur == (target_long)lim.rlim_cur) { + && lim.rlim_cur == (target_long)lim.rlim_cur + && lim.rlim_cur > guest_stack_size) { guest_stack_size = lim.rlim_cur; } } diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 850410c9b5537..f87b36b2491f3 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -8036,7 +8036,11 @@ static int open_self_maps(CPUArchState *cpu_env, int fd) continue; } +#ifdef TARGET_HPPA + if (h2g(max) == ts->info->stack_limit) { +#else if (h2g(min) == ts->info->stack_limit) { +#endif path = "[stack]"; } else { path = e->path; From 4c184e70ad01289f89a9a780d154b00d6a86cccd Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 24 Sep 2022 13:45:00 +0200 Subject: [PATCH 23/38] linux-user/hppa: Allow PROT_GROWSUP and PROT_GROWSDOWN in mprotect() The hppa platform uses an upwards-growing stack and required in Linux kernels < 5.18 an executable stack for signal processing. For that some executables and libraries are marked to have an executable stack, for which glibc uses the mprotect() syscall to mark the stack like this: mprotect(xfa000000,4096,PROT_EXEC|PROT_READ|PROT_WRITE|PROT_GROWSUP). Currently qemu will return -TARGET_EINVAL for this syscall because of the checks in validate_prot_to_pageflags(), which doesn't allow the PROT_GROWSUP or PROT_GROWSDOWN flags and thus triggers this error in the guest: error while loading shared libraries: libc.so.6: cannot enable executable stack as shared object requires: Invalid argument Allow mprotect() to handle both flags and thus fix the guest. The glibc tst-execstack testcase can be used to reproduce the issue. Signed-off-by: Helge Deller Message-Id: <20220924114501.21767-7-deller@gmx.de> [lvivier: s/elif TARGET_HPPA/elif defined(TARGET_HPPA)/] Signed-off-by: Laurent Vivier --- linux-user/mmap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/linux-user/mmap.c b/linux-user/mmap.c index e557f3626e5db..28f3bc85ed45b 100644 --- a/linux-user/mmap.c +++ b/linux-user/mmap.c @@ -106,6 +106,8 @@ static int validate_prot_to_pageflags(int *host_prot, int prot) page_flags |= PAGE_MTE; } } +#elif defined(TARGET_HPPA) + valid |= PROT_GROWSDOWN | PROT_GROWSUP; #endif return prot & ~valid ? 0 : page_flags; From 2319a53758efd0a2a1b45d94d3a3b721730cbc00 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 24 Sep 2022 13:45:01 +0200 Subject: [PATCH 24/38] linux-user/hppa: Fix setup_sigcontext() We don't emulate a preemptive kernel on this level, and the hppa architecture doesn't allow context switches on the gateway page. So we always have to return to sc_iaoq[] and not to gr[31]. This fixes the remaining random segfaults which still occured. Signed-off-by: Helge Deller Message-Id: <20220924114501.21767-8-deller@gmx.de> Signed-off-by: Laurent Vivier --- linux-user/hppa/signal.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/linux-user/hppa/signal.c b/linux-user/hppa/signal.c index 396e310dc92df..f253a15864619 100644 --- a/linux-user/hppa/signal.c +++ b/linux-user/hppa/signal.c @@ -49,23 +49,13 @@ struct target_rt_sigframe { static void setup_sigcontext(struct target_sigcontext *sc, CPUArchState *env) { - int flags = 0; int i; - /* ??? if on_sig_stack, flags |= 1 (PARISC_SC_FLAG_ONSTACK). */ - - if (env->iaoq_f < TARGET_PAGE_SIZE) { - /* In the gateway page, executing a syscall. */ - flags |= 2; /* PARISC_SC_FLAG_IN_SYSCALL */ - __put_user(env->gr[31], &sc->sc_iaoq[0]); - __put_user(env->gr[31] + 4, &sc->sc_iaoq[1]); - } else { - __put_user(env->iaoq_f, &sc->sc_iaoq[0]); - __put_user(env->iaoq_b, &sc->sc_iaoq[1]); - } + __put_user(env->iaoq_f, &sc->sc_iaoq[0]); + __put_user(env->iaoq_b, &sc->sc_iaoq[1]); __put_user(0, &sc->sc_iasq[0]); __put_user(0, &sc->sc_iasq[1]); - __put_user(flags, &sc->sc_flags); + __put_user(0, &sc->sc_flags); __put_user(cpu_hppa_get_psw(env), &sc->sc_gr[0]); for (i = 1; i < 32; ++i) { From 9b9145f04d303354d8da1112577ece26aaf478dd Mon Sep 17 00:00:00 2001 From: fanwenjie Date: Wed, 31 Aug 2022 11:55:25 +0800 Subject: [PATCH 25/38] linux-user: fix bug about missing signum convert of sigqueue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes: 66fb9763af ("basic signal handling") Fixes: cf8b8bfc50 ("linux-user: add support for rt_tgsigqueueinfo() system call") Signed-off-by: fanwenjie Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Laurent Vivier --- linux-user/syscall.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index f87b36b2491f3..54b29f3b406a5 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -9728,7 +9728,7 @@ static abi_long do_syscall1(CPUArchState *cpu_env, int num, abi_long arg1, } target_to_host_siginfo(&uinfo, p); unlock_user(p, arg3, 0); - ret = get_errno(sys_rt_sigqueueinfo(arg1, arg2, &uinfo)); + ret = get_errno(sys_rt_sigqueueinfo(arg1, target_to_host_signal(arg2), &uinfo)); } return ret; case TARGET_NR_rt_tgsigqueueinfo: @@ -9741,7 +9741,7 @@ static abi_long do_syscall1(CPUArchState *cpu_env, int num, abi_long arg1, } target_to_host_siginfo(&uinfo, p); unlock_user(p, arg4, 0); - ret = get_errno(sys_rt_tgsigqueueinfo(arg1, arg2, arg3, &uinfo)); + ret = get_errno(sys_rt_tgsigqueueinfo(arg1, arg2, target_to_host_signal(arg3), &uinfo)); } return ret; #ifdef TARGET_NR_sigreturn From 9e59899f8c75e3a7b327eb42ef61818d7f06992e Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 25 Jul 2022 12:00:35 +0100 Subject: [PATCH 26/38] linux-user: Don't assume 0 is not a valid host timer_t value For handling guest POSIX timers, we currently use an array g_posix_timers[], whose entries are a host timer_t value, or 0 for "this slot is unused". When the guest calls the timer_create syscall we look through the array for a slot containing 0, and use that for the new timer. This scheme assumes that host timer_t values can never be zero. This is unfortunately not a valid assumption -- for some host libc versions, timer_t values are simply indexes starting at 0. When using this kind of host libc, the effect is that the first and second timers end up sharing a slot, and so when the guest tries to operate on the first timer it changes the second timer instead. Rework the timer allocation code, so that: * the 'slot in use' indication uses a separate array from the host timer_t array * we grab the free slot atomically, to avoid races when multiple threads call timer_create simultaneously * releasing an allocated slot is abstracted out into a new free_host_timer_slot() function called in the correct places This fixes: * problems on hosts where timer_t 0 is valid * the FIXME in next_free_host_timer() about locking * bugs in the error paths in timer_create where we forgot to release the slot we grabbed, or forgot to free the host timer Reported-by: Jon Alduan Signed-off-by: Peter Maydell Message-Id: <20220725110035.1273441-1-peter.maydell@linaro.org> Signed-off-by: Laurent Vivier --- linux-user/syscall.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 54b29f3b406a5..e0e0f058121f1 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -525,20 +525,25 @@ _syscall4(int, sys_prlimit64, pid_t, pid, int, resource, #if defined(TARGET_NR_timer_create) /* Maximum of 32 active POSIX timers allowed at any one time. */ -static timer_t g_posix_timers[32] = { 0, } ; +#define GUEST_TIMER_MAX 32 +static timer_t g_posix_timers[GUEST_TIMER_MAX]; +static int g_posix_timer_allocated[GUEST_TIMER_MAX]; static inline int next_free_host_timer(void) { - int k ; - /* FIXME: Does finding the next free slot require a lock? */ - for (k = 0; k < ARRAY_SIZE(g_posix_timers); k++) { - if (g_posix_timers[k] == 0) { - g_posix_timers[k] = (timer_t) 1; + int k; + for (k = 0; k < ARRAY_SIZE(g_posix_timer_allocated); k++) { + if (qatomic_xchg(g_posix_timer_allocated + k, 1) == 0) { return k; } } return -1; } + +static inline void free_host_timer_slot(int id) +{ + qatomic_store_release(g_posix_timer_allocated + id, 0); +} #endif static inline int host_to_target_errno(int host_errno) @@ -12896,15 +12901,18 @@ static abi_long do_syscall1(CPUArchState *cpu_env, int num, abi_long arg1, phost_sevp = &host_sevp; ret = target_to_host_sigevent(phost_sevp, arg2); if (ret != 0) { + free_host_timer_slot(timer_index); return ret; } } ret = get_errno(timer_create(clkid, phost_sevp, phtimer)); if (ret) { - phtimer = NULL; + free_host_timer_slot(timer_index); } else { if (put_user(TIMER_MAGIC | timer_index, arg3, target_timer_t)) { + timer_delete(*phtimer); + free_host_timer_slot(timer_index); return -TARGET_EFAULT; } } @@ -13040,7 +13048,7 @@ static abi_long do_syscall1(CPUArchState *cpu_env, int num, abi_long arg1, } else { timer_t htimer = g_posix_timers[timerid]; ret = get_errno(timer_delete(htimer)); - g_posix_timers[timerid] = 0; + free_host_timer_slot(timerid); } return ret; } From 2941e0fa050314cd200f24e3b99da61440b106ab Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Wed, 17 Aug 2022 14:39:02 +0200 Subject: [PATCH 27/38] linux-user/s390x: Save/restore fpc when handling a signal Linux kernel does this in fpregs_store() and fpregs_load(), so qemu-user should do this as well. Found by running valgrind's none/tests/s390x/test_sig. Signed-off-by: Ilya Leoshkevich Reviewed-by: Richard Henderson Message-Id: <20220817123902.585623-1-iii@linux.ibm.com> Signed-off-by: Laurent Vivier --- linux-user/s390x/signal.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/linux-user/s390x/signal.c b/linux-user/s390x/signal.c index 4979c4b01791b..f72165576f3bf 100644 --- a/linux-user/s390x/signal.c +++ b/linux-user/s390x/signal.c @@ -146,6 +146,7 @@ static void save_sigregs(CPUS390XState *env, target_sigregs *sregs) * We have to store the fp registers to current->thread.fp_regs * to merge them with the emulated registers. */ + __put_user(env->fpc, &sregs->fpregs.fpc); for (i = 0; i < 16; i++) { __put_user(*get_freg(env, i), &sregs->fpregs.fprs[i]); } @@ -331,6 +332,7 @@ static void restore_sigregs(CPUS390XState *env, target_sigregs *sc) for (i = 0; i < 16; i++) { __get_user(env->aregs[i], &sc->regs.acrs[i]); } + __get_user(env->fpc, &sc->fpregs.fpc); for (i = 0; i < 16; i++) { __get_user(*get_freg(env, i), &sc->fpregs.fprs[i]); } From fcdc0ab4b4d09bac65c192726af1c1bcb28de8e4 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Wed, 3 Aug 2022 11:30:08 +0100 Subject: [PATCH 28/38] linux-user: Introduce stubs for ELF AT_BASE_PLATFORM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AT_BASE_PLATFORM is a elf auxiliary vector pointing to a string to pass some architecture information. See getauxval(3) man-page. Signed-off-by: Jiaxun Yang Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20220803103009.95972-2-jiaxun.yang@flygoat.com> Signed-off-by: Laurent Vivier --- linux-user/elfload.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index c7e3f1d47c168..581fbc858b48e 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -1776,6 +1776,10 @@ static inline void init_thread(struct target_pt_regs *regs, #endif /* TARGET_HEXAGON */ +#ifndef ELF_BASE_PLATFORM +#define ELF_BASE_PLATFORM (NULL) +#endif + #ifndef ELF_PLATFORM #define ELF_PLATFORM (NULL) #endif @@ -2220,8 +2224,8 @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc, int i; abi_ulong u_rand_bytes; uint8_t k_rand_bytes[16]; - abi_ulong u_platform; - const char *k_platform; + abi_ulong u_platform, u_base_platform; + const char *k_platform, *k_base_platform; const int n = sizeof(elf_addr_t); sp = p; @@ -2243,6 +2247,22 @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc, } } + u_base_platform = 0; + k_base_platform = ELF_BASE_PLATFORM; + if (k_base_platform) { + size_t len = strlen(k_base_platform) + 1; + if (STACK_GROWS_DOWN) { + sp -= (len + n - 1) & ~(n - 1); + u_base_platform = sp; + /* FIXME - check return value of memcpy_to_target() for failure */ + memcpy_to_target(sp, k_base_platform, len); + } else { + memcpy_to_target(sp, k_base_platform, len); + u_base_platform = sp; + sp += len + 1; + } + } + u_platform = 0; k_platform = ELF_PLATFORM; if (k_platform) { @@ -2284,6 +2304,8 @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc, } size = (DLINFO_ITEMS + 1) * 2; + if (k_base_platform) + size += 2; if (k_platform) size += 2; #ifdef DLINFO_ARCH_ITEMS @@ -2361,6 +2383,9 @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc, NEW_AUX_ENT(AT_HWCAP2, (abi_ulong) ELF_HWCAP2); #endif + if (u_base_platform) { + NEW_AUX_ENT(AT_BASE_PLATFORM, u_base_platform); + } if (u_platform) { NEW_AUX_ENT(AT_PLATFORM, u_platform); } From fbf47c18aa8687b1c6a929fbcd8bb36dfc386454 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Wed, 3 Aug 2022 11:30:09 +0100 Subject: [PATCH 29/38] linux-user: Set ELF_BASE_PLATFORM for MIPS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Match most appropriate base platform string based on insn_flags. Logic is aligned with aligned with set_isa() from arch/mips/kernel/cpu-probe.c in Linux kernel. Signed-off-by: Jiaxun Yang Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20220803103009.95972-3-jiaxun.yang@flygoat.com> Signed-off-by: Laurent Vivier --- linux-user/elfload.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 581fbc858b48e..20894b633f58d 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -1080,6 +1080,37 @@ static uint32_t get_elf_hwcap(void) #define elf_check_abi(x) (!((x) & EF_MIPS_ABI2)) #endif +#define ELF_BASE_PLATFORM get_elf_base_platform() + +#define MATCH_PLATFORM_INSN(_flags, _base_platform) \ + do { if ((cpu->env.insn_flags & (_flags)) == _flags) \ + { return _base_platform; } } while (0) + +static const char *get_elf_base_platform(void) +{ + MIPSCPU *cpu = MIPS_CPU(thread_cpu); + + /* 64 bit ISAs goes first */ + MATCH_PLATFORM_INSN(CPU_MIPS64R6, "mips64r6"); + MATCH_PLATFORM_INSN(CPU_MIPS64R5, "mips64r5"); + MATCH_PLATFORM_INSN(CPU_MIPS64R2, "mips64r2"); + MATCH_PLATFORM_INSN(CPU_MIPS64R1, "mips64"); + MATCH_PLATFORM_INSN(CPU_MIPS5, "mips5"); + MATCH_PLATFORM_INSN(CPU_MIPS4, "mips4"); + MATCH_PLATFORM_INSN(CPU_MIPS3, "mips3"); + + /* 32 bit ISAs */ + MATCH_PLATFORM_INSN(CPU_MIPS32R6, "mips32r6"); + MATCH_PLATFORM_INSN(CPU_MIPS32R5, "mips32r5"); + MATCH_PLATFORM_INSN(CPU_MIPS32R2, "mips32r2"); + MATCH_PLATFORM_INSN(CPU_MIPS32R1, "mips32"); + MATCH_PLATFORM_INSN(CPU_MIPS2, "mips2"); + + /* Fallback */ + return "mips"; +} +#undef MATCH_PLATFORM_INSN + static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop) { From 0fbc0f8da1ec69991472a42ba601376e3f7c6b2d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 28 Aug 2022 19:09:59 -0700 Subject: [PATCH 30/38] linux-user: Combine do_futex and do_futex_time64 Pass a boolean to select between time32 and time64. Signed-off-by: Richard Henderson Reviewed-by: Laurent Vivier Message-Id: <20220829021006.67305-2-richard.henderson@linaro.org> Signed-off-by: Laurent Vivier --- linux-user/syscall.c | 67 ++++++++------------------------------------ 1 file changed, 11 insertions(+), 56 deletions(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index e0e0f058121f1..af3a605fc458c 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -7768,9 +7768,10 @@ static int do_safe_futex(int *uaddr, int op, int val, futexes locally would make futexes shared between multiple processes tricky. However they're probably useless because guest atomic operations won't work either. */ -#if defined(TARGET_NR_futex) -static int do_futex(CPUState *cpu, target_ulong uaddr, int op, int val, - target_ulong timeout, target_ulong uaddr2, int val3) +#if defined(TARGET_NR_futex) || defined(TARGET_NR_futex_time64) +static int do_futex(CPUState *cpu, bool time64, target_ulong uaddr, + int op, int val, target_ulong timeout, + target_ulong uaddr2, int val3) { struct timespec ts, *pts; int base_op; @@ -7787,7 +7788,11 @@ static int do_futex(CPUState *cpu, target_ulong uaddr, int op, int val, case FUTEX_WAIT_BITSET: if (timeout) { pts = &ts; - target_to_host_timespec(pts, timeout); + if (time64 + ? target_to_host_timespec64(pts, timeout) + : target_to_host_timespec(pts, timeout)) { + return -TARGET_EFAULT; + } } else { pts = NULL; } @@ -7817,56 +7822,6 @@ static int do_futex(CPUState *cpu, target_ulong uaddr, int op, int val, } #endif -#if defined(TARGET_NR_futex_time64) -static int do_futex_time64(CPUState *cpu, target_ulong uaddr, int op, - int val, target_ulong timeout, - target_ulong uaddr2, int val3) -{ - struct timespec ts, *pts; - int base_op; - - /* ??? We assume FUTEX_* constants are the same on both host - and target. */ -#ifdef FUTEX_CMD_MASK - base_op = op & FUTEX_CMD_MASK; -#else - base_op = op; -#endif - switch (base_op) { - case FUTEX_WAIT: - case FUTEX_WAIT_BITSET: - if (timeout) { - pts = &ts; - if (target_to_host_timespec64(pts, timeout)) { - return -TARGET_EFAULT; - } - } else { - pts = NULL; - } - return do_safe_futex(g2h(cpu, uaddr), op, - tswap32(val), pts, NULL, val3); - case FUTEX_WAKE: - return do_safe_futex(g2h(cpu, uaddr), op, val, NULL, NULL, 0); - case FUTEX_FD: - return do_safe_futex(g2h(cpu, uaddr), op, val, NULL, NULL, 0); - case FUTEX_REQUEUE: - case FUTEX_CMP_REQUEUE: - case FUTEX_WAKE_OP: - /* For FUTEX_REQUEUE, FUTEX_CMP_REQUEUE, and FUTEX_WAKE_OP, the - TIMEOUT parameter is interpreted as a uint32_t by the kernel. - But the prototype takes a `struct timespec *'; insert casts - to satisfy the compiler. We do not need to tswap TIMEOUT - since it's not compared to guest memory. */ - pts = (struct timespec *)(uintptr_t) timeout; - return do_safe_futex(g2h(cpu, uaddr), op, val, pts, g2h(cpu, uaddr2), - (base_op == FUTEX_CMP_REQUEUE - ? tswap32(val3) : val3)); - default: - return -TARGET_ENOSYS; - } -} -#endif - #if defined(TARGET_NR_name_to_handle_at) && defined(CONFIG_OPEN_BY_HANDLE) static abi_long do_name_to_handle_at(abi_long dirfd, abi_long pathname, abi_long handle, abi_long mount_id, @@ -12372,11 +12327,11 @@ static abi_long do_syscall1(CPUArchState *cpu_env, int num, abi_long arg1, #endif #ifdef TARGET_NR_futex case TARGET_NR_futex: - return do_futex(cpu, arg1, arg2, arg3, arg4, arg5, arg6); + return do_futex(cpu, false, arg1, arg2, arg3, arg4, arg5, arg6); #endif #ifdef TARGET_NR_futex_time64 case TARGET_NR_futex_time64: - return do_futex_time64(cpu, arg1, arg2, arg3, arg4, arg5, arg6); + return do_futex(cpu, true, arg1, arg2, arg3, arg4, arg5, arg6); #endif #ifdef CONFIG_INOTIFY #if defined(TARGET_NR_inotify_init) From 57b9ccd4c03dc5f175c4bd7d7406a15fb4d24aca Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 28 Aug 2022 19:10:00 -0700 Subject: [PATCH 31/38] linux-user: Sink call to do_safe_futex Leave only the argument adjustments within the shift, and sink the actual syscall to the end. Sink the timespec conversion as well, as there will be more users. Signed-off-by: Richard Henderson Reviewed-by: Laurent Vivier Message-Id: <20220829021006.67305-3-richard.henderson@linaro.org> Signed-off-by: Laurent Vivier --- linux-user/syscall.c | 60 +++++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index af3a605fc458c..17e17f4804566 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -7773,11 +7773,11 @@ static int do_futex(CPUState *cpu, bool time64, target_ulong uaddr, int op, int val, target_ulong timeout, target_ulong uaddr2, int val3) { - struct timespec ts, *pts; + struct timespec ts, *pts = NULL; + void *haddr2 = NULL; int base_op; - /* ??? We assume FUTEX_* constants are the same on both host - and target. */ + /* We assume FUTEX_* constants are the same on both host and target. */ #ifdef FUTEX_CMD_MASK base_op = op & FUTEX_CMD_MASK; #else @@ -7786,39 +7786,41 @@ static int do_futex(CPUState *cpu, bool time64, target_ulong uaddr, switch (base_op) { case FUTEX_WAIT: case FUTEX_WAIT_BITSET: - if (timeout) { - pts = &ts; - if (time64 - ? target_to_host_timespec64(pts, timeout) - : target_to_host_timespec(pts, timeout)) { - return -TARGET_EFAULT; - } - } else { - pts = NULL; - } - return do_safe_futex(g2h(cpu, uaddr), - op, tswap32(val), pts, NULL, val3); + val = tswap32(val); + break; case FUTEX_WAKE: - return do_safe_futex(g2h(cpu, uaddr), - op, val, NULL, NULL, 0); + timeout = 0; + break; case FUTEX_FD: - return do_safe_futex(g2h(cpu, uaddr), - op, val, NULL, NULL, 0); - case FUTEX_REQUEUE: + timeout = 0; + break; case FUTEX_CMP_REQUEUE: + val3 = tswap32(val3); + /* fall through */ + case FUTEX_REQUEUE: case FUTEX_WAKE_OP: - /* For FUTEX_REQUEUE, FUTEX_CMP_REQUEUE, and FUTEX_WAKE_OP, the - TIMEOUT parameter is interpreted as a uint32_t by the kernel. - But the prototype takes a `struct timespec *'; insert casts - to satisfy the compiler. We do not need to tswap TIMEOUT - since it's not compared to guest memory. */ - pts = (struct timespec *)(uintptr_t) timeout; - return do_safe_futex(g2h(cpu, uaddr), op, val, pts, g2h(cpu, uaddr2), - (base_op == FUTEX_CMP_REQUEUE - ? tswap32(val3) : val3)); + /* + * For these, the 4th argument is not TIMEOUT, but VAL2. + * But the prototype of do_safe_futex takes a pointer, so + * insert casts to satisfy the compiler. We do not need + * to tswap VAL2 since it's not compared to guest memory. + */ + pts = (struct timespec *)(uintptr_t)timeout; + timeout = 0; + haddr2 = g2h(cpu, uaddr2); + break; default: return -TARGET_ENOSYS; } + if (timeout) { + pts = &ts; + if (time64 + ? target_to_host_timespec64(pts, timeout) + : target_to_host_timespec(pts, timeout)) { + return -TARGET_EFAULT; + } + } + return do_safe_futex(g2h(cpu, uaddr), op, val, pts, haddr2, val3); } #endif From a6180f8aede726c1648b1ae3a602058c93f859c8 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 28 Aug 2022 19:10:01 -0700 Subject: [PATCH 32/38] linux-user: Implement FUTEX_WAKE_BITSET Signed-off-by: Richard Henderson Reviewed-by: Laurent Vivier Message-Id: <20220829021006.67305-4-richard.henderson@linaro.org> Signed-off-by: Laurent Vivier --- linux-user/syscall.c | 1 + 1 file changed, 1 insertion(+) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 17e17f4804566..e2b6b564e6cbb 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -7789,6 +7789,7 @@ static int do_futex(CPUState *cpu, bool time64, target_ulong uaddr, val = tswap32(val); break; case FUTEX_WAKE: + case FUTEX_WAKE_BITSET: timeout = 0; break; case FUTEX_FD: From 0f9467311260c23eed758f97c75d83f1815acb29 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 28 Aug 2022 19:10:02 -0700 Subject: [PATCH 33/38] linux-user: Convert signal number for FUTEX_FD The val argument to FUTEX_FD is a signal number. Convert to match the host, as it will be converted back when the signal is delivered. Signed-off-by: Richard Henderson Reviewed-by: Laurent Vivier Message-Id: <20220829021006.67305-5-richard.henderson@linaro.org> Signed-off-by: Laurent Vivier --- linux-user/syscall.c | 1 + 1 file changed, 1 insertion(+) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index e2b6b564e6cbb..3f144e3c1f5df 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -7793,6 +7793,7 @@ static int do_futex(CPUState *cpu, bool time64, target_ulong uaddr, timeout = 0; break; case FUTEX_FD: + val = target_to_host_signal(val); timeout = 0; break; case FUTEX_CMP_REQUEUE: From c72a90df47b990572ac4fdb7e918005466446db9 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 28 Aug 2022 19:10:03 -0700 Subject: [PATCH 34/38] linux-user: Implement PI futexes Define the missing FUTEX_* constants in syscall_defs.h Signed-off-by: Richard Henderson Reviewed-by: Laurent Vivier Message-Id: <20220829021006.67305-6-richard.henderson@linaro.org> Signed-off-by: Laurent Vivier --- linux-user/syscall.c | 10 ++++++++++ linux-user/syscall_defs.h | 3 +++ 2 files changed, 13 insertions(+) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 3f144e3c1f5df..2e954d8dbd9ed 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -7788,8 +7788,17 @@ static int do_futex(CPUState *cpu, bool time64, target_ulong uaddr, case FUTEX_WAIT_BITSET: val = tswap32(val); break; + case FUTEX_WAIT_REQUEUE_PI: + val = tswap32(val); + haddr2 = g2h(cpu, uaddr2); + break; + case FUTEX_LOCK_PI: + case FUTEX_LOCK_PI2: + break; case FUTEX_WAKE: case FUTEX_WAKE_BITSET: + case FUTEX_TRYLOCK_PI: + case FUTEX_UNLOCK_PI: timeout = 0; break; case FUTEX_FD: @@ -7797,6 +7806,7 @@ static int do_futex(CPUState *cpu, bool time64, target_ulong uaddr, timeout = 0; break; case FUTEX_CMP_REQUEUE: + case FUTEX_CMP_REQUEUE_PI: val3 = tswap32(val3); /* fall through */ case FUTEX_REQUEUE: diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h index 1e3577bfa56ff..01ee10a88fe02 100644 --- a/linux-user/syscall_defs.h +++ b/linux-user/syscall_defs.h @@ -2699,6 +2699,9 @@ struct target_drm_i915_getparam { #define FUTEX_TRYLOCK_PI 8 #define FUTEX_WAIT_BITSET 9 #define FUTEX_WAKE_BITSET 10 +#define FUTEX_WAIT_REQUEUE_PI 11 +#define FUTEX_CMP_REQUEUE_PI 12 +#define FUTEX_LOCK_PI2 13 #define FUTEX_PRIVATE_FLAG 128 #define FUTEX_CLOCK_REALTIME 256 From 53b578f31fda28b2cf83ca28d0c7a5159416d32b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 28 Aug 2022 19:10:04 -0700 Subject: [PATCH 35/38] linux-user: Update print_futex_op Use a table for the names; print unknown values in hex, since the value contains flags. Signed-off-by: Richard Henderson Message-Id: <20220829021006.67305-7-richard.henderson@linaro.org> [lv: update print_futex() according to "linux-user: Show timespec on strace for futex()"] Signed-off-by: Laurent Vivier --- linux-user/strace.c | 70 +++++++++++++++++++++------------------------ 1 file changed, 33 insertions(+), 37 deletions(-) diff --git a/linux-user/strace.c b/linux-user/strace.c index 2deb84a2c1061..faa733125624f 100644 --- a/linux-user/strace.c +++ b/linux-user/strace.c @@ -3710,44 +3710,37 @@ print_munmap(CPUArchState *cpu_env, const struct syscallname *name, #endif #ifdef TARGET_NR_futex -static void print_futex_op(abi_long tflag, int last) -{ -#define print_op(val) \ -if( cmd == val ) { \ - qemu_log(#val); \ - return; \ -} - - int cmd = (int)tflag; -#ifdef FUTEX_PRIVATE_FLAG - if (cmd & FUTEX_PRIVATE_FLAG) { - qemu_log("FUTEX_PRIVATE_FLAG|"); - cmd &= ~FUTEX_PRIVATE_FLAG; - } -#endif -#ifdef FUTEX_CLOCK_REALTIME - if (cmd & FUTEX_CLOCK_REALTIME) { - qemu_log("FUTEX_CLOCK_REALTIME|"); - cmd &= ~FUTEX_CLOCK_REALTIME; +static void print_futex_op(int cmd, int last) +{ + static const char * const futex_names[] = { +#define NAME(X) [X] = #X + NAME(FUTEX_WAIT), + NAME(FUTEX_WAKE), + NAME(FUTEX_FD), + NAME(FUTEX_REQUEUE), + NAME(FUTEX_CMP_REQUEUE), + NAME(FUTEX_WAKE_OP), + NAME(FUTEX_LOCK_PI), + NAME(FUTEX_UNLOCK_PI), + NAME(FUTEX_TRYLOCK_PI), + NAME(FUTEX_WAIT_BITSET), + NAME(FUTEX_WAKE_BITSET), + NAME(FUTEX_WAIT_REQUEUE_PI), + NAME(FUTEX_CMP_REQUEUE_PI), + NAME(FUTEX_LOCK_PI2), +#undef NAME + }; + + unsigned base_cmd = cmd & FUTEX_CMD_MASK; + + if (base_cmd < ARRAY_SIZE(futex_names)) { + qemu_log("%s%s%s", + (cmd & FUTEX_PRIVATE_FLAG ? "FUTEX_PRIVATE_FLAG|" : ""), + (cmd & FUTEX_CLOCK_REALTIME ? "FUTEX_CLOCK_REALTIME|" : ""), + futex_names[base_cmd]); + } else { + qemu_log("0x%x", cmd); } -#endif - print_op(FUTEX_WAIT) - print_op(FUTEX_WAKE) - print_op(FUTEX_FD) - print_op(FUTEX_REQUEUE) - print_op(FUTEX_CMP_REQUEUE) - print_op(FUTEX_WAKE_OP) - print_op(FUTEX_LOCK_PI) - print_op(FUTEX_UNLOCK_PI) - print_op(FUTEX_TRYLOCK_PI) -#ifdef FUTEX_WAIT_BITSET - print_op(FUTEX_WAIT_BITSET) -#endif -#ifdef FUTEX_WAKE_BITSET - print_op(FUTEX_WAKE_BITSET) -#endif - /* unknown values */ - qemu_log("%d", cmd); } static void @@ -3763,6 +3756,9 @@ print_futex(CPUArchState *cpu_env, const struct syscallname *name, switch (op) { case FUTEX_WAIT: case FUTEX_WAIT_BITSET: + case FUTEX_LOCK_PI: + case FUTEX_LOCK_PI2: + case FUTEX_WAIT_REQUEUE_PI: print_timespec(arg3, 0); break; default: From c5a1c6b88cad2e8c4d81b03dda48d49ea0be9cca Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 28 Aug 2022 19:10:05 -0700 Subject: [PATCH 36/38] linux-user: Lock log around strace Do not allow syscall arguments to be interleaved between threads. Signed-off-by: Richard Henderson Reviewed-by: Laurent Vivier Message-Id: <20220829021006.67305-8-richard.henderson@linaro.org> Signed-off-by: Laurent Vivier --- linux-user/strace.c | 65 ++++++++++++++++++++++++++++++++------------- 1 file changed, 46 insertions(+), 19 deletions(-) diff --git a/linux-user/strace.c b/linux-user/strace.c index faa733125624f..37bc96df9bb6c 100644 --- a/linux-user/strace.c +++ b/linux-user/strace.c @@ -3919,26 +3919,37 @@ print_syscall(CPUArchState *cpu_env, int num, abi_long arg4, abi_long arg5, abi_long arg6) { int i; - const char *format="%s(" TARGET_ABI_FMT_ld "," TARGET_ABI_FMT_ld "," TARGET_ABI_FMT_ld "," TARGET_ABI_FMT_ld "," TARGET_ABI_FMT_ld "," TARGET_ABI_FMT_ld ")"; + FILE *f; + const char *format = "%s(" TARGET_ABI_FMT_ld "," TARGET_ABI_FMT_ld "," + TARGET_ABI_FMT_ld "," TARGET_ABI_FMT_ld "," + TARGET_ABI_FMT_ld "," TARGET_ABI_FMT_ld ")"; - qemu_log("%d ", getpid()); + f = qemu_log_trylock(); + if (!f) { + return; + } + fprintf(f, "%d ", getpid()); - for(i=0;i Date: Sun, 28 Aug 2022 19:10:06 -0700 Subject: [PATCH 37/38] linux-user: Log tid for strace Printing the same pid for all threads isn't helpful. Signed-off-by: Richard Henderson Reviewed-by: Laurent Vivier Message-Id: <20220829021006.67305-9-richard.henderson@linaro.org> Signed-off-by: Laurent Vivier --- linux-user/strace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linux-user/strace.c b/linux-user/strace.c index 37bc96df9bb6c..e02dbc3a71085 100644 --- a/linux-user/strace.c +++ b/linux-user/strace.c @@ -3928,7 +3928,7 @@ print_syscall(CPUArchState *cpu_env, int num, if (!f) { return; } - fprintf(f, "%d ", getpid()); + fprintf(f, "%d ", gettid()); for (i = 0; i < nsyscalls; i++) { if (scnames[i].nr == num) { From b04997815dc0ead04133c043f1d0098e5db7a7ca Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 27 Sep 2022 11:35:38 +0200 Subject: [PATCH 38/38] linux-user: Add parameters of getrandom() syscall for strace Signed-off-by: Helge Deller Reviewed-by: Laurent Vivier Message-Id: <20220927093538.8954-2-deller@gmx.de> Signed-off-by: Laurent Vivier --- linux-user/strace.list | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linux-user/strace.list b/linux-user/strace.list index f8a771b4f2561..a87415bf3d506 100644 --- a/linux-user/strace.list +++ b/linux-user/strace.list @@ -352,7 +352,7 @@ { TARGET_NR_getpriority, "getpriority", "%s(%#x,%#x)", NULL, NULL }, #endif #ifdef TARGET_NR_getrandom -{ TARGET_NR_getrandom, "getrandom", NULL, NULL, NULL }, +{ TARGET_NR_getrandom, "getrandom", "%s(%p,%u,%u)", NULL, NULL }, #endif #ifdef TARGET_NR_getresgid { TARGET_NR_getresgid, "getresgid" , NULL, NULL, NULL },