| @@ -0,0 +1,57 @@ | ||
| /* SPDX-License-Identifier: GPL-2.0+ */ | ||
| #undef TRACE_SYSTEM | ||
| #define TRACE_SYSTEM rseq | ||
|
|
||
| #if !defined(_TRACE_RSEQ_H) || defined(TRACE_HEADER_MULTI_READ) | ||
| #define _TRACE_RSEQ_H | ||
|
|
||
| #include <linux/tracepoint.h> | ||
| #include <linux/types.h> | ||
|
|
||
| TRACE_EVENT(rseq_update, | ||
|
|
||
| TP_PROTO(struct task_struct *t), | ||
|
|
||
| TP_ARGS(t), | ||
|
|
||
| TP_STRUCT__entry( | ||
| __field(s32, cpu_id) | ||
| ), | ||
|
|
||
| TP_fast_assign( | ||
| __entry->cpu_id = raw_smp_processor_id(); | ||
| ), | ||
|
|
||
| TP_printk("cpu_id=%d", __entry->cpu_id) | ||
| ); | ||
|
|
||
| TRACE_EVENT(rseq_ip_fixup, | ||
|
|
||
| TP_PROTO(unsigned long regs_ip, unsigned long start_ip, | ||
| unsigned long post_commit_offset, unsigned long abort_ip), | ||
|
|
||
| TP_ARGS(regs_ip, start_ip, post_commit_offset, abort_ip), | ||
|
|
||
| TP_STRUCT__entry( | ||
| __field(unsigned long, regs_ip) | ||
| __field(unsigned long, start_ip) | ||
| __field(unsigned long, post_commit_offset) | ||
| __field(unsigned long, abort_ip) | ||
| ), | ||
|
|
||
| TP_fast_assign( | ||
| __entry->regs_ip = regs_ip; | ||
| __entry->start_ip = start_ip; | ||
| __entry->post_commit_offset = post_commit_offset; | ||
| __entry->abort_ip = abort_ip; | ||
| ), | ||
|
|
||
| TP_printk("regs_ip=0x%lx start_ip=0x%lx post_commit_offset=%lu abort_ip=0x%lx", | ||
| __entry->regs_ip, __entry->start_ip, | ||
| __entry->post_commit_offset, __entry->abort_ip) | ||
| ); | ||
|
|
||
| #endif /* _TRACE_SOCK_H */ | ||
|
|
||
| /* This part must be outside protection */ | ||
| #include <trace/define_trace.h> |
| @@ -0,0 +1,133 @@ | ||
| /* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ | ||
| #ifndef _UAPI_LINUX_RSEQ_H | ||
| #define _UAPI_LINUX_RSEQ_H | ||
|
|
||
| /* | ||
| * linux/rseq.h | ||
| * | ||
| * Restartable sequences system call API | ||
| * | ||
| * Copyright (c) 2015-2018 Mathieu Desnoyers <mathieu.desnoyers@efficios.com> | ||
| */ | ||
|
|
||
| #ifdef __KERNEL__ | ||
| # include <linux/types.h> | ||
| #else | ||
| # include <stdint.h> | ||
| #endif | ||
|
|
||
| #include <linux/types_32_64.h> | ||
|
|
||
| enum rseq_cpu_id_state { | ||
| RSEQ_CPU_ID_UNINITIALIZED = -1, | ||
| RSEQ_CPU_ID_REGISTRATION_FAILED = -2, | ||
| }; | ||
|
|
||
| enum rseq_flags { | ||
| RSEQ_FLAG_UNREGISTER = (1 << 0), | ||
| }; | ||
|
|
||
| enum rseq_cs_flags_bit { | ||
| RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT = 0, | ||
| RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT = 1, | ||
| RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT = 2, | ||
| }; | ||
|
|
||
| enum rseq_cs_flags { | ||
| RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT = | ||
| (1U << RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT), | ||
| RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL = | ||
| (1U << RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT), | ||
| RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE = | ||
| (1U << RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT), | ||
| }; | ||
|
|
||
| /* | ||
| * struct rseq_cs is aligned on 4 * 8 bytes to ensure it is always | ||
| * contained within a single cache-line. It is usually declared as | ||
| * link-time constant data. | ||
| */ | ||
| struct rseq_cs { | ||
| /* Version of this structure. */ | ||
| __u32 version; | ||
| /* enum rseq_cs_flags */ | ||
| __u32 flags; | ||
| LINUX_FIELD_u32_u64(start_ip); | ||
| /* Offset from start_ip. */ | ||
| LINUX_FIELD_u32_u64(post_commit_offset); | ||
| LINUX_FIELD_u32_u64(abort_ip); | ||
| } __attribute__((aligned(4 * sizeof(__u64)))); | ||
|
|
||
| /* | ||
| * struct rseq is aligned on 4 * 8 bytes to ensure it is always | ||
| * contained within a single cache-line. | ||
| * | ||
| * A single struct rseq per thread is allowed. | ||
| */ | ||
| struct rseq { | ||
| /* | ||
| * Restartable sequences cpu_id_start field. Updated by the | ||
| * kernel, and read by user-space with single-copy atomicity | ||
| * semantics. Aligned on 32-bit. Always contains a value in the | ||
| * range of possible CPUs, although the value may not be the | ||
| * actual current CPU (e.g. if rseq is not initialized). This | ||
| * CPU number value should always be compared against the value | ||
| * of the cpu_id field before performing a rseq commit or | ||
| * returning a value read from a data structure indexed using | ||
| * the cpu_id_start value. | ||
| */ | ||
| __u32 cpu_id_start; | ||
| /* | ||
| * Restartable sequences cpu_id field. Updated by the kernel, | ||
| * and read by user-space with single-copy atomicity semantics. | ||
| * Aligned on 32-bit. Values RSEQ_CPU_ID_UNINITIALIZED and | ||
| * RSEQ_CPU_ID_REGISTRATION_FAILED have a special semantic: the | ||
| * former means "rseq uninitialized", and latter means "rseq | ||
| * initialization failed". This value is meant to be read within | ||
| * rseq critical sections and compared with the cpu_id_start | ||
| * value previously read, before performing the commit instruction, | ||
| * or read and compared with the cpu_id_start value before returning | ||
| * a value loaded from a data structure indexed using the | ||
| * cpu_id_start value. | ||
| */ | ||
| __u32 cpu_id; | ||
| /* | ||
| * Restartable sequences rseq_cs field. | ||
| * | ||
| * Contains NULL when no critical section is active for the current | ||
| * thread, or holds a pointer to the currently active struct rseq_cs. | ||
| * | ||
| * Updated by user-space, which sets the address of the currently | ||
| * active rseq_cs at the beginning of assembly instruction sequence | ||
| * block, and set to NULL by the kernel when it restarts an assembly | ||
| * instruction sequence block, as well as when the kernel detects that | ||
| * it is preempting or delivering a signal outside of the range | ||
| * targeted by the rseq_cs. Also needs to be set to NULL by user-space | ||
| * before reclaiming memory that contains the targeted struct rseq_cs. | ||
| * | ||
| * Read and set by the kernel with single-copy atomicity semantics. | ||
| * Set by user-space with single-copy atomicity semantics. Aligned | ||
| * on 64-bit. | ||
| */ | ||
| LINUX_FIELD_u32_u64(rseq_cs); | ||
| /* | ||
| * - RSEQ_DISABLE flag: | ||
| * | ||
| * Fallback fast-track flag for single-stepping. | ||
| * Set by user-space if lack of progress is detected. | ||
| * Cleared by user-space after rseq finish. | ||
| * Read by the kernel. | ||
| * - RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT | ||
| * Inhibit instruction sequence block restart and event | ||
| * counter increment on preemption for this thread. | ||
| * - RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL | ||
| * Inhibit instruction sequence block restart and event | ||
| * counter increment on signal delivery for this thread. | ||
| * - RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE | ||
| * Inhibit instruction sequence block restart and event | ||
| * counter increment on migration for this thread. | ||
| */ | ||
| __u32 flags; | ||
| } __attribute__((aligned(4 * sizeof(__u64)))); | ||
|
|
||
| #endif /* _UAPI_LINUX_RSEQ_H */ |
| @@ -0,0 +1,50 @@ | ||
| /* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ | ||
| #ifndef _UAPI_LINUX_TYPES_32_64_H | ||
| #define _UAPI_LINUX_TYPES_32_64_H | ||
|
|
||
| /* | ||
| * linux/types_32_64.h | ||
| * | ||
| * Integer type declaration for pointers across 32-bit and 64-bit systems. | ||
| * | ||
| * Copyright (c) 2015-2018 Mathieu Desnoyers <mathieu.desnoyers@efficios.com> | ||
| */ | ||
|
|
||
| #ifdef __KERNEL__ | ||
| # include <linux/types.h> | ||
| #else | ||
| # include <stdint.h> | ||
| #endif | ||
|
|
||
| #include <asm/byteorder.h> | ||
|
|
||
| #ifdef __BYTE_ORDER | ||
| # if (__BYTE_ORDER == __BIG_ENDIAN) | ||
| # define LINUX_BYTE_ORDER_BIG_ENDIAN | ||
| # else | ||
| # define LINUX_BYTE_ORDER_LITTLE_ENDIAN | ||
| # endif | ||
| #else | ||
| # ifdef __BIG_ENDIAN | ||
| # define LINUX_BYTE_ORDER_BIG_ENDIAN | ||
| # else | ||
| # define LINUX_BYTE_ORDER_LITTLE_ENDIAN | ||
| # endif | ||
| #endif | ||
|
|
||
| #ifdef __LP64__ | ||
| # define LINUX_FIELD_u32_u64(field) __u64 field | ||
| # define LINUX_FIELD_u32_u64_INIT_ONSTACK(field, v) field = (intptr_t)v | ||
| #else | ||
| # ifdef LINUX_BYTE_ORDER_BIG_ENDIAN | ||
| # define LINUX_FIELD_u32_u64(field) __u32 field ## _padding, field | ||
| # define LINUX_FIELD_u32_u64_INIT_ONSTACK(field, v) \ | ||
| field ## _padding = 0, field = (intptr_t)v | ||
| # else | ||
| # define LINUX_FIELD_u32_u64(field) __u32 field, field ## _padding | ||
| # define LINUX_FIELD_u32_u64_INIT_ONSTACK(field, v) \ | ||
| field = (intptr_t)v, field ## _padding = 0 | ||
| # endif | ||
| #endif | ||
|
|
||
| #endif /* _UAPI_LINUX_TYPES_32_64_H */ |
| @@ -0,0 +1,357 @@ | ||
| // SPDX-License-Identifier: GPL-2.0+ | ||
| /* | ||
| * Restartable sequences system call | ||
| * | ||
| * Copyright (C) 2015, Google, Inc., | ||
| * Paul Turner <pjt@google.com> and Andrew Hunter <ahh@google.com> | ||
| * Copyright (C) 2015-2018, EfficiOS Inc., | ||
| * Mathieu Desnoyers <mathieu.desnoyers@efficios.com> | ||
| */ | ||
|
|
||
| #include <linux/sched.h> | ||
| #include <linux/uaccess.h> | ||
| #include <linux/syscalls.h> | ||
| #include <linux/rseq.h> | ||
| #include <linux/types.h> | ||
| #include <asm/ptrace.h> | ||
|
|
||
| #define CREATE_TRACE_POINTS | ||
| #include <trace/events/rseq.h> | ||
|
|
||
| #define RSEQ_CS_PREEMPT_MIGRATE_FLAGS (RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE | \ | ||
| RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT) | ||
|
|
||
| /* | ||
| * | ||
| * Restartable sequences are a lightweight interface that allows | ||
| * user-level code to be executed atomically relative to scheduler | ||
| * preemption and signal delivery. Typically used for implementing | ||
| * per-cpu operations. | ||
| * | ||
| * It allows user-space to perform update operations on per-cpu data | ||
| * without requiring heavy-weight atomic operations. | ||
| * | ||
| * Detailed algorithm of rseq user-space assembly sequences: | ||
| * | ||
| * init(rseq_cs) | ||
| * cpu = TLS->rseq::cpu_id_start | ||
| * [1] TLS->rseq::rseq_cs = rseq_cs | ||
| * [start_ip] ---------------------------- | ||
| * [2] if (cpu != TLS->rseq::cpu_id) | ||
| * goto abort_ip; | ||
| * [3] <last_instruction_in_cs> | ||
| * [post_commit_ip] ---------------------------- | ||
| * | ||
| * The address of jump target abort_ip must be outside the critical | ||
| * region, i.e.: | ||
| * | ||
| * [abort_ip] < [start_ip] || [abort_ip] >= [post_commit_ip] | ||
| * | ||
| * Steps [2]-[3] (inclusive) need to be a sequence of instructions in | ||
| * userspace that can handle being interrupted between any of those | ||
| * instructions, and then resumed to the abort_ip. | ||
| * | ||
| * 1. Userspace stores the address of the struct rseq_cs assembly | ||
| * block descriptor into the rseq_cs field of the registered | ||
| * struct rseq TLS area. This update is performed through a single | ||
| * store within the inline assembly instruction sequence. | ||
| * [start_ip] | ||
| * | ||
| * 2. Userspace tests to check whether the current cpu_id field match | ||
| * the cpu number loaded before start_ip, branching to abort_ip | ||
| * in case of a mismatch. | ||
| * | ||
| * If the sequence is preempted or interrupted by a signal | ||
| * at or after start_ip and before post_commit_ip, then the kernel | ||
| * clears TLS->__rseq_abi::rseq_cs, and sets the user-space return | ||
| * ip to abort_ip before returning to user-space, so the preempted | ||
| * execution resumes at abort_ip. | ||
| * | ||
| * 3. Userspace critical section final instruction before | ||
| * post_commit_ip is the commit. The critical section is | ||
| * self-terminating. | ||
| * [post_commit_ip] | ||
| * | ||
| * 4. <success> | ||
| * | ||
| * On failure at [2], or if interrupted by preempt or signal delivery | ||
| * between [1] and [3]: | ||
| * | ||
| * [abort_ip] | ||
| * F1. <failure> | ||
| */ | ||
|
|
||
| static int rseq_update_cpu_id(struct task_struct *t) | ||
| { | ||
| u32 cpu_id = raw_smp_processor_id(); | ||
|
|
||
| if (__put_user(cpu_id, &t->rseq->cpu_id_start)) | ||
| return -EFAULT; | ||
| if (__put_user(cpu_id, &t->rseq->cpu_id)) | ||
| return -EFAULT; | ||
| trace_rseq_update(t); | ||
| return 0; | ||
| } | ||
|
|
||
| static int rseq_reset_rseq_cpu_id(struct task_struct *t) | ||
| { | ||
| u32 cpu_id_start = 0, cpu_id = RSEQ_CPU_ID_UNINITIALIZED; | ||
|
|
||
| /* | ||
| * Reset cpu_id_start to its initial state (0). | ||
| */ | ||
| if (__put_user(cpu_id_start, &t->rseq->cpu_id_start)) | ||
| return -EFAULT; | ||
| /* | ||
| * Reset cpu_id to RSEQ_CPU_ID_UNINITIALIZED, so any user coming | ||
| * in after unregistration can figure out that rseq needs to be | ||
| * registered again. | ||
| */ | ||
| if (__put_user(cpu_id, &t->rseq->cpu_id)) | ||
| return -EFAULT; | ||
| return 0; | ||
| } | ||
|
|
||
| static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs) | ||
| { | ||
| struct rseq_cs __user *urseq_cs; | ||
| unsigned long ptr; | ||
| u32 __user *usig; | ||
| u32 sig; | ||
| int ret; | ||
|
|
||
| ret = __get_user(ptr, &t->rseq->rseq_cs); | ||
| if (ret) | ||
| return ret; | ||
| if (!ptr) { | ||
| memset(rseq_cs, 0, sizeof(*rseq_cs)); | ||
| return 0; | ||
| } | ||
| urseq_cs = (struct rseq_cs __user *)ptr; | ||
| if (copy_from_user(rseq_cs, urseq_cs, sizeof(*rseq_cs))) | ||
| return -EFAULT; | ||
| if (rseq_cs->version > 0) | ||
| return -EINVAL; | ||
|
|
||
| /* Ensure that abort_ip is not in the critical section. */ | ||
| if (rseq_cs->abort_ip - rseq_cs->start_ip < rseq_cs->post_commit_offset) | ||
| return -EINVAL; | ||
|
|
||
| usig = (u32 __user *)(rseq_cs->abort_ip - sizeof(u32)); | ||
| ret = get_user(sig, usig); | ||
| if (ret) | ||
| return ret; | ||
|
|
||
| if (current->rseq_sig != sig) { | ||
| printk_ratelimited(KERN_WARNING | ||
| "Possible attack attempt. Unexpected rseq signature 0x%x, expecting 0x%x (pid=%d, addr=%p).\n", | ||
| sig, current->rseq_sig, current->pid, usig); | ||
| return -EPERM; | ||
| } | ||
| return 0; | ||
| } | ||
|
|
||
| static int rseq_need_restart(struct task_struct *t, u32 cs_flags) | ||
| { | ||
| u32 flags, event_mask; | ||
| int ret; | ||
|
|
||
| /* Get thread flags. */ | ||
| ret = __get_user(flags, &t->rseq->flags); | ||
| if (ret) | ||
| return ret; | ||
|
|
||
| /* Take critical section flags into account. */ | ||
| flags |= cs_flags; | ||
|
|
||
| /* | ||
| * Restart on signal can only be inhibited when restart on | ||
| * preempt and restart on migrate are inhibited too. Otherwise, | ||
| * a preempted signal handler could fail to restart the prior | ||
| * execution context on sigreturn. | ||
| */ | ||
| if (unlikely((flags & RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL) && | ||
| (flags & RSEQ_CS_PREEMPT_MIGRATE_FLAGS) != | ||
| RSEQ_CS_PREEMPT_MIGRATE_FLAGS)) | ||
| return -EINVAL; | ||
|
|
||
| /* | ||
| * Load and clear event mask atomically with respect to | ||
| * scheduler preemption. | ||
| */ | ||
| preempt_disable(); | ||
| event_mask = t->rseq_event_mask; | ||
| t->rseq_event_mask = 0; | ||
| preempt_enable(); | ||
|
|
||
| return !!(event_mask & ~flags); | ||
| } | ||
|
|
||
| static int clear_rseq_cs(struct task_struct *t) | ||
| { | ||
| /* | ||
| * The rseq_cs field is set to NULL on preemption or signal | ||
| * delivery on top of rseq assembly block, as well as on top | ||
| * of code outside of the rseq assembly block. This performs | ||
| * a lazy clear of the rseq_cs field. | ||
| * | ||
| * Set rseq_cs to NULL with single-copy atomicity. | ||
| */ | ||
| return __put_user(0UL, &t->rseq->rseq_cs); | ||
| } | ||
|
|
||
| /* | ||
| * Unsigned comparison will be true when ip >= start_ip, and when | ||
| * ip < start_ip + post_commit_offset. | ||
| */ | ||
| static bool in_rseq_cs(unsigned long ip, struct rseq_cs *rseq_cs) | ||
| { | ||
| return ip - rseq_cs->start_ip < rseq_cs->post_commit_offset; | ||
| } | ||
|
|
||
| static int rseq_ip_fixup(struct pt_regs *regs) | ||
| { | ||
| unsigned long ip = instruction_pointer(regs); | ||
| struct task_struct *t = current; | ||
| struct rseq_cs rseq_cs; | ||
| int ret; | ||
|
|
||
| ret = rseq_get_rseq_cs(t, &rseq_cs); | ||
| if (ret) | ||
| return ret; | ||
|
|
||
| /* | ||
| * Handle potentially not being within a critical section. | ||
| * If not nested over a rseq critical section, restart is useless. | ||
| * Clear the rseq_cs pointer and return. | ||
| */ | ||
| if (!in_rseq_cs(ip, &rseq_cs)) | ||
| return clear_rseq_cs(t); | ||
| ret = rseq_need_restart(t, rseq_cs.flags); | ||
| if (ret <= 0) | ||
| return ret; | ||
| ret = clear_rseq_cs(t); | ||
| if (ret) | ||
| return ret; | ||
| trace_rseq_ip_fixup(ip, rseq_cs.start_ip, rseq_cs.post_commit_offset, | ||
| rseq_cs.abort_ip); | ||
| instruction_pointer_set(regs, (unsigned long)rseq_cs.abort_ip); | ||
| return 0; | ||
| } | ||
|
|
||
| /* | ||
| * This resume handler must always be executed between any of: | ||
| * - preemption, | ||
| * - signal delivery, | ||
| * and return to user-space. | ||
| * | ||
| * This is how we can ensure that the entire rseq critical section, | ||
| * consisting of both the C part and the assembly instruction sequence, | ||
| * will issue the commit instruction only if executed atomically with | ||
| * respect to other threads scheduled on the same CPU, and with respect | ||
| * to signal handlers. | ||
| */ | ||
| void __rseq_handle_notify_resume(struct pt_regs *regs) | ||
| { | ||
| struct task_struct *t = current; | ||
| int ret; | ||
|
|
||
| if (unlikely(t->flags & PF_EXITING)) | ||
| return; | ||
| if (unlikely(!access_ok(VERIFY_WRITE, t->rseq, sizeof(*t->rseq)))) | ||
| goto error; | ||
| ret = rseq_ip_fixup(regs); | ||
| if (unlikely(ret < 0)) | ||
| goto error; | ||
| if (unlikely(rseq_update_cpu_id(t))) | ||
| goto error; | ||
| return; | ||
|
|
||
| error: | ||
| force_sig(SIGSEGV, t); | ||
| } | ||
|
|
||
| #ifdef CONFIG_DEBUG_RSEQ | ||
|
|
||
| /* | ||
| * Terminate the process if a syscall is issued within a restartable | ||
| * sequence. | ||
| */ | ||
| void rseq_syscall(struct pt_regs *regs) | ||
| { | ||
| unsigned long ip = instruction_pointer(regs); | ||
| struct task_struct *t = current; | ||
| struct rseq_cs rseq_cs; | ||
|
|
||
| if (!t->rseq) | ||
| return; | ||
| if (!access_ok(VERIFY_READ, t->rseq, sizeof(*t->rseq)) || | ||
| rseq_get_rseq_cs(t, &rseq_cs) || in_rseq_cs(ip, &rseq_cs)) | ||
| force_sig(SIGSEGV, t); | ||
| } | ||
|
|
||
| #endif | ||
|
|
||
| /* | ||
| * sys_rseq - setup restartable sequences for caller thread. | ||
| */ | ||
| SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, | ||
| int, flags, u32, sig) | ||
| { | ||
| int ret; | ||
|
|
||
| if (flags & RSEQ_FLAG_UNREGISTER) { | ||
| /* Unregister rseq for current thread. */ | ||
| if (current->rseq != rseq || !current->rseq) | ||
| return -EINVAL; | ||
| if (current->rseq_len != rseq_len) | ||
| return -EINVAL; | ||
| if (current->rseq_sig != sig) | ||
| return -EPERM; | ||
| ret = rseq_reset_rseq_cpu_id(current); | ||
| if (ret) | ||
| return ret; | ||
| current->rseq = NULL; | ||
| current->rseq_len = 0; | ||
| current->rseq_sig = 0; | ||
| return 0; | ||
| } | ||
|
|
||
| if (unlikely(flags)) | ||
| return -EINVAL; | ||
|
|
||
| if (current->rseq) { | ||
| /* | ||
| * If rseq is already registered, check whether | ||
| * the provided address differs from the prior | ||
| * one. | ||
| */ | ||
| if (current->rseq != rseq || current->rseq_len != rseq_len) | ||
| return -EINVAL; | ||
| if (current->rseq_sig != sig) | ||
| return -EPERM; | ||
| /* Already registered. */ | ||
| return -EBUSY; | ||
| } | ||
|
|
||
| /* | ||
| * If there was no rseq previously registered, | ||
| * ensure the provided rseq is properly aligned and valid. | ||
| */ | ||
| if (!IS_ALIGNED((unsigned long)rseq, __alignof__(*rseq)) || | ||
| rseq_len != sizeof(*rseq)) | ||
| return -EINVAL; | ||
| if (!access_ok(VERIFY_WRITE, rseq, rseq_len)) | ||
| return -EFAULT; | ||
| current->rseq = rseq; | ||
| current->rseq_len = rseq_len; | ||
| current->rseq_sig = sig; | ||
| /* | ||
| * If rseq was previously inactive, and has just been | ||
| * registered, ensure the cpu_id_start and cpu_id fields | ||
| * are updated before returning to user-space. | ||
| */ | ||
| rseq_set_notify_resume(current); | ||
|
|
||
| return 0; | ||
| } |
| @@ -0,0 +1,6 @@ | ||
| basic_percpu_ops_test | ||
| basic_test | ||
| basic_rseq_op_test | ||
| param_test | ||
| param_test_benchmark | ||
| param_test_compare_twice |
| @@ -0,0 +1,30 @@ | ||
| # SPDX-License-Identifier: GPL-2.0+ OR MIT | ||
| CFLAGS += -O2 -Wall -g -I./ -I../../../../usr/include/ -L./ -Wl,-rpath=./ | ||
| LDLIBS += -lpthread | ||
|
|
||
| # Own dependencies because we only want to build against 1st prerequisite, but | ||
| # still track changes to header files and depend on shared object. | ||
| OVERRIDE_TARGETS = 1 | ||
|
|
||
| TEST_GEN_PROGS = basic_test basic_percpu_ops_test param_test \ | ||
| param_test_benchmark param_test_compare_twice | ||
|
|
||
| TEST_GEN_PROGS_EXTENDED = librseq.so | ||
|
|
||
| TEST_PROGS = run_param_test.sh | ||
|
|
||
| include ../lib.mk | ||
|
|
||
| $(OUTPUT)/librseq.so: rseq.c rseq.h rseq-*.h | ||
| $(CC) $(CFLAGS) -shared -fPIC $< $(LDLIBS) -o $@ | ||
|
|
||
| $(OUTPUT)/%: %.c $(TEST_GEN_PROGS_EXTENDED) rseq.h rseq-*.h | ||
| $(CC) $(CFLAGS) $< $(LDLIBS) -lrseq -o $@ | ||
|
|
||
| $(OUTPUT)/param_test_benchmark: param_test.c $(TEST_GEN_PROGS_EXTENDED) \ | ||
| rseq.h rseq-*.h | ||
| $(CC) $(CFLAGS) -DBENCHMARK $< $(LDLIBS) -lrseq -o $@ | ||
|
|
||
| $(OUTPUT)/param_test_compare_twice: param_test.c $(TEST_GEN_PROGS_EXTENDED) \ | ||
| rseq.h rseq-*.h | ||
| $(CC) $(CFLAGS) -DRSEQ_COMPARE_TWICE $< $(LDLIBS) -lrseq -o $@ |
| @@ -0,0 +1,312 @@ | ||
| // SPDX-License-Identifier: LGPL-2.1 | ||
| #define _GNU_SOURCE | ||
| #include <assert.h> | ||
| #include <pthread.h> | ||
| #include <sched.h> | ||
| #include <stdint.h> | ||
| #include <stdio.h> | ||
| #include <stdlib.h> | ||
| #include <string.h> | ||
| #include <stddef.h> | ||
|
|
||
| #include "rseq.h" | ||
|
|
||
| #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) | ||
|
|
||
| struct percpu_lock_entry { | ||
| intptr_t v; | ||
| } __attribute__((aligned(128))); | ||
|
|
||
| struct percpu_lock { | ||
| struct percpu_lock_entry c[CPU_SETSIZE]; | ||
| }; | ||
|
|
||
| struct test_data_entry { | ||
| intptr_t count; | ||
| } __attribute__((aligned(128))); | ||
|
|
||
| struct spinlock_test_data { | ||
| struct percpu_lock lock; | ||
| struct test_data_entry c[CPU_SETSIZE]; | ||
| int reps; | ||
| }; | ||
|
|
||
| struct percpu_list_node { | ||
| intptr_t data; | ||
| struct percpu_list_node *next; | ||
| }; | ||
|
|
||
| struct percpu_list_entry { | ||
| struct percpu_list_node *head; | ||
| } __attribute__((aligned(128))); | ||
|
|
||
| struct percpu_list { | ||
| struct percpu_list_entry c[CPU_SETSIZE]; | ||
| }; | ||
|
|
||
| /* A simple percpu spinlock. Returns the cpu lock was acquired on. */ | ||
| int rseq_this_cpu_lock(struct percpu_lock *lock) | ||
| { | ||
| int cpu; | ||
|
|
||
| for (;;) { | ||
| int ret; | ||
|
|
||
| cpu = rseq_cpu_start(); | ||
| ret = rseq_cmpeqv_storev(&lock->c[cpu].v, | ||
| 0, 1, cpu); | ||
| if (rseq_likely(!ret)) | ||
| break; | ||
| /* Retry if comparison fails or rseq aborts. */ | ||
| } | ||
| /* | ||
| * Acquire semantic when taking lock after control dependency. | ||
| * Matches rseq_smp_store_release(). | ||
| */ | ||
| rseq_smp_acquire__after_ctrl_dep(); | ||
| return cpu; | ||
| } | ||
|
|
||
| void rseq_percpu_unlock(struct percpu_lock *lock, int cpu) | ||
| { | ||
| assert(lock->c[cpu].v == 1); | ||
| /* | ||
| * Release lock, with release semantic. Matches | ||
| * rseq_smp_acquire__after_ctrl_dep(). | ||
| */ | ||
| rseq_smp_store_release(&lock->c[cpu].v, 0); | ||
| } | ||
|
|
||
| void *test_percpu_spinlock_thread(void *arg) | ||
| { | ||
| struct spinlock_test_data *data = arg; | ||
| int i, cpu; | ||
|
|
||
| if (rseq_register_current_thread()) { | ||
| fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", | ||
| errno, strerror(errno)); | ||
| abort(); | ||
| } | ||
| for (i = 0; i < data->reps; i++) { | ||
| cpu = rseq_this_cpu_lock(&data->lock); | ||
| data->c[cpu].count++; | ||
| rseq_percpu_unlock(&data->lock, cpu); | ||
| } | ||
| if (rseq_unregister_current_thread()) { | ||
| fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", | ||
| errno, strerror(errno)); | ||
| abort(); | ||
| } | ||
|
|
||
| return NULL; | ||
| } | ||
|
|
||
| /* | ||
| * A simple test which implements a sharded counter using a per-cpu | ||
| * lock. Obviously real applications might prefer to simply use a | ||
| * per-cpu increment; however, this is reasonable for a test and the | ||
| * lock can be extended to synchronize more complicated operations. | ||
| */ | ||
| void test_percpu_spinlock(void) | ||
| { | ||
| const int num_threads = 200; | ||
| int i; | ||
| uint64_t sum; | ||
| pthread_t test_threads[num_threads]; | ||
| struct spinlock_test_data data; | ||
|
|
||
| memset(&data, 0, sizeof(data)); | ||
| data.reps = 5000; | ||
|
|
||
| for (i = 0; i < num_threads; i++) | ||
| pthread_create(&test_threads[i], NULL, | ||
| test_percpu_spinlock_thread, &data); | ||
|
|
||
| for (i = 0; i < num_threads; i++) | ||
| pthread_join(test_threads[i], NULL); | ||
|
|
||
| sum = 0; | ||
| for (i = 0; i < CPU_SETSIZE; i++) | ||
| sum += data.c[i].count; | ||
|
|
||
| assert(sum == (uint64_t)data.reps * num_threads); | ||
| } | ||
|
|
||
| void this_cpu_list_push(struct percpu_list *list, | ||
| struct percpu_list_node *node, | ||
| int *_cpu) | ||
| { | ||
| int cpu; | ||
|
|
||
| for (;;) { | ||
| intptr_t *targetptr, newval, expect; | ||
| int ret; | ||
|
|
||
| cpu = rseq_cpu_start(); | ||
| /* Load list->c[cpu].head with single-copy atomicity. */ | ||
| expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head); | ||
| newval = (intptr_t)node; | ||
| targetptr = (intptr_t *)&list->c[cpu].head; | ||
| node->next = (struct percpu_list_node *)expect; | ||
| ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu); | ||
| if (rseq_likely(!ret)) | ||
| break; | ||
| /* Retry if comparison fails or rseq aborts. */ | ||
| } | ||
| if (_cpu) | ||
| *_cpu = cpu; | ||
| } | ||
|
|
||
| /* | ||
| * Unlike a traditional lock-less linked list; the availability of a | ||
| * rseq primitive allows us to implement pop without concerns over | ||
| * ABA-type races. | ||
| */ | ||
| struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list, | ||
| int *_cpu) | ||
| { | ||
| for (;;) { | ||
| struct percpu_list_node *head; | ||
| intptr_t *targetptr, expectnot, *load; | ||
| off_t offset; | ||
| int ret, cpu; | ||
|
|
||
| cpu = rseq_cpu_start(); | ||
| targetptr = (intptr_t *)&list->c[cpu].head; | ||
| expectnot = (intptr_t)NULL; | ||
| offset = offsetof(struct percpu_list_node, next); | ||
| load = (intptr_t *)&head; | ||
| ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot, | ||
| offset, load, cpu); | ||
| if (rseq_likely(!ret)) { | ||
| if (_cpu) | ||
| *_cpu = cpu; | ||
| return head; | ||
| } | ||
| if (ret > 0) | ||
| return NULL; | ||
| /* Retry if rseq aborts. */ | ||
| } | ||
| } | ||
|
|
||
| /* | ||
| * __percpu_list_pop is not safe against concurrent accesses. Should | ||
| * only be used on lists that are not concurrently modified. | ||
| */ | ||
| struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu) | ||
| { | ||
| struct percpu_list_node *node; | ||
|
|
||
| node = list->c[cpu].head; | ||
| if (!node) | ||
| return NULL; | ||
| list->c[cpu].head = node->next; | ||
| return node; | ||
| } | ||
|
|
||
| void *test_percpu_list_thread(void *arg) | ||
| { | ||
| int i; | ||
| struct percpu_list *list = (struct percpu_list *)arg; | ||
|
|
||
| if (rseq_register_current_thread()) { | ||
| fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", | ||
| errno, strerror(errno)); | ||
| abort(); | ||
| } | ||
|
|
||
| for (i = 0; i < 100000; i++) { | ||
| struct percpu_list_node *node; | ||
|
|
||
| node = this_cpu_list_pop(list, NULL); | ||
| sched_yield(); /* encourage shuffling */ | ||
| if (node) | ||
| this_cpu_list_push(list, node, NULL); | ||
| } | ||
|
|
||
| if (rseq_unregister_current_thread()) { | ||
| fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", | ||
| errno, strerror(errno)); | ||
| abort(); | ||
| } | ||
|
|
||
| return NULL; | ||
| } | ||
|
|
||
| /* Simultaneous modification to a per-cpu linked list from many threads. */ | ||
| void test_percpu_list(void) | ||
| { | ||
| int i, j; | ||
| uint64_t sum = 0, expected_sum = 0; | ||
| struct percpu_list list; | ||
| pthread_t test_threads[200]; | ||
| cpu_set_t allowed_cpus; | ||
|
|
||
| memset(&list, 0, sizeof(list)); | ||
|
|
||
| /* Generate list entries for every usable cpu. */ | ||
| sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); | ||
| for (i = 0; i < CPU_SETSIZE; i++) { | ||
| if (!CPU_ISSET(i, &allowed_cpus)) | ||
| continue; | ||
| for (j = 1; j <= 100; j++) { | ||
| struct percpu_list_node *node; | ||
|
|
||
| expected_sum += j; | ||
|
|
||
| node = malloc(sizeof(*node)); | ||
| assert(node); | ||
| node->data = j; | ||
| node->next = list.c[i].head; | ||
| list.c[i].head = node; | ||
| } | ||
| } | ||
|
|
||
| for (i = 0; i < 200; i++) | ||
| pthread_create(&test_threads[i], NULL, | ||
| test_percpu_list_thread, &list); | ||
|
|
||
| for (i = 0; i < 200; i++) | ||
| pthread_join(test_threads[i], NULL); | ||
|
|
||
| for (i = 0; i < CPU_SETSIZE; i++) { | ||
| struct percpu_list_node *node; | ||
|
|
||
| if (!CPU_ISSET(i, &allowed_cpus)) | ||
| continue; | ||
|
|
||
| while ((node = __percpu_list_pop(&list, i))) { | ||
| sum += node->data; | ||
| free(node); | ||
| } | ||
| } | ||
|
|
||
| /* | ||
| * All entries should now be accounted for (unless some external | ||
| * actor is interfering with our allowed affinity while this | ||
| * test is running). | ||
| */ | ||
| assert(sum == expected_sum); | ||
| } | ||
|
|
||
| int main(int argc, char **argv) | ||
| { | ||
| if (rseq_register_current_thread()) { | ||
| fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", | ||
| errno, strerror(errno)); | ||
| goto error; | ||
| } | ||
| printf("spinlock\n"); | ||
| test_percpu_spinlock(); | ||
| printf("percpu_list\n"); | ||
| test_percpu_list(); | ||
| if (rseq_unregister_current_thread()) { | ||
| fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", | ||
| errno, strerror(errno)); | ||
| goto error; | ||
| } | ||
| return 0; | ||
|
|
||
| error: | ||
| return -1; | ||
| } |
| @@ -0,0 +1,56 @@ | ||
| // SPDX-License-Identifier: LGPL-2.1 | ||
| /* | ||
| * Basic test coverage for critical regions and rseq_current_cpu(). | ||
| */ | ||
|
|
||
| #define _GNU_SOURCE | ||
| #include <assert.h> | ||
| #include <sched.h> | ||
| #include <signal.h> | ||
| #include <stdio.h> | ||
| #include <string.h> | ||
| #include <sys/time.h> | ||
|
|
||
| #include "rseq.h" | ||
|
|
||
| void test_cpu_pointer(void) | ||
| { | ||
| cpu_set_t affinity, test_affinity; | ||
| int i; | ||
|
|
||
| sched_getaffinity(0, sizeof(affinity), &affinity); | ||
| CPU_ZERO(&test_affinity); | ||
| for (i = 0; i < CPU_SETSIZE; i++) { | ||
| if (CPU_ISSET(i, &affinity)) { | ||
| CPU_SET(i, &test_affinity); | ||
| sched_setaffinity(0, sizeof(test_affinity), | ||
| &test_affinity); | ||
| assert(sched_getcpu() == i); | ||
| assert(rseq_current_cpu() == i); | ||
| assert(rseq_current_cpu_raw() == i); | ||
| assert(rseq_cpu_start() == i); | ||
| CPU_CLR(i, &test_affinity); | ||
| } | ||
| } | ||
| sched_setaffinity(0, sizeof(affinity), &affinity); | ||
| } | ||
|
|
||
| int main(int argc, char **argv) | ||
| { | ||
| if (rseq_register_current_thread()) { | ||
| fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", | ||
| errno, strerror(errno)); | ||
| goto init_thread_error; | ||
| } | ||
| printf("testing current cpu\n"); | ||
| test_cpu_pointer(); | ||
| if (rseq_unregister_current_thread()) { | ||
| fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", | ||
| errno, strerror(errno)); | ||
| goto init_thread_error; | ||
| } | ||
| return 0; | ||
|
|
||
| init_thread_error: | ||
| return -1; | ||
| } |
| @@ -0,0 +1,65 @@ | ||
| /* SPDX-License-Identifier: LGPL-2.1 OR MIT */ | ||
| /* | ||
| * rseq-skip.h | ||
| * | ||
| * (C) Copyright 2017-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com> | ||
| */ | ||
|
|
||
| static inline __attribute__((always_inline)) | ||
| int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) | ||
| { | ||
| return -1; | ||
| } | ||
|
|
||
| static inline __attribute__((always_inline)) | ||
| int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, | ||
| off_t voffp, intptr_t *load, int cpu) | ||
| { | ||
| return -1; | ||
| } | ||
|
|
||
| static inline __attribute__((always_inline)) | ||
| int rseq_addv(intptr_t *v, intptr_t count, int cpu) | ||
| { | ||
| return -1; | ||
| } | ||
|
|
||
| static inline __attribute__((always_inline)) | ||
| int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, | ||
| intptr_t *v2, intptr_t newv2, | ||
| intptr_t newv, int cpu) | ||
| { | ||
| return -1; | ||
| } | ||
|
|
||
| static inline __attribute__((always_inline)) | ||
| int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, | ||
| intptr_t *v2, intptr_t newv2, | ||
| intptr_t newv, int cpu) | ||
| { | ||
| return -1; | ||
| } | ||
|
|
||
| static inline __attribute__((always_inline)) | ||
| int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, | ||
| intptr_t *v2, intptr_t expect2, | ||
| intptr_t newv, int cpu) | ||
| { | ||
| return -1; | ||
| } | ||
|
|
||
| static inline __attribute__((always_inline)) | ||
| int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, | ||
| void *dst, void *src, size_t len, | ||
| intptr_t newv, int cpu) | ||
| { | ||
| return -1; | ||
| } | ||
|
|
||
| static inline __attribute__((always_inline)) | ||
| int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, | ||
| void *dst, void *src, size_t len, | ||
| intptr_t newv, int cpu) | ||
| { | ||
| return -1; | ||
| } |
| @@ -0,0 +1,117 @@ | ||
| // SPDX-License-Identifier: LGPL-2.1 | ||
| /* | ||
| * rseq.c | ||
| * | ||
| * Copyright (C) 2016 Mathieu Desnoyers <mathieu.desnoyers@efficios.com> | ||
| * | ||
| * This library is free software; you can redistribute it and/or | ||
| * modify it under the terms of the GNU Lesser General Public | ||
| * License as published by the Free Software Foundation; only | ||
| * version 2.1 of the License. | ||
| * | ||
| * This library is distributed in the hope that it will be useful, | ||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| * Lesser General Public License for more details. | ||
| */ | ||
|
|
||
| #define _GNU_SOURCE | ||
| #include <errno.h> | ||
| #include <sched.h> | ||
| #include <stdio.h> | ||
| #include <stdlib.h> | ||
| #include <string.h> | ||
| #include <unistd.h> | ||
| #include <syscall.h> | ||
| #include <assert.h> | ||
| #include <signal.h> | ||
|
|
||
| #include "rseq.h" | ||
|
|
||
| #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) | ||
|
|
||
| __attribute__((tls_model("initial-exec"))) __thread | ||
| volatile struct rseq __rseq_abi = { | ||
| .cpu_id = RSEQ_CPU_ID_UNINITIALIZED, | ||
| }; | ||
|
|
||
| static __attribute__((tls_model("initial-exec"))) __thread | ||
| volatile int refcount; | ||
|
|
||
| static void signal_off_save(sigset_t *oldset) | ||
| { | ||
| sigset_t set; | ||
| int ret; | ||
|
|
||
| sigfillset(&set); | ||
| ret = pthread_sigmask(SIG_BLOCK, &set, oldset); | ||
| if (ret) | ||
| abort(); | ||
| } | ||
|
|
||
| static void signal_restore(sigset_t oldset) | ||
| { | ||
| int ret; | ||
|
|
||
| ret = pthread_sigmask(SIG_SETMASK, &oldset, NULL); | ||
| if (ret) | ||
| abort(); | ||
| } | ||
|
|
||
| static int sys_rseq(volatile struct rseq *rseq_abi, uint32_t rseq_len, | ||
| int flags, uint32_t sig) | ||
| { | ||
| return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig); | ||
| } | ||
|
|
||
| int rseq_register_current_thread(void) | ||
| { | ||
| int rc, ret = 0; | ||
| sigset_t oldset; | ||
|
|
||
| signal_off_save(&oldset); | ||
| if (refcount++) | ||
| goto end; | ||
| rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), 0, RSEQ_SIG); | ||
| if (!rc) { | ||
| assert(rseq_current_cpu_raw() >= 0); | ||
| goto end; | ||
| } | ||
| if (errno != EBUSY) | ||
| __rseq_abi.cpu_id = -2; | ||
| ret = -1; | ||
| refcount--; | ||
| end: | ||
| signal_restore(oldset); | ||
| return ret; | ||
| } | ||
|
|
||
| int rseq_unregister_current_thread(void) | ||
| { | ||
| int rc, ret = 0; | ||
| sigset_t oldset; | ||
|
|
||
| signal_off_save(&oldset); | ||
| if (--refcount) | ||
| goto end; | ||
| rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), | ||
| RSEQ_FLAG_UNREGISTER, RSEQ_SIG); | ||
| if (!rc) | ||
| goto end; | ||
| ret = -1; | ||
| end: | ||
| signal_restore(oldset); | ||
| return ret; | ||
| } | ||
|
|
||
| int32_t rseq_fallback_current_cpu(void) | ||
| { | ||
| int32_t cpu; | ||
|
|
||
| cpu = sched_getcpu(); | ||
| if (cpu < 0) { | ||
| perror("sched_getcpu()"); | ||
| abort(); | ||
| } | ||
| return cpu; | ||
| } |
| @@ -0,0 +1,147 @@ | ||
| /* SPDX-License-Identifier: LGPL-2.1 OR MIT */ | ||
| /* | ||
| * rseq.h | ||
| * | ||
| * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com> | ||
| */ | ||
|
|
||
| #ifndef RSEQ_H | ||
| #define RSEQ_H | ||
|
|
||
| #include <stdint.h> | ||
| #include <stdbool.h> | ||
| #include <pthread.h> | ||
| #include <signal.h> | ||
| #include <sched.h> | ||
| #include <errno.h> | ||
| #include <stdio.h> | ||
| #include <stdlib.h> | ||
| #include <sched.h> | ||
| #include <linux/rseq.h> | ||
|
|
||
| /* | ||
| * Empty code injection macros, override when testing. | ||
| * It is important to consider that the ASM injection macros need to be | ||
| * fully reentrant (e.g. do not modify the stack). | ||
| */ | ||
| #ifndef RSEQ_INJECT_ASM | ||
| #define RSEQ_INJECT_ASM(n) | ||
| #endif | ||
|
|
||
| #ifndef RSEQ_INJECT_C | ||
| #define RSEQ_INJECT_C(n) | ||
| #endif | ||
|
|
||
| #ifndef RSEQ_INJECT_INPUT | ||
| #define RSEQ_INJECT_INPUT | ||
| #endif | ||
|
|
||
| #ifndef RSEQ_INJECT_CLOBBER | ||
| #define RSEQ_INJECT_CLOBBER | ||
| #endif | ||
|
|
||
| #ifndef RSEQ_INJECT_FAILED | ||
| #define RSEQ_INJECT_FAILED | ||
| #endif | ||
|
|
||
| extern __thread volatile struct rseq __rseq_abi; | ||
|
|
||
| #define rseq_likely(x) __builtin_expect(!!(x), 1) | ||
| #define rseq_unlikely(x) __builtin_expect(!!(x), 0) | ||
| #define rseq_barrier() __asm__ __volatile__("" : : : "memory") | ||
|
|
||
| #define RSEQ_ACCESS_ONCE(x) (*(__volatile__ __typeof__(x) *)&(x)) | ||
| #define RSEQ_WRITE_ONCE(x, v) __extension__ ({ RSEQ_ACCESS_ONCE(x) = (v); }) | ||
| #define RSEQ_READ_ONCE(x) RSEQ_ACCESS_ONCE(x) | ||
|
|
||
| #define __rseq_str_1(x) #x | ||
| #define __rseq_str(x) __rseq_str_1(x) | ||
|
|
||
| #define rseq_log(fmt, args...) \ | ||
| fprintf(stderr, fmt "(in %s() at " __FILE__ ":" __rseq_str(__LINE__)"\n", \ | ||
| ## args, __func__) | ||
|
|
||
| #define rseq_bug(fmt, args...) \ | ||
| do { \ | ||
| rseq_log(fmt, ##args); \ | ||
| abort(); \ | ||
| } while (0) | ||
|
|
||
| #if defined(__x86_64__) || defined(__i386__) | ||
| #include <rseq-x86.h> | ||
| #elif defined(__ARMEL__) | ||
| #include <rseq-arm.h> | ||
| #elif defined(__PPC__) | ||
| #include <rseq-ppc.h> | ||
| #else | ||
| #error unsupported target | ||
| #endif | ||
|
|
||
| /* | ||
| * Register rseq for the current thread. This needs to be called once | ||
| * by any thread which uses restartable sequences, before they start | ||
| * using restartable sequences, to ensure restartable sequences | ||
| * succeed. A restartable sequence executed from a non-registered | ||
| * thread will always fail. | ||
| */ | ||
| int rseq_register_current_thread(void); | ||
|
|
||
| /* | ||
| * Unregister rseq for current thread. | ||
| */ | ||
| int rseq_unregister_current_thread(void); | ||
|
|
||
| /* | ||
| * Restartable sequence fallback for reading the current CPU number. | ||
| */ | ||
| int32_t rseq_fallback_current_cpu(void); | ||
|
|
||
| /* | ||
| * Values returned can be either the current CPU number, -1 (rseq is | ||
| * uninitialized), or -2 (rseq initialization has failed). | ||
| */ | ||
| static inline int32_t rseq_current_cpu_raw(void) | ||
| { | ||
| return RSEQ_ACCESS_ONCE(__rseq_abi.cpu_id); | ||
| } | ||
|
|
||
| /* | ||
| * Returns a possible CPU number, which is typically the current CPU. | ||
| * The returned CPU number can be used to prepare for an rseq critical | ||
| * section, which will confirm whether the cpu number is indeed the | ||
| * current one, and whether rseq is initialized. | ||
| * | ||
| * The CPU number returned by rseq_cpu_start should always be validated | ||
| * by passing it to a rseq asm sequence, or by comparing it to the | ||
| * return value of rseq_current_cpu_raw() if the rseq asm sequence | ||
| * does not need to be invoked. | ||
| */ | ||
| static inline uint32_t rseq_cpu_start(void) | ||
| { | ||
| return RSEQ_ACCESS_ONCE(__rseq_abi.cpu_id_start); | ||
| } | ||
|
|
||
| static inline uint32_t rseq_current_cpu(void) | ||
| { | ||
| int32_t cpu; | ||
|
|
||
| cpu = rseq_current_cpu_raw(); | ||
| if (rseq_unlikely(cpu < 0)) | ||
| cpu = rseq_fallback_current_cpu(); | ||
| return cpu; | ||
| } | ||
|
|
||
| /* | ||
| * rseq_prepare_unload() should be invoked by each thread using rseq_finish*() | ||
| * at least once between their last rseq_finish*() and library unload of the | ||
| * library defining the rseq critical section (struct rseq_cs). This also | ||
| * applies to use of rseq in code generated by JIT: rseq_prepare_unload() | ||
| * should be invoked at least once by each thread using rseq_finish*() before | ||
| * reclaim of the memory holding the struct rseq_cs. | ||
| */ | ||
| static inline void rseq_prepare_unload(void) | ||
| { | ||
| __rseq_abi.rseq_cs = 0; | ||
| } | ||
|
|
||
| #endif /* RSEQ_H_ */ |
| @@ -0,0 +1,121 @@ | ||
| #!/bin/bash | ||
| # SPDX-License-Identifier: GPL-2.0+ or MIT | ||
|
|
||
| EXTRA_ARGS=${@} | ||
|
|
||
| OLDIFS="$IFS" | ||
| IFS=$'\n' | ||
| TEST_LIST=( | ||
| "-T s" | ||
| "-T l" | ||
| "-T b" | ||
| "-T b -M" | ||
| "-T m" | ||
| "-T m -M" | ||
| "-T i" | ||
| ) | ||
|
|
||
| TEST_NAME=( | ||
| "spinlock" | ||
| "list" | ||
| "buffer" | ||
| "buffer with barrier" | ||
| "memcpy" | ||
| "memcpy with barrier" | ||
| "increment" | ||
| ) | ||
| IFS="$OLDIFS" | ||
|
|
||
| REPS=1000 | ||
| SLOW_REPS=100 | ||
|
|
||
| function do_tests() | ||
| { | ||
| local i=0 | ||
| while [ "$i" -lt "${#TEST_LIST[@]}" ]; do | ||
| echo "Running test ${TEST_NAME[$i]}" | ||
| ./param_test ${TEST_LIST[$i]} -r ${REPS} ${@} ${EXTRA_ARGS} || exit 1 | ||
| echo "Running compare-twice test ${TEST_NAME[$i]}" | ||
| ./param_test_compare_twice ${TEST_LIST[$i]} -r ${REPS} ${@} ${EXTRA_ARGS} || exit 1 | ||
| let "i++" | ||
| done | ||
| } | ||
|
|
||
| echo "Default parameters" | ||
| do_tests | ||
|
|
||
| echo "Loop injection: 10000 loops" | ||
|
|
||
| OLDIFS="$IFS" | ||
| IFS=$'\n' | ||
| INJECT_LIST=( | ||
| "1" | ||
| "2" | ||
| "3" | ||
| "4" | ||
| "5" | ||
| "6" | ||
| "7" | ||
| "8" | ||
| "9" | ||
| ) | ||
| IFS="$OLDIFS" | ||
|
|
||
| NR_LOOPS=10000 | ||
|
|
||
| i=0 | ||
| while [ "$i" -lt "${#INJECT_LIST[@]}" ]; do | ||
| echo "Injecting at <${INJECT_LIST[$i]}>" | ||
| do_tests -${INJECT_LIST[i]} ${NR_LOOPS} | ||
| let "i++" | ||
| done | ||
| NR_LOOPS= | ||
|
|
||
| function inject_blocking() | ||
| { | ||
| OLDIFS="$IFS" | ||
| IFS=$'\n' | ||
| INJECT_LIST=( | ||
| "7" | ||
| "8" | ||
| "9" | ||
| ) | ||
| IFS="$OLDIFS" | ||
|
|
||
| NR_LOOPS=-1 | ||
|
|
||
| i=0 | ||
| while [ "$i" -lt "${#INJECT_LIST[@]}" ]; do | ||
| echo "Injecting at <${INJECT_LIST[$i]}>" | ||
| do_tests -${INJECT_LIST[i]} -1 ${@} | ||
| let "i++" | ||
| done | ||
| NR_LOOPS= | ||
| } | ||
|
|
||
| echo "Yield injection (25%)" | ||
| inject_blocking -m 4 -y | ||
|
|
||
| echo "Yield injection (50%)" | ||
| inject_blocking -m 2 -y | ||
|
|
||
| echo "Yield injection (100%)" | ||
| inject_blocking -m 1 -y | ||
|
|
||
| echo "Kill injection (25%)" | ||
| inject_blocking -m 4 -k | ||
|
|
||
| echo "Kill injection (50%)" | ||
| inject_blocking -m 2 -k | ||
|
|
||
| echo "Kill injection (100%)" | ||
| inject_blocking -m 1 -k | ||
|
|
||
| echo "Sleep injection (1ms, 25%)" | ||
| inject_blocking -m 4 -s 1 | ||
|
|
||
| echo "Sleep injection (1ms, 50%)" | ||
| inject_blocking -m 2 -s 1 | ||
|
|
||
| echo "Sleep injection (1ms, 100%)" | ||
| inject_blocking -m 1 -s 1 |