@@ -27,6 +27,7 @@
#include <linux/signal_types.h>
#include <linux/mm_types_task.h>
#include <linux/task_io_accounting.h>
#include <linux/rseq.h>

/* task_struct member predeclarations (sorted alphabetically): */
struct audit_context;
@@ -1047,6 +1048,17 @@ struct task_struct {
unsigned long numa_pages_migrated;
#endif /* CONFIG_NUMA_BALANCING */

#ifdef CONFIG_RSEQ
struct rseq __user *rseq;
u32 rseq_len;
u32 rseq_sig;
/*
* RmW on rseq_event_mask must be performed atomically
* with respect to preemption.
*/
unsigned long rseq_event_mask;
#endif

struct tlbflush_unmap_batch tlb_ubc;

struct rcu_head rcu;
@@ -1757,4 +1769,126 @@ extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
#define TASK_SIZE_OF(tsk) TASK_SIZE
#endif

#ifdef CONFIG_RSEQ

/*
* Map the event mask on the user-space ABI enum rseq_cs_flags
* for direct mask checks.
*/
enum rseq_event_mask_bits {
RSEQ_EVENT_PREEMPT_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT,
RSEQ_EVENT_SIGNAL_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT,
RSEQ_EVENT_MIGRATE_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT,
};

enum rseq_event_mask {
RSEQ_EVENT_PREEMPT = (1U << RSEQ_EVENT_PREEMPT_BIT),
RSEQ_EVENT_SIGNAL = (1U << RSEQ_EVENT_SIGNAL_BIT),
RSEQ_EVENT_MIGRATE = (1U << RSEQ_EVENT_MIGRATE_BIT),
};

static inline void rseq_set_notify_resume(struct task_struct *t)
{
if (t->rseq)
set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
}

void __rseq_handle_notify_resume(struct pt_regs *regs);

static inline void rseq_handle_notify_resume(struct pt_regs *regs)
{
if (current->rseq)
__rseq_handle_notify_resume(regs);
}

static inline void rseq_signal_deliver(struct pt_regs *regs)
{
preempt_disable();
__set_bit(RSEQ_EVENT_SIGNAL_BIT, &current->rseq_event_mask);
preempt_enable();
rseq_handle_notify_resume(regs);
}

/* rseq_preempt() requires preemption to be disabled. */
static inline void rseq_preempt(struct task_struct *t)
{
__set_bit(RSEQ_EVENT_PREEMPT_BIT, &t->rseq_event_mask);
rseq_set_notify_resume(t);
}

/* rseq_migrate() requires preemption to be disabled. */
static inline void rseq_migrate(struct task_struct *t)
{
__set_bit(RSEQ_EVENT_MIGRATE_BIT, &t->rseq_event_mask);
rseq_set_notify_resume(t);
}

/*
* If parent process has a registered restartable sequences area, the
* child inherits. Only applies when forking a process, not a thread. In
* case a parent fork() in the middle of a restartable sequence, set the
* resume notifier to force the child to retry.
*/
static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
{
if (clone_flags & CLONE_THREAD) {
t->rseq = NULL;
t->rseq_len = 0;
t->rseq_sig = 0;
t->rseq_event_mask = 0;
} else {
t->rseq = current->rseq;
t->rseq_len = current->rseq_len;
t->rseq_sig = current->rseq_sig;
t->rseq_event_mask = current->rseq_event_mask;
rseq_preempt(t);
}
}

static inline void rseq_execve(struct task_struct *t)
{
t->rseq = NULL;
t->rseq_len = 0;
t->rseq_sig = 0;
t->rseq_event_mask = 0;
}

#else

static inline void rseq_set_notify_resume(struct task_struct *t)
{
}
static inline void rseq_handle_notify_resume(struct pt_regs *regs)
{
}
static inline void rseq_signal_deliver(struct pt_regs *regs)
{
}
static inline void rseq_preempt(struct task_struct *t)
{
}
static inline void rseq_migrate(struct task_struct *t)
{
}
static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
{
}
static inline void rseq_execve(struct task_struct *t)
{
}

#endif

#ifdef CONFIG_DEBUG_RSEQ

void rseq_syscall(struct pt_regs *regs);

#else

static inline void rseq_syscall(struct pt_regs *regs)
{
}

#endif

#endif
@@ -66,6 +66,7 @@ struct old_linux_dirent;
struct perf_event_attr;
struct file_handle;
struct sigaltstack;
struct rseq;
union bpf_attr;

#include <linux/types.h>
@@ -897,7 +898,8 @@ asmlinkage long sys_pkey_alloc(unsigned long flags, unsigned long init_val);
asmlinkage long sys_pkey_free(int pkey);
asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags,
unsigned mask, struct statx __user *buffer);

asmlinkage long sys_rseq(struct rseq __user *rseq, uint32_t rseq_len,
int flags, uint32_t sig);

/*
* Architecture-specific system calls
@@ -0,0 +1,57 @@
/* SPDX-License-Identifier: GPL-2.0+ */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM rseq

#if !defined(_TRACE_RSEQ_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_RSEQ_H

#include <linux/tracepoint.h>
#include <linux/types.h>

TRACE_EVENT(rseq_update,

TP_PROTO(struct task_struct *t),

TP_ARGS(t),

TP_STRUCT__entry(
__field(s32, cpu_id)
),

TP_fast_assign(
__entry->cpu_id = raw_smp_processor_id();
),

TP_printk("cpu_id=%d", __entry->cpu_id)
);

TRACE_EVENT(rseq_ip_fixup,

TP_PROTO(unsigned long regs_ip, unsigned long start_ip,
unsigned long post_commit_offset, unsigned long abort_ip),

TP_ARGS(regs_ip, start_ip, post_commit_offset, abort_ip),

TP_STRUCT__entry(
__field(unsigned long, regs_ip)
__field(unsigned long, start_ip)
__field(unsigned long, post_commit_offset)
__field(unsigned long, abort_ip)
),

TP_fast_assign(
__entry->regs_ip = regs_ip;
__entry->start_ip = start_ip;
__entry->post_commit_offset = post_commit_offset;
__entry->abort_ip = abort_ip;
),

TP_printk("regs_ip=0x%lx start_ip=0x%lx post_commit_offset=%lu abort_ip=0x%lx",
__entry->regs_ip, __entry->start_ip,
__entry->post_commit_offset, __entry->abort_ip)
);

#endif /* _TRACE_SOCK_H */

/* This part must be outside protection */
#include <trace/define_trace.h>
@@ -0,0 +1,133 @@
/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
#ifndef _UAPI_LINUX_RSEQ_H
#define _UAPI_LINUX_RSEQ_H

/*
* linux/rseq.h
*
* Restartable sequences system call API
*
* Copyright (c) 2015-2018 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
*/

#ifdef __KERNEL__
# include <linux/types.h>
#else
# include <stdint.h>
#endif

#include <linux/types_32_64.h>

enum rseq_cpu_id_state {
RSEQ_CPU_ID_UNINITIALIZED = -1,
RSEQ_CPU_ID_REGISTRATION_FAILED = -2,
};

enum rseq_flags {
RSEQ_FLAG_UNREGISTER = (1 << 0),
};

enum rseq_cs_flags_bit {
RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT = 0,
RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT = 1,
RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT = 2,
};

enum rseq_cs_flags {
RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT =
(1U << RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT),
RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL =
(1U << RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT),
RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE =
(1U << RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT),
};

/*
* struct rseq_cs is aligned on 4 * 8 bytes to ensure it is always
* contained within a single cache-line. It is usually declared as
* link-time constant data.
*/
struct rseq_cs {
/* Version of this structure. */
__u32 version;
/* enum rseq_cs_flags */
__u32 flags;
LINUX_FIELD_u32_u64(start_ip);
/* Offset from start_ip. */
LINUX_FIELD_u32_u64(post_commit_offset);
LINUX_FIELD_u32_u64(abort_ip);
} __attribute__((aligned(4 * sizeof(__u64))));

/*
* struct rseq is aligned on 4 * 8 bytes to ensure it is always
* contained within a single cache-line.
*
* A single struct rseq per thread is allowed.
*/
struct rseq {
/*
* Restartable sequences cpu_id_start field. Updated by the
* kernel, and read by user-space with single-copy atomicity
* semantics. Aligned on 32-bit. Always contains a value in the
* range of possible CPUs, although the value may not be the
* actual current CPU (e.g. if rseq is not initialized). This
* CPU number value should always be compared against the value
* of the cpu_id field before performing a rseq commit or
* returning a value read from a data structure indexed using
* the cpu_id_start value.
*/
__u32 cpu_id_start;
/*
* Restartable sequences cpu_id field. Updated by the kernel,
* and read by user-space with single-copy atomicity semantics.
* Aligned on 32-bit. Values RSEQ_CPU_ID_UNINITIALIZED and
* RSEQ_CPU_ID_REGISTRATION_FAILED have a special semantic: the
* former means "rseq uninitialized", and latter means "rseq
* initialization failed". This value is meant to be read within
* rseq critical sections and compared with the cpu_id_start
* value previously read, before performing the commit instruction,
* or read and compared with the cpu_id_start value before returning
* a value loaded from a data structure indexed using the
* cpu_id_start value.
*/
__u32 cpu_id;
/*
* Restartable sequences rseq_cs field.
*
* Contains NULL when no critical section is active for the current
* thread, or holds a pointer to the currently active struct rseq_cs.
*
* Updated by user-space, which sets the address of the currently
* active rseq_cs at the beginning of assembly instruction sequence
* block, and set to NULL by the kernel when it restarts an assembly
* instruction sequence block, as well as when the kernel detects that
* it is preempting or delivering a signal outside of the range
* targeted by the rseq_cs. Also needs to be set to NULL by user-space
* before reclaiming memory that contains the targeted struct rseq_cs.
*
* Read and set by the kernel with single-copy atomicity semantics.
* Set by user-space with single-copy atomicity semantics. Aligned
* on 64-bit.
*/
LINUX_FIELD_u32_u64(rseq_cs);
/*
* - RSEQ_DISABLE flag:
*
* Fallback fast-track flag for single-stepping.
* Set by user-space if lack of progress is detected.
* Cleared by user-space after rseq finish.
* Read by the kernel.
* - RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT
* Inhibit instruction sequence block restart and event
* counter increment on preemption for this thread.
* - RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL
* Inhibit instruction sequence block restart and event
* counter increment on signal delivery for this thread.
* - RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE
* Inhibit instruction sequence block restart and event
* counter increment on migration for this thread.
*/
__u32 flags;
} __attribute__((aligned(4 * sizeof(__u64))));

#endif /* _UAPI_LINUX_RSEQ_H */
@@ -0,0 +1,50 @@
/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
#ifndef _UAPI_LINUX_TYPES_32_64_H
#define _UAPI_LINUX_TYPES_32_64_H

/*
* linux/types_32_64.h
*
* Integer type declaration for pointers across 32-bit and 64-bit systems.
*
* Copyright (c) 2015-2018 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
*/

#ifdef __KERNEL__
# include <linux/types.h>
#else
# include <stdint.h>
#endif

#include <asm/byteorder.h>

#ifdef __BYTE_ORDER
# if (__BYTE_ORDER == __BIG_ENDIAN)
# define LINUX_BYTE_ORDER_BIG_ENDIAN
# else
# define LINUX_BYTE_ORDER_LITTLE_ENDIAN
# endif
#else
# ifdef __BIG_ENDIAN
# define LINUX_BYTE_ORDER_BIG_ENDIAN
# else
# define LINUX_BYTE_ORDER_LITTLE_ENDIAN
# endif
#endif

#ifdef __LP64__
# define LINUX_FIELD_u32_u64(field) __u64 field
# define LINUX_FIELD_u32_u64_INIT_ONSTACK(field, v) field = (intptr_t)v
#else
# ifdef LINUX_BYTE_ORDER_BIG_ENDIAN
# define LINUX_FIELD_u32_u64(field) __u32 field ## _padding, field
# define LINUX_FIELD_u32_u64_INIT_ONSTACK(field, v) \
field ## _padding = 0, field = (intptr_t)v
# else
# define LINUX_FIELD_u32_u64(field) __u32 field, field ## _padding
# define LINUX_FIELD_u32_u64_INIT_ONSTACK(field, v) \
field = (intptr_t)v, field ## _padding = 0
# endif
#endif

#endif /* _UAPI_LINUX_TYPES_32_64_H */
@@ -1428,6 +1428,29 @@ config ARCH_HAS_MEMBARRIER_CALLBACKS
config ARCH_HAS_MEMBARRIER_SYNC_CORE
bool

config RSEQ
bool "Enable rseq() system call" if EXPERT
default y
depends on HAVE_RSEQ
select MEMBARRIER
help
Enable the restartable sequences system call. It provides a
user-space cache for the current CPU number value, which
speeds up getting the current CPU number from user-space,
as well as an ABI to speed up user-space operations on
per-CPU data.

If unsure, say Y.

config DEBUG_RSEQ
default n
bool "Enabled debugging of rseq() system call" if EXPERT
depends on RSEQ && DEBUG_KERNEL
help
Enable extra debugging checks for the rseq system call.

If unsure, say N.

config EMBEDDED
bool "Embedded system"
option allnoconfig_y
@@ -114,6 +114,7 @@ obj-$(CONFIG_TORTURE_TEST) += torture.o

obj-$(CONFIG_HAS_IOMEM) += iomem.o
obj-$(CONFIG_ZONE_DEVICE) += memremap.o
obj-$(CONFIG_RSEQ) += rseq.o

$(obj)/configs.o: $(obj)/config_data.h

@@ -1900,6 +1900,8 @@ static __latent_entropy struct task_struct *copy_process(
*/
copy_seccomp(p);

rseq_fork(p, clone_flags);

/*
* Process group and session signals need to be delivered to just the
* parent before the fork or both the parent and the child after the
@@ -0,0 +1,357 @@
// SPDX-License-Identifier: GPL-2.0+
/*
* Restartable sequences system call
*
* Copyright (C) 2015, Google, Inc.,
* Paul Turner <pjt@google.com> and Andrew Hunter <ahh@google.com>
* Copyright (C) 2015-2018, EfficiOS Inc.,
* Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
*/

#include <linux/sched.h>
#include <linux/uaccess.h>
#include <linux/syscalls.h>
#include <linux/rseq.h>
#include <linux/types.h>
#include <asm/ptrace.h>

#define CREATE_TRACE_POINTS
#include <trace/events/rseq.h>

#define RSEQ_CS_PREEMPT_MIGRATE_FLAGS (RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE | \
RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT)

/*
*
* Restartable sequences are a lightweight interface that allows
* user-level code to be executed atomically relative to scheduler
* preemption and signal delivery. Typically used for implementing
* per-cpu operations.
*
* It allows user-space to perform update operations on per-cpu data
* without requiring heavy-weight atomic operations.
*
* Detailed algorithm of rseq user-space assembly sequences:
*
* init(rseq_cs)
* cpu = TLS->rseq::cpu_id_start
* [1] TLS->rseq::rseq_cs = rseq_cs
* [start_ip] ----------------------------
* [2] if (cpu != TLS->rseq::cpu_id)
* goto abort_ip;
* [3] <last_instruction_in_cs>
* [post_commit_ip] ----------------------------
*
* The address of jump target abort_ip must be outside the critical
* region, i.e.:
*
* [abort_ip] < [start_ip] || [abort_ip] >= [post_commit_ip]
*
* Steps [2]-[3] (inclusive) need to be a sequence of instructions in
* userspace that can handle being interrupted between any of those
* instructions, and then resumed to the abort_ip.
*
* 1. Userspace stores the address of the struct rseq_cs assembly
* block descriptor into the rseq_cs field of the registered
* struct rseq TLS area. This update is performed through a single
* store within the inline assembly instruction sequence.
* [start_ip]
*
* 2. Userspace tests to check whether the current cpu_id field match
* the cpu number loaded before start_ip, branching to abort_ip
* in case of a mismatch.
*
* If the sequence is preempted or interrupted by a signal
* at or after start_ip and before post_commit_ip, then the kernel
* clears TLS->__rseq_abi::rseq_cs, and sets the user-space return
* ip to abort_ip before returning to user-space, so the preempted
* execution resumes at abort_ip.
*
* 3. Userspace critical section final instruction before
* post_commit_ip is the commit. The critical section is
* self-terminating.
* [post_commit_ip]
*
* 4. <success>
*
* On failure at [2], or if interrupted by preempt or signal delivery
* between [1] and [3]:
*
* [abort_ip]
* F1. <failure>
*/

static int rseq_update_cpu_id(struct task_struct *t)
{
u32 cpu_id = raw_smp_processor_id();

if (__put_user(cpu_id, &t->rseq->cpu_id_start))
return -EFAULT;
if (__put_user(cpu_id, &t->rseq->cpu_id))
return -EFAULT;
trace_rseq_update(t);
return 0;
}

static int rseq_reset_rseq_cpu_id(struct task_struct *t)
{
u32 cpu_id_start = 0, cpu_id = RSEQ_CPU_ID_UNINITIALIZED;

/*
* Reset cpu_id_start to its initial state (0).
*/
if (__put_user(cpu_id_start, &t->rseq->cpu_id_start))
return -EFAULT;
/*
* Reset cpu_id to RSEQ_CPU_ID_UNINITIALIZED, so any user coming
* in after unregistration can figure out that rseq needs to be
* registered again.
*/
if (__put_user(cpu_id, &t->rseq->cpu_id))
return -EFAULT;
return 0;
}

static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs)
{
struct rseq_cs __user *urseq_cs;
unsigned long ptr;
u32 __user *usig;
u32 sig;
int ret;

ret = __get_user(ptr, &t->rseq->rseq_cs);
if (ret)
return ret;
if (!ptr) {
memset(rseq_cs, 0, sizeof(*rseq_cs));
return 0;
}
urseq_cs = (struct rseq_cs __user *)ptr;
if (copy_from_user(rseq_cs, urseq_cs, sizeof(*rseq_cs)))
return -EFAULT;
if (rseq_cs->version > 0)
return -EINVAL;

/* Ensure that abort_ip is not in the critical section. */
if (rseq_cs->abort_ip - rseq_cs->start_ip < rseq_cs->post_commit_offset)
return -EINVAL;

usig = (u32 __user *)(rseq_cs->abort_ip - sizeof(u32));
ret = get_user(sig, usig);
if (ret)
return ret;

if (current->rseq_sig != sig) {
printk_ratelimited(KERN_WARNING
"Possible attack attempt. Unexpected rseq signature 0x%x, expecting 0x%x (pid=%d, addr=%p).\n",
sig, current->rseq_sig, current->pid, usig);
return -EPERM;
}
return 0;
}

static int rseq_need_restart(struct task_struct *t, u32 cs_flags)
{
u32 flags, event_mask;
int ret;

/* Get thread flags. */
ret = __get_user(flags, &t->rseq->flags);
if (ret)
return ret;

/* Take critical section flags into account. */
flags |= cs_flags;

/*
* Restart on signal can only be inhibited when restart on
* preempt and restart on migrate are inhibited too. Otherwise,
* a preempted signal handler could fail to restart the prior
* execution context on sigreturn.
*/
if (unlikely((flags & RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL) &&
(flags & RSEQ_CS_PREEMPT_MIGRATE_FLAGS) !=
RSEQ_CS_PREEMPT_MIGRATE_FLAGS))
return -EINVAL;

/*
* Load and clear event mask atomically with respect to
* scheduler preemption.
*/
preempt_disable();
event_mask = t->rseq_event_mask;
t->rseq_event_mask = 0;
preempt_enable();

return !!(event_mask & ~flags);
}

static int clear_rseq_cs(struct task_struct *t)
{
/*
* The rseq_cs field is set to NULL on preemption or signal
* delivery on top of rseq assembly block, as well as on top
* of code outside of the rseq assembly block. This performs
* a lazy clear of the rseq_cs field.
*
* Set rseq_cs to NULL with single-copy atomicity.
*/
return __put_user(0UL, &t->rseq->rseq_cs);
}

/*
* Unsigned comparison will be true when ip >= start_ip, and when
* ip < start_ip + post_commit_offset.
*/
static bool in_rseq_cs(unsigned long ip, struct rseq_cs *rseq_cs)
{
return ip - rseq_cs->start_ip < rseq_cs->post_commit_offset;
}

static int rseq_ip_fixup(struct pt_regs *regs)
{
unsigned long ip = instruction_pointer(regs);
struct task_struct *t = current;
struct rseq_cs rseq_cs;
int ret;

ret = rseq_get_rseq_cs(t, &rseq_cs);
if (ret)
return ret;

/*
* Handle potentially not being within a critical section.
* If not nested over a rseq critical section, restart is useless.
* Clear the rseq_cs pointer and return.
*/
if (!in_rseq_cs(ip, &rseq_cs))
return clear_rseq_cs(t);
ret = rseq_need_restart(t, rseq_cs.flags);
if (ret <= 0)
return ret;
ret = clear_rseq_cs(t);
if (ret)
return ret;
trace_rseq_ip_fixup(ip, rseq_cs.start_ip, rseq_cs.post_commit_offset,
rseq_cs.abort_ip);
instruction_pointer_set(regs, (unsigned long)rseq_cs.abort_ip);
return 0;
}

/*
* This resume handler must always be executed between any of:
* - preemption,
* - signal delivery,
* and return to user-space.
*
* This is how we can ensure that the entire rseq critical section,
* consisting of both the C part and the assembly instruction sequence,
* will issue the commit instruction only if executed atomically with
* respect to other threads scheduled on the same CPU, and with respect
* to signal handlers.
*/
void __rseq_handle_notify_resume(struct pt_regs *regs)
{
struct task_struct *t = current;
int ret;

if (unlikely(t->flags & PF_EXITING))
return;
if (unlikely(!access_ok(VERIFY_WRITE, t->rseq, sizeof(*t->rseq))))
goto error;
ret = rseq_ip_fixup(regs);
if (unlikely(ret < 0))
goto error;
if (unlikely(rseq_update_cpu_id(t)))
goto error;
return;

error:
force_sig(SIGSEGV, t);
}

#ifdef CONFIG_DEBUG_RSEQ

/*
* Terminate the process if a syscall is issued within a restartable
* sequence.
*/
void rseq_syscall(struct pt_regs *regs)
{
unsigned long ip = instruction_pointer(regs);
struct task_struct *t = current;
struct rseq_cs rseq_cs;

if (!t->rseq)
return;
if (!access_ok(VERIFY_READ, t->rseq, sizeof(*t->rseq)) ||
rseq_get_rseq_cs(t, &rseq_cs) || in_rseq_cs(ip, &rseq_cs))
force_sig(SIGSEGV, t);
}

#endif

/*
* sys_rseq - setup restartable sequences for caller thread.
*/
SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
int, flags, u32, sig)
{
int ret;

if (flags & RSEQ_FLAG_UNREGISTER) {
/* Unregister rseq for current thread. */
if (current->rseq != rseq || !current->rseq)
return -EINVAL;
if (current->rseq_len != rseq_len)
return -EINVAL;
if (current->rseq_sig != sig)
return -EPERM;
ret = rseq_reset_rseq_cpu_id(current);
if (ret)
return ret;
current->rseq = NULL;
current->rseq_len = 0;
current->rseq_sig = 0;
return 0;
}

if (unlikely(flags))
return -EINVAL;

if (current->rseq) {
/*
* If rseq is already registered, check whether
* the provided address differs from the prior
* one.
*/
if (current->rseq != rseq || current->rseq_len != rseq_len)
return -EINVAL;
if (current->rseq_sig != sig)
return -EPERM;
/* Already registered. */
return -EBUSY;
}

/*
* If there was no rseq previously registered,
* ensure the provided rseq is properly aligned and valid.
*/
if (!IS_ALIGNED((unsigned long)rseq, __alignof__(*rseq)) ||
rseq_len != sizeof(*rseq))
return -EINVAL;
if (!access_ok(VERIFY_WRITE, rseq, rseq_len))
return -EFAULT;
current->rseq = rseq;
current->rseq_len = rseq_len;
current->rseq_sig = sig;
/*
* If rseq was previously inactive, and has just been
* registered, ensure the cpu_id_start and cpu_id fields
* are updated before returning to user-space.
*/
rseq_set_notify_resume(current);

return 0;
}
@@ -1191,6 +1191,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
if (p->sched_class->migrate_task_rq)
p->sched_class->migrate_task_rq(p);
p->se.nr_migrations++;
rseq_migrate(p);
perf_event_task_migrate(p);
}

@@ -2634,6 +2635,7 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev,
{
sched_info_switch(rq, prev, next);
perf_event_task_sched_out(prev, next);
rseq_preempt(prev);
fire_sched_out_preempt_notifiers(prev, next);
prepare_task(next);
prepare_arch_switch(next);
@@ -432,3 +432,6 @@ COND_SYSCALL(setresgid16);
COND_SYSCALL(setresuid16);
COND_SYSCALL(setreuid16);
COND_SYSCALL(setuid16);

/* restartable sequence */
COND_SYSCALL(rseq);
@@ -29,6 +29,7 @@ TARGETS += powerpc
TARGETS += proc
TARGETS += pstore
TARGETS += ptrace
TARGETS += rseq
TARGETS += rtc
TARGETS += seccomp
TARGETS += sigaltstack
@@ -133,6 +133,9 @@ COMPILE.S = $(CC) $(ASFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c
LINK.S = $(CC) $(ASFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH)
endif

# Selftest makefiles can override those targets by setting
# OVERRIDE_TARGETS = 1.
ifeq ($(OVERRIDE_TARGETS),)
$(OUTPUT)/%:%.c
$(LINK.c) $^ $(LDLIBS) -o $@

@@ -141,5 +144,6 @@ $(OUTPUT)/%.o:%.S

$(OUTPUT)/%:%.S
$(LINK.S) $^ $(LDLIBS) -o $@
endif

.PHONY: run_tests all clean install emit_tests
@@ -0,0 +1,6 @@
basic_percpu_ops_test
basic_test
basic_rseq_op_test
param_test
param_test_benchmark
param_test_compare_twice
@@ -0,0 +1,30 @@
# SPDX-License-Identifier: GPL-2.0+ OR MIT
CFLAGS += -O2 -Wall -g -I./ -I../../../../usr/include/ -L./ -Wl,-rpath=./
LDLIBS += -lpthread

# Own dependencies because we only want to build against 1st prerequisite, but
# still track changes to header files and depend on shared object.
OVERRIDE_TARGETS = 1

TEST_GEN_PROGS = basic_test basic_percpu_ops_test param_test \
param_test_benchmark param_test_compare_twice

TEST_GEN_PROGS_EXTENDED = librseq.so

TEST_PROGS = run_param_test.sh

include ../lib.mk

$(OUTPUT)/librseq.so: rseq.c rseq.h rseq-*.h
$(CC) $(CFLAGS) -shared -fPIC $< $(LDLIBS) -o $@

$(OUTPUT)/%: %.c $(TEST_GEN_PROGS_EXTENDED) rseq.h rseq-*.h
$(CC) $(CFLAGS) $< $(LDLIBS) -lrseq -o $@

$(OUTPUT)/param_test_benchmark: param_test.c $(TEST_GEN_PROGS_EXTENDED) \
rseq.h rseq-*.h
$(CC) $(CFLAGS) -DBENCHMARK $< $(LDLIBS) -lrseq -o $@

$(OUTPUT)/param_test_compare_twice: param_test.c $(TEST_GEN_PROGS_EXTENDED) \
rseq.h rseq-*.h
$(CC) $(CFLAGS) -DRSEQ_COMPARE_TWICE $< $(LDLIBS) -lrseq -o $@
@@ -0,0 +1,312 @@
// SPDX-License-Identifier: LGPL-2.1
#define _GNU_SOURCE
#include <assert.h>
#include <pthread.h>
#include <sched.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stddef.h>

#include "rseq.h"

#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))

struct percpu_lock_entry {
intptr_t v;
} __attribute__((aligned(128)));

struct percpu_lock {
struct percpu_lock_entry c[CPU_SETSIZE];
};

struct test_data_entry {
intptr_t count;
} __attribute__((aligned(128)));

struct spinlock_test_data {
struct percpu_lock lock;
struct test_data_entry c[CPU_SETSIZE];
int reps;
};

struct percpu_list_node {
intptr_t data;
struct percpu_list_node *next;
};

struct percpu_list_entry {
struct percpu_list_node *head;
} __attribute__((aligned(128)));

struct percpu_list {
struct percpu_list_entry c[CPU_SETSIZE];
};

/* A simple percpu spinlock. Returns the cpu lock was acquired on. */
int rseq_this_cpu_lock(struct percpu_lock *lock)
{
int cpu;

for (;;) {
int ret;

cpu = rseq_cpu_start();
ret = rseq_cmpeqv_storev(&lock->c[cpu].v,
0, 1, cpu);
if (rseq_likely(!ret))
break;
/* Retry if comparison fails or rseq aborts. */
}
/*
* Acquire semantic when taking lock after control dependency.
* Matches rseq_smp_store_release().
*/
rseq_smp_acquire__after_ctrl_dep();
return cpu;
}

void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
{
assert(lock->c[cpu].v == 1);
/*
* Release lock, with release semantic. Matches
* rseq_smp_acquire__after_ctrl_dep().
*/
rseq_smp_store_release(&lock->c[cpu].v, 0);
}

void *test_percpu_spinlock_thread(void *arg)
{
struct spinlock_test_data *data = arg;
int i, cpu;

if (rseq_register_current_thread()) {
fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
errno, strerror(errno));
abort();
}
for (i = 0; i < data->reps; i++) {
cpu = rseq_this_cpu_lock(&data->lock);
data->c[cpu].count++;
rseq_percpu_unlock(&data->lock, cpu);
}
if (rseq_unregister_current_thread()) {
fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
errno, strerror(errno));
abort();
}

return NULL;
}

/*
* A simple test which implements a sharded counter using a per-cpu
* lock. Obviously real applications might prefer to simply use a
* per-cpu increment; however, this is reasonable for a test and the
* lock can be extended to synchronize more complicated operations.
*/
void test_percpu_spinlock(void)
{
const int num_threads = 200;
int i;
uint64_t sum;
pthread_t test_threads[num_threads];
struct spinlock_test_data data;

memset(&data, 0, sizeof(data));
data.reps = 5000;

for (i = 0; i < num_threads; i++)
pthread_create(&test_threads[i], NULL,
test_percpu_spinlock_thread, &data);

for (i = 0; i < num_threads; i++)
pthread_join(test_threads[i], NULL);

sum = 0;
for (i = 0; i < CPU_SETSIZE; i++)
sum += data.c[i].count;

assert(sum == (uint64_t)data.reps * num_threads);
}

void this_cpu_list_push(struct percpu_list *list,
struct percpu_list_node *node,
int *_cpu)
{
int cpu;

for (;;) {
intptr_t *targetptr, newval, expect;
int ret;

cpu = rseq_cpu_start();
/* Load list->c[cpu].head with single-copy atomicity. */
expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
newval = (intptr_t)node;
targetptr = (intptr_t *)&list->c[cpu].head;
node->next = (struct percpu_list_node *)expect;
ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu);
if (rseq_likely(!ret))
break;
/* Retry if comparison fails or rseq aborts. */
}
if (_cpu)
*_cpu = cpu;
}

/*
* Unlike a traditional lock-less linked list; the availability of a
* rseq primitive allows us to implement pop without concerns over
* ABA-type races.
*/
struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
int *_cpu)
{
for (;;) {
struct percpu_list_node *head;
intptr_t *targetptr, expectnot, *load;
off_t offset;
int ret, cpu;

cpu = rseq_cpu_start();
targetptr = (intptr_t *)&list->c[cpu].head;
expectnot = (intptr_t)NULL;
offset = offsetof(struct percpu_list_node, next);
load = (intptr_t *)&head;
ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot,
offset, load, cpu);
if (rseq_likely(!ret)) {
if (_cpu)
*_cpu = cpu;
return head;
}
if (ret > 0)
return NULL;
/* Retry if rseq aborts. */
}
}

/*
* __percpu_list_pop is not safe against concurrent accesses. Should
* only be used on lists that are not concurrently modified.
*/
struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
{
struct percpu_list_node *node;

node = list->c[cpu].head;
if (!node)
return NULL;
list->c[cpu].head = node->next;
return node;
}

void *test_percpu_list_thread(void *arg)
{
int i;
struct percpu_list *list = (struct percpu_list *)arg;

if (rseq_register_current_thread()) {
fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
errno, strerror(errno));
abort();
}

for (i = 0; i < 100000; i++) {
struct percpu_list_node *node;

node = this_cpu_list_pop(list, NULL);
sched_yield(); /* encourage shuffling */
if (node)
this_cpu_list_push(list, node, NULL);
}

if (rseq_unregister_current_thread()) {
fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
errno, strerror(errno));
abort();
}

return NULL;
}

/* Simultaneous modification to a per-cpu linked list from many threads. */
void test_percpu_list(void)
{
int i, j;
uint64_t sum = 0, expected_sum = 0;
struct percpu_list list;
pthread_t test_threads[200];
cpu_set_t allowed_cpus;

memset(&list, 0, sizeof(list));

/* Generate list entries for every usable cpu. */
sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
for (i = 0; i < CPU_SETSIZE; i++) {
if (!CPU_ISSET(i, &allowed_cpus))
continue;
for (j = 1; j <= 100; j++) {
struct percpu_list_node *node;

expected_sum += j;

node = malloc(sizeof(*node));
assert(node);
node->data = j;
node->next = list.c[i].head;
list.c[i].head = node;
}
}

for (i = 0; i < 200; i++)
pthread_create(&test_threads[i], NULL,
test_percpu_list_thread, &list);

for (i = 0; i < 200; i++)
pthread_join(test_threads[i], NULL);

for (i = 0; i < CPU_SETSIZE; i++) {
struct percpu_list_node *node;

if (!CPU_ISSET(i, &allowed_cpus))
continue;

while ((node = __percpu_list_pop(&list, i))) {
sum += node->data;
free(node);
}
}

/*
* All entries should now be accounted for (unless some external
* actor is interfering with our allowed affinity while this
* test is running).
*/
assert(sum == expected_sum);
}

int main(int argc, char **argv)
{
if (rseq_register_current_thread()) {
fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
errno, strerror(errno));
goto error;
}
printf("spinlock\n");
test_percpu_spinlock();
printf("percpu_list\n");
test_percpu_list();
if (rseq_unregister_current_thread()) {
fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
errno, strerror(errno));
goto error;
}
return 0;

error:
return -1;
}
@@ -0,0 +1,56 @@
// SPDX-License-Identifier: LGPL-2.1
/*
* Basic test coverage for critical regions and rseq_current_cpu().
*/

#define _GNU_SOURCE
#include <assert.h>
#include <sched.h>
#include <signal.h>
#include <stdio.h>
#include <string.h>
#include <sys/time.h>

#include "rseq.h"

void test_cpu_pointer(void)
{
cpu_set_t affinity, test_affinity;
int i;

sched_getaffinity(0, sizeof(affinity), &affinity);
CPU_ZERO(&test_affinity);
for (i = 0; i < CPU_SETSIZE; i++) {
if (CPU_ISSET(i, &affinity)) {
CPU_SET(i, &test_affinity);
sched_setaffinity(0, sizeof(test_affinity),
&test_affinity);
assert(sched_getcpu() == i);
assert(rseq_current_cpu() == i);
assert(rseq_current_cpu_raw() == i);
assert(rseq_cpu_start() == i);
CPU_CLR(i, &test_affinity);
}
}
sched_setaffinity(0, sizeof(affinity), &affinity);
}

int main(int argc, char **argv)
{
if (rseq_register_current_thread()) {
fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
errno, strerror(errno));
goto init_thread_error;
}
printf("testing current cpu\n");
test_cpu_pointer();
if (rseq_unregister_current_thread()) {
fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
errno, strerror(errno));
goto init_thread_error;
}
return 0;

init_thread_error:
return -1;
}

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

@@ -0,0 +1,65 @@
/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
/*
* rseq-skip.h
*
* (C) Copyright 2017-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
*/

static inline __attribute__((always_inline))
int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
{
return -1;
}

static inline __attribute__((always_inline))
int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
off_t voffp, intptr_t *load, int cpu)
{
return -1;
}

static inline __attribute__((always_inline))
int rseq_addv(intptr_t *v, intptr_t count, int cpu)
{
return -1;
}

static inline __attribute__((always_inline))
int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
intptr_t *v2, intptr_t newv2,
intptr_t newv, int cpu)
{
return -1;
}

static inline __attribute__((always_inline))
int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
intptr_t *v2, intptr_t newv2,
intptr_t newv, int cpu)
{
return -1;
}

static inline __attribute__((always_inline))
int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
intptr_t *v2, intptr_t expect2,
intptr_t newv, int cpu)
{
return -1;
}

static inline __attribute__((always_inline))
int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
void *dst, void *src, size_t len,
intptr_t newv, int cpu)
{
return -1;
}

static inline __attribute__((always_inline))
int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
void *dst, void *src, size_t len,
intptr_t newv, int cpu)
{
return -1;
}

Large diffs are not rendered by default.

@@ -0,0 +1,117 @@
// SPDX-License-Identifier: LGPL-2.1
/*
* rseq.c
*
* Copyright (C) 2016 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; only
* version 2.1 of the License.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*/

#define _GNU_SOURCE
#include <errno.h>
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <syscall.h>
#include <assert.h>
#include <signal.h>

#include "rseq.h"

#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))

__attribute__((tls_model("initial-exec"))) __thread
volatile struct rseq __rseq_abi = {
.cpu_id = RSEQ_CPU_ID_UNINITIALIZED,
};

static __attribute__((tls_model("initial-exec"))) __thread
volatile int refcount;

static void signal_off_save(sigset_t *oldset)
{
sigset_t set;
int ret;

sigfillset(&set);
ret = pthread_sigmask(SIG_BLOCK, &set, oldset);
if (ret)
abort();
}

static void signal_restore(sigset_t oldset)
{
int ret;

ret = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
if (ret)
abort();
}

static int sys_rseq(volatile struct rseq *rseq_abi, uint32_t rseq_len,
int flags, uint32_t sig)
{
return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig);
}

int rseq_register_current_thread(void)
{
int rc, ret = 0;
sigset_t oldset;

signal_off_save(&oldset);
if (refcount++)
goto end;
rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), 0, RSEQ_SIG);
if (!rc) {
assert(rseq_current_cpu_raw() >= 0);
goto end;
}
if (errno != EBUSY)
__rseq_abi.cpu_id = -2;
ret = -1;
refcount--;
end:
signal_restore(oldset);
return ret;
}

int rseq_unregister_current_thread(void)
{
int rc, ret = 0;
sigset_t oldset;

signal_off_save(&oldset);
if (--refcount)
goto end;
rc = sys_rseq(&__rseq_abi, sizeof(struct rseq),
RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
if (!rc)
goto end;
ret = -1;
end:
signal_restore(oldset);
return ret;
}

int32_t rseq_fallback_current_cpu(void)
{
int32_t cpu;

cpu = sched_getcpu();
if (cpu < 0) {
perror("sched_getcpu()");
abort();
}
return cpu;
}
@@ -0,0 +1,147 @@
/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
/*
* rseq.h
*
* (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
*/

#ifndef RSEQ_H
#define RSEQ_H

#include <stdint.h>
#include <stdbool.h>
#include <pthread.h>
#include <signal.h>
#include <sched.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <sched.h>
#include <linux/rseq.h>

/*
* Empty code injection macros, override when testing.
* It is important to consider that the ASM injection macros need to be
* fully reentrant (e.g. do not modify the stack).
*/
#ifndef RSEQ_INJECT_ASM
#define RSEQ_INJECT_ASM(n)
#endif

#ifndef RSEQ_INJECT_C
#define RSEQ_INJECT_C(n)
#endif

#ifndef RSEQ_INJECT_INPUT
#define RSEQ_INJECT_INPUT
#endif

#ifndef RSEQ_INJECT_CLOBBER
#define RSEQ_INJECT_CLOBBER
#endif

#ifndef RSEQ_INJECT_FAILED
#define RSEQ_INJECT_FAILED
#endif

extern __thread volatile struct rseq __rseq_abi;

#define rseq_likely(x) __builtin_expect(!!(x), 1)
#define rseq_unlikely(x) __builtin_expect(!!(x), 0)
#define rseq_barrier() __asm__ __volatile__("" : : : "memory")

#define RSEQ_ACCESS_ONCE(x) (*(__volatile__ __typeof__(x) *)&(x))
#define RSEQ_WRITE_ONCE(x, v) __extension__ ({ RSEQ_ACCESS_ONCE(x) = (v); })
#define RSEQ_READ_ONCE(x) RSEQ_ACCESS_ONCE(x)

#define __rseq_str_1(x) #x
#define __rseq_str(x) __rseq_str_1(x)

#define rseq_log(fmt, args...) \
fprintf(stderr, fmt "(in %s() at " __FILE__ ":" __rseq_str(__LINE__)"\n", \
## args, __func__)

#define rseq_bug(fmt, args...) \
do { \
rseq_log(fmt, ##args); \
abort(); \
} while (0)

#if defined(__x86_64__) || defined(__i386__)
#include <rseq-x86.h>
#elif defined(__ARMEL__)
#include <rseq-arm.h>
#elif defined(__PPC__)
#include <rseq-ppc.h>
#else
#error unsupported target
#endif

/*
* Register rseq for the current thread. This needs to be called once
* by any thread which uses restartable sequences, before they start
* using restartable sequences, to ensure restartable sequences
* succeed. A restartable sequence executed from a non-registered
* thread will always fail.
*/
int rseq_register_current_thread(void);

/*
* Unregister rseq for current thread.
*/
int rseq_unregister_current_thread(void);

/*
* Restartable sequence fallback for reading the current CPU number.
*/
int32_t rseq_fallback_current_cpu(void);

/*
* Values returned can be either the current CPU number, -1 (rseq is
* uninitialized), or -2 (rseq initialization has failed).
*/
static inline int32_t rseq_current_cpu_raw(void)
{
return RSEQ_ACCESS_ONCE(__rseq_abi.cpu_id);
}

/*
* Returns a possible CPU number, which is typically the current CPU.
* The returned CPU number can be used to prepare for an rseq critical
* section, which will confirm whether the cpu number is indeed the
* current one, and whether rseq is initialized.
*
* The CPU number returned by rseq_cpu_start should always be validated
* by passing it to a rseq asm sequence, or by comparing it to the
* return value of rseq_current_cpu_raw() if the rseq asm sequence
* does not need to be invoked.
*/
static inline uint32_t rseq_cpu_start(void)
{
return RSEQ_ACCESS_ONCE(__rseq_abi.cpu_id_start);
}

static inline uint32_t rseq_current_cpu(void)
{
int32_t cpu;

cpu = rseq_current_cpu_raw();
if (rseq_unlikely(cpu < 0))
cpu = rseq_fallback_current_cpu();
return cpu;
}

/*
* rseq_prepare_unload() should be invoked by each thread using rseq_finish*()
* at least once between their last rseq_finish*() and library unload of the
* library defining the rseq critical section (struct rseq_cs). This also
* applies to use of rseq in code generated by JIT: rseq_prepare_unload()
* should be invoked at least once by each thread using rseq_finish*() before
* reclaim of the memory holding the struct rseq_cs.
*/
static inline void rseq_prepare_unload(void)
{
__rseq_abi.rseq_cs = 0;
}

#endif /* RSEQ_H_ */
@@ -0,0 +1,121 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0+ or MIT

EXTRA_ARGS=${@}

OLDIFS="$IFS"
IFS=$'\n'
TEST_LIST=(
"-T s"
"-T l"
"-T b"
"-T b -M"
"-T m"
"-T m -M"
"-T i"
)

TEST_NAME=(
"spinlock"
"list"
"buffer"
"buffer with barrier"
"memcpy"
"memcpy with barrier"
"increment"
)
IFS="$OLDIFS"

REPS=1000
SLOW_REPS=100

function do_tests()
{
local i=0
while [ "$i" -lt "${#TEST_LIST[@]}" ]; do
echo "Running test ${TEST_NAME[$i]}"
./param_test ${TEST_LIST[$i]} -r ${REPS} ${@} ${EXTRA_ARGS} || exit 1
echo "Running compare-twice test ${TEST_NAME[$i]}"
./param_test_compare_twice ${TEST_LIST[$i]} -r ${REPS} ${@} ${EXTRA_ARGS} || exit 1
let "i++"
done
}

echo "Default parameters"
do_tests

echo "Loop injection: 10000 loops"

OLDIFS="$IFS"
IFS=$'\n'
INJECT_LIST=(
"1"
"2"
"3"
"4"
"5"
"6"
"7"
"8"
"9"
)
IFS="$OLDIFS"

NR_LOOPS=10000

i=0
while [ "$i" -lt "${#INJECT_LIST[@]}" ]; do
echo "Injecting at <${INJECT_LIST[$i]}>"
do_tests -${INJECT_LIST[i]} ${NR_LOOPS}
let "i++"
done
NR_LOOPS=

function inject_blocking()
{
OLDIFS="$IFS"
IFS=$'\n'
INJECT_LIST=(
"7"
"8"
"9"
)
IFS="$OLDIFS"

NR_LOOPS=-1

i=0
while [ "$i" -lt "${#INJECT_LIST[@]}" ]; do
echo "Injecting at <${INJECT_LIST[$i]}>"
do_tests -${INJECT_LIST[i]} -1 ${@}
let "i++"
done
NR_LOOPS=
}

echo "Yield injection (25%)"
inject_blocking -m 4 -y

echo "Yield injection (50%)"
inject_blocking -m 2 -y

echo "Yield injection (100%)"
inject_blocking -m 1 -y

echo "Kill injection (25%)"
inject_blocking -m 4 -k

echo "Kill injection (50%)"
inject_blocking -m 2 -k

echo "Kill injection (100%)"
inject_blocking -m 1 -k

echo "Sleep injection (1ms, 25%)"
inject_blocking -m 4 -s 1

echo "Sleep injection (1ms, 50%)"
inject_blocking -m 2 -s 1

echo "Sleep injection (1ms, 100%)"
inject_blocking -m 1 -s 1