Skip to content
Permalink
fzsync-demo
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
/* SPDX-License-Identifier: GPL-2.0-or-later
* Copyright (c) 2016 Cyril Hrubis <chrubis@suse.cz>
*/
/* The LTP library has some of its own atomic synchronisation primitives
* contained in this file. Generally speaking these should not be used
* directly in tests for synchronisation, instead use tst_checkpoint.h,
* tst_fuzzy_sync.h or the POSIX library.
*
* Notes on compile and runtime memory barriers and atomics.
*
* Within the LTP library we have three concerns when accessing variables
* shared by multiple threads or processes:
*
* (1) Removal or reordering of accesses by the compiler.
* (2) Atomicity of addition.
* (3) LOAD-STORE ordering between threads.
*
* The first (1) is the most likely to cause an error if not properly
* handled. We avoid it by using volatile variables and statements which will
* not be removed or reordered by the compiler during optimisation. This includes
* the __atomic and __sync intrinsics and volatile asm statements marked with
* "memory" as well as variables marked with volatile.
*
* On any platform Linux is likely to run on, a LOAD (fetch) or STORE of a
* 32-bit integer will be atomic. However fetching and adding to a variable is
* quite likely not; so for (2) we need to ensure we use atomic addition.
*
* Finally, for tst_fuzzy_sync at least, we need to ensure that LOADs and
* STOREs of any shared variables (including non-atomics) that are made
* between calls to tst_fzsync_wait are completed (globally visible) before
* tst_fzsync_wait completes. For this, runtime memory and instruction
* barriers are required in addition to compile time.
*
* We use full sequential ordering (__ATOMIC_SEQ_CST) for the sake of
* simplicity. LTP tests tend to be syscall heavy so any performance gain from
* using a weaker memory model is unlikely to result in a relatively large
* performance improvement while at the same time being a potent source of
* confusion.
*
* Likewise, for the fallback ASM, the simplest "definitely will work, always"
* approach is preferred over anything more performant.
*
* Also see Documentation/memory-barriers.txt in the kernel tree and
* https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html
* terminology may vary between sources.
*/
#ifndef TST_ATOMIC_H__
#define TST_ATOMIC_H__
#include "config.h"
#if HAVE_ATOMIC_MEMORY_MODEL == 1
static inline int tst_atomic_add_return(int i, int *v)
{
return __atomic_add_fetch(v, i, __ATOMIC_SEQ_CST);
}
static inline int tst_atomic_load(int *v)
{
return __atomic_load_n(v, __ATOMIC_SEQ_CST);
}
static inline void tst_atomic_store(int i, int *v)
{
__atomic_store_n(v, i, __ATOMIC_SEQ_CST);
}
#elif HAVE_SYNC_ADD_AND_FETCH == 1
static inline int tst_atomic_add_return(int i, int *v)
{
return __sync_add_and_fetch(v, i);
}
static inline int tst_atomic_load(int *v)
{
int ret;
__sync_synchronize();
ret = *v;
__sync_synchronize();
return ret;
}
static inline void tst_atomic_store(int i, int *v)
{
__sync_synchronize();
*v = i;
__sync_synchronize();
}
#elif defined(__i386__) || defined(__x86_64__)
# define LTP_USE_GENERIC_LOAD_STORE_ASM 1
static inline int tst_atomic_add_return(int i, int *v)
{
int __ret = i;
/*
* taken from arch/x86/include/asm/cmpxchg.h
*/
asm volatile ("lock; xaddl %0, %1\n"
: "+r" (__ret), "+m" (*v) : : "memory", "cc");
return i + __ret;
}
#elif defined(__powerpc__) || defined(__powerpc64__)
static inline int tst_atomic_add_return(int i, int *v)
{
int t;
/* taken from arch/powerpc/include/asm/atomic.h */
asm volatile(
" sync\n"
"1: lwarx %0,0,%2 # atomic_add_return\n"
" add %0,%1,%0\n"
" stwcx. %0,0,%2 \n"
" bne- 1b\n"
" sync\n"
: "=&r" (t)
: "r" (i), "r" (v)
: "cc", "memory");
return t;
}
static inline int tst_atomic_load(int *v)
{
int ret;
asm volatile("sync\n" : : : "memory");
ret = *v;
asm volatile("sync\n" : : : "memory");
return ret;
}
static inline void tst_atomic_store(int i, int *v)
{
asm volatile("sync\n" : : : "memory");
*v = i;
asm volatile("sync\n" : : : "memory");
}
#elif defined(__s390__) || defined(__s390x__)
# define LTP_USE_GENERIC_LOAD_STORE_ASM 1
static inline int tst_atomic_add_return(int i, int *v)
{
int old_val, new_val;
/* taken from arch/s390/include/asm/atomic.h */
asm volatile(
" l %0,%2\n"
"0: lr %1,%0\n"
" ar %1,%3\n"
" cs %0,%1,%2\n"
" jl 0b"
: "=&d" (old_val), "=&d" (new_val), "+Q" (*v)
: "d" (i)
: "cc", "memory");
return old_val + i;
}
#elif defined(__arc__)
/*ARCv2 defines the smp barriers */
#ifdef __ARC700__
#define smp_mb() asm volatile("" : : : "memory")
#else
#define smp_mb() asm volatile("dmb 3\n" : : : "memory")
#endif
static inline int tst_atomic_add_return(int i, int *v)
{
unsigned int val;
smp_mb();
asm volatile(
"1: llock %[val], [%[ctr]] \n"
" add %[val], %[val], %[i] \n"
" scond %[val], [%[ctr]] \n"
" bnz 1b \n"
: [val] "=&r" (val)
: [ctr] "r" (v),
[i] "ir" (i)
: "cc", "memory");
smp_mb();
return val;
}
static inline int tst_atomic_load(int *v)
{
int ret;
smp_mb();
ret = *v;
smp_mb();
return ret;
}
static inline void tst_atomic_store(int i, int *v)
{
smp_mb();
*v = i;
smp_mb();
}
#elif defined (__aarch64__)
static inline int tst_atomic_add_return(int i, int *v)
{
unsigned long tmp;
int result;
__asm__ __volatile__(
" prfm pstl1strm, %2 \n"
"1: ldaxr %w0, %2 \n"
" add %w0, %w0, %w3 \n"
" stlxr %w1, %w0, %2 \n"
" cbnz %w1, 1b \n"
" dmb ish \n"
: "=&r" (result), "=&r" (tmp), "+Q" (*v)
: "Ir" (i)
: "memory");
return result;
}
/* We are using load and store exclusive (ldaxr & stlxr) instructions to try
* and help prevent the tst_atomic_load and, more likely, tst_atomic_store
* functions from interfering with tst_atomic_add_return which takes advantage
* of exclusivity. It is not clear if this is a good idea or not, but does
* mean that all three functions are very similar.
*/
static inline int tst_atomic_load(int *v)
{
int ret;
unsigned long tmp;
asm volatile("//atomic_load \n"
" prfm pstl1strm, %[v] \n"
"1: ldaxr %w[ret], %[v] \n"
" stlxr %w[tmp], %w[ret], %[v] \n"
" cbnz %w[tmp], 1b \n"
" dmb ish \n"
: [tmp] "=&r" (tmp), [ret] "=&r" (ret), [v] "+Q" (*v)
: : "memory");
return ret;
}
static inline void tst_atomic_store(int i, int *v)
{
unsigned long tmp;
asm volatile("//atomic_store \n"
" prfm pstl1strm, %[v] \n"
"1: ldaxr %w[tmp], %[v] \n"
" stlxr %w[tmp], %w[i], %[v] \n"
" cbnz %w[tmp], 1b \n"
" dmb ish \n"
: [tmp] "=&r" (tmp), [v] "+Q" (*v)
: [i] "r" (i)
: "memory");
}
#elif defined(__sparc__) && defined(__arch64__)
# define LTP_USE_GENERIC_LOAD_STORE_ASM 1
static inline int tst_atomic_add_return(int i, int *v)
{
int ret, tmp;
/* Based on arch/sparc/lib/atomic_64.S with the exponential backoff
* function removed because we are unlikely to have a large (>= 16?)
* number of cores continuously trying to update one variable.
*/
asm volatile("/*atomic_add_return*/ \n"
"1: ldsw [%[v]], %[ret]; \n"
" add %[ret], %[i], %[tmp]; \n"
" cas [%[v]], %[ret], %[tmp]; \n"
" cmp %[ret], %[tmp]; \n"
" bne,pn %%icc, 1b; \n"
" nop; \n"
" add %[ret], %[i], %[ret]; \n"
: [ret] "=r&" (ret), [tmp] "=r&" (tmp)
: [i] "r" (i), [v] "r" (v)
: "memory", "cc");
return ret;
}
#else /* HAVE_SYNC_ADD_AND_FETCH == 1 */
# error Your compiler does not provide __atomic_add_fetch, __sync_add_and_fetch \
and an LTP implementation is missing for your architecture.
#endif
#ifdef LTP_USE_GENERIC_LOAD_STORE_ASM
static inline int tst_atomic_load(int *v)
{
int ret;
asm volatile("" : : : "memory");
ret = *v;
asm volatile("" : : : "memory");
return ret;
}
static inline void tst_atomic_store(int i, int *v)
{
asm volatile("" : : : "memory");
*v = i;
asm volatile("" : : : "memory");
}
#endif
static inline int tst_atomic_inc(int *v)
{
return tst_atomic_add_return(1, v);
}
static inline int tst_atomic_dec(int *v)
{
return tst_atomic_add_return(-1, v);
}
#endif /* TST_ATOMIC_H__ */