Skip to content

Commit

Permalink
Merge remote-tracking branch 'remotes/rth/tags/pull-atomic-20161026' …
Browse files Browse the repository at this point in the history
…into staging

cmpxchg emulation of atomics, v8

# gpg: Signature made Wed 26 Oct 2016 16:30:03 BST
# gpg:                using RSA key 0xAD1270CC4DD0279B
# gpg: Good signature from "Richard Henderson <rth7680@gmail.com>"
# gpg:                 aka "Richard Henderson <rth@redhat.com>"
# gpg:                 aka "Richard Henderson <rth@twiddle.net>"
# Primary key fingerprint: 9CB1 8DDA F8E8 49AD 2AFC  16A4 AD12 70CC 4DD0 279B

* remotes/rth/tags/pull-atomic-20161026: (37 commits)
  target-alpha: Emulate LL/SC using cmpxchg helpers
  target-alpha: Introduce MMU_PHYS_IDX
  target-arm: remove EXCP_STREX + cpu_exclusive_{test, info}
  linux-user: remove handling of aarch64's EXCP_STREX
  linux-user: remove handling of ARM's EXCP_STREX
  target-arm: emulate aarch64's LL/SC using cmpxchg helpers
  target-arm: emulate SWP with atomic_xchg helper
  target-arm: emulate LL/SC using cmpxchg helpers
  target-arm: Rearrange aa32 load and store functions
  tests: add atomic_add-bench
  target-i386: remove helper_lock()
  target-i386: emulate XCHG using atomic helper
  target-i386: emulate LOCK'ed BTX ops using atomic helpers
  target-i386: emulate LOCK'ed XADD using atomic helper
  target-i386: emulate LOCK'ed NEG using cmpxchg helper
  target-i386: emulate LOCK'ed NOT using atomic helper
  target-i386: emulate LOCK'ed INC using atomic helper
  target-i386: emulate LOCK'ed OP instructions using atomic helpers
  target-i386: emulate LOCK'ed cmpxchg using cmpxchg helpers
  tcg: Emit barriers with parallel_cpus
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
  • Loading branch information
pm215 committed Oct 27, 2016
2 parents 8f9d84d + ed28391 commit 5929d7e
Show file tree
Hide file tree
Showing 43 changed files with 2,345 additions and 1,090 deletions.
2 changes: 1 addition & 1 deletion Makefile.objs
Expand Up @@ -89,7 +89,7 @@ endif

#######################################################################
# Target-independent parts used in system and user emulation
common-obj-y += tcg-runtime.o cpus-common.o
common-obj-y += cpus-common.o
common-obj-y += hw/
common-obj-y += qom/
common-obj-y += disas/
Expand Down
1 change: 1 addition & 0 deletions Makefile.target
Expand Up @@ -94,6 +94,7 @@ obj-$(CONFIG_TCG_INTERPRETER) += disas/tci.o
obj-y += fpu/softfloat.o
obj-y += target-$(TARGET_BASE_ARCH)/
obj-y += disas.o
obj-y += tcg-runtime.o
obj-$(call notempty,$(TARGET_XML_FILES)) += gdbstub-xml.o
obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o

Expand Down
215 changes: 215 additions & 0 deletions atomic_template.h
@@ -0,0 +1,215 @@
/*
* Atomic helper templates
* Included from tcg-runtime.c and cputlb.c.
*
* Copyright (c) 2016 Red Hat, Inc
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/

#if DATA_SIZE == 16
# define SUFFIX o
# define DATA_TYPE Int128
# define BSWAP bswap128
#elif DATA_SIZE == 8
# define SUFFIX q
# define DATA_TYPE uint64_t
# define BSWAP bswap64
#elif DATA_SIZE == 4
# define SUFFIX l
# define DATA_TYPE uint32_t
# define BSWAP bswap32
#elif DATA_SIZE == 2
# define SUFFIX w
# define DATA_TYPE uint16_t
# define BSWAP bswap16
#elif DATA_SIZE == 1
# define SUFFIX b
# define DATA_TYPE uint8_t
# define BSWAP
#else
# error unsupported data size
#endif

#if DATA_SIZE >= 4
# define ABI_TYPE DATA_TYPE
#else
# define ABI_TYPE uint32_t
#endif

/* Define host-endian atomic operations. Note that END is used within
the ATOMIC_NAME macro, and redefined below. */
#if DATA_SIZE == 1
# define END
#elif defined(HOST_WORDS_BIGENDIAN)
# define END _be
#else
# define END _le
#endif

ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
ABI_TYPE cmpv, ABI_TYPE newv EXTRA_ARGS)
{
DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
return atomic_cmpxchg__nocheck(haddr, cmpv, newv);
}

#if DATA_SIZE >= 16
ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS)
{
DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP;
__atomic_load(haddr, &val, __ATOMIC_RELAXED);
return val;
}

void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr,
ABI_TYPE val EXTRA_ARGS)
{
DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
__atomic_store(haddr, &val, __ATOMIC_RELAXED);
}
#else
ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr,
ABI_TYPE val EXTRA_ARGS)
{
DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
return atomic_xchg__nocheck(haddr, val);
}

#define GEN_ATOMIC_HELPER(X) \
ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \
ABI_TYPE val EXTRA_ARGS) \
{ \
DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; \
return atomic_##X(haddr, val); \
} \

GEN_ATOMIC_HELPER(fetch_add)
GEN_ATOMIC_HELPER(fetch_and)
GEN_ATOMIC_HELPER(fetch_or)
GEN_ATOMIC_HELPER(fetch_xor)
GEN_ATOMIC_HELPER(add_fetch)
GEN_ATOMIC_HELPER(and_fetch)
GEN_ATOMIC_HELPER(or_fetch)
GEN_ATOMIC_HELPER(xor_fetch)

#undef GEN_ATOMIC_HELPER
#endif /* DATA SIZE >= 16 */

#undef END

#if DATA_SIZE > 1

/* Define reverse-host-endian atomic operations. Note that END is used
within the ATOMIC_NAME macro. */
#ifdef HOST_WORDS_BIGENDIAN
# define END _le
#else
# define END _be
#endif

ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
ABI_TYPE cmpv, ABI_TYPE newv EXTRA_ARGS)
{
DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
return BSWAP(atomic_cmpxchg__nocheck(haddr, BSWAP(cmpv), BSWAP(newv)));
}

#if DATA_SIZE >= 16
ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS)
{
DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP;
__atomic_load(haddr, &val, __ATOMIC_RELAXED);
return BSWAP(val);
}

void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr,
ABI_TYPE val EXTRA_ARGS)
{
DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
val = BSWAP(val);
__atomic_store(haddr, &val, __ATOMIC_RELAXED);
}
#else
ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr,
ABI_TYPE val EXTRA_ARGS)
{
DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
return BSWAP(atomic_xchg__nocheck(haddr, BSWAP(val)));
}

#define GEN_ATOMIC_HELPER(X) \
ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \
ABI_TYPE val EXTRA_ARGS) \
{ \
DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; \
return BSWAP(atomic_##X(haddr, BSWAP(val))); \
}

GEN_ATOMIC_HELPER(fetch_and)
GEN_ATOMIC_HELPER(fetch_or)
GEN_ATOMIC_HELPER(fetch_xor)
GEN_ATOMIC_HELPER(and_fetch)
GEN_ATOMIC_HELPER(or_fetch)
GEN_ATOMIC_HELPER(xor_fetch)

#undef GEN_ATOMIC_HELPER

/* Note that for addition, we need to use a separate cmpxchg loop instead
of bswaps for the reverse-host-endian helpers. */
ABI_TYPE ATOMIC_NAME(fetch_add)(CPUArchState *env, target_ulong addr,
ABI_TYPE val EXTRA_ARGS)
{
DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
DATA_TYPE ldo, ldn, ret, sto;

ldo = atomic_read__nocheck(haddr);
while (1) {
ret = BSWAP(ldo);
sto = BSWAP(ret + val);
ldn = atomic_cmpxchg__nocheck(haddr, ldo, sto);
if (ldn == ldo) {
return ret;
}
ldo = ldn;
}
}

ABI_TYPE ATOMIC_NAME(add_fetch)(CPUArchState *env, target_ulong addr,
ABI_TYPE val EXTRA_ARGS)
{
DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
DATA_TYPE ldo, ldn, ret, sto;

ldo = atomic_read__nocheck(haddr);
while (1) {
ret = BSWAP(ldo) + val;
sto = BSWAP(ret);
ldn = atomic_cmpxchg__nocheck(haddr, ldo, sto);
if (ldn == ldo) {
return ret;
}
ldo = ldn;
}
}
#endif /* DATA_SIZE >= 16 */

#undef END
#endif /* DATA_SIZE > 1 */

#undef BSWAP
#undef ABI_TYPE
#undef DATA_TYPE
#undef SUFFIX
#undef DATA_SIZE
62 changes: 61 additions & 1 deletion configure
Expand Up @@ -1216,7 +1216,10 @@ case "$cpu" in
cc_i386='$(CC) -m32'
;;
x86_64)
CPU_CFLAGS="-m64"
# ??? Only extremely old AMD cpus do not have cmpxchg16b.
# If we truly care, we should simply detect this case at
# runtime and generate the fallback to serial emulation.
CPU_CFLAGS="-m64 -mcx16"
LDFLAGS="-m64 $LDFLAGS"
cc_i386='$(CC) -m32'
;;
Expand Down Expand Up @@ -4521,6 +4524,55 @@ if compile_prog "" "" ; then
int128=yes
fi

#########################################
# See if 128-bit atomic operations are supported.

atomic128=no
if test "$int128" = "yes"; then
cat > $TMPC << EOF
int main(void)
{
unsigned __int128 x = 0, y = 0;
y = __atomic_load_16(&x, 0);
__atomic_store_16(&x, y, 0);
__atomic_compare_exchange_16(&x, &y, x, 0, 0, 0);
return 0;
}
EOF
if compile_prog "" "" ; then
atomic128=yes
fi
fi

#########################################
# See if 64-bit atomic operations are supported.
# Note that without __atomic builtins, we can only
# assume atomic loads/stores max at pointer size.

cat > $TMPC << EOF
#include <stdint.h>
int main(void)
{
uint64_t x = 0, y = 0;
#ifdef __ATOMIC_RELAXED
y = __atomic_load_8(&x, 0);
__atomic_store_8(&x, y, 0);
__atomic_compare_exchange_8(&x, &y, x, 0, 0, 0);
__atomic_exchange_8(&x, y, 0);
__atomic_fetch_add_8(&x, y, 0);
#else
typedef char is_host64[sizeof(void *) >= sizeof(uint64_t) ? 1 : -1];
__sync_lock_test_and_set(&x, y);
__sync_val_compare_and_swap(&x, y, 0);
__sync_fetch_and_add(&x, y);
#endif
return 0;
}
EOF
if compile_prog "" "" ; then
atomic64=yes
fi

########################################
# check if getauxval is available.

Expand Down Expand Up @@ -5483,6 +5535,14 @@ if test "$int128" = "yes" ; then
echo "CONFIG_INT128=y" >> $config_host_mak
fi

if test "$atomic128" = "yes" ; then
echo "CONFIG_ATOMIC128=y" >> $config_host_mak
fi

if test "$atomic64" = "yes" ; then
echo "CONFIG_ATOMIC64=y" >> $config_host_mak
fi

if test "$getauxval" = "yes" ; then
echo "CONFIG_GETAUXVAL=y" >> $config_host_mak
fi
Expand Down
6 changes: 6 additions & 0 deletions cpu-exec-common.c
Expand Up @@ -77,3 +77,9 @@ void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc)
}
siglongjmp(cpu->jmp_env, 1);
}

void cpu_loop_exit_atomic(CPUState *cpu, uintptr_t pc)
{
cpu->exception_index = EXCP_ATOMIC;
cpu_loop_exit_restore(cpu, pc);
}
30 changes: 30 additions & 0 deletions cpu-exec.c
Expand Up @@ -216,6 +216,36 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
}
#endif

static void cpu_exec_step(CPUState *cpu)
{
CPUArchState *env = (CPUArchState *)cpu->env_ptr;
TranslationBlock *tb;
target_ulong cs_base, pc;
uint32_t flags;

cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
tb = tb_gen_code(cpu, pc, cs_base, flags,
1 | CF_NOCACHE | CF_IGNORE_ICOUNT);
tb->orig_tb = NULL;
/* execute the generated code */
trace_exec_tb_nocache(tb, pc);
cpu_tb_exec(cpu, tb);
tb_phys_invalidate(tb, -1);
tb_free(tb);
}

void cpu_exec_step_atomic(CPUState *cpu)
{
start_exclusive();

/* Since we got here, we know that parallel_cpus must be true. */
parallel_cpus = false;
cpu_exec_step(cpu);
parallel_cpus = true;

end_exclusive();
}

struct tb_desc {
target_ulong pc;
target_ulong cs_base;
Expand Down
2 changes: 2 additions & 0 deletions cpus.c
Expand Up @@ -1497,6 +1497,8 @@ static void tcg_exec_all(void)
if (r == EXCP_DEBUG) {
cpu_handle_guest_debug(cpu);
break;
} else if (r == EXCP_ATOMIC) {
cpu_exec_step_atomic(cpu);
}
} else if (cpu->stop || cpu->stopped) {
if (cpu->unplug) {
Expand Down

0 comments on commit 5929d7e

Please sign in to comment.