diff --git a/README.md b/README.md index ee59e6ae8..42cc266ff 100644 --- a/README.md +++ b/README.md @@ -169,16 +169,3 @@ use those compilers, you probably need to configure with ******************************************************* -## NOTE FOR TILERA USERS -The Tilera cache coherency protocols, as of the TileGX boards, appear to be -somewhat buggy for large multithreaded programs. And by buggy I mean they cause -kernel panics (at least, I haven't been able to demonstrate data corruption -yet). Thankfully, you can pick from several cache coherency protocols, and one -of them is more stable than the default. What I have found that seems to be -*more* stable, if not perfectly stable, is to force the cache coherency -protocol to hashed. The way you do this is with a boot argument to the Tilera -kernel. The tile-monitor command I use is this: - - `tile-monitor --net --hvx ucache_hash=all --` - -Good luck! diff --git a/config/qthread_check_assembly.m4 b/config/qthread_check_assembly.m4 index dc34ae846..19620b557 100644 --- a/config/qthread_check_assembly.m4 +++ b/config/qthread_check_assembly.m4 @@ -79,30 +79,6 @@ unset qthread_assemble ])dnl -dnl ################################################################# -dnl -dnl QTHREAD_CHECK_SPARCV8PLUS -dnl -dnl ################################################################# -AC_DEFUN([QTHREAD_CHECK_SPARCV8PLUS],[ - AC_MSG_CHECKING([if have Sparc v8+/v9 support]) - sparc_result=0 - QTHREAD_TRY_ASSEMBLE([$qthread_cv_asm_text - casa [%o0] 0x80, %o1, %o2], - [sparc_result=1], - [sparc_result=0]) - if test "$sparc_result" = "1" ; then - AC_MSG_RESULT([yes]) - ifelse([$1],,:,[$1]) - else - AC_MSG_RESULT([no]) - ifelse([$2],,:,[$2]) - fi - - unset sparc_result -])dnl - - dnl ################################################################# dnl dnl QTHREAD_CHECK_INLINE_GCC @@ -179,23 +155,6 @@ AC_DEFUN([QTHREAD_CHECK_ASSEMBLY],[ qthread_gcc_inline_assign='"movl [$]0, %0" : "=&r"(ret)' ;; - ia64-*) - qthread_cv_asm_arch="IA64" - qthread_gcc_inline_assign='"mov %0=r0\n;;\n" : "=&r"(ret)' - ;; - - alpha-*|alphaev[[4-8]]-*|alphaev56-*|alphaev6[[78]]-*) - qthread_cv_asm_arch="ALPHA" - qthread_gcc_inline_assign='"bis [$]31,[$]31,%0" : "=&r"(ret)' - ;; - - tile-*) - AS_IF([test "$ac_cv_sizeof_long" = "4"], - [qthread_cv_asm_arch="TILE"], - [qthread_cv_asm_arch="TILEGX"]) - qthread_gcc_inline_assign='"movei %0, 5" : "=&r"(ret)' - ;; - armv7l-*) qthread_cv_asm_arch="ARM" qthread_gcc_inline_assign='"movt %0, #5" : "=&r"(ret)' @@ -205,13 +164,6 @@ AC_DEFUN([QTHREAD_CHECK_ASSEMBLY],[ qthread_cv_asm_arch="ARMV8_A64" ;; - mips-*|mips64-*) - # Should really find some way to make sure that we are on - # a MIPS III machine (r4000 and later) - qthread_cv_asm_arch="MIPS" - qthread_gcc_inline_assign='"or %0,[$]0,[$]0" : "=&r"(ret)' - ;; - powerpc*|powerpc64*) AS_IF([test "$ac_cv_sizeof_long" = "4"], [qthread_cv_asm_arch="POWERPC32"], @@ -236,15 +188,6 @@ AC_DEFUN([QTHREAD_CHECK_ASSEMBLY],[ qthread_gcc_inline_assign='"A_%=: li %0,0" : "=&r"(ret)' ;; - sparc*-*) - # SPARC v9 (and above) are the only ones with 64bit support - # if compiling 32 bit, see if we are v9 (aka v8plus) or - # earlier (casa is v8+/v9). - AS_IF([test "$ac_cv_sizeof_long" = "4"], - [QTHREAD_CHECK_SPARCV8PLUS([qthread_cv_asm_arch="SPARCV9_32"])], - [qthread_cv_asm_arch="SPARCV9_64"]) - qthread_gcc_inline_assign='"mov 0,%0" : "=&r"(ret)' - ;; esac # now that we know our architecture, try to inline assemble diff --git a/config/qthread_check_atomics.m4 b/config/qthread_check_atomics.m4 index df9544030..80ec3e97c 100644 --- a/config/qthread_check_atomics.m4 +++ b/config/qthread_check_atomics.m4 @@ -15,17 +15,9 @@ AS_IF([test "x$enable_builtin_atomics" != xno], [AS_IF([test "x$enable_builtin_atomics" = xyes], [AC_MSG_WARN([Disabling builtin atomics on IBM_XL, due to compiler design decision])]) enable_builtin_atomics=no])]) -AS_IF([test "x$enable_builtin_atomics" != xno], [ - AS_IF([test "x$qthread_cv_c_compiler_type" = xIntel -o "x$qthread_cv_cxx_compiler_type" = xIntel], - [AC_CHECK_HEADERS([ia64intrin.h ia32intrin.h])]) AC_CACHE_CHECK([whether compiler supports builtin atomic CAS-32], [qthread_cv_atomic_CAS32], [AC_LINK_IFELSE([AC_LANG_SOURCE([[ -#ifdef HAVE_IA64INTRIN_H -# include -#elif HAVE_IA32INTRIN_H -# include -#endif #include #include /* for uint32_t */ @@ -40,11 +32,6 @@ return (int)foo; AC_CACHE_CHECK([whether compiler supports builtin atomic CAS-64], [qthread_cv_atomic_CAS64], [AC_LINK_IFELSE([AC_LANG_SOURCE([[ -#ifdef HAVE_IA64INTRIN_H -# include -#elif HAVE_IA32INTRIN_H -# include -#endif #include #include /* for uint64_t */ @@ -59,11 +46,6 @@ return foo; AC_CACHE_CHECK([whether compiler supports builtin atomic CAS-ptr], [qthread_cv_atomic_CASptr], [AC_LINK_IFELSE([AC_LANG_SOURCE([[ -#ifdef HAVE_IA64INTRIN_H -# include -#elif HAVE_IA32INTRIN_H -# include -#endif #include int main(void) @@ -130,11 +112,6 @@ AC_CACHE_CHECK([whether compiler supports builtin atomic incr], [qthread_cv_atomic_incr], [AS_IF([test "$1" -eq 8], [AC_LINK_IFELSE([AC_LANG_SOURCE([[ -#ifdef HAVE_IA64INTRIN_H -# include -#elif HAVE_IA32INTRIN_H -# include -#endif #include #include /* for uint64_t */ @@ -147,11 +124,6 @@ return foo; [qthread_cv_atomic_incr="yes"], [qthread_cv_atomic_incr="no"])], [AC_LINK_IFELSE([AC_LANG_SOURCE([[ -#ifdef HAVE_IA64INTRIN_H -# include -#elif HAVE_IA32INTRIN_H -# include -#endif #include #include /* for uint32_t */ @@ -169,11 +141,6 @@ AS_IF([test "$qthread_cv_atomic_incr" = "yes"], [qt_cv_atomic_incr_works], [AS_IF([test "$1" -eq 8], [AC_RUN_IFELSE([AC_LANG_SOURCE([[ -#ifdef HAVE_IA64INTRIN_H -# include -#elif HAVE_IA32INTRIN_H -# include -#endif #include #include /* for uint64_t */ @@ -204,11 +171,6 @@ return 0; [qt_cv_atomic_incr_works="no"], [qt_cv_atomic_incr_works="assuming yes"])], [AC_RUN_IFELSE([AC_LANG_SOURCE([[ -#ifdef HAVE_IA64INTRIN_H -# include -#elif HAVE_IA32INTRIN_H -# include -#endif #include #include /* for uint32_t */ @@ -224,23 +186,6 @@ return 0; [qt_cv_atomic_incr_works="no"], [qt_cv_atomic_incr_works="assuming yes"])]) ])]) -AS_IF([test "$qthread_cv_atomic_CAS" = "yes"], - [AC_CACHE_CHECK([whether ia64intrin.h is required], - [qthread_cv_require_ia64intrin_h], - [AC_LINK_IFELSE([AC_LANG_SOURCE([[ -#include - -int main(void) -{ -long bar=1, old=1, new=2; -long foo = __sync_val_compare_and_swap(&bar, old, new); -return foo; -}]])], - [qthread_cv_require_ia64intrin_h="no"], - [qthread_cv_require_ia64intrin_h="yes"])])]) -]) -AS_IF([test "$qthread_cv_require_ia64intrin_h" = "yes"], - [AC_DEFINE([QTHREAD_NEEDS_IA64INTRIN],[1],[if this header is necessary for builtin atomics])]) AS_IF([test "x$qthread_cv_atomic_CASptr" = "xyes"], [AC_DEFINE([QTHREAD_ATOMIC_CAS_PTR],[1], [if the compiler supports __sync_val_compare_and_swap on pointers])]) diff --git a/config/qthread_check_attributes.m4 b/config/qthread_check_attributes.m4 index acb45f28d..ebdac55c6 100644 --- a/config/qthread_check_attributes.m4 +++ b/config/qthread_check_attributes.m4 @@ -140,9 +140,6 @@ AC_CACHE_CHECK([support for __sync_synchronize], POWERPC*) mdefstr='__asm__ __volatile__ ("sync":::"memory")' ;; - SPARCV9_32|SPARCV9_64) - mdefstr='__asm__ __volatile__ ("membar #StoreStore|#LoadStore|#StoreLoad|#LoadLoad":::"memory")' - ;; *) AC_MSG_ERROR([ASM $qthread_cv_asm_arch]) mdefstr="$cdefstr" diff --git a/config/qthread_check_tiletopo.m4 b/config/qthread_check_tiletopo.m4 deleted file mode 100644 index 93135e930..000000000 --- a/config/qthread_check_tiletopo.m4 +++ /dev/null @@ -1,23 +0,0 @@ -# -*- Autoconf -*- -# -# Copyright (c) 2010 Sandia Corporation -# - -# QTHREAD_CHECK_TILETOPO([action-if-found], [action-if-not-found]) -# ------------------------------------------------------------------------------ -AC_DEFUN([QTHREAD_CHECK_TILETOPO], [ - qt_allgoodsofar=yes - AC_CHECK_HEADERS([tmc/cpus.h],[], - [qt_allgoodsofar=no - break]) - AS_IF([test "x$qt_allgoodsofar" = xyes], - [AC_SEARCH_LIBS([tmc_cpus_set_task_cpu], - [ilib tmc], - [], - [qt_allgoodsofar=no])]) - - AS_IF([test "x$qt_allgoodsofar" = xyes], - [AC_DEFINE([QTHREAD_HAVE_TILETOPO],[1],[if the machine has a Tilera-style topology interface]) - $1], - [$2]) -]) diff --git a/config/qthread_detect_compiler_type.m4 b/config/qthread_detect_compiler_type.m4 index 01f9e0457..55f5c5d63 100644 --- a/config/qthread_detect_compiler_type.m4 +++ b/config/qthread_detect_compiler_type.m4 @@ -29,8 +29,6 @@ AC_CACHE_CHECK([what kind of C compiler $CC is], [AC_LANG_PUSH([C]) dnl These compilers have been caught pretending to be GNU GCC - AS_IF([test "x$qthread_cv_c_compiler_type" == x], - [_QTHREAD_CHECK_IFDEF([__TILECC__],[qthread_cv_c_compiler_type=TileCC])]) AS_IF([test "x$qthread_cv_c_compiler_type" == x], [_QTHREAD_CHECK_IFDEF([__INTEL_COMPILER],[qthread_cv_c_compiler_type=Intel])]) AS_IF([test "x$qthread_cv_c_compiler_type" == x], @@ -141,8 +139,6 @@ AC_CACHE_CHECK([what kind of C compiler $CC is], [_QTHREAD_CHECK_IFDEF([__HIGHC__],[qthread_cv_c_compiler_type=MetaWare])]) AS_IF([test "x$qthread_cv_c_compiler_type" == x], [_QTHREAD_CHECK_IFDEF([__MWERKS__],[qthread_cv_c_compiler_type=MetrowerksCodeWarrior])]) - AS_IF([test "x$qthread_cv_c_compiler_type" == x], - [_QTHREAD_CHECK_IFDEF([__sgi],[qthread_cv_c_compiler_type=MIPSpro])]) AS_IF([test "x$qthread_cv_c_compiler_type" == x], [_QTHREAD_CHECK_IFDEF([__MRC__],[qthread_cv_c_compiler_type=MPW])]) AS_IF([test "x$qthread_cv_c_compiler_type" == x], @@ -201,8 +197,6 @@ AC_CACHE_CHECK([what kind of C++ compiler $CXX is], [AC_LANG_PUSH([C++]) dnl These compilers have been caught pretending to be GNU G++ - AS_IF([test "x$qthread_cv_cxx_compiler_type" == x], - [_QTHREAD_CHECK_IFDEF([__TILECC__],[qthread_cv_cxx_compiler_type=TileCC])]) AS_IF([test "x$qthread_cv_cxx_compiler_type" == x], [_QTHREAD_CHECK_IFDEF([__INTEL_COMPILER],[qthread_cv_cxx_compiler_type=Intel])]) AS_IF([test "x$qthread_cv_cxx_compiler_type" == x], @@ -281,8 +275,6 @@ AC_CACHE_CHECK([what kind of C++ compiler $CXX is], [_QTHREAD_CHECK_IFDEF([__HIGHC__],[qthread_cv_cxx_compiler_type=MetaWare])]) AS_IF([test "x$qthread_cv_cxx_compiler_type" == x], [_QTHREAD_CHECK_IFDEF([__MWERKS__],[qthread_cv_cxx_compiler_type=MetrowerksCodeWarrior])]) - AS_IF([test "x$qthread_cv_cxx_compiler_type" == x], - [_QTHREAD_CHECK_IFDEF([__sgi],[qthread_cv_cxx_compiler_type=MIPSpro])]) AS_IF([test "x$qthread_cv_cxx_compiler_type" == x], [_QTHREAD_CHECK_IFDEF([__MRC__],[qthread_cv_cxx_compiler_type=MPW])]) AS_IF([test "x$qthread_cv_cxx_compiler_type" == x], diff --git a/config/qthread_ia_cacheline.m4 b/config/qthread_ia_cacheline.m4 index 2a73e7342..25d3bcfea 100644 --- a/config/qthread_ia_cacheline.m4 +++ b/config/qthread_ia_cacheline.m4 @@ -9,22 +9,14 @@ AC_CACHE_CHECK([for x86 cache line size], #define QTHREAD_UNSUPPORTED 0 #define QTHREAD_IA32 1 #define QTHREAD_AMD64 2 -#define QTHREAD_IA64 3 -#define QTHREAD_ALPHA 4 -#define QTHREAD_MIPS 5 #define QTHREAD_POWERPC32 6 #define QTHREAD_POWERPC64 7 -#define QTHREAD_SPARCV9_32 8 -#define QTHREAD_SPARCV9_64 9 -#define QTHREAD_TILEPRO 10 -#define QTHREAD_TILEGX 11 #define QTHREAD_ARM 12 #define QTHREAD_ARMV8_A64 13 ],[ int op = 1, eax, ebx, ecx, edx, cachelinesize; FILE *f; -#if QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32 || \ - QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64 +#if QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32 # ifdef __PIC__ __asm__("push %%ebx\n\t" "cpuid\n\t" @@ -45,8 +37,7 @@ __asm__("cpuid" cachelinesize = 8*((ebx>>8)&0xff); if (cachelinesize == 0) { op = 2; -#if QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32 || \ - QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64 +#if QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32 __asm__("push %%ebx\n\t" "cpuid\n\t" "mov %%ebx, %1\n\t" diff --git a/configure.ac b/configure.ac index 0b02abc9f..d2c56736d 100644 --- a/configure.ac +++ b/configure.ac @@ -294,11 +294,11 @@ AC_ARG_WITH([topology], [AS_HELP_STRING([--with-topology=[[topologylib]]], [specify which topology interface to use. Supported interfaces include no, hwloc, hwloc_v2, binders, lgrp, libnuma, - libnumaV2, mach, plpa, sys, and, tilera.])], + libnumaV2, mach, plpa, and sys.])], [AS_IF([test "x$with_topology" = xyes], [with_topology=none_specified]) case "$with_topology" in - hwloc|binders|hwloc_v2|lgrp|libnuma|libnumaV2|mach|no|plpa|sys|tilera) ;; + hwloc|binders|hwloc_v2|lgrp|libnuma|libnumaV2|mach|no|plpa|sys) ;; none_specified) ;; *) AC_MSG_ERROR([Unsupported topology library ($with_topology)]) @@ -327,8 +327,7 @@ AC_ARG_ENABLE([condwait-queue], [force the use of a pthread condwait queue, instead of a spin-based queue for inter-thread communication (important if spinning shepherds - interfere with each other). Default enabled on - sparc/solaris, but default disabled elsewhere.])]) + interfere with each other). Default disabled.])]) AC_ARG_ENABLE([third-party-benchmarks], [AS_HELP_STRING([--enable-third-party-benchmarks], @@ -402,16 +401,9 @@ dnl Test for this *before* AC_PROG_CC, to avoid getting the default CFLAGS dnl However, that means we don't know a ton about this machine or this compiler dnl yet, so we may have to reset it later. AS_IF([test "x$enable_debugging" = xyes], - [case "$build_cpu" in dnl ( - sparc) - CFLAGS="$CFLAGS -O0 -g3" - CXXFLAGS="$CXXFLAGS -O0 -g3" - ;; - *) - CFLAGS="$CFLAGS -O0 -g" - CXXFLAGS="$CXXFLAGS -O0 -g" - ;; - esac]) + [CFLAGS="$CFLAGS -O0 -g" + CXXFLAGS="$CXXFLAGS -O0 -g" + ]) AC_PROG_CC dnl We use system extensions. This includes setting _GNU_SOURCE AC_USE_SYSTEM_EXTENSIONS @@ -495,7 +487,7 @@ AS_IF([test "x$enable_picky" = xyes], QTHREAD_CHECK_ASSEMBLY([have_assembly=1], [have_assembly=0]) case "$qthread_cv_asm_arch" in - POWERPC32|SPARCV9_32) + POWERPC32) compile_compat_atomic=yes ;; esac @@ -735,14 +727,7 @@ AS_IF([test "x$enable_oversubscription" = "xyes"], AC_CHECK_FUNCS([sched_yield])]) AS_IF([test "x$enable_condwait_queue" = "x"], - [case "$host" in - sparc-sun-solaris*) - enable_condwait_queue="yes" - ;; - *) - enable_condwait_queue="no" - ;; - esac]) + [enable_condwait_queue="no"]) AS_IF([test "x$enable_condwait_queue" = "xyes"], [AC_DEFINE([QTHREAD_CONDWAIT_BLOCKING_QUEUE], [1], [use pthread-based condwait for lf queue])]) @@ -989,10 +974,6 @@ AS_IF([test "x$qthread_topo" != xno], [qthread_topo=libnumaV2], [AS_IF([test "x$qthread_topo" != xno], [AC_MSG_ERROR([Specified topology library ($qthread_topo) does not work.])])])]) - AS_IF([test "x$qthread_topo" = xno -o "x$qthread_topo" = xtilera], - [QTHREAD_CHECK_TILETOPO([qthread_topo=tilera], - [AS_IF([test "x$qthread_topo" != xno], - [AC_MSG_ERROR([Specified topology library ($qthread_topo) does not work.])])])]) # Third, check any others. AS_IF([test "x$qthread_topo" = xno -o "x$qthread_topo" = xmach], [QTHREAD_CHECK_MACHTOPO([qthread_topo=mach], diff --git a/include/Makefile.am b/include/Makefile.am index fb27e9547..50929a7a3 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -10,7 +10,6 @@ noinst_HEADERS = \ fastcontext/taskimpl.h \ fastcontext/power-ucontext.h \ fastcontext/386-ucontext.h \ - fastcontext/tile-ucontext.h \ net/net.h \ qthread_innards.h \ qloop_innards.h \ diff --git a/include/fastcontext/taskimpl.h b/include/fastcontext/taskimpl.h index 077fc6c26..e663a8d7d 100644 --- a/include/fastcontext/taskimpl.h +++ b/include/fastcontext/taskimpl.h @@ -7,16 +7,7 @@ #include "qthread/common.h" -#if ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEPRO) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEGX)) -#ifdef HAVE_STDARG_H -#include -#endif -#include -#define NEEDTILEMAKECONTEXT -#define NEEDSWAPCONTEXT -#include "tile-ucontext.h" -#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32) +#if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32) #define NEEDX86MAKECONTEXT #define NEEDSWAPCONTEXT #include "386-ucontext.h" diff --git a/include/fastcontext/tile-ucontext.h b/include/fastcontext/tile-ucontext.h deleted file mode 100644 index c0a8408b4..000000000 --- a/include/fastcontext/tile-ucontext.h +++ /dev/null @@ -1,52 +0,0 @@ -#ifdef HAVE_CONFIG_H -#include -#endif - -#include /* for size_t, per C89 */ - -#include "qt_visibility.h" - -#define setcontext(u) qt_setmctxt(&(u)->mc) -#define getcontext(u) qt_getmctxt(&(u)->mc) -typedef struct mctxt mctxt_t; -typedef struct uctxt uctxt_t; - -/* - * This struct defines the way the registers are stored on the stack during a - * system call/exception. It should be a multiple of 8 bytes to preserve - * normal stack alignment rules. - * - */ -struct mctxt { - /* Saved main processor registers; 56..63 are special. */ - /* tp, sp, and lr must immediately follow regs[] for aliasing. */ - unsigned long regs[23]; /* callee saves r30-r52 */ - unsigned long tp; /* thread-local data pointer (23*4) */ - unsigned long sp; /* stack pointer (grows DOWNWARD) (23*4)+4 */ - unsigned long lr; /* aka link register (where to go when returning from a - * function) (23*4)+(2*4) */ - - /* Saved special registers. */ - unsigned long pc; /* (23*4)+(3*4) */ - unsigned long r0; /* (23*4)+(4*4) */ - // unsigned long ex1; /* stored in EX_CONTEXT_1_1 (PL and ICS bit) */ - unsigned long arg0; /* (23*4)+(5*4) only used for first function invocation */ - unsigned long first; /* (23*4)+(6*4) */ -}; - -struct uctxt { - struct { - void *ss_sp; - size_t ss_size; - } uc_stack; - - // sigset_t uc_sigmask; - mctxt_t mc; - struct uctxt *uc_link; /* unused */ -}; - -int INTERNAL qt_swapctxt(uctxt_t *, uctxt_t *); -void INTERNAL qt_makectxt(uctxt_t *, void (*)(void), int, ...); -int INTERNAL qt_getmctxt(mctxt_t *); -void INTERNAL qt_setmctxt(mctxt_t *); -/* vim:set expandtab: */ diff --git a/include/qt_atomics.h b/include/qt_atomics.h index 70c71b73d..8b55ff864 100644 --- a/include/qt_atomics.h +++ b/include/qt_atomics.h @@ -4,14 +4,6 @@ #include #include -#ifdef QTHREAD_NEEDS_IA64INTRIN -#ifdef HAVE_IA64INTRIN_H -#include -#elif defined(HAVE_IA32INTRIN_H) -#include -#endif -#endif - #include #include @@ -429,41 +421,6 @@ qt_cas(void **const ptr, void *const oldv, void *const newv) { /*{{{*/ : "cc", "memory"); return result; -#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32) - void *nv = newv; - __asm__ __volatile__("cas [%1], %2, %0" - : "=&r"(nv) - : "r"(ptr), - "r"(oldv) -#if !defined(__SUNPRO_C) && !defined(__SUNPRO_CC) - , - "0"(nv) -#endif - : "cc", "memory"); - return nv; - -#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64) - void *nv = newv; - __asm__ __volatile__("casx [%1], %2, %0" - : "=&r"(nv) - : "r"(ptr), - "r"(oldv) -#if !defined(__SUNPRO_C) && !defined(__SUNPRO_CC) - , - "0"(nv) -#endif - : "cc", "memory"); - return nv; - -#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64) - void **retval; - __asm__ __volatile__("mov ar.ccv=%0;;" : : "rO"(oldv)); - __asm__ __volatile__("cmpxchg4.acq %0=[%1],%2,ar.ccv" - : "=r"(retval) - : "r"(ptr), "r"(newv) - : "memory"); - return retval; - #elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) || \ (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32) void **retval; @@ -624,110 +581,6 @@ static QINLINE aligned_t qthread_internal_incr_mod_( : "r"(operand), "r"(max) : "cc", "memory"); -#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32) || \ - ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64) && \ - (QTHREAD_SIZEOF_ALIGNED_T == 4)) - - uint32_t oldval, newval; - - /* newval = *operand; */ - do { - /* you *should* be able to move the *operand reference outside the - * loop and use the output of the CAS (namely, newval) instead. - * However, there seems to be a bug in gcc 4.0.4 wherein, if you do - * that, the while() comparison uses a temporary register value for - * newval that has nothing to do with the output of the CAS - * instruction. (See how obviously wrong that is?) For some reason that - * I haven't been able to figure out, moving the *operand reference - * inside the loop fixes that problem, even at -O2 optimization. */ - retval = oldval = *operand; - newval = oldval + 1; - newval *= (newval < max); - - /* if (*operand == oldval) - * swap(newval, *operand) - * else - * newval = *operand - */ - __asm__ __volatile__("cas [%1] , %2, %0" /* */ - : "=&r"(newval) - : "r"(operand), "r"(oldval), "0"(newval) - : "memory"); - } while (oldval != newval); - -#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64) - aligned_t oldval, newval; - - /* newval = *operand; */ - do { - /* you *should* be able to move the *operand reference outside the - * loop and use the output of the CAS (namely, newval) instead. - * However, there seems to be a bug in gcc 4.0.4 wherein, if you do - * that, the while() comparison uses a temporary register value for - * newval that has nothing to do with the output of the CAS - * instruction. (See how obviously wrong that is?) For some reason that - * I haven't been able to figure out, moving the *operand reference - * inside the loop fixes that problem, even at -O2 optimization. */ - retval = oldval = *operand; - newval = oldval + 1; - newval *= (newval < max); - - /* if (*operand == oldval) - * swap(newval, *operand) - * else - * newval = *operand - */ - __asm__ __volatile__("casx [%1] , %2, %0" - : "=&r"(newval) - : "r"(operand), - "r"(oldval) -#if !defined(__SUNPRO_CC) && !defined(__SUNPRO_C) - , - "0"(newval) -#endif - : "memory"); - } while (oldval != newval); - -#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64) -#if QTHREAD_SIZEOF_ALIGNED_T == 8 - int64_t res, old, new; - - do { - old = *operand; /* atomic, because operand is aligned */ - new = old + 1; - new *= (new < max); - asm volatile("mov ar.ccv=%0;;" - : /* no output */ - : "rO"(old)); - - /* separate so the compiler can insert its junk */ - asm volatile("cmpxchg8.acq %0=[%1],%2,ar.ccv" - : "=r"(res) - : "r"(operand), "r"(new) - : "memory"); - } while (res != old); /* if res==old, new is out of date */ - retval = old; - -#else /* 32-bit aligned_t */ - int32_t res, old, new; - - do { - old = *operand; /* atomic, because operand is aligned */ - new = old + 1; - new *= (new < max); - asm volatile("mov ar.ccv=%0;;" - : /* no output */ - : "rO"(old)); - - /* separate so the compiler can insert its junk */ - asm volatile("cmpxchg4.acq %0=[%1],%2,ar.ccv" - : "=r"(res) - : "r"(operand), "r"(new) - : "memory"); - } while (res != old); /* if res==old, new is out of date */ - retval = old; -#endif /* if QTHREAD_SIZEOF_ALIGNED_T == 8 */ - #elif ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32) && \ (QTHREAD_SIZEOF_ALIGNED_T == 4)) || \ ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) && \ diff --git a/include/qthread/common.h.in b/include/qthread/common.h.in index 2ea1bbede..ed26ef4f6 100644 --- a/include/qthread/common.h.in +++ b/include/qthread/common.h.in @@ -59,12 +59,6 @@ /* builtin incr supported */ #undef QTHREAD_ATOMIC_INCR -/* ia64intrin.h available */ -#undef HAVE_IA64INTRIN_H - -/* if ia64intrin is needed */ -#undef QTHREAD_NEEDS_IA64INTRIN - /* specifying data alignment is allowed */ #undef QTHREAD_ALIGNEDDATA_ALLOWED @@ -113,15 +107,8 @@ #define QTHREAD_UNSUPPORTED 0 #define QTHREAD_IA32 1 #define QTHREAD_AMD64 2 -#define QTHREAD_IA64 3 -#define QTHREAD_ALPHA 4 -#define QTHREAD_MIPS 5 #define QTHREAD_POWERPC32 6 #define QTHREAD_POWERPC64 7 -#define QTHREAD_SPARCV9_32 8 -#define QTHREAD_SPARCV9_64 9 -#define QTHREAD_TILEPRO 10 -#define QTHREAD_TILEGX 11 #define QTHREAD_ARM 12 #define QTHREAD_ARMV8_A64 13 diff --git a/include/qthread/qthread.h b/include/qthread/qthread.h index 4152d577a..2464d4ec7 100644 --- a/include/qthread/qthread.h +++ b/include/qthread/qthread.h @@ -22,14 +22,6 @@ using std::memory_order_relaxed; #include /* for fprintf() */ #endif -#ifdef QTHREAD_NEEDS_IA64INTRIN -#ifdef HAVE_IA64INTRIN_H -#include -#elif defined(HAVE_IA32INTRIN_H) -#include -#endif -#endif - #include "common.h" #include "qthread-int.h" @@ -676,9 +668,7 @@ int qthread_spinlocks_destroy(qthread_spinlock_t *a); int qthread_lock_init(aligned_t const *a, bool const is_recursive); int qthread_lock_destroy(aligned_t *a); -#if defined(QTHREAD_MUTEX_INCREMENT) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32) +#if defined(QTHREAD_MUTEX_INCREMENT) || QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32 uint32_t qthread_incr32_(uint32_t *, int32_t); uint64_t qthread_incr64_(uint64_t *, int64_t); float qthread_fincr_(float *, float); @@ -699,8 +689,7 @@ static QINLINE float qthread_fincr(float *operand, float incr) { /*{{{ */ #if defined(QTHREAD_MUTEX_INCREMENT) return qthread_fincr_(operand, incr); -#elif QTHREAD_ATOMIC_CAS && (!defined(HAVE_GCC_INLINE_ASSEMBLY) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEGX)) +#elif QTHREAD_ATOMIC_CAS && !defined(HAVE_GCC_INLINE_ASSEMBLY) union { float f; uint32_t i; @@ -752,51 +741,6 @@ static QINLINE float qthread_fincr(float *operand, float incr) { /*{{{ */ return retval.f; -#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32) - union { - float f; - uint32_t i; - } oldval, newval; - - /* newval.f = *operand; */ - do { - /* you *should* be able to move the *operand reference outside the - * loop and use the output of the CAS (namely, newval) instead. - * However, there seems to be a bug in gcc 4.0.4 wherein, if you do - * that, the while() comparison uses a temporary register value for - * newval that has nothing to do with the output of the CAS - * instruction. (See how obviously wrong that is?) For some reason that - * I haven't been able to figure out, moving the *operand reference - * inside the loop fixes that problem, even at -O2 optimization. */ - oldval.f = *(float volatile *)operand; - newval.f = oldval.f + incr; - __asm__ __volatile__( - "membar #StoreStore|#LoadStore|#StoreLoad|#LoadLoad\n\t" - "cas [%1], %2, %0" - : "+r"(newval.i) - : "r"(operand), "r"(oldval.i) - : "cc", "memory"); - } while (oldval.i != newval.i); - return oldval.f; - -#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64) - union { - float f; - uint32_t i; - } oldval, newval, res; - - do { - oldval.f = *(float volatile *)operand; - newval.f = oldval.f + incr; - __asm__ __volatile__("mov ar.ccv=%0;;" ::"rO"(oldval.i)); - __asm__ __volatile__("cmpxchg4.acq %0=[%1],%2,ar.ccv" - : "=r"(res.i) - : "r"(operand), "r"(newval.i) - : "memory"); - } while (res.i != oldval.i); /* if res!=old, the calc is out of date */ - return oldval.f; - #elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) || \ (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32) union { @@ -860,9 +804,7 @@ static QINLINE double qthread_dincr(double *operand, double incr) { /*{{{ */ (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) return qthread_dincr_(operand, incr); -#elif QTHREAD_ATOMIC_CAS && (!defined(HAVE_GCC_INLINE_ASSEMBLY) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEGX) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32)) +#elif QTHREAD_ATOMIC_CAS && !defined(HAVE_GCC_INLINE_ASSEMBLY) union { uint64_t i; double d; @@ -916,82 +858,6 @@ static QINLINE double qthread_dincr(double *operand, double incr) { /*{{{ */ return retval.d; -#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32) - union { - uint64_t i; - double d; - } oldval, newval; - - newval.d = *(double volatile *)operand; - do { - /* this allows the compiler to be as flexible as possible with register - * assignments */ - uint64_t tmp1; - uint64_t tmp2; - - oldval.d = newval.d; - newval.d += incr; - __asm__ __volatile__( - "ldd %0, %1\n\t" - "ldx %4, %2\n\t" - "membar #StoreStore|#LoadStore|#StoreLoad|#LoadLoad\n\t" - "sllx %1, 0x20, %1\n\t" - "sllx %2, 0x20, %2\n\t" - "casx [%3], %2, %1\n\t" - "srlx %1, 0x20, %1\n\t" - "std %1, %0" - /* h means 64-BIT REGISTER - * (probably unnecessary, but why take chances?) */ - : "=m"(newval.i), "=h"(tmp1), "=h"(tmp2) - : "r"(operand), "m"(oldval.i) - : "memory"); - } while (oldval.i != newval.i); - return oldval.d; - -#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64) - union { - uint64_t i; - double d; - } oldval, newval; - - /*newval.d = *operand; */ - do { - /* you *should* be able to move the *operand reference outside the - * loop and use the output of the CAS (namely, newval) instead. - * However, there seems to be a bug in gcc 4.0.4 wherein, if you do - * that, the while() comparison uses a temporary register value for - * newval that has nothing to do with the output of the CAS - * instruction. (See how obviously wrong that is?) For some reason that - * I haven't been able to figure out, moving the *operand reference - * inside the loop fixes that problem, even at -O2 optimization. */ - oldval.d = *(double volatile *)operand; - newval.d = oldval.d + incr; - __asm__ __volatile__( - "membar #StoreStore|#LoadStore|#StoreLoad|#LoadLoad\n\t" - "casx [%1], %2, %0" - : "+r"(newval.i) - : "r"(operand), "r"(oldval.i) - : "memory"); - } while (oldval.d != newval.d); - return oldval.d; - -#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64) - union { - uint64_t i; - double d; - } oldval, newval, res; - - do { - oldval.d = *(double volatile *)operand; - newval.d = oldval.d + incr; - __asm__ __volatile__("mov ar.ccv=%0;;" ::"rO"(oldval.i)); - __asm__ __volatile__("cmpxchg8.acq %0=[%1],%2,ar.ccv" - : "=r"(res.i) - : "r"(operand), "r"(newval.i) - : "memory"); - } while (res.i != oldval.i); /* if res!=old, the calc is out of date */ - return oldval.d; - #elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) union { double d; @@ -1172,57 +1038,6 @@ static QINLINE uint32_t qthread_incr32(uint32_t *operand, return retval; -#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64) - uint32_t oldval, newval; - - /* newval = *operand; */ - do { - /* you *should* be able to move the *operand reference outside the - * loop and use the output of the CAS (namely, newval) instead. - * However, there seems to be a bug in gcc 4.0.4 wherein, if you do - * that, the while() comparison uses a temporary register value for - * newval that has nothing to do with the output of the CAS - * instruction. (See how obviously wrong that is?) For some reason that - * I haven't been able to figure out, moving the *operand reference - * inside the loop fixes that problem, even at -O2 optimization. */ - oldval = *operand; - newval = oldval + incr; - /* newval always gets the value of *operand; if it's - * the same as oldval, then the swap was successful */ - __asm__ __volatile__( - "membar #StoreStore|#LoadStore|#StoreLoad|#LoadLoad\n\t" - "cas [%1] , %2, %0" - : "+r"(newval) - : "r"(operand), "r"(oldval) - : "cc", "memory"); - } while (oldval != newval); - return oldval; - -#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64) - uint32_t res; - - if (incr == 1) { - asm volatile("fetchadd4.rel %0=[%1],1" : "=r"(res) : "r"(operand)); - } else { - uint32_t old, newval; - - do { - old = *operand; /* atomic, because operand is aligned */ - newval = old + incr; - asm volatile("mov ar.ccv=%0;;" - : /* no output */ - : "rO"(old)); - - /* separate so the compiler can insert its junk */ - asm volatile("cmpxchg4.acq %0=[%1],%2,ar.ccv" - : "=r"(res) - : "r"(operand), "r"(newval) - : "memory"); - } while (res != old); /* if res!=old, the calc is out of date */ - } - return res; - #elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32) || \ (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) @@ -1245,8 +1060,7 @@ static QINLINE uint32_t qthread_incr32(uint32_t *operand, static QINLINE uint64_t qthread_incr64(uint64_t *operand, uint64_t incr) { /*{{{ */ #if defined(QTHREAD_MUTEX_INCREMENT) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32) + (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) return qthread_incr64_(operand, incr); #elif defined(QTHREAD_ATOMIC_INCR) @@ -1263,8 +1077,7 @@ static QINLINE uint64_t qthread_incr64(uint64_t *operand, #elif !defined(HAVE_GCC_INLINE_ASSEMBLY) #error Qthreads requires either mutex increments, inline assembly, or compiler atomic builtins -#else // if defined(QTHREAD_MUTEX_INCREMENT) || (QTHREAD_ASSEMBLY_ARCH == - // QTHREAD_POWERPC32) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32) +#else // if defined(QTHREAD_MUTEX_INCREMENT) || QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32 #if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) uint64_t retval; uint64_t incrd = incrd; /* no initializing */ @@ -1280,90 +1093,6 @@ static QINLINE uint64_t qthread_incr64(uint64_t *operand, return retval; -#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32) - uint64_t oldval, newval = *operand; - - do { - /* this allows the compiler to be as flexible as possible with register - * assignments */ - uint64_t tmp1 = tmp1; - uint64_t tmp2 = tmp2; - - oldval = newval; - newval += incr; - /* newval always gets the value of *operand; if it's - * the same as oldval, then the swap was successful */ - __asm__ __volatile__( - "ldx %0, %1\n\t" - "ldx %4, %2\n\t" - "membar #StoreStore|#LoadStore|#StoreLoad|#LoadLoad\n\t" - "casx [%3] , %2, %1\n\t" - "stx %1, %0" - /* h means 64-BIT REGISTER - * (probably unnecessary, but why take chances?) */ - : "=m"(newval), "=&h"(tmp1), "=&h"(tmp2) - : "r"(operand), "m"(oldval) - : "cc", "memory"); - } while (oldval != newval); - return oldval; - -#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64) - uint64_t oldval, newval; - -#ifdef QTHREAD_ATOMIC_CAS - newval = *operand; - do { - oldval = newval; - newval = __sync_val_compare_and_swap(operand, oldval, oldval + incr); - } while (oldval != newval); -#else - do { - /* you *should* be able to move the *operand reference outside the - * loop and use the output of the CAS (namely, newval) instead. - * However, there seems to be a bug in gcc 4.0.4 wherein, if you do - * that, the while() comparison uses a temporary register value for - * newval that has nothing to do with the output of the CAS - * instruction. (See how obviously wrong that is?) For some reason that - * I haven't been able to figure out, moving the *operand reference - * inside the loop fixes that problem, even at -O2 optimization. */ - oldval = *operand; - newval = oldval + incr; - /* newval always gets the value of *operand; if it's - * the same as oldval, then the swap was successful */ - __asm__ __volatile__( - "membar #StoreStore|#LoadStore|#StoreLoad|#LoadLoad\n\t" - "casx [%1] , %2, %0" - : "+r"(newval) - : "r"(operand), "r"(oldval) - : "cc", "memory"); - } while (oldval != newval); -#endif // ifdef QTHREAD_ATOMIC_CAS - return oldval; - -#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64) - uint64_t res; - - if (incr == 1) { - asm volatile("fetchadd8.rel %0=%1,1" : "=r"(res) : "m"(*operand)); - } else { - uint64_t old, newval; - - do { - old = *operand; /* atomic, because operand is aligned */ - newval = old + incr; - asm volatile("mov ar.ccv=%0;;" - : /* no output */ - : "rO"(old)); - - /* separate so the compiler can insert its junk */ - asm volatile("cmpxchg8.acq %0=[%1],%2,ar.ccv" - : "=r"(res) - : "r"(operand), "r"(newval) - : "memory"); - } while (res != old); /* if res!=old, the calc is out of date */ - } - return res; - #elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32) union { uint64_t i; @@ -1451,8 +1180,7 @@ static QINLINE uint64_t qthread_incr64(uint64_t *operand, #else // if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) #error Unimplemented assembly architecture for qthread_incr64 #endif // if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) -#endif // if defined(QTHREAD_MUTEX_INCREMENT) || (QTHREAD_ASSEMBLY_ARCH == - // QTHREAD_POWERPC32) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32) +#endif // if defined(QTHREAD_MUTEX_INCREMENT) || QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32 } /*}}} */ static QINLINE int64_t qthread_incr_xx(void *addr, @@ -1495,25 +1223,6 @@ static QINLINE uint32_t qthread_cas32(uint32_t *operand, : "cc", "memory"); return result; -#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64) - uint32_t newv = newval; - __asm__ __volatile__("membar #StoreStore|#LoadStore|#StoreLoad|#LoadLoad\n\t" - "cas [%1], %2, %0" - : "+r"(newv) - : "r"(operand), "r"(oldval) - : "cc", "memory"); - return newv; - -#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64) - uint32_t retval; - __asm__ __volatile__("mov ar.ccv=%0;;" : : "rO"(oldval)); - __asm__ __volatile__("cmpxchg4.acq %0=[%1],%2,ar.ccv" - : "=r"(retval) - : "r"(operand), "r"(newval) - : "memory"); - return retval; - #elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) || \ (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32) uint32_t retval; @@ -1559,40 +1268,6 @@ static QINLINE uint64_t qthread_cas64(uint64_t *operand, : "cc", "memory"); return result; -#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32) - uint64_t tmp1 = tmp1; - uint64_t tmp2 = tmp2; - uint64_t newv = newval; - __asm__ __volatile__("ldx %0, %1\n\t" - "ldx %4, %2\n\t" - "membar #StoreStore|#LoadStore|#StoreLoad|#LoadLoad\n\t" - "casx [%3], %2, %1\n\t" - "stx %1, %0" - /* h means 64-BIT REGISTER - * (probably unneecessary, but why take chances?) */ - : "+m"(newv), "=&h"(tmp1), "=&h"(tmp2) - : "r"(operand), "m"(oldval) - : "cc", "memory"); - return newv; - -#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64) - uint64_t newv = newval; - __asm__ __volatile__("membar #StoreStore|#LoadStore|#StoreLoad|#LoadLoad\n\t" - "casx [%1], %2, %0" - : "+r"(newv) - : "r"(operand), "r"(oldval) - : "cc", "memory"); - return newv; - -#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64) - uint32_t retval; - __asm__ __volatile__("mov ar.ccv=%0;;" : : "rO"(oldval)); - __asm__ __volatile__("cmpxchg8.acq %0=[%1],%2,ar.ccv" - : "=r"(retval) - : "r"(operand), "r"(newval) - : "memory"); - return retval; - #elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32) union { uint64_t i; diff --git a/include/qthread_innards.h b/include/qthread_innards.h index 7c4f18cfd..4c819e41b 100644 --- a/include/qthread_innards.h +++ b/include/qthread_innards.h @@ -109,9 +109,7 @@ typedef struct qlib_s { aligned_t sched_shepherd; QTHREAD_FASTLOCK_TYPE sched_shepherd_lock; -#if defined(QTHREAD_MUTEX_INCREMENT) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32) +#if defined(QTHREAD_MUTEX_INCREMENT) || QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32 QTHREAD_FASTLOCK_TYPE *atomic_locks; #ifdef QTHREAD_COUNT_THREADS aligned_t *atomic_stripes; diff --git a/src/Makefile.am b/src/Makefile.am index da882059f..982cdb7f1 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -118,7 +118,6 @@ EXTRA_DIST += \ affinity/libnuma.c \ affinity/libnumaV2.c \ affinity/mach.c \ - affinity/tilera.c \ affinity/plpa.c \ affinity/lgrp.c \ affinity/shepcomp.h diff --git a/src/affinity/tilera.c b/src/affinity/tilera.c deleted file mode 100644 index f1459271c..000000000 --- a/src/affinity/tilera.c +++ /dev/null @@ -1,99 +0,0 @@ -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#ifdef HAVE_TMC_CPUS_H -#include -#endif - -#include - -#include "qt_affinity.h" -#include "qt_asserts.h" -#include "qt_debug.h" // for MALLOC() -#include "shepcomp.h" -#include "shufflesheps.h" - -qthread_shepherd_id_t guess_num_shepherds(void); -qthread_worker_id_t -guess_num_workers_per_shep(qthread_shepherd_id_t nshepherds); - -void INTERNAL qt_affinity_init(qthread_shepherd_id_t *nbshepherds, - qthread_worker_id_t *nbworkers, - size_t *hw_par) { /*{{{ */ - if (*nbshepherds == 0) { - *nbshepherds = guess_num_shepherds(); - if (*nbshepherds <= 0) { *nbshepherds = 1; } - } - if (*nbworkers == 0) { - *nbworkers = guess_num_workers_per_shep(*nbshepherds); - if (*nbworkers <= 0) { *nbworkers = 1; } - } -} /*}}} */ - -qthread_shepherd_id_t INTERNAL guess_num_shepherds(void) { /*{{{ */ - cpu_set_t online_cpus; - - qassert(tmc_cpus_get_online_cpus(&online_cpus), 0); - return tmc_cpus_count(&online_cpus); -} /*}}} */ - -void INTERNAL qt_affinity_set(qthread_worker_t *me, - unsigned int Q_UNUSED(nw)) { /*{{{ */ - if (tmc_cpus_set_my_cpu(me->packed_worker_id) < 0) { - perror("tmc_cpus_set_my_affinity() failed"); - fprintf(stderr, "\tnode = %i\n", (int)me->packed_worker_id); - } -} /*}}} */ - -qthread_worker_id_t INTERNAL -guess_num_workers_per_shep(qthread_shepherd_id_t nshepherds) { /*{{{ */ - return 1; -} /*}}} */ - -int INTERNAL qt_affinity_gendists(qthread_shepherd_t *sheps, - qthread_shepherd_id_t nshepherds) { /*{{{ */ - cpu_set_t online_cpus; - unsigned int *cpu_array; - size_t cpu_count, offset; - -#warning The logic for node assignment is completely wrong for multithreaded shepherds - qassert(tmc_cpus_get_online_cpus(&online_cpus), 0); - cpu_count = tmc_cpus_count(&online_cpus); - assert(cpu_count > 0); - /* assign nodes */ - cpu_array = MALLOC(sizeof(unsigned int) * cpu_count); - assert(cpu_array != NULL); - qassert(tmc_cpus_to_array(&online_cpus, cpu_array, cpu_count), cpu_count); - offset = 0; - for (qthread_shepherd_id_t i = 0; i < nshepherds; i++) { - sheps[i].node = cpu_array[offset]; - offset++; - offset *= (offset < cpu_count); - } - FREE(cpu_array, sizeof(unsigned int) * cpu_count); - for (qthread_shepherd_id_t i = 0; i < nshepherds; i++) { - size_t j, k; - unsigned int ix, iy; - sheps[i].shep_dists = qt_calloc(nshepherds, sizeof(unsigned int)); - sheps[i].sorted_sheplist = - qt_calloc(nshepherds - 1, sizeof(qthread_shepherd_id_t)); - assert(sheps[i].shep_dists); - assert(sheps[i].sorted_sheplist); - tmc_cpus_grid_cpu_to_tile(sheps[i].node, &ix, &iy); - for (j = 0; j < nshepherds; j++) { - unsigned int jx, jy; - tmc_cpus_grid_cpu_to_tile(sheps[j].node, &jx, &jy); - sheps[i].shep_dists[j] = abs((int)ix - (int)jx) + abs((int)iy - (int)jy); - } - for (j = k = 0; j < nshepherds; j++) { - if (j != i) { sheps[i].sorted_sheplist[k++] = j; } - } - if (nshepherds > 1) { - sort_sheps(sheps[i].shep_dists, sheps[i].sorted_sheplist, nshepherds); - } - } - return QTHREAD_SUCCESS; -} /*}}} */ - -/* vim:set expandtab: */ diff --git a/src/cacheline.c b/src/cacheline.c index f05faac61..4c40ed940 100644 --- a/src/cacheline.c +++ b/src/cacheline.c @@ -205,18 +205,6 @@ static void figure_out_cacheline_size(void) { /*{{{ */ } else { cacheline_bytes = 128; // G5 } -#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64) - cacheline_bytes = 128; -#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64) -#ifdef DEBUG_CPUID - printf("IA64 does not support CPUID; but is usually 128\n"); -#endif - cacheline_bytes = 128; // Itanium L2/L3 are 128, L1 is 64 -#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEPRO) - cacheline_bytes = 64; -#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEGX) - cacheline_bytes = 64; #elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32) || \ (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) #if !defined(HAVE_GCC_INLINE_ASSEMBLY) diff --git a/src/compat_atomics.c b/src/compat_atomics.c index 06afca9b8..09b5eb3bd 100644 --- a/src/compat_atomics.c +++ b/src/compat_atomics.c @@ -14,8 +14,7 @@ extern unsigned int QTHREAD_LOCKING_STRIPES; (((size_t)addr >> 4) & (QTHREAD_LOCKING_STRIPES - 1)) #if defined(QTHREAD_MUTEX_INCREMENT) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32) + (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) uint32_t qthread_incr32_(uint32_t *op, int32_t const incr) { /*{{{ */ unsigned int stripe = QTHREAD_CHOOSE_STRIPE(op); uint32_t retval; @@ -110,9 +109,7 @@ uint64_t qthread_cas64_(uint64_t *operand, return retval; } /*}}} */ -#else /* if defined(QTHREAD_MUTEX_INCREMENT) || (QTHREAD_ASSEMBLY_ARCH == \ - QTHREAD_POWERPC32) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32) \ - */ +#else /* if defined(QTHREAD_MUTEX_INCREMENT) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) */ #error Building this file erroneously. #endif /* if defined(QTHREAD_MUTEX_INCREMENT) || (QTHREAD_ASSEMBLY_ARCH == \ QTHREAD_POWERPC32) */ diff --git a/src/ds/qarray.c b/src/ds/qarray.c index 2ab52807c..3329e04f6 100644 --- a/src/ds/qarray.c +++ b/src/ds/qarray.c @@ -38,8 +38,7 @@ qarray_internal_segment_shep(qarray const *a, char *ptr = (((char *)segment_head) + (a->segment_size * a->unit_size)); qassert_ret(a->dist_type == DIST, NULL); - /* ensure that it's 4-byte aligned - * (mandatory on Sparc, good idea elsewhere) */ + /* ensure that it's 4-byte aligned */ if (((uintptr_t)ptr) & 3) { ptr += 4 - (((uintptr_t)ptr) & 3); } /* first, do we have the space? */ qassert_ret((((ptr + sizeof(qthread_shepherd_id_t) - 1) < @@ -419,24 +418,12 @@ static qarray *qarray_create_internal(size_t const count, } /*}}} */ qarray *qarray_create(size_t const count, size_t const obj_size) { /*{{{ */ -#if QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32 || \ - QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64 - return qarray_create_internal(count, obj_size, DIST_STRIPES, 0, 0); - -#else return qarray_create_internal(count, obj_size, FIXED_HASH, 0, 0); -#endif } /*}}} */ qarray *qarray_create_tight(size_t const count, size_t const obj_size) { /*{{{ */ -#if QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32 || \ - QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64 - return qarray_create_internal(count, obj_size, DIST_STRIPES, 1, 0); - -#else return qarray_create_internal(count, obj_size, FIXED_HASH, 1, 0); -#endif } /*}}} */ qarray *qarray_create_configured(size_t const count, diff --git a/src/fastcontext/asm.S b/src/fastcontext/asm.S index 4ef4392c6..0472fc000 100644 --- a/src/fastcontext/asm.S +++ b/src/fastcontext/asm.S @@ -59,14 +59,6 @@ # define NEEDARMA64CONTEXT 1 # define SET qt_setmctxt # define GET qt_getmctxt -# elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEPRO) -# define NEEDTILEPROCONTEXT 1 -# define SET _qt_setmctxt -# define GET _qt_getmctxt -# elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEGX) -# define NEEDTILEGXCONTEXT 1 -# define SET qt_setmctxt -# define GET qt_getmctxt # elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32) # define NEEDX86CONTEXT 1 # define SET qt_setmctxt @@ -215,332 +207,6 @@ GET: ret #endif -#ifdef NEEDTILEPROCONTEXT -.text -.align 2 - -.type GET,@function -.globl GET -GET: - ## .frame $sp, 8, $sp - # .caller_lr = 8 - # .caller_caller_sp = 12 - addli r23, sp, -8 _(the arg) - sw r23, r0 - _(/*) prologue end */) - _(/*) setup the pointer */) - addli r1, sp, -8 - lw r1, r1 - _(/* note that each of these uses different temporary - * registers, to allow efficient scheduling */) - addi r2, r1, (0*4) - sw r2, r30 - addi r3, r1, (1*4) - sw r3, r31 - addi r4, r1, (2*4) - sw r4, r32 - addi r5, r1, (3*4) - sw r5, r33 - addi r6, r1, (4*4) - sw r6, r34 - addi r7, r1, (5*4) - sw r7, r35 - addi r8, r1, (6*4) - sw r8, r36 - addi r9, r1, (7*4) - sw r9, r37 - addi r10, r1, (8*4) - sw r10, r38 - addi r11, r1, (9*4) - sw r11, r39 - addi r12, r1, (10*4) - sw r12, r40 - addi r13, r1, (11*4) - sw r13, r41 - addi r14, r1, (12*4) - sw r14, r42 - addi r15, r1, (13*4) - sw r15, r43 - addi r16, r1, (14*4) - sw r16, r44 - addi r17, r1, (15*4) - sw r17, r45 - addi r18, r1, (16*4) - sw r18, r46 - addi r19, r1, (17*4) - sw r19, r47 - addi r20, r1, (18*4) - sw r20, r48 - addi r21, r1, (19*4) - sw r21, r49 - addi r22, r1, (20*4) - sw r22, r50 - addi r23, r1, (21*4) - sw r23, r51 - addi r24, r1, (22*4) - sw r24, r52 - _(/*) gotten contexts are not function calls */) - addi r6, r1, (23*4)+(6*4) - sw r6, zero - _(/*) store the link register as the new pc */) - move r25, lr - addi r27, r1, (23*4)+(3*4) - sw r27, r25 - _(/*) store the stack pointer */) - addi r27, sp, 0 - addi r28, r1, (23*4)+(1*4) - sw r28, r27 - _(/*) store the return for swapcontext */) - addi r3, r1, (23*4)+(4*4) - movei r4, 1 - sw r3, r4 - _(/*) return value */) - move r0, zero _(/*) success! */) - jrp lr -.type SET,@function -.globl SET -SET: - ## .frame $sp, 8, $sp - # .caller_lr = 8 - # .caller_caller_sp = 12 - addli r6, sp, -8 - sw r6, r0 - _(/*) prologue end */) - _(/*) setup the pointer */) - addli r1, sp, -8 - lw r1, r1 - _(/* note that each of these uses different temporary - * registers, to allow efficient scheduling */) - addi r2, r1, (0*4) - lw r30, r2 - addi r3, r1, (1*4) - lw r31, r3 - addi r4, r1, (2*4) - lw r32, r4 - addi r5, r1, (3*4) - lw r33, r5 - addi r6, r1, (4*4) - lw r34, r6 - addi r7, r1, (5*4) - lw r35, r7 - addi r8, r1, (6*4) - lw r36, r8 - addi r9, r1, (7*4) - lw r37, r9 - addi r10, r1, (8*4) - lw r38, r10 - addi r11, r1, (9*4) - lw r39, r11 - addi r12, r1, (10*4) - lw r40, r12 - addi r13, r1, (11*4) - lw r41, r13 - addi r14, r1, (12*4) - lw r42, r14 - addi r15, r1, (13*4) - lw r43, r15 - addi r16, r1, (14*4) - lw r44, r17 - addi r18, r1, (15*4) - lw r45, r18 - addi r19, r1, (16*4) - lw r46, r19 - addi r20, r1, (17*4) - lw r47, r20 - addi r21, r1, (18*4) - lw r48, r21 - addi r22, r1, (19*4) - lw r49, r22 - addi r23, r1, (20*4) - lw r50, r23 - addi r24, r1, (21*4) - lw r51, r24 - addi r25, r1, (22*4) - lw r52, r25 - _(/*) fiddle with the stack */) - addi r2, r1, (23*4)+(1*4) - lw r3, r2 - move sp, r3 - _(/*) retrieve the new PC */) - addi r6, r1, (23*4)+(3*4) - lw r7, r6 - _(/*) first argument? */) - addi r4, r1, (23*4)+(6*4) - lw r5, r4 - bz r5, 1f - addi r0, r1, (23*4)+(5*4) - lw r0, r0 - jf 2f -1: - addi r0, r1, (23*4)+(4*4) - lw r0, r0 -2: - jrp r7 -#endif - -#ifdef NEEDTILEGXCONTEXT -.text -.align 2 - -.type GET,@function -.globl GET -GET: - _("## .frame $sp, 8, $sp") - _(# .caller_lr = 8) - _(# .caller_caller_sp = 12) - addli r23, sp, -16 _(the arg) - st r23, r0 - _(/*) prologue end */) - _(/*) setup the pointer */) - addli r1, sp, -16 - ld r1, r1 - _(/* note that each of these uses different temporary - * registers, to allow efficient scheduling */) - addi r2, r1, (0*8) - st r2, r30 - addi r3, r1, (1*8) - st r3, r31 - addi r4, r1, (2*8) - st r4, r32 - addi r5, r1, (3*8) - st r5, r33 - addi r6, r1, (4*8) - st r6, r34 - addi r7, r1, (5*8) - st r7, r35 - addi r8, r1, (6*8) - st r8, r36 - addi r9, r1, (7*8) - st r9, r37 - addi r10, r1, (8*8) - st r10, r38 - addi r11, r1, (9*8) - st r11, r39 - addi r12, r1, (10*8) - st r12, r40 - addi r13, r1, (11*8) - st r13, r41 - addi r14, r1, (12*8) - st r14, r42 - addi r15, r1, (13*8) - st r15, r43 - addi r16, r1, (14*8) - st r16, r44 - addi r17, r1, (15*8) - st r17, r45 - addli r18, r1, (16*8) - st r18, r46 - addli r19, r1, (17*8) - st r19, r47 - addli r20, r1, (18*8) - st r20, r48 - addli r21, r1, (19*8) - st r21, r49 - addli r22, r1, (20*8) - st r22, r50 - addli r23, r1, (21*8) - st r23, r51 - addli r24, r1, (22*8) - st r24, r52 - _(/*) gotten contexts are not function calls */) - addli r6, r1, (23*8)+(6*8) - st r6, zero - _(/*) store the link register as the new pc */) - move r25, lr - addli r27, r1, (23*8)+(3*8) - st r27, r25 - _(/*) store the stack pointer */) - addli r27, sp, 0 - addli r28, r1, (23*8)+(1*8) - st r28, r27 - _(/*) store the return for swapcontext */) - addli r3, r1, (23*8)+(4*8) - movei r4, 1 - st r3, r4 - _(/*) return value */) - move r0, zero _(/*) success! */) - jrp lr -.type SET,@function -.globl SET -SET: - _("## .frame $sp, 8, $sp") - _(# .caller_lr = 8) - _(# .caller_caller_sp = 12) - addli r6, sp, -16 - st r6, r0 - _(/*) prologue end */) - _(/*) setup the pointer */) - addli r1, sp, -16 - ld r1, r1 - _(/* note that each of these uses different temporary - * registers, to allow efficient scheduling */) - addi r2, r1, (0*8) - ld r30, r2 - addi r3, r1, (1*8) - ld r31, r3 - addi r4, r1, (2*8) - ld r32, r4 - addi r5, r1, (3*8) - ld r33, r5 - addi r6, r1, (4*8) - ld r34, r6 - addi r7, r1, (5*8) - ld r35, r7 - addi r8, r1, (6*8) - ld r36, r8 - addi r9, r1, (7*8) - ld r37, r9 - addi r10, r1, (8*8) - ld r38, r10 - addi r11, r1, (9*8) - ld r39, r11 - addi r12, r1, (10*8) - ld r40, r12 - addi r13, r1, (11*8) - ld r41, r13 - addi r14, r1, (12*8) - ld r42, r14 - addi r15, r1, (13*8) - ld r43, r15 - addi r16, r1, (14*8) - ld r44, r16 - addi r17, r1, (15*8) - ld r45, r17 - addli r18, r1, (16*8) - ld r46, r18 - addli r19, r1, (17*8) - ld r47, r19 - addli r20, r1, (18*8) - ld r48, r20 - addli r21, r1, (19*8) - ld r49, r21 - addli r22, r1, (20*8) - ld r50, r22 - addli r23, r1, (21*8) - ld r51, r23 - addli r24, r1, (22*8) - ld r52, r24 - _(/*) fiddle with the stack */) - addli r2, r1, (23*8)+(1*8) - ld r3, r2 - move sp, r3 - _(/*) retrieve the new PC */) - addli r6, r1, (23*8)+(3*8) - ld r7, r6 - _(/*) first argument? */) - addli r4, r1, (23*8)+(6*8) - ld r5, r4 - beqz r5, 1f - addli r0, r1, (23*8)+(5*8) - ld r0, r0 - j 2f -1: - addli r0, r1, (23*8)+(4*8) - ld r0, r0 -2: - jrp r7 -#endif - #ifdef NEEDPOWERCONTEXT /* get FPR and VR use flags with sc 0x7FF3 */ /* get vsave with mfspr reg, 256 */ diff --git a/src/fastcontext/context.c b/src/fastcontext/context.c index d7835bfbc..aa33f137e 100644 --- a/src/fastcontext/context.c +++ b/src/fastcontext/context.c @@ -83,33 +83,6 @@ void INTERNAL qt_makectxt(uctxt_t *ucp, void (*func)(void), int argc, ...) { ucp->mc.mc_esp = (long)sp; } -#elif defined(NEEDTILEMAKECONTEXT) -/* This function is entirely copyright Sandia National Laboratories */ -void INTERNAL qt_makectxt(uctxt_t *ucp, void (*func)(void), int argc, ...) { - unsigned long *sp; - unsigned long *tos = ucp->uc_stack.ss_sp; - int i; - va_list arg; - - tos += ucp->uc_stack.ss_size / sizeof(unsigned long); - tos -= 1; // allow space for an incoming lr - sp = tos - argc; // allow space for arguments - sp = (void *)((unsigned long)sp - - (unsigned long)sp % 64); /* 64-align for Tilera */ - /* now copy from my arg list to the function's arglist (yes, I know this is - * voodoo) */ - // memmove(sp, &argc + 1, argc * sizeof(void*)); - /* The function may also expect to pull args from up to nine registers */ - va_start(arg, argc); - for (i = 0; i < argc; i++) { - if (i == 0) { ucp->mc.arg0 = va_arg(arg, unsigned long); } - } - ucp->mc.pc = (unsigned long)func; - ucp->mc.sp = (unsigned long)sp; - ucp->mc.first = 1; - va_end(arg); -} - #elif defined(NEEDARMMAKECONTEXT) /* This function is entirely copyright Sandia National Laboratories */ void INTERNAL qt_makectxt(uctxt_t *ucp, void (*func)(void), int argc, ...) { @@ -177,8 +150,7 @@ QT_SKIP_THREAD_SANITIZER int INTERNAL qt_swapctxt(uctxt_t *oucp, uctxt_t *ucp) { Q_PREFETCH(ucp, 0, 0); if (getcontext(oucp) == 0) { #if ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64)) + (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64)) Q_PREFETCH((void *)ucp->mc.mc_esp, 1, 3); #endif setcontext(ucp); diff --git a/src/qthread.c b/src/qthread.c index 112caa2a1..32550ef63 100644 --- a/src/qthread.c +++ b/src/qthread.c @@ -88,8 +88,7 @@ #if !(defined(HAVE_GCC_INLINE_ASSEMBLY) && \ (QTHREAD_SIZEOF_ALIGNED_T == 4 || \ - (QTHREAD_ASSEMBLY_ARCH != QTHREAD_POWERPC32 && \ - QTHREAD_ASSEMBLY_ARCH != QTHREAD_SPARCV9_32))) && \ + QTHREAD_ASSEMBLY_ARCH != QTHREAD_POWERPC32)) && \ !defined(QTHREAD_ATOMIC_CAS) && !defined(QTHREAD_MUTEX_INCREMENT) #warning QTHREAD_MUTEX_INCREMENT not defined. It probably should be. #define QTHREAD_MUTEX_INCREMENT 1 diff --git a/src/syncvar.c b/src/syncvar.c index d9803a564..07f283773 100644 --- a/src/syncvar.c +++ b/src/syncvar.c @@ -103,12 +103,9 @@ extern unsigned int QTHREAD_LOCKING_STRIPES; BUILD_UNLOCKED_SYNCVAR(val, state), \ memory_order_relaxed); \ } while (0) -#elif ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEPRO)) +#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) || \ + (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) || \ + (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32) #define UNLOCK_THIS_UNMODIFIED_SYNCVAR(addr, unlocked) \ do { \ atomic_store_explicit( \ @@ -141,8 +138,7 @@ static uint64_t qthread_mwaitc(syncvar_t *restrict const addr, unsigned char const statemask, unsigned int timeout, eflags_t *restrict const err) { /*{{{ */ -#if ((QTHREAD_ASSEMBLY_ARCH != QTHREAD_TILEPRO) && \ - (QTHREAD_ASSEMBLY_ARCH != QTHREAD_POWERPC32)) +#if (QTHREAD_ASSEMBLY_ARCH != QTHREAD_POWERPC32) syncvar_t unlocked; #endif syncvar_t locked; @@ -156,21 +152,7 @@ static uint64_t qthread_mwaitc(syncvar_t *restrict const addr, e.zf = 0; e.cf = 1; do { -#if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEPRO) - uint32_t low, high; - int32_t *addrptr = (int32_t *)addr; - /* note that the tilera is little-endian, otherwise this would be - * addrptr+1 */ - while ((low = __insn_tns(addrptr)) == 1) { - if (timeout-- <= 0) { goto errexit; } - SPINLOCK_BODY(); - } - /* now addrptr[0] is 1 and low is the "real" (unlocked) addrptr[0] - * value. */ - high = addrptr[1]; - locked.u.w = (((uint64_t)high) << 32) | low; - MACHINE_FENCE; -#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) +#if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) /* This applies for any 32-bit architecture with a valid 32-bit CAS * (though I'm making some big-endian assumptions at the moment) */ uint32_t low_unlocked, low_locked; @@ -191,7 +173,7 @@ static uint64_t qthread_mwaitc(syncvar_t *restrict const addr, if (timeout-- <= 0) { goto errexit; } } while (1); locked.u.w = addr->u.w; // I locked it, so I can read it -#else /* if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEPRO) */ +#else /* if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) */ { syncvar_t tmp; loop_start: @@ -213,7 +195,7 @@ static uint64_t qthread_mwaitc(syncvar_t *restrict const addr, if (timeout-- <= 0) { goto errexit; } } while (1); } -#endif /* if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEPRO) */ +#endif /* if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) */ /*************************************************** * now locked == unlocked, and the lock bit is set * ***************************************************/ @@ -229,10 +211,7 @@ static uint64_t qthread_mwaitc(syncvar_t *restrict const addr, return locked.u.s.data; } else { /* this is NOT a state of interest, so unlock the locked bit */ -#if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEPRO) - MACHINE_FENCE; - addrptr[0] = low; -#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) +#if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) MACHINE_FENCE; addrptr[1] = low_unlocked; #else @@ -404,12 +383,10 @@ int API_FUNC qthread_syncvar_readFF(uint64_t *restrict dest, QTHREAD_FEB_UNIQUERECORD(feb, src, me); QTHREAD_FEB_TIMER_START(febblock); -#if ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64)) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARM) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARMV8_A64) +#if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) || \ + (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) || \ + (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARM) || \ + (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARMV8_A64) { /* I'm being optimistic here; this only works if a basic 64-bit load is * atomic (on most platforms it is). Thus, if I've done an atomic read @@ -426,11 +403,10 @@ int API_FUNC qthread_syncvar_readFF(uint64_t *restrict dest, return QTHREAD_SUCCESS; } } -#endif /* if ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64) || (QTHREAD_ASSEMBLY_ARCH == \ - QTHREAD_POWERPC64) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64) \ - || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARM) || (QTHREAD_ASSEMBLY_ARCH \ - == QTHREAD_ARMV8_A64)) */ +#endif /* if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) || \ + (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) || \ + (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARM) || \ + (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARMV8_A64)) */ ret = qthread_mwaitc(src, SYNCFEB_FULL, INITIAL_TIMEOUT, &e); qthread_debug(SYNCVAR_DETAILS, "2 src(%p) = %x, ret = %x\n", @@ -549,12 +525,10 @@ int API_FUNC qthread_syncvar_readFF_nb(uint64_t *restrict dest, if (!me) { return qthread_syncvar_blocker_func(dest, src, READFF_NB); } -#if ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARM) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARMV8_A64)) +#if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) || \ + (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) || \ + (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARM) || \ + (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARMV8_A64) { /* I'm being optimistic here; this only works if a basic 64-bit load is * atomic (on most platforms it is). Thus, if I've done an atomic read @@ -569,11 +543,10 @@ int API_FUNC qthread_syncvar_readFF_nb(uint64_t *restrict dest, return QTHREAD_SUCCESS; } } -#endif /* if ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64) || (QTHREAD_ASSEMBLY_ARCH == \ - QTHREAD_POWERPC64) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64) \ - || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARM) || (QTHREAD_ASSEMBLY_ARCH \ - == QTHREAD_ARMV8_A64)) */ +#endif /* if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) || \ + (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) || \ + (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARM) || \ + (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARMV8_A64) */ ret = qthread_mwaitc(src, SYNCFEB_FULL, 1, &e); qthread_debug(SYNCVAR_DETAILS, "2 src(%p) = %x, ret = %x\n", diff --git a/src/threadqueues/sherwood_threadqueues.c b/src/threadqueues/sherwood_threadqueues.c index 43e639b34..642937475 100644 --- a/src/threadqueues/sherwood_threadqueues.c +++ b/src/threadqueues/sherwood_threadqueues.c @@ -223,10 +223,8 @@ void INTERNAL qt_threadqueue_subsystem_init(void) { /*{{{*/ #endif /* if defined(UNPOOLED_QUEUES) || defined(UNPOOLED) */ ssize_t INTERNAL qt_threadqueue_advisory_queuelen(qt_threadqueue_t *q) { /*{{{*/ -#if ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64)) +#if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) || \ + (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) /* only works if a basic load is atomic */ return q->qlength; @@ -237,9 +235,8 @@ ssize_t INTERNAL qt_threadqueue_advisory_queuelen(qt_threadqueue_t *q) { /*{{{*/ tmp = q->qlength; QTHREAD_TRYLOCK_UNLOCK(&q->qlock); return tmp; -#endif /* if ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) || \ - (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64) || (QTHREAD_ASSEMBLY_ARCH == \ - QTHREAD_POWERPC64) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64)) \ +#endif /* if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) || \ + (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) */ } /*}}}*/ diff --git a/test/basics/qthread_stackleft.c b/test/basics/qthread_stackleft.c index df323cca2..24b96b0cb 100644 --- a/test/basics/qthread_stackleft.c +++ b/test/basics/qthread_stackleft.c @@ -38,11 +38,7 @@ static aligned_t alldone; static STACKLEFT_NOINLINE size_t thread2(size_t left, size_t depth) { size_t foo = qthread_stackleft(); iprintf("leveli%i: %zu bytes left\n", (int)depth, foo); -#if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64) - assert(foo <= left); -#else assert(foo < left); -#endif if (depth < 5) { thread2(foo, depth + 1); } return 1; }