diff --git a/README.md b/README.md
index ee59e6ae8..79f39f17e 100644
--- a/README.md
+++ b/README.md
@@ -134,51 +134,4 @@ For example, `Collect<mt_loop_traits::Add>` is rougly equivalent to the followin
     retval += function(args);
   }
 ```
-******************************************************
-
-##NOTE FOR PGI USERS
-pgcc needs the `-c9x` flag in order to correctly process variadic macros (which
-are used in qthread.c) and the PRIuMAX format definitions (used in `qalloc.c`).
-Use the CFLAGS variable to add this flag. Note that pgcc's support for the full
-C90/C99 standards is lousy, so most C90/C99 features that COULD be used are
-avoided.
-
-******************************************************
-
-##NOTE FOR IBM XL USERS
-make check will probably fail with the error:
-
-`xlc++: 1501-210 command option t contains an incorrect subargument`
-`.../.libs/libqthread.so: could not read symbols: Invalid operation`
-
-This does not mean that the library did not compile correctly, but instead
-means that your libtool is probably broken (most are). The problem seems to be
-that the wrapper script (testloop) is created with incorrect arguments to
-xlc++. The other wrapper scripts (e.g. test1/test2/test3/testq) all have the
-correct arguments, and if you modify testloop so that $relink_command uses the
-`-Wl,--rpath -Wl,directory` syntax rather than the `-rpath,directory` syntax,
-it would work just fine.
-
-*******************************************************
-
-## NOTE FOR IBM BLUEGENE/P GCC USERS
-Old versions of GCC do not handle builtin atomics correctly on this platform.
-The non-existence of `__sync_fetch_and_add()` cannot be reliably detected, so to
-use those compilers, you probably need to configure with
-`--disable-internal-spinlock`.
-
-*******************************************************
-
-## NOTE FOR TILERA USERS
-The Tilera cache coherency protocols, as of the TileGX boards, appear to be
-somewhat buggy for large multithreaded programs. And by buggy I mean they cause
-kernel panics (at least, I haven't been able to demonstrate data corruption
-yet). Thankfully, you can pick from several cache coherency protocols, and one
-of them is more stable than the default. What I have found that seems to be
-*more* stable, if not perfectly stable, is to force the cache coherency
-protocol to hashed. The way you do this is with a boot argument to the Tilera
-kernel. The tile-monitor command I use is this:
-
-	`tile-monitor --net <tilera> --hvx ucache_hash=all --`
-
-Good luck!
+
diff --git a/config/ax_c_restrict.m4 b/config/ax_c_restrict.m4
index 5eb3fb4a8..9bfe58d59 100644
--- a/config/ax_c_restrict.m4
+++ b/config/ax_c_restrict.m4
@@ -68,8 +68,6 @@
 # the same family, and in the presence of varying compiler options.  If only
 # plain "restrict" works, do nothing.  Here are some variants:
 # - GCC supports both __restrict and __restrict__
-# - older DEC Alpha C compilers support only __restrict
-# - _Restrict is the only spelling accepted by Sun WorkShop 6 update 2 C
 # Otherwise, define "restrict" to be empty.
 AN_IDENTIFIER([restrict], [AX_C_RESTRICT])
 AC_DEFUN([AX_C_RESTRICT],
@@ -95,15 +93,7 @@ AC_DEFUN([AX_C_RESTRICT],
    nothing if this is not supported.  Do not define if restrict is
    supported directly.  */
 #undef restrict
-/* Work around a bug in Sun C++: it does not support _Restrict or
-   __restrict__, even though the corresponding Sun C compiler ends up with
-   "#define restrict _Restrict" or "#define restrict __restrict__" in the
-   previous line.  Perhaps some future version of Sun C++ will work with
-   restrict; if so, hopefully it defines __RESTRICT like Sun C does.  */
-#if defined __SUNPRO_CC && !defined __RESTRICT
-# define _Restrict
-# define __restrict__
-#endif])
+])
  case $ac_cv_c_restrict in
    restrict) ;;
    no) AC_DEFINE([restrict], []) ;;
diff --git a/config/ax_openmp.m4 b/config/ax_openmp.m4
index 7ea794be0..72bc809e6 100644
--- a/config/ax_openmp.m4
+++ b/config/ax_openmp.m4
@@ -72,9 +72,9 @@ AC_PREREQ(2.59) dnl for _AC_LANG_PREFIX
 
 AC_CACHE_CHECK([for OpenMP flag of _AC_LANG compiler], ax_cv_[]_AC_LANG_ABBREV[]_openmp, [save[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS
 ax_cv_[]_AC_LANG_ABBREV[]_openmp=unknown
-# Flags to try:  -fopenmp (gcc), -openmp (icc), -mp (SGI & PGI),
-#                -xopenmp (Sun), -omp (Tru64), -qsmp=omp (AIX), none
-ax_openmp_flags="-fopenmp -openmp -mp -xopenmp -omp -qsmp=omp none"
+# Flags to try:  -fopenmp (gcc), -openmp (icc), -mp (SGI),
+#                -omp (Tru64), -qsmp=omp (AIX), none
+ax_openmp_flags="-fopenmp -openmp -mp -omp -qsmp=omp none"
 if test "x$OPENMP_[]_AC_LANG_PREFIX[]FLAGS" != x; then
   ax_openmp_flags="$OPENMP_[]_AC_LANG_PREFIX[]FLAGS $ax_openmp_flags"
 fi
diff --git a/config/qthread_check_assembly.m4 b/config/qthread_check_assembly.m4
index dc34ae846..da7cdcc16 100644
--- a/config/qthread_check_assembly.m4
+++ b/config/qthread_check_assembly.m4
@@ -78,92 +78,6 @@ rm -rf conftest*
 unset qthread_assemble
 ])dnl
 
-
-dnl #################################################################
-dnl
-dnl QTHREAD_CHECK_SPARCV8PLUS
-dnl
-dnl #################################################################
-AC_DEFUN([QTHREAD_CHECK_SPARCV8PLUS],[
-    AC_MSG_CHECKING([if have Sparc v8+/v9 support])
-    sparc_result=0
-    QTHREAD_TRY_ASSEMBLE([$qthread_cv_asm_text
-	casa [%o0] 0x80, %o1, %o2],
-                [sparc_result=1],
-                [sparc_result=0])
-    if test "$sparc_result" = "1" ; then
-        AC_MSG_RESULT([yes])
-        ifelse([$1],,:,[$1])
-    else
-        AC_MSG_RESULT([no])
-        ifelse([$2],,:,[$2])
-    fi
-
-    unset sparc_result
-])dnl
-
-
-dnl #################################################################
-dnl
-dnl QTHREAD_CHECK_INLINE_GCC
-dnl
-dnl Check if the compiler is capable of doing GCC-style inline
-dnl assembly.  Some compilers emit a warning and ignore the inline
-dnl assembly (xlc on OS X) and compile without error.  Therefore,
-dnl the test attempts to run the emited code to check that the
-dnl assembly is actually run.  To run this test, one argument to
-dnl the macro must be an assembly instruction in gcc format to move 
-dnl the value 0 into the register containing the variable ret.  
-dnl For PowerPC, this would be:
-dnl
-dnl   "li %0,0" : "=&r"(ret)
-dnl
-dnl DEFINE QTHREAD_GCC_INLINE_ASSEMBLY to 0 or 1 depending on GCC
-dnl                support
-dnl
-dnl #################################################################
-AC_DEFUN([QTHREAD_CHECK_INLINE_C_GCC],[
-  AC_CACHE_CHECK([support for __asm__ __volatile__],
-	             [qt_cv_asm_volatile],
-				 [AC_LINK_IFELSE([AC_LANG_PROGRAM([[]],[[__asm__ __volatile__ ("":::"memory");]])],
-					             [qt_cv_asm_volatile=yes],
-								 [qt_cv_asm_volatile=no])])
-  AC_CACHE_CHECK([$CC support for GCC inline assembly],[qt_cv_gcc_inline_assembly],[
-  assembly="$1"
-  asm_result="unknown"
-  AS_IF([test ! "$assembly" = ""],
-        [AC_RUN_IFELSE([AC_LANG_SOURCE([[
-int main(void) {
-int ret = 1;
-__asm__ __volatile__ ($assembly);
-return ret;
-}]])],
-      [asm_result="yes"], [asm_result="no"], 
-      [asm_result="unknown"])],
-	    [assembly="test skipped - assuming no"])
-
-  # if we're cross compiling, just try to compile and figure good enough
-  AS_IF([test "$asm_result" = "unknown"],
-    [AC_LINK_IFELSE([AC_LANG_SOURCE([[
-int main(void)
-{
-int ret = 1;
-__asm__ __volatile__ ($assembly);
-return ret;
-}]])],
-      [asm_result="yes"], [asm_result="no"])
-  ])
-  qt_cv_gcc_inline_assembly="$asm_result"
-  unset assembly asm_result])
-
-
-  AS_IF([test "x$qt_cv_gcc_inline_assembly" = "xyes"],
-    [AC_DEFINE([HAVE_GCC_INLINE_ASSEMBLY], [1],
-       [Whether C compiler supports GCC style inline assembly])
-     $2], [$3])
-])dnl
-
-
 AC_DEFUN([QTHREAD_CHECK_ASSEMBLY],[
   AC_REQUIRE([AM_PROG_AS])
   AC_CHECK_SIZEOF([long])
@@ -179,23 +93,6 @@ AC_DEFUN([QTHREAD_CHECK_ASSEMBLY],[
       qthread_gcc_inline_assign='"movl [$]0, %0" : "=&r"(ret)'
     ;;
 
-    ia64-*)
-      qthread_cv_asm_arch="IA64"
-      qthread_gcc_inline_assign='"mov %0=r0\n;;\n" : "=&r"(ret)'
-    ;;
-
-    alpha-*|alphaev[[4-8]]-*|alphaev56-*|alphaev6[[78]]-*)
-      qthread_cv_asm_arch="ALPHA"
-      qthread_gcc_inline_assign='"bis [$]31,[$]31,%0" : "=&r"(ret)'
-    ;;
-
-	tile-*)
-      AS_IF([test "$ac_cv_sizeof_long" = "4"],
-            [qthread_cv_asm_arch="TILE"],
-            [qthread_cv_asm_arch="TILEGX"])
-	  qthread_gcc_inline_assign='"movei %0, 5" : "=&r"(ret)'
-	;;
-
 	armv7l-*)
 	  qthread_cv_asm_arch="ARM"
 	  qthread_gcc_inline_assign='"movt %0, #5" : "=&r"(ret)'
@@ -205,13 +102,6 @@ AC_DEFUN([QTHREAD_CHECK_ASSEMBLY],[
 		qthread_cv_asm_arch="ARMV8_A64"
 	;;
 
-    mips-*|mips64-*)
-      # Should really find some way to make sure that we are on
-      # a MIPS III machine (r4000 and later)
-      qthread_cv_asm_arch="MIPS"
-      qthread_gcc_inline_assign='"or %0,[$]0,[$]0" : "=&r"(ret)'
-    ;;
-
     powerpc*|powerpc64*)
       AS_IF([test "$ac_cv_sizeof_long" = "4"],
             [qthread_cv_asm_arch="POWERPC32"],
@@ -236,20 +126,8 @@ AC_DEFUN([QTHREAD_CHECK_ASSEMBLY],[
       qthread_gcc_inline_assign='"A_%=: li %0,0" : "=&r"(ret)'
     ;;
 
-    sparc*-*)
-      # SPARC v9 (and above) are the only ones with 64bit support
-      # if compiling 32 bit, see if we are v9 (aka v8plus) or
-      # earlier (casa is v8+/v9). 
-      AS_IF([test "$ac_cv_sizeof_long" = "4"],
-            [QTHREAD_CHECK_SPARCV8PLUS([qthread_cv_asm_arch="SPARCV9_32"])],
-            [qthread_cv_asm_arch="SPARCV9_64"])
-      qthread_gcc_inline_assign='"mov 0,%0" : "=&r"(ret)'
-    ;;
   esac
 
-  # now that we know our architecture, try to inline assemble
-  QTHREAD_CHECK_INLINE_C_GCC([$qthread_gcc_inline_assign], [$1], [$2])
-
   AC_MSG_CHECKING([for asssembly architecture])
   AC_MSG_RESULT([$qthread_cv_asm_arch])
   result="QTHREAD_$qthread_cv_asm_arch"
diff --git a/config/qthread_check_atomics.m4 b/config/qthread_check_atomics.m4
index df9544030..cd0628a8f 100644
--- a/config/qthread_check_atomics.m4
+++ b/config/qthread_check_atomics.m4
@@ -10,22 +10,9 @@ AC_REQUIRE([QTHREAD_DETECT_COMPILER_TYPE])
 AC_ARG_ENABLE([builtin-atomics],
      [AS_HELP_STRING([--disable-builtin-atomics],
 	                 [force the use of inline-assembly (if possible) rather than compiler-builtins for atomics. This is useful for working around some compiler bugs; normally, it's preferable to use compiler builtins.])])
-AS_IF([test "x$enable_builtin_atomics" != xno],
-      [AS_IF([test "x$qthread_cv_c_compiler_type" = xIBM_XL -o "x$qthread_cv_cxx_compiler_type" = xIBM_XL],
-		     [AS_IF([test "x$enable_builtin_atomics" = xyes],
-				    [AC_MSG_WARN([Disabling builtin atomics on IBM_XL, due to compiler design decision])])
-			  enable_builtin_atomics=no])])
-AS_IF([test "x$enable_builtin_atomics" != xno], [
-  AS_IF([test "x$qthread_cv_c_compiler_type" = xIntel -o "x$qthread_cv_cxx_compiler_type" = xIntel],
-	    [AC_CHECK_HEADERS([ia64intrin.h ia32intrin.h])])
 AC_CACHE_CHECK([whether compiler supports builtin atomic CAS-32],
   [qthread_cv_atomic_CAS32],
   [AC_LINK_IFELSE([AC_LANG_SOURCE([[
-#ifdef HAVE_IA64INTRIN_H
-# include <ia64intrin.h>
-#elif HAVE_IA32INTRIN_H
-# include <ia32intrin.h>
-#endif
 #include <stdlib.h>
 #include <stdint.h> /* for uint32_t */
 
@@ -40,11 +27,6 @@ return (int)foo;
 AC_CACHE_CHECK([whether compiler supports builtin atomic CAS-64],
   [qthread_cv_atomic_CAS64],
   [AC_LINK_IFELSE([AC_LANG_SOURCE([[
-#ifdef HAVE_IA64INTRIN_H
-# include <ia64intrin.h>
-#elif HAVE_IA32INTRIN_H
-# include <ia32intrin.h>
-#endif
 #include <stdlib.h>
 #include <stdint.h> /* for uint64_t */
 
@@ -59,11 +41,6 @@ return foo;
 AC_CACHE_CHECK([whether compiler supports builtin atomic CAS-ptr],
   [qthread_cv_atomic_CASptr],
   [AC_LINK_IFELSE([AC_LANG_SOURCE([[
-#ifdef HAVE_IA64INTRIN_H
-# include <ia64intrin.h>
-#elif HAVE_IA32INTRIN_H
-# include <ia32intrin.h>
-#endif
 #include <stdlib.h>
 
 int main(void)
@@ -130,11 +107,6 @@ AC_CACHE_CHECK([whether compiler supports builtin atomic incr],
   [qthread_cv_atomic_incr],
   [AS_IF([test "$1" -eq 8],
          [AC_LINK_IFELSE([AC_LANG_SOURCE([[
-#ifdef HAVE_IA64INTRIN_H
-# include <ia64intrin.h>
-#elif HAVE_IA32INTRIN_H
-# include <ia32intrin.h>
-#endif
 #include <stdlib.h>
 #include <stdint.h> /* for uint64_t */
 
@@ -147,11 +119,6 @@ return foo;
 		   [qthread_cv_atomic_incr="yes"],
 		   [qthread_cv_atomic_incr="no"])],
          [AC_LINK_IFELSE([AC_LANG_SOURCE([[
-#ifdef HAVE_IA64INTRIN_H
-# include <ia64intrin.h>
-#elif HAVE_IA32INTRIN_H
-# include <ia32intrin.h>
-#endif
 #include <stdlib.h>
 #include <stdint.h> /* for uint32_t */
 
@@ -169,11 +136,6 @@ AS_IF([test "$qthread_cv_atomic_incr" = "yes"],
 	      [qt_cv_atomic_incr_works],
 		  [AS_IF([test "$1" -eq 8],
          [AC_RUN_IFELSE([AC_LANG_SOURCE([[
-#ifdef HAVE_IA64INTRIN_H
-# include <ia64intrin.h>
-#elif HAVE_IA32INTRIN_H
-# include <ia32intrin.h>
-#endif
 #include <stdlib.h>
 #include <stdint.h> /* for uint64_t */
 
@@ -204,11 +166,6 @@ return 0;
 		   [qt_cv_atomic_incr_works="no"],
 		   [qt_cv_atomic_incr_works="assuming yes"])],
          [AC_RUN_IFELSE([AC_LANG_SOURCE([[
-#ifdef HAVE_IA64INTRIN_H
-# include <ia64intrin.h>
-#elif HAVE_IA32INTRIN_H
-# include <ia32intrin.h>
-#endif
 #include <stdlib.h>
 #include <stdint.h> /* for uint32_t */
 
@@ -224,23 +181,6 @@ return 0;
 		   [qt_cv_atomic_incr_works="no"],
 		   [qt_cv_atomic_incr_works="assuming yes"])])
    ])])
-AS_IF([test "$qthread_cv_atomic_CAS" = "yes"],
-	  [AC_CACHE_CHECK([whether ia64intrin.h is required],
-	    [qthread_cv_require_ia64intrin_h],
-		[AC_LINK_IFELSE([AC_LANG_SOURCE([[
-#include <stdlib.h>
-
-int main(void)
-{
-long bar=1, old=1, new=2;
-long foo = __sync_val_compare_and_swap(&bar, old, new);
-return foo;
-}]])],
-		[qthread_cv_require_ia64intrin_h="no"],
-		[qthread_cv_require_ia64intrin_h="yes"])])])
-])
-AS_IF([test "$qthread_cv_require_ia64intrin_h" = "yes"],
-	  [AC_DEFINE([QTHREAD_NEEDS_IA64INTRIN],[1],[if this header is necessary for builtin atomics])])
 AS_IF([test "x$qthread_cv_atomic_CASptr" = "xyes"],
       [AC_DEFINE([QTHREAD_ATOMIC_CAS_PTR],[1],
 	  	[if the compiler supports __sync_val_compare_and_swap on pointers])])
diff --git a/config/qthread_check_attributes.m4 b/config/qthread_check_attributes.m4
index acb45f28d..2ea7d76d4 100644
--- a/config/qthread_check_attributes.m4
+++ b/config/qthread_check_attributes.m4
@@ -100,14 +100,12 @@ AC_DEFUN([QTHREAD_BUILTIN_PREFETCH],[dnl
 AC_CACHE_CHECK(
  [support for __builtin_prefetch],
  [qt_cv_builtin_prefetch],
- [AS_IF([test "$qthread_cv_c_compiler_type" == PortlandGroup],
-        [qt_cv_builtin_prefetch=no],
-		[AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include <stdlib.h>
+ [AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include <stdlib.h>
 int x;]],[[
 __builtin_prefetch(&x, 0, 0);
 return malloc(x)?1:0;]])],
  [qt_cv_builtin_prefetch=yes],
- [qt_cv_builtin_prefetch=no])])])
+ [qt_cv_builtin_prefetch=no])])
  AS_IF([test "x$qt_cv_builtin_prefetch" = xyes],
  	   [AC_DEFINE([HAS_BUILTIN_PREFETCH], [1], [define if compiler supports __builtin_prefetch])
 	   $1], [$2])
@@ -140,9 +138,6 @@ AC_CACHE_CHECK([support for __sync_synchronize],
 		     POWERPC*)
                        mdefstr='__asm__ __volatile__ ("sync":::"memory")'
 		       ;;
-		     SPARCV9_32|SPARCV9_64)
-                       mdefstr='__asm__ __volatile__ ("membar #StoreStore|#LoadStore|#StoreLoad|#LoadLoad":::"memory")'
-                       ;;
 		    *)
 				 AC_MSG_ERROR([ASM $qthread_cv_asm_arch])
                        mdefstr="$cdefstr"
diff --git a/config/qthread_check_tiletopo.m4 b/config/qthread_check_tiletopo.m4
deleted file mode 100644
index 93135e930..000000000
--- a/config/qthread_check_tiletopo.m4
+++ /dev/null
@@ -1,23 +0,0 @@
-# -*- Autoconf -*-
-#
-# Copyright (c)      2010  Sandia Corporation
-#
-
-# QTHREAD_CHECK_TILETOPO([action-if-found], [action-if-not-found])
-# ------------------------------------------------------------------------------
-AC_DEFUN([QTHREAD_CHECK_TILETOPO], [
-  qt_allgoodsofar=yes
-  AC_CHECK_HEADERS([tmc/cpus.h],[],
-  			       [qt_allgoodsofar=no
-				    break])
-  AS_IF([test "x$qt_allgoodsofar" = xyes],
-        [AC_SEARCH_LIBS([tmc_cpus_set_task_cpu],
-		               [ilib tmc],
-					   [],
-					   [qt_allgoodsofar=no])])
-  
-  AS_IF([test "x$qt_allgoodsofar" = xyes],
-	    [AC_DEFINE([QTHREAD_HAVE_TILETOPO],[1],[if the machine has a Tilera-style topology interface])
-		 $1],
-		[$2])
-])
diff --git a/config/qthread_detect_compiler_type.m4 b/config/qthread_detect_compiler_type.m4
index 01f9e0457..242cbd91d 100644
--- a/config/qthread_detect_compiler_type.m4
+++ b/config/qthread_detect_compiler_type.m4
@@ -29,8 +29,6 @@ AC_CACHE_CHECK([what kind of C compiler $CC is],
   [AC_LANG_PUSH([C])
 
    dnl These compilers have been caught pretending to be GNU GCC
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__TILECC__],[qthread_cv_c_compiler_type=TileCC])])
    AS_IF([test "x$qthread_cv_c_compiler_type" == x],
      [_QTHREAD_CHECK_IFDEF([__INTEL_COMPILER],[qthread_cv_c_compiler_type=Intel])])
    AS_IF([test "x$qthread_cv_c_compiler_type" == x],
@@ -42,10 +40,6 @@ AC_CACHE_CHECK([what kind of C compiler $CC is],
 		AS_IF([test "x$qthread_cv_c_compiler_type" = "xLLVM"],
 		      [_QTHREAD_CHECK_IFDEF([__APPLE_CC__],[qthread_cv_c_compiler_type=Apple-LLVM])])
 		])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__CONVEY],[qthread_cv_c_compiler_type=Convey])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__PATHCC__],[qthread_cv_c_compiler_type=EKOPath])])
 
    dnl GCC is one of the most common
    AS_IF([test "x$qthread_cv_c_compiler_type" == x],
@@ -70,128 +64,6 @@ AC_CACHE_CHECK([what kind of C compiler $CC is],
 			  ])
 	 ])])
 
-   dnl A few common compilers (to detect quickly)
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__SUNPRO_C],[qthread_cv_c_compiler_type=SunStudio])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__OPEN64__],[qthread_cv_c_compiler_type=Open64])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__xlc__],[qthread_cv_c_compiler_type=IBM_XL])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__PGI],[qthread_cv_c_compiler_type=PortlandGroup])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__CYGWIN__],[qthread_cv_c_compiler_type=Cygwin])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__MINGW32__],[qthread_cv_c_compiler_type=MinGW32])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__MINGW64__],[qthread_cv_c_compiler_type=MinGW64])])
-
-   dnl Now detect the rarer ones
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([_ACC_],[qthread_cv_c_compiler_type=ACC])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__CMB__],[qthread_cv_c_compiler_type=AltiumMicroBlaze])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__CHC__],[qthread_cv_c_compiler_type=AltiumCtoHardware])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__ACK__],[qthread_cv_c_compiler_type=AmsterdamCompilerKit])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__CC_ARM],[qthread_cv_c_compiler_type=ARM])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__AZTEC_C__],[qthread_cv_c_compiler_type=Aztec])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__TURBO_C__],[qthread_cv_c_compiler_type=Borland])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__DECC],[qthread_cv_c_compiler_type=Compaq])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__convexc__],[qthread_cv_c_compiler_type=Convex])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([_CRAYC],[qthread_cv_c_compiler_type=Cray])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__CC65__],[qthread_cv_c_compiler_type=CC65])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([_DICE],[qthread_cv_c_compiler_type=Dice])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__DCC__],[qthread_cv_c_compiler_type=Diab])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__DMC__],[qthread_cv_c_compiler_type=DigitalMars])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__SYSC__],[qthread_cv_c_compiler_type=Dignus])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__DJGPP__],[qthread_cv_c_compiler_type=DJGPP])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__ghs__],[qthread_cv_c_compiler_type=GreenHill])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__HP_cc],[qthread_cv_c_compiler_type=HP])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__IAR_SYSTEMS_ICC__],[qthread_cv_c_compiler_type=IAR])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__IBMC__],[qthread_cv_c_compiler_type=IBM_zOS])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__IMAGECRAFT__],[qthread_cv_c_compiler_type=ImageCraft])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__KEIL__],[qthread_cv_c_compiler_type=KeilCARM])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__C166__],[qthread_cv_c_compiler_type=KeilC166])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__C51__],[qthread_cv_c_compiler_type=KeilC51])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__LCC__],[qthread_cv_c_compiler_type=LCC])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__HIGHC__],[qthread_cv_c_compiler_type=MetaWare])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__MWERKS__],[qthread_cv_c_compiler_type=MetrowerksCodeWarrior])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__sgi],[qthread_cv_c_compiler_type=MIPSpro])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__MRC__],[qthread_cv_c_compiler_type=MPW])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([_MSC_VER],[qthread_cv_c_compiler_type=MicrosoftVisual])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([_MRI],[qthread_cv_c_compiler_type=Microtec])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__NDPC__],[qthread_cv_c_compiler_type=MicrowayNDP])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([MIRACLE],[qthread_cv_cxx_compiler_type=Miracle])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__CC_NORCROFT],[qthread_cv_c_compiler_type=Norcroft])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__NWCC__],[qthread_cv_c_compiler_type=NWCC])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__PACIFIC__],[qthread_cv_c_compiler_type=Pacific])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([_PACC_VER],[qthread_cv_c_compiler_type=Palm])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__POCC__],[qthread_cv_c_compiler_type=Pelles])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__RENESAS__],[qthread_cv_c_compiler_type=Renesas])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__SASC],[qthread_cv_c_compiler_type=SAS])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([_SCO_DS],[qthread_cv_c_compiler_type=SCO])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([SDCC],[qthread_cv_c_compiler_type=SmallDevice])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__SNC__],[qthread_cv_c_compiler_type=SN])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__VOSC__],[qthread_cv_c_compiler_type=StratusVOS])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__TenDRA__],[qthread_cv_c_compiler_type=TenDRA])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__TI_COMPILER_VERSION__],[qthread_cv_c_compiler_type=TexasInstruments])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([THINKC3],[qthread_cv_c_compiler_type=THINK])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([THINKC4],[qthread_cv_c_compiler_type=THINK])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__TINYC__],[qthread_cv_c_compiler_type=TinyC])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__TURBOC__],[qthread_cv_c_compiler_type=Turbo])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([_UCC],[qthread_cv_c_compiler_type=Ultimate])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__USLC__],[qthread_cv_c_compiler_type=USL])])
-
    AS_IF([test "x$qthread_cv_c_compiler_type" == x],
      [qthread_cv_c_compiler_type=unknown])
    AC_LANG_POP([C])
@@ -201,18 +73,12 @@ AC_CACHE_CHECK([what kind of C++ compiler $CXX is],
   [AC_LANG_PUSH([C++])
 
    dnl These compilers have been caught pretending to be GNU G++
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__TILECC__],[qthread_cv_cxx_compiler_type=TileCC])])
    AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
      [_QTHREAD_CHECK_IFDEF([__INTEL_COMPILER],[qthread_cv_cxx_compiler_type=Intel])])
    AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
      [_QTHREAD_CHECK_IFDEF([__clang__],[qthread_cv_cxx_compiler_type=Clang])])
    AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
      [_QTHREAD_CHECK_IFDEF([__llvm__],[qthread_cv_cxx_compiler_type=LLVM])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__CONVEY],[qthread_cv_cxx_compiler_type=Convey])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__PATHCC__],[qthread_cv_cxx_compiler_type=EKOPath])])
 
    dnl GCC is one of the most common
    AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
@@ -226,93 +92,6 @@ AC_CACHE_CHECK([what kind of C++ compiler $CXX is],
 			  [_QTHREAD_CHECK_IFDEF_EQ([__GNUC__],[4],[qthread_cv_cxx_compiler_type=GNU4])])
 		])])
 
-   dnl A few common compilers (to detect quickly)
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__SUNPRO_CC],[qthread_cv_cxx_compiler_type=SunStudio])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__OPEN64__],[qthread_cv_cxx_compiler_type=Open64])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__xlC__],[qthread_cv_cxx_compiler_type=IBM_XL])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__PGI],[qthread_cv_cxx_compiler_type=PortlandGroup])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__CYGWIN__],[qthread_cv_cxx_compiler_type=Cygwin])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__MINGW32__],[qthread_cv_cxx_compiler_type=MinGW32])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__MINGW64__],[qthread_cv_cxx_compiler_type=MinGW64])])
-
-   dnl Now detect the rarer ones
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__BORLANDC__],[qthread_cv_cxx_compiler_type=Borland])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__COMO__],[qthread_cv_cxx_compiler_type=Comeau])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__DECCXX__],[qthread_cv_cxx_compiler_type=Compaq])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__DCC__],[qthread_cv_cxx_compiler_type=Diab])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__DMC__],[qthread_cv_cxx_compiler_type=DigitalMars])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__SYSC__],[qthread_cv_cxx_compiler_type=Dignus])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__DJGPP__],[qthread_cv_cxx_compiler_type=DJGPP])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__FCC_VERSION],[qthread_cv_cxx_compiler_type=Fujitsu])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__ghs__],[qthread_cv_cxx_compiler_type=GreenHill])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__HP_aCC],[qthread_cv_cxx_compiler_type=HP])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__IAR_SYSTEMS_ICC__],[qthread_cv_cxx_compiler_type=IAR])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__IBMCPP__],[qthread_cv_cxx_compiler_type=IBM_zOS])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__KCC],[qthread_cv_cxx_compiler_type=KAI])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__KEIL__],[qthread_cv_cxx_compiler_type=KeilCARM])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__C166__],[qthread_cv_cxx_compiler_type=KeilC166])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__C51__],[qthread_cv_cxx_compiler_type=KeilC51])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__LCC__],[qthread_cv_cxx_compiler_type=LCC])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__HIGHC__],[qthread_cv_cxx_compiler_type=MetaWare])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__MWERKS__],[qthread_cv_cxx_compiler_type=MetrowerksCodeWarrior])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__sgi],[qthread_cv_cxx_compiler_type=MIPSpro])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__MRC__],[qthread_cv_cxx_compiler_type=MPW])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([_MSC_VER],[qthread_cv_cxx_compiler_type=MicrosoftVisual])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([_MRI],[qthread_cv_cxx_compiler_type=Microtec])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([_PACC_VER],[qthread_cv_cxx_compiler_type=Palm])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__RENESAS__],[qthread_cv_cxx_compiler_type=Renesas])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([_SCO_DS],[qthread_cv_cxx_compiler_type=SCO])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__SC__],[qthread_cv_cxx_compiler_type=Symantec])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__TenDRA__],[qthread_cv_cxx_compiler_type=TenDRA])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__TI_COMPILER_VERSION__],[qthread_cv_cxx_compiler_type=TexasInstruments])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__TURBOC__],[qthread_cv_cxx_compiler_type=Turbo])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([_UCC],[qthread_cv_cxx_compiler_type=Ultimate])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__WATCOMC__],[qthread_cv_cxx_compiler_type=Watcom])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__ZTC__],[qthread_cv_cxx_compiler_type=Zortech])])
-
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__EDG__],[qthread_cv_cxx_compiler_type=EDG_FrontEnd])])
-
    AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
      [qthread_cv_cxx_compiler_type=unknown])
    AC_LANG_POP([C++])
diff --git a/config/qthread_ia_cacheline.m4 b/config/qthread_ia_cacheline.m4
index 2a73e7342..25d3bcfea 100644
--- a/config/qthread_ia_cacheline.m4
+++ b/config/qthread_ia_cacheline.m4
@@ -9,22 +9,14 @@ AC_CACHE_CHECK([for x86 cache line size],
 #define QTHREAD_UNSUPPORTED 0
 #define QTHREAD_IA32        1
 #define QTHREAD_AMD64       2
-#define QTHREAD_IA64        3
-#define QTHREAD_ALPHA       4
-#define QTHREAD_MIPS        5
 #define QTHREAD_POWERPC32   6
 #define QTHREAD_POWERPC64   7
-#define QTHREAD_SPARCV9_32  8
-#define QTHREAD_SPARCV9_64  9
-#define QTHREAD_TILEPRO	    10
-#define QTHREAD_TILEGX	    11
 #define QTHREAD_ARM         12
 #define QTHREAD_ARMV8_A64   13
 ],[
 int op = 1, eax, ebx, ecx, edx, cachelinesize;
 FILE *f;
-#if QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32 || \
-    QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64
+#if QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32
 # ifdef __PIC__
 __asm__("push %%ebx\n\t"
 "cpuid\n\t"
@@ -45,8 +37,7 @@ __asm__("cpuid"
 cachelinesize = 8*((ebx>>8)&0xff);
 if (cachelinesize == 0) {
 	op = 2;
-#if QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32 || \
-    QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64
+#if QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32
 __asm__("push %%ebx\n\t"
 "cpuid\n\t"
 "mov %%ebx, %1\n\t"
diff --git a/configure.ac b/configure.ac
index 0b02abc9f..89939cc41 100644
--- a/configure.ac
+++ b/configure.ac
@@ -294,11 +294,11 @@ AC_ARG_WITH([topology],
             [AS_HELP_STRING([--with-topology=[[topologylib]]],
                             [specify which topology interface to use. Supported
                              interfaces include no, hwloc, hwloc_v2, binders, lgrp, libnuma,
-                             libnumaV2, mach, plpa, sys, and, tilera.])],
+                             libnumaV2, mach, plpa, and sys.])],
             [AS_IF([test "x$with_topology" = xyes],
                    [with_topology=none_specified])
              case "$with_topology" in
-                 hwloc|binders|hwloc_v2|lgrp|libnuma|libnumaV2|mach|no|plpa|sys|tilera) ;;
+                 hwloc|binders|hwloc_v2|lgrp|libnuma|libnumaV2|mach|no|plpa|sys) ;;
                  none_specified) ;;
                  *)
                  AC_MSG_ERROR([Unsupported topology library ($with_topology)])
@@ -327,8 +327,7 @@ AC_ARG_ENABLE([condwait-queue],
                               [force the use of a pthread condwait queue,
                                instead of a spin-based queue for inter-thread
                                communication (important if spinning shepherds
-                               interfere with each other). Default enabled on
-                               sparc/solaris, but default disabled elsewhere.])])
+                               interfere with each other). Default disabled.])])
 
 AC_ARG_ENABLE([third-party-benchmarks],
               [AS_HELP_STRING([--enable-third-party-benchmarks],
@@ -402,16 +401,9 @@ dnl Test for this *before* AC_PROG_CC, to avoid getting the default CFLAGS
 dnl However, that means we don't know a ton about this machine or this compiler
 dnl yet, so we may have to reset it later.
 AS_IF([test "x$enable_debugging" = xyes],
-      [case "$build_cpu" in dnl (
-         sparc)
-           CFLAGS="$CFLAGS -O0 -g3"
-           CXXFLAGS="$CXXFLAGS -O0 -g3"
-           ;;
-         *)
-           CFLAGS="$CFLAGS -O0 -g"
-           CXXFLAGS="$CXXFLAGS -O0 -g"
-           ;;
-       esac])
+      [CFLAGS="$CFLAGS -O0 -g"
+       CXXFLAGS="$CXXFLAGS -O0 -g"
+      ])
 AC_PROG_CC
 dnl We use system extensions.  This includes setting _GNU_SOURCE
 AC_USE_SYSTEM_EXTENSIONS
@@ -453,18 +445,6 @@ AS_IF([test "x$enable_picky" = xyes],
          Intel)
            CFLAGS="-Wall -wd981 -wd1572 -wd869 $CFLAGS"
            ;;
-         SunStudio)
-           # This compiler defaults to enabling all warnings
-           ;;
-         PortlandGroup)
-           CFLAGS="-Minform=inform $CFLAGS"
-           ;;
-         IBM_XL)
-           CFLAGS="-q64 $CFLAGS"
-           ;;
-         Convey|EKOPath)
-           CFLAGS="-W -Wall $CFLAGS"
-           ;;
          *)
            CFLAGS="-Wall $CFLAGS"
            ;;
@@ -476,18 +456,6 @@ AS_IF([test "x$enable_picky" = xyes],
          Intel)
            CXXFLAGS="-Wall -wd981 $CXXFLAGS"
            ;;
-         SunStudio)
-           # This compiler defaults to enabling all warnings
-           ;;
-         PortlandGroup)
-           CXXFLAGS="-Minform=inform $CXXFLAGS"
-           ;;
-         IBM_XL)
-           CXXFLAGS="-q64 $CXXFLAGS"
-           ;;
-         Convey|EKOPath)
-           CXXFLAGS="-W -Wall $CXXFLAGS"
-           ;;
          *)
            CXXFLAGS="-Wall $CXXFLAGS"
            ;;
@@ -495,7 +463,7 @@ AS_IF([test "x$enable_picky" = xyes],
 
 QTHREAD_CHECK_ASSEMBLY([have_assembly=1], [have_assembly=0])
 case "$qthread_cv_asm_arch" in
-    POWERPC32|SPARCV9_32)
+    POWERPC32)
     compile_compat_atomic=yes
     ;;
 esac
@@ -735,14 +703,7 @@ AS_IF([test "x$enable_oversubscription" = "xyes"],
        AC_CHECK_FUNCS([sched_yield])])
 
 AS_IF([test "x$enable_condwait_queue" = "x"],
-      [case "$host" in
-         sparc-sun-solaris*)
-           enable_condwait_queue="yes"
-           ;;
-         *)
-           enable_condwait_queue="no"
-           ;;
-       esac])
+      [enable_condwait_queue="no"])
 AS_IF([test "x$enable_condwait_queue" = "xyes"],
       [AC_DEFINE([QTHREAD_CONDWAIT_BLOCKING_QUEUE], [1], [use pthread-based condwait for lf queue])])
 
@@ -989,10 +950,6 @@ AS_IF([test "x$qthread_topo" != xno],
                                     [qthread_topo=libnumaV2],
                                     [AS_IF([test "x$qthread_topo" != xno],
                                            [AC_MSG_ERROR([Specified topology library ($qthread_topo) does not work.])])])])
-       AS_IF([test "x$qthread_topo" = xno -o "x$qthread_topo" = xtilera],
-             [QTHREAD_CHECK_TILETOPO([qthread_topo=tilera],
-                                     [AS_IF([test "x$qthread_topo" != xno],
-                                            [AC_MSG_ERROR([Specified topology library ($qthread_topo) does not work.])])])])
        # Third, check any others.
        AS_IF([test "x$qthread_topo" = xno -o "x$qthread_topo" = xmach],
              [QTHREAD_CHECK_MACHTOPO([qthread_topo=mach],
diff --git a/include/Makefile.am b/include/Makefile.am
index fb27e9547..50929a7a3 100644
--- a/include/Makefile.am
+++ b/include/Makefile.am
@@ -10,7 +10,6 @@ noinst_HEADERS = \
 	fastcontext/taskimpl.h \
 	fastcontext/power-ucontext.h \
 	fastcontext/386-ucontext.h \
-	fastcontext/tile-ucontext.h \
 	net/net.h \
 	qthread_innards.h \
 	qloop_innards.h \
diff --git a/include/fastcontext/taskimpl.h b/include/fastcontext/taskimpl.h
index 077fc6c26..e663a8d7d 100644
--- a/include/fastcontext/taskimpl.h
+++ b/include/fastcontext/taskimpl.h
@@ -7,16 +7,7 @@
 
 #include "qthread/common.h"
 
-#if ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEPRO) ||                             \
-     (QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEGX))
-#ifdef HAVE_STDARG_H
-#include <stdarg.h>
-#endif
-#include <stddef.h>
-#define NEEDTILEMAKECONTEXT
-#define NEEDSWAPCONTEXT
-#include "tile-ucontext.h"
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32)
+#if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32)
 #define NEEDX86MAKECONTEXT
 #define NEEDSWAPCONTEXT
 #include "386-ucontext.h"
diff --git a/include/fastcontext/tile-ucontext.h b/include/fastcontext/tile-ucontext.h
deleted file mode 100644
index c0a8408b4..000000000
--- a/include/fastcontext/tile-ucontext.h
+++ /dev/null
@@ -1,52 +0,0 @@
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <stddef.h> /* for size_t, per C89 */
-
-#include "qt_visibility.h"
-
-#define setcontext(u) qt_setmctxt(&(u)->mc)
-#define getcontext(u) qt_getmctxt(&(u)->mc)
-typedef struct mctxt mctxt_t;
-typedef struct uctxt uctxt_t;
-
-/*
- * This struct defines the way the registers are stored on the stack during a
- * system call/exception.  It should be a multiple of 8 bytes to preserve
- * normal stack alignment rules.
- *
- */
-struct mctxt {
-  /* Saved main processor registers; 56..63 are special. */
-  /* tp, sp, and lr must immediately follow regs[] for aliasing. */
-  unsigned long regs[23]; /* callee saves r30-r52 */
-  unsigned long tp;       /* thread-local data pointer (23*4) */
-  unsigned long sp;       /* stack pointer (grows DOWNWARD) (23*4)+4 */
-  unsigned long lr; /* aka link register (where to go when returning from a
-                     * function) (23*4)+(2*4) */
-
-  /* Saved special registers. */
-  unsigned long pc; /* (23*4)+(3*4) */
-  unsigned long r0; /* (23*4)+(4*4) */
-  // unsigned long ex1;      /* stored in EX_CONTEXT_1_1 (PL and ICS bit) */
-  unsigned long arg0; /* (23*4)+(5*4) only used for first function invocation */
-  unsigned long first; /* (23*4)+(6*4) */
-};
-
-struct uctxt {
-  struct {
-    void *ss_sp;
-    size_t ss_size;
-  } uc_stack;
-
-  // sigset_t uc_sigmask;
-  mctxt_t mc;
-  struct uctxt *uc_link; /* unused */
-};
-
-int INTERNAL qt_swapctxt(uctxt_t *, uctxt_t *);
-void INTERNAL qt_makectxt(uctxt_t *, void (*)(void), int, ...);
-int INTERNAL qt_getmctxt(mctxt_t *);
-void INTERNAL qt_setmctxt(mctxt_t *);
-/* vim:set expandtab: */
diff --git a/include/qt_atomics.h b/include/qt_atomics.h
index 70c71b73d..79d8c4dfa 100644
--- a/include/qt_atomics.h
+++ b/include/qt_atomics.h
@@ -4,14 +4,6 @@
 #include <stdatomic.h>
 #include <sys/time.h>
 
-#ifdef QTHREAD_NEEDS_IA64INTRIN
-#ifdef HAVE_IA64INTRIN_H
-#include <ia64intrin.h>
-#elif defined(HAVE_IA32INTRIN_H)
-#include <ia32intrin.h>
-#endif
-#endif
-
 #include <qthread/common.h>
 #include <qthread/qthread.h>
 
@@ -56,21 +48,7 @@
   do { COMPILER_FENCE; } while (0)
 #endif // ifdef QTHREAD_OVERSUBSCRIPTION
 
-#if defined(__tile__)
-#include <tmc/sync.h>
-#define QTHREAD_FASTLOCK_ATTRVAR
-#define QTHREAD_FASTLOCK_SETUP()                                               \
-  do {                                                                         \
-  } while (0)
-#define QTHREAD_FASTLOCK_INIT(x) tmc_sync_mutex_init(&(x))
-#define QTHREAD_FASTLOCK_INIT_PTR(x) tmc_sync_mutex_init((x))
-#define QTHREAD_FASTLOCK_LOCK(x) tmc_sync_mutex_lock((x))
-#define QTHREAD_FASTLOCK_UNLOCK(x) tmc_sync_mutex_unlock((x))
-#define QTHREAD_FASTLOCK_DESTROY(x)
-#define QTHREAD_FASTLOCK_DESTROY_PTR(x)
-#define QTHREAD_FASTLOCK_TYPE tmc_sync_mutex_t
-#define QTHREAD_FASTLOCK_INITIALIZER TMC_SYNC_MUTEX_INIT
-#elif defined(USE_INTERNAL_SPINLOCK) && USE_INTERNAL_SPINLOCK
+#if defined(USE_INTERNAL_SPINLOCK) && USE_INTERNAL_SPINLOCK
 #define QTHREAD_FASTLOCK_SETUP()                                               \
   do {                                                                         \
   } while (0)
@@ -152,18 +130,6 @@ extern pthread_mutexattr_t _fastlock_attr;
 
 // Trylock declarations
 
-#if defined(__tile__)
-#include <tmc/sync.h>
-#define QTHREAD_TRYLOCK_INIT(x) tmc_sync_mutex_init(&(x))
-#define QTHREAD_TRYLOCK_INIT_PTR(x) tmc_sync_mutex_init((x))
-#define QTHREAD_TRYLOCK_LOCK(x) tmc_sync_mutex_lock((x))
-#define QTHREAD_TRYLOCK_TRY(x) (tmc_sync_mutex_trylock((x)) == 0)
-#define QTHREAD_TRYLOCK_UNLOCK(x) tmc_sync_mutex_unlock((x))
-#define QTHREAD_TRYLOCK_DESTROY(x)
-#define QTHREAD_TRYLOCK_DESTROY_PTR(x)
-#define QTHREAD_TRYLOCK_TYPE tmc_sync_mutex_t
-#define QTHREAD_TRYLOCK_INITIALIZER TMC_SYNC_MUTEX_INIT
-
 /* For the followimg implementation of try-locks,
  * it is necessary that qthread_incr() be defined on
  * haligned_t types. This requirement is satisfied when
@@ -171,7 +137,7 @@ extern pthread_mutexattr_t _fastlock_attr;
  * whether it is satisfied in some circumstances when
  * !defined(QTHREAD_ATOMIC_INCR).
  */
-#elif defined(USE_INTERNAL_SPINLOCK) && USE_INTERNAL_SPINLOCK &&               \
+#if defined(USE_INTERNAL_SPINLOCK) && USE_INTERNAL_SPINLOCK &&               \
   defined(QTHREAD_ATOMIC_INCR) && !defined(QTHREAD_MUTEX_INCREMENT)
 
 #define QTHREAD_TRYLOCK_TYPE qt_spin_trylock_t
@@ -242,7 +208,7 @@ extern pthread_mutexattr_t _fastlock_attr;
 #define QTHREAD_TRYLOCK_DESTROY_PTR(x) pthread_mutex_destroy((x))
 #define QTHREAD_TRYLOCK_TRY(x) (pthread_mutex_trylock((x)) == 0)
 
-#endif // if defined(__tile__)
+#endif
 
 #include <pthread.h>
 #define QTHREAD_COND_DECL(c)                                                   \
@@ -429,41 +395,6 @@ qt_cas(void **const ptr, void *const oldv, void *const newv) { /*{{{*/
                        : "cc", "memory");
   return result;
 
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32)
-  void *nv = newv;
-  __asm__ __volatile__("cas [%1], %2, %0"
-                       : "=&r"(nv)
-                       : "r"(ptr),
-                         "r"(oldv)
-#if !defined(__SUNPRO_C) && !defined(__SUNPRO_CC)
-                           ,
-                         "0"(nv)
-#endif
-                       : "cc", "memory");
-  return nv;
-
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64)
-  void *nv = newv;
-  __asm__ __volatile__("casx [%1], %2, %0"
-                       : "=&r"(nv)
-                       : "r"(ptr),
-                         "r"(oldv)
-#if !defined(__SUNPRO_C) && !defined(__SUNPRO_CC)
-                           ,
-                         "0"(nv)
-#endif
-                       : "cc", "memory");
-  return nv;
-
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64)
-  void **retval;
-  __asm__ __volatile__("mov ar.ccv=%0;;" : : "rO"(oldv));
-  __asm__ __volatile__("cmpxchg4.acq %0=[%1],%2,ar.ccv"
-                       : "=r"(retval)
-                       : "r"(ptr), "r"(newv)
-                       : "memory");
-  return retval;
-
 #elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) ||                              \
   (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32)
   void **retval;
@@ -477,19 +408,10 @@ qt_cas(void **const ptr, void *const oldv, void *const newv) { /*{{{*/
    * instantiates cmpxchg for 8-byte registers, and IA32 never has 64-bit
    * pointers
    */
-#if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) && defined(__PGI)
-  __asm__ __volatile__("lock; cmpxchg %1,(%2)\n\t"
-                       "mov %%rax,(%0)" ::"r"(&retval),
-                       "r"(newv),
-                       "r"(ptr),
-                       "a"(oldv) /* load into RAX */
-                       : "cc", "memory");
-#else
   __asm__ __volatile__("lock; cmpxchg %1,(%2)"
                        : "=a"(retval)                   /* store from RAX */
                        : "r"(newv), "r"(ptr), "a"(oldv) /* load into RAX */
                        : "cc", "memory");
-#endif /* if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) && defined(__PGI) */
   return retval;
 
 #else /* if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) */
@@ -624,110 +546,6 @@ static QINLINE aligned_t qthread_internal_incr_mod_(
                : "r"(operand), "r"(max)
                : "cc", "memory");
 
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32) ||                         \
-  ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64) &&                            \
-   (QTHREAD_SIZEOF_ALIGNED_T == 4))
-
-  uint32_t oldval, newval;
-
-  /* newval = *operand; */
-  do {
-    /* you *should* be able to move the *operand reference outside the
-     * loop and use the output of the CAS (namely, newval) instead.
-     * However, there seems to be a bug in gcc 4.0.4 wherein, if you do
-     * that, the while() comparison uses a temporary register value for
-     * newval that has nothing to do with the output of the CAS
-     * instruction. (See how obviously wrong that is?) For some reason that
-     * I haven't been able to figure out, moving the *operand reference
-     * inside the loop fixes that problem, even at -O2 optimization. */
-    retval = oldval = *operand;
-    newval = oldval + 1;
-    newval *= (newval < max);
-
-    /* if (*operand == oldval)
-     * swap(newval, *operand)
-     * else
-     * newval = *operand
-     */
-    __asm__ __volatile__("cas [%1] , %2, %0" /* */
-                         : "=&r"(newval)
-                         : "r"(operand), "r"(oldval), "0"(newval)
-                         : "memory");
-  } while (oldval != newval);
-
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64)
-  aligned_t oldval, newval;
-
-  /* newval = *operand; */
-  do {
-    /* you *should* be able to move the *operand reference outside the
-     * loop and use the output of the CAS (namely, newval) instead.
-     * However, there seems to be a bug in gcc 4.0.4 wherein, if you do
-     * that, the while() comparison uses a temporary register value for
-     * newval that has nothing to do with the output of the CAS
-     * instruction. (See how obviously wrong that is?) For some reason that
-     * I haven't been able to figure out, moving the *operand reference
-     * inside the loop fixes that problem, even at -O2 optimization. */
-    retval = oldval = *operand;
-    newval = oldval + 1;
-    newval *= (newval < max);
-
-    /* if (*operand == oldval)
-     * swap(newval, *operand)
-     * else
-     * newval = *operand
-     */
-    __asm__ __volatile__("casx [%1] , %2, %0"
-                         : "=&r"(newval)
-                         : "r"(operand),
-                           "r"(oldval)
-#if !defined(__SUNPRO_CC) && !defined(__SUNPRO_C)
-                             ,
-                           "0"(newval)
-#endif
-                         : "memory");
-  } while (oldval != newval);
-
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64)
-#if QTHREAD_SIZEOF_ALIGNED_T == 8
-  int64_t res, old, new;
-
-  do {
-    old = *operand; /* atomic, because operand is aligned */
-    new = old + 1;
-    new *= (new < max);
-    asm volatile("mov ar.ccv=%0;;"
-                 : /* no output */
-                 : "rO"(old));
-
-    /* separate so the compiler can insert its junk */
-    asm volatile("cmpxchg8.acq %0=[%1],%2,ar.ccv"
-                 : "=r"(res)
-                 : "r"(operand), "r"(new)
-                 : "memory");
-  } while (res != old); /* if res==old, new is out of date */
-  retval = old;
-
-#else  /* 32-bit aligned_t */
-  int32_t res, old, new;
-
-  do {
-    old = *operand; /* atomic, because operand is aligned */
-    new = old + 1;
-    new *= (new < max);
-    asm volatile("mov ar.ccv=%0;;"
-                 : /* no output */
-                 : "rO"(old));
-
-    /* separate so the compiler can insert its junk */
-    asm volatile("cmpxchg4.acq %0=[%1],%2,ar.ccv"
-                 : "=r"(res)
-                 : "r"(operand), "r"(new)
-                 : "memory");
-  } while (res != old); /* if res==old, new is out of date */
-  retval = old;
-#endif /* if QTHREAD_SIZEOF_ALIGNED_T == 8 */
-
 #elif ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32) &&                              \
        (QTHREAD_SIZEOF_ALIGNED_T == 4)) ||                                     \
   ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) &&                                 \
diff --git a/include/qthread/common.h.in b/include/qthread/common.h.in
index 2ea1bbede..f75ffe142 100644
--- a/include/qthread/common.h.in
+++ b/include/qthread/common.h.in
@@ -59,12 +59,6 @@
 /* builtin incr supported */
 #undef QTHREAD_ATOMIC_INCR
 
-/* ia64intrin.h available */
-#undef HAVE_IA64INTRIN_H
-
-/* if ia64intrin is needed */
-#undef QTHREAD_NEEDS_IA64INTRIN
-
 /* specifying data alignment is allowed */
 #undef QTHREAD_ALIGNEDDATA_ALLOWED
 
@@ -83,15 +77,6 @@
 #ifndef restrict
 #undef restrict
 #endif
-/* Work around a bug in Sun C++: it does not support _Restrict or
-   __restrict__, even though the corresponding Sun C compiler ends up with
-   "#define restrict _Restrict" or "#define restrict __restrict__" in the
-   previous line.  Perhaps some future version of Sun C++ will work with
-   restrict; if so, hopefully it defines __RESTRICT like Sun C does.  */
-#if defined __SUNPRO_CC && !defined __RESTRICT
-# define _Restrict
-# define __restrict__
-#endif
 
 /* If __builtin_trap can be used */
 #undef QTHREAD_TRAP_OKAY
@@ -102,26 +87,12 @@
 # define QTHREAD_TRAP() *(volatile int *)0 = 0
 #endif
 
-/* Work around another bug in Sun C++: it does not support __asm__, even though
- * their C compiler DOES. */
-#if defined(__SUNPRO_CC)
-# define __asm__ asm
-# define __volatile__ volatile
-#endif
-
 /* Architecture defines */
 #define QTHREAD_UNSUPPORTED 0
 #define QTHREAD_IA32        1
 #define QTHREAD_AMD64       2
-#define QTHREAD_IA64        3
-#define QTHREAD_ALPHA       4
-#define QTHREAD_MIPS        5
 #define QTHREAD_POWERPC32   6
 #define QTHREAD_POWERPC64   7
-#define QTHREAD_SPARCV9_32  8
-#define QTHREAD_SPARCV9_64  9
-#define QTHREAD_TILEPRO	    10
-#define QTHREAD_TILEGX	    11
 #define QTHREAD_ARM         12
 #define QTHREAD_ARMV8_A64   13
 
diff --git a/include/qthread/qthread.h b/include/qthread/qthread.h
index 4152d577a..c066747b4 100644
--- a/include/qthread/qthread.h
+++ b/include/qthread/qthread.h
@@ -22,14 +22,6 @@ using std::memory_order_relaxed;
 #include <stdio.h> /* for fprintf() */
 #endif
 
-#ifdef QTHREAD_NEEDS_IA64INTRIN
-#ifdef HAVE_IA64INTRIN_H
-#include <ia64intrin.h>
-#elif defined(HAVE_IA32INTRIN_H)
-#include <ia32intrin.h>
-#endif
-#endif
-
 #include "common.h"
 #include "qthread-int.h"
 
@@ -676,9 +668,7 @@ int qthread_spinlocks_destroy(qthread_spinlock_t *a);
 int qthread_lock_init(aligned_t const *a, bool const is_recursive);
 int qthread_lock_destroy(aligned_t *a);
 
-#if defined(QTHREAD_MUTEX_INCREMENT) ||                                        \
-  (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) ||                              \
-  (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32)
+#if defined(QTHREAD_MUTEX_INCREMENT) || QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32
 uint32_t qthread_incr32_(uint32_t *, int32_t);
 uint64_t qthread_incr64_(uint64_t *, int64_t);
 float qthread_fincr_(float *, float);
@@ -699,8 +689,7 @@ static QINLINE float qthread_fincr(float *operand, float incr) { /*{{{ */
 #if defined(QTHREAD_MUTEX_INCREMENT)
   return qthread_fincr_(operand, incr);
 
-#elif QTHREAD_ATOMIC_CAS && (!defined(HAVE_GCC_INLINE_ASSEMBLY) ||             \
-                             (QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEGX))
+#elif QTHREAD_ATOMIC_CAS && !defined(HAVE_GCC_INLINE_ASSEMBLY)
   union {
     float f;
     uint32_t i;
@@ -752,51 +741,6 @@ static QINLINE float qthread_fincr(float *operand, float incr) { /*{{{ */
 
   return retval.f;
 
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64) ||                         \
-  (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32)
-  union {
-    float f;
-    uint32_t i;
-  } oldval, newval;
-
-  /* newval.f = *operand; */
-  do {
-    /* you *should* be able to move the *operand reference outside the
-     * loop and use the output of the CAS (namely, newval) instead.
-     * However, there seems to be a bug in gcc 4.0.4 wherein, if you do
-     * that, the while() comparison uses a temporary register value for
-     * newval that has nothing to do with the output of the CAS
-     * instruction. (See how obviously wrong that is?) For some reason that
-     * I haven't been able to figure out, moving the *operand reference
-     * inside the loop fixes that problem, even at -O2 optimization. */
-    oldval.f = *(float volatile *)operand;
-    newval.f = oldval.f + incr;
-    __asm__ __volatile__(
-      "membar #StoreStore|#LoadStore|#StoreLoad|#LoadLoad\n\t"
-      "cas [%1], %2, %0"
-      : "+r"(newval.i)
-      : "r"(operand), "r"(oldval.i)
-      : "cc", "memory");
-  } while (oldval.i != newval.i);
-  return oldval.f;
-
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64)
-  union {
-    float f;
-    uint32_t i;
-  } oldval, newval, res;
-
-  do {
-    oldval.f = *(float volatile *)operand;
-    newval.f = oldval.f + incr;
-    __asm__ __volatile__("mov ar.ccv=%0;;" ::"rO"(oldval.i));
-    __asm__ __volatile__("cmpxchg4.acq %0=[%1],%2,ar.ccv"
-                         : "=r"(res.i)
-                         : "r"(operand), "r"(newval.i)
-                         : "memory");
-  } while (res.i != oldval.i); /* if res!=old, the calc is out of date */
-  return oldval.f;
-
 #elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) ||                              \
   (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32)
   union {
@@ -860,9 +804,7 @@ static QINLINE double qthread_dincr(double *operand, double incr) { /*{{{ */
   (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32)
   return qthread_dincr_(operand, incr);
 
-#elif QTHREAD_ATOMIC_CAS && (!defined(HAVE_GCC_INLINE_ASSEMBLY) ||             \
-                             (QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEGX) ||      \
-                             (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32))
+#elif QTHREAD_ATOMIC_CAS && !defined(HAVE_GCC_INLINE_ASSEMBLY)
   union {
     uint64_t i;
     double d;
@@ -916,82 +858,6 @@ static QINLINE double qthread_dincr(double *operand, double incr) { /*{{{ */
 
   return retval.d;
 
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32)
-  union {
-    uint64_t i;
-    double d;
-  } oldval, newval;
-
-  newval.d = *(double volatile *)operand;
-  do {
-    /* this allows the compiler to be as flexible as possible with register
-     * assignments */
-    uint64_t tmp1;
-    uint64_t tmp2;
-
-    oldval.d = newval.d;
-    newval.d += incr;
-    __asm__ __volatile__(
-      "ldd %0, %1\n\t"
-      "ldx %4, %2\n\t"
-      "membar #StoreStore|#LoadStore|#StoreLoad|#LoadLoad\n\t"
-      "sllx %1, 0x20, %1\n\t"
-      "sllx %2, 0x20, %2\n\t"
-      "casx [%3], %2, %1\n\t"
-      "srlx %1, 0x20, %1\n\t"
-      "std %1, %0"
-      /* h means 64-BIT REGISTER
-       * (probably unnecessary, but why take chances?) */
-      : "=m"(newval.i), "=h"(tmp1), "=h"(tmp2)
-      : "r"(operand), "m"(oldval.i)
-      : "memory");
-  } while (oldval.i != newval.i);
-  return oldval.d;
-
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64)
-  union {
-    uint64_t i;
-    double d;
-  } oldval, newval;
-
-  /*newval.d = *operand; */
-  do {
-    /* you *should* be able to move the *operand reference outside the
-     * loop and use the output of the CAS (namely, newval) instead.
-     * However, there seems to be a bug in gcc 4.0.4 wherein, if you do
-     * that, the while() comparison uses a temporary register value for
-     * newval that has nothing to do with the output of the CAS
-     * instruction. (See how obviously wrong that is?) For some reason that
-     * I haven't been able to figure out, moving the *operand reference
-     * inside the loop fixes that problem, even at -O2 optimization. */
-    oldval.d = *(double volatile *)operand;
-    newval.d = oldval.d + incr;
-    __asm__ __volatile__(
-      "membar #StoreStore|#LoadStore|#StoreLoad|#LoadLoad\n\t"
-      "casx [%1], %2, %0"
-      : "+r"(newval.i)
-      : "r"(operand), "r"(oldval.i)
-      : "memory");
-  } while (oldval.d != newval.d);
-  return oldval.d;
-
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64)
-  union {
-    uint64_t i;
-    double d;
-  } oldval, newval, res;
-
-  do {
-    oldval.d = *(double volatile *)operand;
-    newval.d = oldval.d + incr;
-    __asm__ __volatile__("mov ar.ccv=%0;;" ::"rO"(oldval.i));
-    __asm__ __volatile__("cmpxchg8.acq %0=[%1],%2,ar.ccv"
-                         : "=r"(res.i)
-                         : "r"(operand), "r"(newval.i)
-                         : "memory");
-  } while (res.i != oldval.i); /* if res!=old, the calc is out of date */
-  return oldval.d;
-
 #elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64)
   union {
     double d;
@@ -1011,19 +877,10 @@ static QINLINE double qthread_dincr(double *operand, double incr) { /*{{{ */
      * unit to use). */
     oldval.d = *(double volatile *)operand;
     newval.d = oldval.d + incr;
-#ifdef __PGI
-    __asm__ __volatile__("lock; cmpxchgq %1, (%2)\n\t"
-                         "mov %%rax,(%0)" ::"r"(&retval.i),
-                         "r"(newval.i),
-                         "r"(operand),
-                         "a"(oldval.i)
-                         : "memory");
-#else
     __asm__ __volatile__("lock; cmpxchgq %1, (%2)"
                          : "=a"(retval.i)
                          : "r"(newval.i), "r"(operand), "0"(oldval.i)
                          : "memory");
-#endif // ifdef __PGI
   } while (retval.i != oldval.i);
   return oldval.d;
 
@@ -1172,57 +1029,6 @@ static QINLINE uint32_t qthread_incr32(uint32_t *operand,
 
   return retval;
 
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32) ||                         \
-  (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64)
-  uint32_t oldval, newval;
-
-  /* newval = *operand; */
-  do {
-    /* you *should* be able to move the *operand reference outside the
-     * loop and use the output of the CAS (namely, newval) instead.
-     * However, there seems to be a bug in gcc 4.0.4 wherein, if you do
-     * that, the while() comparison uses a temporary register value for
-     * newval that has nothing to do with the output of the CAS
-     * instruction. (See how obviously wrong that is?) For some reason that
-     * I haven't been able to figure out, moving the *operand reference
-     * inside the loop fixes that problem, even at -O2 optimization. */
-    oldval = *operand;
-    newval = oldval + incr;
-    /* newval always gets the value of *operand; if it's
-     * the same as oldval, then the swap was successful */
-    __asm__ __volatile__(
-      "membar #StoreStore|#LoadStore|#StoreLoad|#LoadLoad\n\t"
-      "cas [%1] , %2, %0"
-      : "+r"(newval)
-      : "r"(operand), "r"(oldval)
-      : "cc", "memory");
-  } while (oldval != newval);
-  return oldval;
-
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64)
-  uint32_t res;
-
-  if (incr == 1) {
-    asm volatile("fetchadd4.rel %0=[%1],1" : "=r"(res) : "r"(operand));
-  } else {
-    uint32_t old, newval;
-
-    do {
-      old = *operand; /* atomic, because operand is aligned */
-      newval = old + incr;
-      asm volatile("mov ar.ccv=%0;;"
-                   : /* no output */
-                   : "rO"(old));
-
-      /* separate so the compiler can insert its junk */
-      asm volatile("cmpxchg4.acq %0=[%1],%2,ar.ccv"
-                   : "=r"(res)
-                   : "r"(operand), "r"(newval)
-                   : "memory");
-    } while (res != old); /* if res!=old, the calc is out of date */
-  }
-  return res;
-
 #elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32) ||                               \
   (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64)
 
@@ -1245,8 +1051,7 @@ static QINLINE uint32_t qthread_incr32(uint32_t *operand,
 static QINLINE uint64_t qthread_incr64(uint64_t *operand,
                                        uint64_t incr) { /*{{{ */
 #if defined(QTHREAD_MUTEX_INCREMENT) ||                                        \
-  (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) ||                              \
-  (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32)
+  (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32)
   return qthread_incr64_(operand, incr);
 
 #elif defined(QTHREAD_ATOMIC_INCR)
@@ -1263,8 +1068,7 @@ static QINLINE uint64_t qthread_incr64(uint64_t *operand,
 
 #elif !defined(HAVE_GCC_INLINE_ASSEMBLY)
 #error Qthreads requires either mutex increments, inline assembly, or compiler atomic builtins
-#else // if defined(QTHREAD_MUTEX_INCREMENT) || (QTHREAD_ASSEMBLY_ARCH ==
-      // QTHREAD_POWERPC32) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32)
+#else // if defined(QTHREAD_MUTEX_INCREMENT) || QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32
 #if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64)
   uint64_t retval;
   uint64_t incrd = incrd; /* no initializing */
@@ -1280,90 +1084,6 @@ static QINLINE uint64_t qthread_incr64(uint64_t *operand,
 
   return retval;
 
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32)
-  uint64_t oldval, newval = *operand;
-
-  do {
-    /* this allows the compiler to be as flexible as possible with register
-     * assignments */
-    uint64_t tmp1 = tmp1;
-    uint64_t tmp2 = tmp2;
-
-    oldval = newval;
-    newval += incr;
-    /* newval always gets the value of *operand; if it's
-     * the same as oldval, then the swap was successful */
-    __asm__ __volatile__(
-      "ldx %0, %1\n\t"
-      "ldx %4, %2\n\t"
-      "membar #StoreStore|#LoadStore|#StoreLoad|#LoadLoad\n\t"
-      "casx [%3] , %2, %1\n\t"
-      "stx %1, %0"
-      /* h means 64-BIT REGISTER
-       * (probably unnecessary, but why take chances?) */
-      : "=m"(newval), "=&h"(tmp1), "=&h"(tmp2)
-      : "r"(operand), "m"(oldval)
-      : "cc", "memory");
-  } while (oldval != newval);
-  return oldval;
-
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64)
-  uint64_t oldval, newval;
-
-#ifdef QTHREAD_ATOMIC_CAS
-  newval = *operand;
-  do {
-    oldval = newval;
-    newval = __sync_val_compare_and_swap(operand, oldval, oldval + incr);
-  } while (oldval != newval);
-#else
-  do {
-    /* you *should* be able to move the *operand reference outside the
-     * loop and use the output of the CAS (namely, newval) instead.
-     * However, there seems to be a bug in gcc 4.0.4 wherein, if you do
-     * that, the while() comparison uses a temporary register value for
-     * newval that has nothing to do with the output of the CAS
-     * instruction. (See how obviously wrong that is?) For some reason that
-     * I haven't been able to figure out, moving the *operand reference
-     * inside the loop fixes that problem, even at -O2 optimization. */
-    oldval = *operand;
-    newval = oldval + incr;
-    /* newval always gets the value of *operand; if it's
-     * the same as oldval, then the swap was successful */
-    __asm__ __volatile__(
-      "membar #StoreStore|#LoadStore|#StoreLoad|#LoadLoad\n\t"
-      "casx [%1] , %2, %0"
-      : "+r"(newval)
-      : "r"(operand), "r"(oldval)
-      : "cc", "memory");
-  } while (oldval != newval);
-#endif // ifdef QTHREAD_ATOMIC_CAS
-  return oldval;
-
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64)
-  uint64_t res;
-
-  if (incr == 1) {
-    asm volatile("fetchadd8.rel %0=%1,1" : "=r"(res) : "m"(*operand));
-  } else {
-    uint64_t old, newval;
-
-    do {
-      old = *operand; /* atomic, because operand is aligned */
-      newval = old + incr;
-      asm volatile("mov ar.ccv=%0;;"
-                   : /* no output */
-                   : "rO"(old));
-
-      /* separate so the compiler can insert its junk */
-      asm volatile("cmpxchg8.acq %0=[%1],%2,ar.ccv"
-                   : "=r"(res)
-                   : "r"(operand), "r"(newval)
-                   : "memory");
-    } while (res != old); /* if res!=old, the calc is out of date */
-  }
-  return res;
-
 #elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32)
   union {
     uint64_t i;
@@ -1431,28 +1151,16 @@ static QINLINE uint64_t qthread_incr64(uint64_t *operand,
 #elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64)
   uint64_t retval = incr;
 
-#ifdef __PGI
-  /* this is a workaround for a bug in the PGI compiler where the width of
-   * retval is not respected and %eax is used instead of %rax */
-  __asm__ __volatile__("lock xaddq %0, (%2)\n\t"
-                       "mov %0,(%1)" ::"r"(incr),
-                       "r"(&retval),
-                       "r"(operand)
-                       : "memory");
-#else
   __asm__ __volatile__("lock ; xaddq %0, (%1);"
                        : "+r"(retval)
                        : "r"(operand)
                        : "memory");
-#endif // ifdef __PGI
-
   return retval;
 
 #else // if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64)
 #error Unimplemented assembly architecture for qthread_incr64
 #endif // if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64)
-#endif // if defined(QTHREAD_MUTEX_INCREMENT) || (QTHREAD_ASSEMBLY_ARCH ==
-       // QTHREAD_POWERPC32) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32)
+#endif // if defined(QTHREAD_MUTEX_INCREMENT) || QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32
 } /*}}} */
 
 static QINLINE int64_t qthread_incr_xx(void *addr,
@@ -1495,25 +1203,6 @@ static QINLINE uint32_t qthread_cas32(uint32_t *operand,
                        : "cc", "memory");
   return result;
 
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32) ||                         \
-  (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64)
-  uint32_t newv = newval;
-  __asm__ __volatile__("membar #StoreStore|#LoadStore|#StoreLoad|#LoadLoad\n\t"
-                       "cas [%1], %2, %0"
-                       : "+r"(newv)
-                       : "r"(operand), "r"(oldval)
-                       : "cc", "memory");
-  return newv;
-
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64)
-  uint32_t retval;
-  __asm__ __volatile__("mov ar.ccv=%0;;" : : "rO"(oldval));
-  __asm__ __volatile__("cmpxchg4.acq %0=[%1],%2,ar.ccv"
-                       : "=r"(retval)
-                       : "r"(operand), "r"(newval)
-                       : "memory");
-  return retval;
-
 #elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) ||                              \
   (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32)
   uint32_t retval;
@@ -1559,40 +1248,6 @@ static QINLINE uint64_t qthread_cas64(uint64_t *operand,
                        : "cc", "memory");
   return result;
 
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32)
-  uint64_t tmp1 = tmp1;
-  uint64_t tmp2 = tmp2;
-  uint64_t newv = newval;
-  __asm__ __volatile__("ldx %0, %1\n\t"
-                       "ldx %4, %2\n\t"
-                       "membar #StoreStore|#LoadStore|#StoreLoad|#LoadLoad\n\t"
-                       "casx [%3], %2, %1\n\t"
-                       "stx %1, %0"
-                       /* h means 64-BIT REGISTER
-                        * (probably unneecessary, but why take chances?) */
-                       : "+m"(newv), "=&h"(tmp1), "=&h"(tmp2)
-                       : "r"(operand), "m"(oldval)
-                       : "cc", "memory");
-  return newv;
-
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64)
-  uint64_t newv = newval;
-  __asm__ __volatile__("membar #StoreStore|#LoadStore|#StoreLoad|#LoadLoad\n\t"
-                       "casx [%1], %2, %0"
-                       : "+r"(newv)
-                       : "r"(operand), "r"(oldval)
-                       : "cc", "memory");
-  return newv;
-
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64)
-  uint32_t retval;
-  __asm__ __volatile__("mov ar.ccv=%0;;" : : "rO"(oldval));
-  __asm__ __volatile__("cmpxchg8.acq %0=[%1],%2,ar.ccv"
-                       : "=r"(retval)
-                       : "r"(operand), "r"(newval)
-                       : "memory");
-  return retval;
-
 #elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32)
   union {
     uint64_t i;
@@ -1639,19 +1294,6 @@ static QINLINE uint64_t qthread_cas64(uint64_t *operand,
    * [lock] cmpxchg reg, reg/mem
    *                src, dest
    */
-#ifdef __PGI
-  /* this is a workaround for a bug in the PGI compiler where the width of
-   * retval is not respected and %eax is used instead of %rax */
-  uint64_t retval;
-  __asm__ __volatile__("lock cmpxchg %1,(%2)\n\t"
-                       "mov %%rax,(%0)" ::"r"(&retval),
-                       "r"(newval),
-                       "r"(operand),
-                       "a"(oldval) /* load into RAX */
-                       : "cc", "memory");
-  return retval;
-
-#else
   uint64_t retval;
   __asm__ __volatile__(
     "lock; cmpxchg %1,(%2)"
@@ -1659,7 +1301,6 @@ static QINLINE uint64_t qthread_cas64(uint64_t *operand,
     : "r"(newval), "r"(operand), "a"(oldval) /* load into RAX */
     : "cc", "memory");
   return retval;
-#endif // ifdef __PGI
 #elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32)
   /* In general, RISC doesn't provide a way to do 64 bit operations from 32
    * bit code. Sorry! */
diff --git a/include/qthread_innards.h b/include/qthread_innards.h
index 7c4f18cfd..4c819e41b 100644
--- a/include/qthread_innards.h
+++ b/include/qthread_innards.h
@@ -109,9 +109,7 @@ typedef struct qlib_s {
   aligned_t sched_shepherd;
   QTHREAD_FASTLOCK_TYPE sched_shepherd_lock;
 
-#if defined(QTHREAD_MUTEX_INCREMENT) ||                                        \
-  (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) ||                              \
-  (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32)
+#if defined(QTHREAD_MUTEX_INCREMENT) || QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32
   QTHREAD_FASTLOCK_TYPE *atomic_locks;
 #ifdef QTHREAD_COUNT_THREADS
   aligned_t *atomic_stripes;
diff --git a/src/Makefile.am b/src/Makefile.am
index da882059f..982cdb7f1 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -118,7 +118,6 @@ EXTRA_DIST += \
 			 affinity/libnuma.c \
 			 affinity/libnumaV2.c \
 			 affinity/mach.c \
-			 affinity/tilera.c \
 			 affinity/plpa.c \
 			 affinity/lgrp.c \
 			 affinity/shepcomp.h
diff --git a/src/affinity/tilera.c b/src/affinity/tilera.c
deleted file mode 100644
index f1459271c..000000000
--- a/src/affinity/tilera.c
+++ /dev/null
@@ -1,99 +0,0 @@
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-#ifdef HAVE_TMC_CPUS_H
-#include <tmc/cpus.h>
-#endif
-
-#include <stdio.h>
-
-#include "qt_affinity.h"
-#include "qt_asserts.h"
-#include "qt_debug.h" // for MALLOC()
-#include "shepcomp.h"
-#include "shufflesheps.h"
-
-qthread_shepherd_id_t guess_num_shepherds(void);
-qthread_worker_id_t
-guess_num_workers_per_shep(qthread_shepherd_id_t nshepherds);
-
-void INTERNAL qt_affinity_init(qthread_shepherd_id_t *nbshepherds,
-                               qthread_worker_id_t *nbworkers,
-                               size_t *hw_par) { /*{{{ */
-  if (*nbshepherds == 0) {
-    *nbshepherds = guess_num_shepherds();
-    if (*nbshepherds <= 0) { *nbshepherds = 1; }
-  }
-  if (*nbworkers == 0) {
-    *nbworkers = guess_num_workers_per_shep(*nbshepherds);
-    if (*nbworkers <= 0) { *nbworkers = 1; }
-  }
-} /*}}} */
-
-qthread_shepherd_id_t INTERNAL guess_num_shepherds(void) { /*{{{ */
-  cpu_set_t online_cpus;
-
-  qassert(tmc_cpus_get_online_cpus(&online_cpus), 0);
-  return tmc_cpus_count(&online_cpus);
-} /*}}} */
-
-void INTERNAL qt_affinity_set(qthread_worker_t *me,
-                              unsigned int Q_UNUSED(nw)) { /*{{{ */
-  if (tmc_cpus_set_my_cpu(me->packed_worker_id) < 0) {
-    perror("tmc_cpus_set_my_affinity() failed");
-    fprintf(stderr, "\tnode = %i\n", (int)me->packed_worker_id);
-  }
-} /*}}} */
-
-qthread_worker_id_t INTERNAL
-guess_num_workers_per_shep(qthread_shepherd_id_t nshepherds) { /*{{{ */
-  return 1;
-} /*}}} */
-
-int INTERNAL qt_affinity_gendists(qthread_shepherd_t *sheps,
-                                  qthread_shepherd_id_t nshepherds) { /*{{{ */
-  cpu_set_t online_cpus;
-  unsigned int *cpu_array;
-  size_t cpu_count, offset;
-
-#warning The logic for node assignment is completely wrong for multithreaded shepherds
-  qassert(tmc_cpus_get_online_cpus(&online_cpus), 0);
-  cpu_count = tmc_cpus_count(&online_cpus);
-  assert(cpu_count > 0);
-  /* assign nodes */
-  cpu_array = MALLOC(sizeof(unsigned int) * cpu_count);
-  assert(cpu_array != NULL);
-  qassert(tmc_cpus_to_array(&online_cpus, cpu_array, cpu_count), cpu_count);
-  offset = 0;
-  for (qthread_shepherd_id_t i = 0; i < nshepherds; i++) {
-    sheps[i].node = cpu_array[offset];
-    offset++;
-    offset *= (offset < cpu_count);
-  }
-  FREE(cpu_array, sizeof(unsigned int) * cpu_count);
-  for (qthread_shepherd_id_t i = 0; i < nshepherds; i++) {
-    size_t j, k;
-    unsigned int ix, iy;
-    sheps[i].shep_dists = qt_calloc(nshepherds, sizeof(unsigned int));
-    sheps[i].sorted_sheplist =
-      qt_calloc(nshepherds - 1, sizeof(qthread_shepherd_id_t));
-    assert(sheps[i].shep_dists);
-    assert(sheps[i].sorted_sheplist);
-    tmc_cpus_grid_cpu_to_tile(sheps[i].node, &ix, &iy);
-    for (j = 0; j < nshepherds; j++) {
-      unsigned int jx, jy;
-      tmc_cpus_grid_cpu_to_tile(sheps[j].node, &jx, &jy);
-      sheps[i].shep_dists[j] = abs((int)ix - (int)jx) + abs((int)iy - (int)jy);
-    }
-    for (j = k = 0; j < nshepherds; j++) {
-      if (j != i) { sheps[i].sorted_sheplist[k++] = j; }
-    }
-    if (nshepherds > 1) {
-      sort_sheps(sheps[i].shep_dists, sheps[i].sorted_sheplist, nshepherds);
-    }
-  }
-  return QTHREAD_SUCCESS;
-} /*}}} */
-
-/* vim:set expandtab: */
diff --git a/src/cacheline.c b/src/cacheline.c
index f05faac61..4c40ed940 100644
--- a/src/cacheline.c
+++ b/src/cacheline.c
@@ -205,18 +205,6 @@ static void figure_out_cacheline_size(void) { /*{{{ */
   } else {
     cacheline_bytes = 128; // G5
   }
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32) ||                         \
-  (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64)
-  cacheline_bytes = 128;
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64)
-#ifdef DEBUG_CPUID
-  printf("IA64 does not support CPUID; but is usually 128\n");
-#endif
-  cacheline_bytes = 128; // Itanium L2/L3 are 128, L1 is 64
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEPRO)
-  cacheline_bytes = 64;
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEGX)
-  cacheline_bytes = 64;
 #elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32) ||                               \
   (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64)
 #if !defined(HAVE_GCC_INLINE_ASSEMBLY)
diff --git a/src/compat_atomics.c b/src/compat_atomics.c
index 06afca9b8..09b5eb3bd 100644
--- a/src/compat_atomics.c
+++ b/src/compat_atomics.c
@@ -14,8 +14,7 @@ extern unsigned int QTHREAD_LOCKING_STRIPES;
   (((size_t)addr >> 4) & (QTHREAD_LOCKING_STRIPES - 1))
 
 #if defined(QTHREAD_MUTEX_INCREMENT) ||                                        \
-  (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) ||                              \
-  (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32)
+  (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32)
 uint32_t qthread_incr32_(uint32_t *op, int32_t const incr) { /*{{{ */
   unsigned int stripe = QTHREAD_CHOOSE_STRIPE(op);
   uint32_t retval;
@@ -110,9 +109,7 @@ uint64_t qthread_cas64_(uint64_t *operand,
   return retval;
 } /*}}} */
 
-#else /* if defined(QTHREAD_MUTEX_INCREMENT) || (QTHREAD_ASSEMBLY_ARCH ==      \
-         QTHREAD_POWERPC32) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32)   \
-       */
+#else /* if defined(QTHREAD_MUTEX_INCREMENT) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) */
 #error Building this file erroneously.
 #endif /* if defined(QTHREAD_MUTEX_INCREMENT) || (QTHREAD_ASSEMBLY_ARCH ==     \
           QTHREAD_POWERPC32) */
diff --git a/src/ds/qarray.c b/src/ds/qarray.c
index 2ab52807c..3329e04f6 100644
--- a/src/ds/qarray.c
+++ b/src/ds/qarray.c
@@ -38,8 +38,7 @@ qarray_internal_segment_shep(qarray const *a,
   char *ptr = (((char *)segment_head) + (a->segment_size * a->unit_size));
 
   qassert_ret(a->dist_type == DIST, NULL);
-  /* ensure that it's 4-byte aligned
-   * (mandatory on Sparc, good idea elsewhere) */
+  /* ensure that it's 4-byte aligned */
   if (((uintptr_t)ptr) & 3) { ptr += 4 - (((uintptr_t)ptr) & 3); }
   /* first, do we have the space? */
   qassert_ret((((ptr + sizeof(qthread_shepherd_id_t) - 1) <
@@ -419,24 +418,12 @@ static qarray *qarray_create_internal(size_t const count,
 } /*}}} */
 
 qarray *qarray_create(size_t const count, size_t const obj_size) { /*{{{ */
-#if QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32 ||                             \
-  QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64
-  return qarray_create_internal(count, obj_size, DIST_STRIPES, 0, 0);
-
-#else
   return qarray_create_internal(count, obj_size, FIXED_HASH, 0, 0);
-#endif
 } /*}}} */
 
 qarray *qarray_create_tight(size_t const count,
                             size_t const obj_size) { /*{{{ */
-#if QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32 ||                             \
-  QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64
-  return qarray_create_internal(count, obj_size, DIST_STRIPES, 1, 0);
-
-#else
   return qarray_create_internal(count, obj_size, FIXED_HASH, 1, 0);
-#endif
 } /*}}} */
 
 qarray *qarray_create_configured(size_t const count,
diff --git a/src/fastcontext/asm.S b/src/fastcontext/asm.S
index 4ef4392c6..afdacfd7d 100644
--- a/src/fastcontext/asm.S
+++ b/src/fastcontext/asm.S
@@ -59,14 +59,6 @@
 #  define NEEDARMA64CONTEXT 1
 #  define SET qt_setmctxt
 #  define GET qt_getmctxt
-# elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEPRO)
-#  define NEEDTILEPROCONTEXT 1
-#  define SET _qt_setmctxt
-#  define GET _qt_getmctxt
-# elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEGX)
-#  define NEEDTILEGXCONTEXT 1
-#  define SET qt_setmctxt
-#  define GET qt_getmctxt
 # elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32)
 #  define NEEDX86CONTEXT 1
 #  define SET qt_setmctxt
@@ -215,332 +207,6 @@ GET:
         ret
 #endif
 
-#ifdef NEEDTILEPROCONTEXT
-.text
-.align 2
-
-.type  GET,@function
-.globl GET
-GET:
-        ## .frame $sp, 8, $sp
-        # .caller_lr = 8
-        # .caller_caller_sp = 12
-        addli   r23, sp, -8     _(the arg)
-        sw              r23, r0
-        _(/*) prologue end */)
-        _(/*) setup the pointer */)
-        addli   r1, sp, -8
-        lw              r1, r1
-        _(/* note that each of these uses different temporary
-           * registers, to allow efficient scheduling */)
-        addi    r2, r1, (0*4)
-        sw              r2, r30
-        addi    r3, r1, (1*4)
-        sw              r3, r31
-        addi    r4, r1, (2*4)
-        sw              r4, r32
-        addi    r5, r1, (3*4)
-        sw              r5, r33
-        addi    r6, r1, (4*4)
-        sw              r6, r34
-        addi    r7, r1, (5*4)
-        sw              r7, r35
-        addi    r8, r1, (6*4)
-        sw              r8, r36
-        addi    r9, r1, (7*4)
-        sw              r9, r37
-        addi    r10, r1, (8*4)
-        sw              r10, r38
-        addi    r11, r1, (9*4)
-        sw              r11, r39
-        addi    r12, r1, (10*4)
-        sw              r12, r40
-        addi    r13, r1, (11*4)
-        sw              r13, r41
-        addi    r14, r1, (12*4)
-        sw              r14, r42
-        addi    r15, r1, (13*4)
-        sw              r15, r43
-        addi    r16, r1, (14*4)
-        sw              r16, r44
-        addi    r17, r1, (15*4)
-        sw              r17, r45
-        addi    r18, r1, (16*4)
-        sw              r18, r46
-        addi    r19, r1, (17*4)
-        sw              r19, r47
-        addi    r20, r1, (18*4)
-        sw              r20, r48
-        addi    r21, r1, (19*4)
-        sw              r21, r49
-        addi    r22, r1, (20*4)
-        sw              r22, r50
-        addi    r23, r1, (21*4)
-        sw              r23, r51
-        addi    r24, r1, (22*4)
-        sw              r24, r52
-        _(/*) gotten contexts are not function calls */)
-        addi    r6, r1, (23*4)+(6*4)
-        sw              r6, zero
-        _(/*) store the link register as the new pc */)
-        move    r25, lr
-        addi    r27, r1, (23*4)+(3*4)
-        sw              r27, r25
-        _(/*) store the stack pointer */)
-        addi    r27, sp, 0
-        addi    r28, r1, (23*4)+(1*4)
-        sw              r28, r27
-        _(/*) store the return for swapcontext */)
-        addi    r3, r1, (23*4)+(4*4)
-        movei   r4, 1
-        sw              r3, r4
-        _(/*) return value */)
-        move    r0, zero _(/*) success! */)
-        jrp     lr
-.type  SET,@function
-.globl SET
-SET:
-        ## .frame $sp, 8, $sp
-        # .caller_lr = 8
-        # .caller_caller_sp = 12
-        addli   r6, sp, -8
-        sw              r6, r0
-        _(/*) prologue end */)
-        _(/*) setup the pointer */)
-        addli   r1, sp, -8
-        lw              r1, r1
-        _(/* note that each of these uses different temporary
-           * registers, to allow efficient scheduling */)
-        addi    r2, r1, (0*4)
-        lw              r30, r2
-        addi    r3, r1, (1*4)
-        lw              r31, r3
-        addi    r4, r1, (2*4)
-        lw              r32, r4
-        addi    r5, r1, (3*4)
-        lw              r33, r5
-        addi    r6, r1, (4*4)
-        lw              r34, r6
-        addi    r7, r1, (5*4)
-        lw              r35, r7
-        addi    r8, r1, (6*4)
-        lw              r36, r8
-        addi    r9, r1, (7*4)
-        lw              r37, r9
-        addi    r10, r1, (8*4)
-        lw              r38, r10
-        addi    r11, r1, (9*4)
-        lw              r39, r11
-        addi    r12, r1, (10*4)
-        lw              r40, r12
-        addi    r13, r1, (11*4)
-        lw              r41, r13
-        addi    r14, r1, (12*4)
-        lw              r42, r14
-        addi    r15, r1, (13*4)
-        lw              r43, r15
-        addi    r16, r1, (14*4)
-        lw              r44, r17
-        addi    r18, r1, (15*4)
-        lw              r45, r18
-        addi    r19, r1, (16*4)
-        lw              r46, r19
-        addi    r20, r1, (17*4)
-        lw              r47, r20
-        addi    r21, r1, (18*4)
-        lw              r48, r21
-        addi    r22, r1, (19*4)
-        lw              r49, r22
-        addi    r23, r1, (20*4)
-        lw              r50, r23
-        addi    r24, r1, (21*4)
-        lw              r51, r24
-        addi    r25, r1, (22*4)
-        lw              r52, r25
-        _(/*) fiddle with the stack */)
-        addi    r2, r1, (23*4)+(1*4)
-        lw              r3, r2
-        move    sp, r3
-        _(/*) retrieve the new PC */)
-        addi    r6, r1, (23*4)+(3*4)
-        lw              r7, r6
-        _(/*) first argument? */)
-        addi    r4, r1, (23*4)+(6*4)
-        lw              r5, r4
-        bz              r5, 1f
-        addi    r0, r1, (23*4)+(5*4)
-        lw              r0, r0
-        jf              2f
-1:
-        addi    r0, r1, (23*4)+(4*4)
-        lw              r0, r0
-2:
-        jrp     r7
-#endif
-
-#ifdef NEEDTILEGXCONTEXT
-.text
-.align 2
-
-.type  GET,@function
-.globl GET
-GET:
-        _("## .frame $sp, 8, $sp")
-        _(# .caller_lr = 8)
-        _(# .caller_caller_sp = 12)
-        addli   r23, sp, -16     _(the arg)
-        st              r23, r0
-        _(/*) prologue end */)
-        _(/*) setup the pointer */)
-        addli   r1, sp, -16
-        ld              r1, r1
-        _(/* note that each of these uses different temporary
-           * registers, to allow efficient scheduling */)
-        addi    r2, r1, (0*8)
-        st      r2, r30
-        addi    r3, r1, (1*8)
-        st      r3, r31
-        addi    r4, r1, (2*8)
-        st      r4, r32
-        addi    r5, r1, (3*8)
-        st      r5, r33
-        addi    r6, r1, (4*8)
-        st      r6, r34
-        addi    r7, r1, (5*8)
-        st      r7, r35
-        addi    r8, r1, (6*8)
-        st      r8, r36
-        addi    r9, r1, (7*8)
-        st      r9, r37
-        addi    r10, r1, (8*8)
-        st      r10, r38
-        addi    r11, r1, (9*8)
-        st      r11, r39
-        addi    r12, r1, (10*8)
-        st      r12, r40
-        addi    r13, r1, (11*8)
-        st      r13, r41
-        addi    r14, r1, (12*8)
-        st      r14, r42
-        addi    r15, r1, (13*8)
-        st      r15, r43
-        addi    r16, r1, (14*8)
-        st      r16, r44
-        addi    r17, r1, (15*8)
-        st      r17, r45
-        addli    r18, r1, (16*8)
-        st      r18, r46
-        addli    r19, r1, (17*8)
-        st      r19, r47
-        addli    r20, r1, (18*8)
-        st      r20, r48
-        addli    r21, r1, (19*8)
-        st      r21, r49
-        addli    r22, r1, (20*8)
-        st      r22, r50
-        addli    r23, r1, (21*8)
-        st      r23, r51
-        addli    r24, r1, (22*8)
-        st      r24, r52
-        _(/*) gotten contexts are not function calls */)
-        addli    r6, r1, (23*8)+(6*8)
-        st      r6, zero
-        _(/*) store the link register as the new pc */)
-        move    r25, lr
-        addli    r27, r1, (23*8)+(3*8)
-        st      r27, r25
-        _(/*) store the stack pointer */)
-        addli    r27, sp, 0
-        addli    r28, r1, (23*8)+(1*8)
-        st      r28, r27
-        _(/*) store the return for swapcontext */)
-        addli    r3, r1, (23*8)+(4*8)
-        movei   r4, 1
-        st      r3, r4
-        _(/*) return value */)
-        move    r0, zero _(/*) success! */)
-        jrp     lr
-.type  SET,@function
-.globl SET
-SET:
-        _("## .frame $sp, 8, $sp")
-        _(# .caller_lr = 8)
-        _(# .caller_caller_sp = 12)
-        addli   r6, sp, -16
-        st      r6, r0
-        _(/*) prologue end */)
-        _(/*) setup the pointer */)
-        addli   r1, sp, -16
-        ld      r1, r1
-        _(/* note that each of these uses different temporary
-           * registers, to allow efficient scheduling */)
-        addi    r2, r1, (0*8)
-        ld      r30, r2
-        addi    r3, r1, (1*8)
-        ld      r31, r3
-        addi    r4, r1, (2*8)
-        ld      r32, r4
-        addi    r5, r1, (3*8)
-        ld      r33, r5
-        addi    r6, r1, (4*8)
-        ld      r34, r6
-        addi    r7, r1, (5*8)
-        ld      r35, r7
-        addi    r8, r1, (6*8)
-        ld      r36, r8
-        addi    r9, r1, (7*8)
-        ld      r37, r9
-        addi    r10, r1, (8*8)
-        ld      r38, r10
-        addi    r11, r1, (9*8)
-        ld      r39, r11
-        addi    r12, r1, (10*8)
-        ld      r40, r12
-        addi    r13, r1, (11*8)
-        ld      r41, r13
-        addi    r14, r1, (12*8)
-        ld      r42, r14
-        addi    r15, r1, (13*8)
-        ld      r43, r15
-        addi    r16, r1, (14*8)
-        ld      r44, r16
-        addi    r17, r1, (15*8)
-        ld      r45, r17
-        addli    r18, r1, (16*8)
-        ld      r46, r18
-        addli    r19, r1, (17*8)
-        ld      r47, r19
-        addli    r20, r1, (18*8)
-        ld      r48, r20
-        addli    r21, r1, (19*8)
-        ld      r49, r21
-        addli    r22, r1, (20*8)
-        ld      r50, r22
-        addli    r23, r1, (21*8)
-        ld      r51, r23
-        addli    r24, r1, (22*8)
-        ld      r52, r24
-        _(/*) fiddle with the stack */)
-        addli    r2, r1, (23*8)+(1*8)
-        ld      r3, r2
-        move    sp, r3
-        _(/*) retrieve the new PC */)
-        addli    r6, r1, (23*8)+(3*8)
-        ld      r7, r6
-        _(/*) first argument? */)
-        addli    r4, r1, (23*8)+(6*8)
-        ld      r5, r4
-        beqz    r5, 1f
-        addli    r0, r1, (23*8)+(5*8)
-        ld      r0, r0
-        j       2f
-1:
-        addli    r0, r1, (23*8)+(4*8)
-        ld      r0, r0
-2:
-        jrp     r7
-#endif
-
 #ifdef NEEDPOWERCONTEXT
 /* get FPR and VR use flags with sc 0x7FF3 */
 /* get vsave with mfspr reg, 256 */
@@ -975,6 +641,6 @@ SET:
 	bx      lr
 #endif
 
-#if defined(__ELF__) && !defined(__SUNPRO_C)
+#if defined(__ELF__)
 .section .note.GNU-stack,"",%progbits
 #endif
diff --git a/src/fastcontext/context.c b/src/fastcontext/context.c
index d7835bfbc..aa33f137e 100644
--- a/src/fastcontext/context.c
+++ b/src/fastcontext/context.c
@@ -83,33 +83,6 @@ void INTERNAL qt_makectxt(uctxt_t *ucp, void (*func)(void), int argc, ...) {
   ucp->mc.mc_esp = (long)sp;
 }
 
-#elif defined(NEEDTILEMAKECONTEXT)
-/* This function is entirely copyright Sandia National Laboratories */
-void INTERNAL qt_makectxt(uctxt_t *ucp, void (*func)(void), int argc, ...) {
-  unsigned long *sp;
-  unsigned long *tos = ucp->uc_stack.ss_sp;
-  int i;
-  va_list arg;
-
-  tos += ucp->uc_stack.ss_size / sizeof(unsigned long);
-  tos -= 1;        // allow space for an incoming lr
-  sp = tos - argc; // allow space for arguments
-  sp = (void *)((unsigned long)sp -
-                (unsigned long)sp % 64); /* 64-align for Tilera */
-  /* now copy from my arg list to the function's arglist (yes, I know this is
-   * voodoo) */
-  // memmove(sp, &argc + 1, argc * sizeof(void*));
-  /* The function may also expect to pull args from up to nine registers */
-  va_start(arg, argc);
-  for (i = 0; i < argc; i++) {
-    if (i == 0) { ucp->mc.arg0 = va_arg(arg, unsigned long); }
-  }
-  ucp->mc.pc = (unsigned long)func;
-  ucp->mc.sp = (unsigned long)sp;
-  ucp->mc.first = 1;
-  va_end(arg);
-}
-
 #elif defined(NEEDARMMAKECONTEXT)
 /* This function is entirely copyright Sandia National Laboratories */
 void INTERNAL qt_makectxt(uctxt_t *ucp, void (*func)(void), int argc, ...) {
@@ -177,8 +150,7 @@ QT_SKIP_THREAD_SANITIZER int INTERNAL qt_swapctxt(uctxt_t *oucp, uctxt_t *ucp) {
   Q_PREFETCH(ucp, 0, 0);
   if (getcontext(oucp) == 0) {
 #if ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32) ||                                \
-     (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) ||                               \
-     (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64))
+     (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64))
     Q_PREFETCH((void *)ucp->mc.mc_esp, 1, 3);
 #endif
     setcontext(ucp);
diff --git a/src/qthread.c b/src/qthread.c
index 112caa2a1..32550ef63 100644
--- a/src/qthread.c
+++ b/src/qthread.c
@@ -88,8 +88,7 @@
 
 #if !(defined(HAVE_GCC_INLINE_ASSEMBLY) &&                                     \
       (QTHREAD_SIZEOF_ALIGNED_T == 4 ||                                        \
-       (QTHREAD_ASSEMBLY_ARCH != QTHREAD_POWERPC32 &&                          \
-        QTHREAD_ASSEMBLY_ARCH != QTHREAD_SPARCV9_32))) &&                      \
+       QTHREAD_ASSEMBLY_ARCH != QTHREAD_POWERPC32)) &&                      \
   !defined(QTHREAD_ATOMIC_CAS) && !defined(QTHREAD_MUTEX_INCREMENT)
 #warning QTHREAD_MUTEX_INCREMENT not defined. It probably should be.
 #define QTHREAD_MUTEX_INCREMENT 1
diff --git a/src/syncvar.c b/src/syncvar.c
index d9803a564..07f283773 100644
--- a/src/syncvar.c
+++ b/src/syncvar.c
@@ -103,12 +103,9 @@ extern unsigned int QTHREAD_LOCKING_STRIPES;
                           BUILD_UNLOCKED_SYNCVAR(val, state),                  \
                           memory_order_relaxed);                               \
   } while (0)
-#elif ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) ||                         \
-       (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) ||                         \
-       (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32) ||                              \
-       (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64) ||                              \
-       (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64) ||                        \
-       (QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEPRO))
+#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) ||                         \
+      (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) ||                         \
+      (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32)
 #define UNLOCK_THIS_UNMODIFIED_SYNCVAR(addr, unlocked)                         \
   do {                                                                         \
     atomic_store_explicit(                                                     \
@@ -141,8 +138,7 @@ static uint64_t qthread_mwaitc(syncvar_t *restrict const addr,
                                unsigned char const statemask,
                                unsigned int timeout,
                                eflags_t *restrict const err) { /*{{{ */
-#if ((QTHREAD_ASSEMBLY_ARCH != QTHREAD_TILEPRO) &&                             \
-     (QTHREAD_ASSEMBLY_ARCH != QTHREAD_POWERPC32))
+#if (QTHREAD_ASSEMBLY_ARCH != QTHREAD_POWERPC32)
   syncvar_t unlocked;
 #endif
   syncvar_t locked;
@@ -156,21 +152,7 @@ static uint64_t qthread_mwaitc(syncvar_t *restrict const addr,
   e.zf = 0;
   e.cf = 1;
   do {
-#if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEPRO)
-    uint32_t low, high;
-    int32_t *addrptr = (int32_t *)addr;
-    /* note that the tilera is little-endian, otherwise this would be
-     * addrptr+1 */
-    while ((low = __insn_tns(addrptr)) == 1) {
-      if (timeout-- <= 0) { goto errexit; }
-      SPINLOCK_BODY();
-    }
-    /* now addrptr[0] is 1 and low is the "real" (unlocked) addrptr[0]
-     * value. */
-    high = addrptr[1];
-    locked.u.w = (((uint64_t)high) << 32) | low;
-    MACHINE_FENCE;
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32)
+#if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32)
     /* This applies for any 32-bit architecture with a valid 32-bit CAS
      * (though I'm making some big-endian assumptions at the moment) */
     uint32_t low_unlocked, low_locked;
@@ -191,7 +173,7 @@ static uint64_t qthread_mwaitc(syncvar_t *restrict const addr,
       if (timeout-- <= 0) { goto errexit; }
     } while (1);
     locked.u.w = addr->u.w; // I locked it, so I can read it
-#else  /* if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEPRO) */
+#else  /* if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) */
     {
       syncvar_t tmp;
     loop_start:
@@ -213,7 +195,7 @@ static uint64_t qthread_mwaitc(syncvar_t *restrict const addr,
         if (timeout-- <= 0) { goto errexit; }
       } while (1);
     }
-#endif /* if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEPRO) */
+#endif /* if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) */
     /***************************************************
      * now locked == unlocked, and the lock bit is set *
      ***************************************************/
@@ -229,10 +211,7 @@ static uint64_t qthread_mwaitc(syncvar_t *restrict const addr,
       return locked.u.s.data;
     } else {
       /* this is NOT a state of interest, so unlock the locked bit */
-#if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEPRO)
-      MACHINE_FENCE;
-      addrptr[0] = low;
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32)
+#if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32)
       MACHINE_FENCE;
       addrptr[1] = low_unlocked;
 #else
@@ -404,12 +383,10 @@ int API_FUNC qthread_syncvar_readFF(uint64_t *restrict dest,
   QTHREAD_FEB_UNIQUERECORD(feb, src, me);
   QTHREAD_FEB_TIMER_START(febblock);
 
-#if ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) ||                               \
-     (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64) ||                                \
-     (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) ||                           \
-     (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64)) ||                         \
-  (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARM) ||                                    \
-  (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARMV8_A64)
+#if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) || \
+    (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) || \
+    (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARM) || \
+    (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARMV8_A64)
   {
     /* I'm being optimistic here; this only works if a basic 64-bit load is
      * atomic (on most platforms it is). Thus, if I've done an atomic read
@@ -426,11 +403,10 @@ int API_FUNC qthread_syncvar_readFF(uint64_t *restrict dest,
       return QTHREAD_SUCCESS;
     }
   }
-#endif /* if ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) ||                      \
-          (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64) || (QTHREAD_ASSEMBLY_ARCH == \
-          QTHREAD_POWERPC64) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64)  \
-          || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARM) || (QTHREAD_ASSEMBLY_ARCH  \
-          == QTHREAD_ARMV8_A64)) */
+#endif /* if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) || \
+             (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) || \
+             (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARM) || \
+             (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARMV8_A64)) */
   ret = qthread_mwaitc(src, SYNCFEB_FULL, INITIAL_TIMEOUT, &e);
   qthread_debug(SYNCVAR_DETAILS,
                 "2 src(%p) = %x, ret = %x\n",
@@ -549,12 +525,10 @@ int API_FUNC qthread_syncvar_readFF_nb(uint64_t *restrict dest,
 
   if (!me) { return qthread_syncvar_blocker_func(dest, src, READFF_NB); }
 
-#if ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) ||                               \
-     (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64) ||                                \
-     (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) ||                           \
-     (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64) ||                          \
-     (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARM) ||                                 \
-     (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARMV8_A64))
+#if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) ||      \
+    (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) ||  \
+    (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARM) ||        \
+    (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARMV8_A64)
   {
     /* I'm being optimistic here; this only works if a basic 64-bit load is
      * atomic (on most platforms it is). Thus, if I've done an atomic read
@@ -569,11 +543,10 @@ int API_FUNC qthread_syncvar_readFF_nb(uint64_t *restrict dest,
       return QTHREAD_SUCCESS;
     }
   }
-#endif /* if ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) ||                      \
-          (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64) || (QTHREAD_ASSEMBLY_ARCH == \
-          QTHREAD_POWERPC64) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64)  \
-          || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARM) || (QTHREAD_ASSEMBLY_ARCH  \
-          == QTHREAD_ARMV8_A64)) */
+#endif /* if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) ||       \
+             (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) ||   \
+             (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARM) ||         \
+             (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARMV8_A64) */
   ret = qthread_mwaitc(src, SYNCFEB_FULL, 1, &e);
   qthread_debug(SYNCVAR_DETAILS,
                 "2 src(%p) = %x, ret = %x\n",
diff --git a/src/threadqueues/sherwood_threadqueues.c b/src/threadqueues/sherwood_threadqueues.c
index 43e639b34..642937475 100644
--- a/src/threadqueues/sherwood_threadqueues.c
+++ b/src/threadqueues/sherwood_threadqueues.c
@@ -223,10 +223,8 @@ void INTERNAL qt_threadqueue_subsystem_init(void) { /*{{{*/
 #endif /* if defined(UNPOOLED_QUEUES) || defined(UNPOOLED) */
 
 ssize_t INTERNAL qt_threadqueue_advisory_queuelen(qt_threadqueue_t *q) { /*{{{*/
-#if ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) ||                               \
-     (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64) ||                                \
-     (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) ||                           \
-     (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64))
+#if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) || \
+    (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64)
   /* only works if a basic load is atomic */
   return q->qlength;
 
@@ -237,9 +235,8 @@ ssize_t INTERNAL qt_threadqueue_advisory_queuelen(qt_threadqueue_t *q) { /*{{{*/
   tmp = q->qlength;
   QTHREAD_TRYLOCK_UNLOCK(&q->qlock);
   return tmp;
-#endif /* if ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) ||                      \
-          (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64) || (QTHREAD_ASSEMBLY_ARCH == \
-          QTHREAD_POWERPC64) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64)) \
+#endif /* if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) || \
+             (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64)
         */
 } /*}}}*/
 
diff --git a/test/argparsing.h b/test/argparsing.h
index d12ae02c9..eb9f9946a 100644
--- a/test/argparsing.h
+++ b/test/argparsing.h
@@ -101,8 +101,6 @@ static ARGP_Atomic(int) verbose;
 #if defined(SILENT_ARGPARSING)
 #warning Silencing iprintf() output.
 #define iprintf(...)
-#elif defined(__tile__) || defined(__CYGWIN32__)
-#define iprintf printf
 #else
 static void iprintf(char const *restrict format, ...) {
   if (atomic_load_explicit(&verbose, memory_order_relaxed)) {
@@ -115,7 +113,7 @@ static void iprintf(char const *restrict format, ...) {
   }
 }
 
-#endif // if defined(__tile__) || defined(__CYGWIN32__)
+#endif // if defined(__CYGWIN32__)
 
 #endif // ifndef TEST_ARGPARSING_H
 /* vim:set expandtab: */
diff --git a/test/basics/qthread_stackleft.c b/test/basics/qthread_stackleft.c
index df323cca2..24b96b0cb 100644
--- a/test/basics/qthread_stackleft.c
+++ b/test/basics/qthread_stackleft.c
@@ -38,11 +38,7 @@ static aligned_t alldone;
 static STACKLEFT_NOINLINE size_t thread2(size_t left, size_t depth) {
   size_t foo = qthread_stackleft();
   iprintf("leveli%i: %zu bytes left\n", (int)depth, foo);
-#if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64)
-  assert(foo <= left);
-#else
   assert(foo < left);
-#endif
   if (depth < 5) { thread2(foo, depth + 1); }
   return 1;
 }