diff --git a/README.md b/README.md
index ee59e6ae8..42cc266ff 100644
--- a/README.md
+++ b/README.md
@@ -169,16 +169,3 @@ use those compilers, you probably need to configure with
 
 *******************************************************
 
-## NOTE FOR TILERA USERS
-The Tilera cache coherency protocols, as of the TileGX boards, appear to be
-somewhat buggy for large multithreaded programs. And by buggy I mean they cause
-kernel panics (at least, I haven't been able to demonstrate data corruption
-yet). Thankfully, you can pick from several cache coherency protocols, and one
-of them is more stable than the default. What I have found that seems to be
-*more* stable, if not perfectly stable, is to force the cache coherency
-protocol to hashed. The way you do this is with a boot argument to the Tilera
-kernel. The tile-monitor command I use is this:
-
-	`tile-monitor --net <tilera> --hvx ucache_hash=all --`
-
-Good luck!
diff --git a/config/qthread_check_assembly.m4 b/config/qthread_check_assembly.m4
index dc34ae846..19620b557 100644
--- a/config/qthread_check_assembly.m4
+++ b/config/qthread_check_assembly.m4
@@ -79,30 +79,6 @@ unset qthread_assemble
 ])dnl
 
 
-dnl #################################################################
-dnl
-dnl QTHREAD_CHECK_SPARCV8PLUS
-dnl
-dnl #################################################################
-AC_DEFUN([QTHREAD_CHECK_SPARCV8PLUS],[
-    AC_MSG_CHECKING([if have Sparc v8+/v9 support])
-    sparc_result=0
-    QTHREAD_TRY_ASSEMBLE([$qthread_cv_asm_text
-	casa [%o0] 0x80, %o1, %o2],
-                [sparc_result=1],
-                [sparc_result=0])
-    if test "$sparc_result" = "1" ; then
-        AC_MSG_RESULT([yes])
-        ifelse([$1],,:,[$1])
-    else
-        AC_MSG_RESULT([no])
-        ifelse([$2],,:,[$2])
-    fi
-
-    unset sparc_result
-])dnl
-
-
 dnl #################################################################
 dnl
 dnl QTHREAD_CHECK_INLINE_GCC
@@ -179,23 +155,6 @@ AC_DEFUN([QTHREAD_CHECK_ASSEMBLY],[
       qthread_gcc_inline_assign='"movl [$]0, %0" : "=&r"(ret)'
     ;;
 
-    ia64-*)
-      qthread_cv_asm_arch="IA64"
-      qthread_gcc_inline_assign='"mov %0=r0\n;;\n" : "=&r"(ret)'
-    ;;
-
-    alpha-*|alphaev[[4-8]]-*|alphaev56-*|alphaev6[[78]]-*)
-      qthread_cv_asm_arch="ALPHA"
-      qthread_gcc_inline_assign='"bis [$]31,[$]31,%0" : "=&r"(ret)'
-    ;;
-
-	tile-*)
-      AS_IF([test "$ac_cv_sizeof_long" = "4"],
-            [qthread_cv_asm_arch="TILE"],
-            [qthread_cv_asm_arch="TILEGX"])
-	  qthread_gcc_inline_assign='"movei %0, 5" : "=&r"(ret)'
-	;;
-
 	armv7l-*)
 	  qthread_cv_asm_arch="ARM"
 	  qthread_gcc_inline_assign='"movt %0, #5" : "=&r"(ret)'
@@ -205,13 +164,6 @@ AC_DEFUN([QTHREAD_CHECK_ASSEMBLY],[
 		qthread_cv_asm_arch="ARMV8_A64"
 	;;
 
-    mips-*|mips64-*)
-      # Should really find some way to make sure that we are on
-      # a MIPS III machine (r4000 and later)
-      qthread_cv_asm_arch="MIPS"
-      qthread_gcc_inline_assign='"or %0,[$]0,[$]0" : "=&r"(ret)'
-    ;;
-
     powerpc*|powerpc64*)
       AS_IF([test "$ac_cv_sizeof_long" = "4"],
             [qthread_cv_asm_arch="POWERPC32"],
@@ -236,15 +188,6 @@ AC_DEFUN([QTHREAD_CHECK_ASSEMBLY],[
       qthread_gcc_inline_assign='"A_%=: li %0,0" : "=&r"(ret)'
     ;;
 
-    sparc*-*)
-      # SPARC v9 (and above) are the only ones with 64bit support
-      # if compiling 32 bit, see if we are v9 (aka v8plus) or
-      # earlier (casa is v8+/v9). 
-      AS_IF([test "$ac_cv_sizeof_long" = "4"],
-            [QTHREAD_CHECK_SPARCV8PLUS([qthread_cv_asm_arch="SPARCV9_32"])],
-            [qthread_cv_asm_arch="SPARCV9_64"])
-      qthread_gcc_inline_assign='"mov 0,%0" : "=&r"(ret)'
-    ;;
   esac
 
   # now that we know our architecture, try to inline assemble
diff --git a/config/qthread_check_atomics.m4 b/config/qthread_check_atomics.m4
index df9544030..80ec3e97c 100644
--- a/config/qthread_check_atomics.m4
+++ b/config/qthread_check_atomics.m4
@@ -15,17 +15,9 @@ AS_IF([test "x$enable_builtin_atomics" != xno],
 		     [AS_IF([test "x$enable_builtin_atomics" = xyes],
 				    [AC_MSG_WARN([Disabling builtin atomics on IBM_XL, due to compiler design decision])])
 			  enable_builtin_atomics=no])])
-AS_IF([test "x$enable_builtin_atomics" != xno], [
-  AS_IF([test "x$qthread_cv_c_compiler_type" = xIntel -o "x$qthread_cv_cxx_compiler_type" = xIntel],
-	    [AC_CHECK_HEADERS([ia64intrin.h ia32intrin.h])])
 AC_CACHE_CHECK([whether compiler supports builtin atomic CAS-32],
   [qthread_cv_atomic_CAS32],
   [AC_LINK_IFELSE([AC_LANG_SOURCE([[
-#ifdef HAVE_IA64INTRIN_H
-# include <ia64intrin.h>
-#elif HAVE_IA32INTRIN_H
-# include <ia32intrin.h>
-#endif
 #include <stdlib.h>
 #include <stdint.h> /* for uint32_t */
 
@@ -40,11 +32,6 @@ return (int)foo;
 AC_CACHE_CHECK([whether compiler supports builtin atomic CAS-64],
   [qthread_cv_atomic_CAS64],
   [AC_LINK_IFELSE([AC_LANG_SOURCE([[
-#ifdef HAVE_IA64INTRIN_H
-# include <ia64intrin.h>
-#elif HAVE_IA32INTRIN_H
-# include <ia32intrin.h>
-#endif
 #include <stdlib.h>
 #include <stdint.h> /* for uint64_t */
 
@@ -59,11 +46,6 @@ return foo;
 AC_CACHE_CHECK([whether compiler supports builtin atomic CAS-ptr],
   [qthread_cv_atomic_CASptr],
   [AC_LINK_IFELSE([AC_LANG_SOURCE([[
-#ifdef HAVE_IA64INTRIN_H
-# include <ia64intrin.h>
-#elif HAVE_IA32INTRIN_H
-# include <ia32intrin.h>
-#endif
 #include <stdlib.h>
 
 int main(void)
@@ -130,11 +112,6 @@ AC_CACHE_CHECK([whether compiler supports builtin atomic incr],
   [qthread_cv_atomic_incr],
   [AS_IF([test "$1" -eq 8],
          [AC_LINK_IFELSE([AC_LANG_SOURCE([[
-#ifdef HAVE_IA64INTRIN_H
-# include <ia64intrin.h>
-#elif HAVE_IA32INTRIN_H
-# include <ia32intrin.h>
-#endif
 #include <stdlib.h>
 #include <stdint.h> /* for uint64_t */
 
@@ -147,11 +124,6 @@ return foo;
 		   [qthread_cv_atomic_incr="yes"],
 		   [qthread_cv_atomic_incr="no"])],
          [AC_LINK_IFELSE([AC_LANG_SOURCE([[
-#ifdef HAVE_IA64INTRIN_H
-# include <ia64intrin.h>
-#elif HAVE_IA32INTRIN_H
-# include <ia32intrin.h>
-#endif
 #include <stdlib.h>
 #include <stdint.h> /* for uint32_t */
 
@@ -169,11 +141,6 @@ AS_IF([test "$qthread_cv_atomic_incr" = "yes"],
 	      [qt_cv_atomic_incr_works],
 		  [AS_IF([test "$1" -eq 8],
          [AC_RUN_IFELSE([AC_LANG_SOURCE([[
-#ifdef HAVE_IA64INTRIN_H
-# include <ia64intrin.h>
-#elif HAVE_IA32INTRIN_H
-# include <ia32intrin.h>
-#endif
 #include <stdlib.h>
 #include <stdint.h> /* for uint64_t */
 
@@ -204,11 +171,6 @@ return 0;
 		   [qt_cv_atomic_incr_works="no"],
 		   [qt_cv_atomic_incr_works="assuming yes"])],
          [AC_RUN_IFELSE([AC_LANG_SOURCE([[
-#ifdef HAVE_IA64INTRIN_H
-# include <ia64intrin.h>
-#elif HAVE_IA32INTRIN_H
-# include <ia32intrin.h>
-#endif
 #include <stdlib.h>
 #include <stdint.h> /* for uint32_t */
 
@@ -224,23 +186,6 @@ return 0;
 		   [qt_cv_atomic_incr_works="no"],
 		   [qt_cv_atomic_incr_works="assuming yes"])])
    ])])
-AS_IF([test "$qthread_cv_atomic_CAS" = "yes"],
-	  [AC_CACHE_CHECK([whether ia64intrin.h is required],
-	    [qthread_cv_require_ia64intrin_h],
-		[AC_LINK_IFELSE([AC_LANG_SOURCE([[
-#include <stdlib.h>
-
-int main(void)
-{
-long bar=1, old=1, new=2;
-long foo = __sync_val_compare_and_swap(&bar, old, new);
-return foo;
-}]])],
-		[qthread_cv_require_ia64intrin_h="no"],
-		[qthread_cv_require_ia64intrin_h="yes"])])])
-])
-AS_IF([test "$qthread_cv_require_ia64intrin_h" = "yes"],
-	  [AC_DEFINE([QTHREAD_NEEDS_IA64INTRIN],[1],[if this header is necessary for builtin atomics])])
 AS_IF([test "x$qthread_cv_atomic_CASptr" = "xyes"],
       [AC_DEFINE([QTHREAD_ATOMIC_CAS_PTR],[1],
 	  	[if the compiler supports __sync_val_compare_and_swap on pointers])])
diff --git a/config/qthread_check_attributes.m4 b/config/qthread_check_attributes.m4
index acb45f28d..ebdac55c6 100644
--- a/config/qthread_check_attributes.m4
+++ b/config/qthread_check_attributes.m4
@@ -140,9 +140,6 @@ AC_CACHE_CHECK([support for __sync_synchronize],
 		     POWERPC*)
                        mdefstr='__asm__ __volatile__ ("sync":::"memory")'
 		       ;;
-		     SPARCV9_32|SPARCV9_64)
-                       mdefstr='__asm__ __volatile__ ("membar #StoreStore|#LoadStore|#StoreLoad|#LoadLoad":::"memory")'
-                       ;;
 		    *)
 				 AC_MSG_ERROR([ASM $qthread_cv_asm_arch])
                        mdefstr="$cdefstr"
diff --git a/config/qthread_check_tiletopo.m4 b/config/qthread_check_tiletopo.m4
deleted file mode 100644
index 93135e930..000000000
--- a/config/qthread_check_tiletopo.m4
+++ /dev/null
@@ -1,23 +0,0 @@
-# -*- Autoconf -*-
-#
-# Copyright (c)      2010  Sandia Corporation
-#
-
-# QTHREAD_CHECK_TILETOPO([action-if-found], [action-if-not-found])
-# ------------------------------------------------------------------------------
-AC_DEFUN([QTHREAD_CHECK_TILETOPO], [
-  qt_allgoodsofar=yes
-  AC_CHECK_HEADERS([tmc/cpus.h],[],
-  			       [qt_allgoodsofar=no
-				    break])
-  AS_IF([test "x$qt_allgoodsofar" = xyes],
-        [AC_SEARCH_LIBS([tmc_cpus_set_task_cpu],
-		               [ilib tmc],
-					   [],
-					   [qt_allgoodsofar=no])])
-  
-  AS_IF([test "x$qt_allgoodsofar" = xyes],
-	    [AC_DEFINE([QTHREAD_HAVE_TILETOPO],[1],[if the machine has a Tilera-style topology interface])
-		 $1],
-		[$2])
-])
diff --git a/config/qthread_detect_compiler_type.m4 b/config/qthread_detect_compiler_type.m4
index 01f9e0457..55f5c5d63 100644
--- a/config/qthread_detect_compiler_type.m4
+++ b/config/qthread_detect_compiler_type.m4
@@ -29,8 +29,6 @@ AC_CACHE_CHECK([what kind of C compiler $CC is],
   [AC_LANG_PUSH([C])
 
    dnl These compilers have been caught pretending to be GNU GCC
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__TILECC__],[qthread_cv_c_compiler_type=TileCC])])
    AS_IF([test "x$qthread_cv_c_compiler_type" == x],
      [_QTHREAD_CHECK_IFDEF([__INTEL_COMPILER],[qthread_cv_c_compiler_type=Intel])])
    AS_IF([test "x$qthread_cv_c_compiler_type" == x],
@@ -141,8 +139,6 @@ AC_CACHE_CHECK([what kind of C compiler $CC is],
      [_QTHREAD_CHECK_IFDEF([__HIGHC__],[qthread_cv_c_compiler_type=MetaWare])])
    AS_IF([test "x$qthread_cv_c_compiler_type" == x],
      [_QTHREAD_CHECK_IFDEF([__MWERKS__],[qthread_cv_c_compiler_type=MetrowerksCodeWarrior])])
-   AS_IF([test "x$qthread_cv_c_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__sgi],[qthread_cv_c_compiler_type=MIPSpro])])
    AS_IF([test "x$qthread_cv_c_compiler_type" == x],
      [_QTHREAD_CHECK_IFDEF([__MRC__],[qthread_cv_c_compiler_type=MPW])])
    AS_IF([test "x$qthread_cv_c_compiler_type" == x],
@@ -201,8 +197,6 @@ AC_CACHE_CHECK([what kind of C++ compiler $CXX is],
   [AC_LANG_PUSH([C++])
 
    dnl These compilers have been caught pretending to be GNU G++
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__TILECC__],[qthread_cv_cxx_compiler_type=TileCC])])
    AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
      [_QTHREAD_CHECK_IFDEF([__INTEL_COMPILER],[qthread_cv_cxx_compiler_type=Intel])])
    AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
@@ -281,8 +275,6 @@ AC_CACHE_CHECK([what kind of C++ compiler $CXX is],
      [_QTHREAD_CHECK_IFDEF([__HIGHC__],[qthread_cv_cxx_compiler_type=MetaWare])])
    AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
      [_QTHREAD_CHECK_IFDEF([__MWERKS__],[qthread_cv_cxx_compiler_type=MetrowerksCodeWarrior])])
-   AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
-     [_QTHREAD_CHECK_IFDEF([__sgi],[qthread_cv_cxx_compiler_type=MIPSpro])])
    AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
      [_QTHREAD_CHECK_IFDEF([__MRC__],[qthread_cv_cxx_compiler_type=MPW])])
    AS_IF([test "x$qthread_cv_cxx_compiler_type" == x],
diff --git a/config/qthread_ia_cacheline.m4 b/config/qthread_ia_cacheline.m4
index 2a73e7342..25d3bcfea 100644
--- a/config/qthread_ia_cacheline.m4
+++ b/config/qthread_ia_cacheline.m4
@@ -9,22 +9,14 @@ AC_CACHE_CHECK([for x86 cache line size],
 #define QTHREAD_UNSUPPORTED 0
 #define QTHREAD_IA32        1
 #define QTHREAD_AMD64       2
-#define QTHREAD_IA64        3
-#define QTHREAD_ALPHA       4
-#define QTHREAD_MIPS        5
 #define QTHREAD_POWERPC32   6
 #define QTHREAD_POWERPC64   7
-#define QTHREAD_SPARCV9_32  8
-#define QTHREAD_SPARCV9_64  9
-#define QTHREAD_TILEPRO	    10
-#define QTHREAD_TILEGX	    11
 #define QTHREAD_ARM         12
 #define QTHREAD_ARMV8_A64   13
 ],[
 int op = 1, eax, ebx, ecx, edx, cachelinesize;
 FILE *f;
-#if QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32 || \
-    QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64
+#if QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32
 # ifdef __PIC__
 __asm__("push %%ebx\n\t"
 "cpuid\n\t"
@@ -45,8 +37,7 @@ __asm__("cpuid"
 cachelinesize = 8*((ebx>>8)&0xff);
 if (cachelinesize == 0) {
 	op = 2;
-#if QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32 || \
-    QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64
+#if QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32
 __asm__("push %%ebx\n\t"
 "cpuid\n\t"
 "mov %%ebx, %1\n\t"
diff --git a/configure.ac b/configure.ac
index 0b02abc9f..d2c56736d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -294,11 +294,11 @@ AC_ARG_WITH([topology],
             [AS_HELP_STRING([--with-topology=[[topologylib]]],
                             [specify which topology interface to use. Supported
                              interfaces include no, hwloc, hwloc_v2, binders, lgrp, libnuma,
-                             libnumaV2, mach, plpa, sys, and, tilera.])],
+                             libnumaV2, mach, plpa, and sys.])],
             [AS_IF([test "x$with_topology" = xyes],
                    [with_topology=none_specified])
              case "$with_topology" in
-                 hwloc|binders|hwloc_v2|lgrp|libnuma|libnumaV2|mach|no|plpa|sys|tilera) ;;
+                 hwloc|binders|hwloc_v2|lgrp|libnuma|libnumaV2|mach|no|plpa|sys) ;;
                  none_specified) ;;
                  *)
                  AC_MSG_ERROR([Unsupported topology library ($with_topology)])
@@ -327,8 +327,7 @@ AC_ARG_ENABLE([condwait-queue],
                               [force the use of a pthread condwait queue,
                                instead of a spin-based queue for inter-thread
                                communication (important if spinning shepherds
-                               interfere with each other). Default enabled on
-                               sparc/solaris, but default disabled elsewhere.])])
+                               interfere with each other). Default disabled.])])
 
 AC_ARG_ENABLE([third-party-benchmarks],
               [AS_HELP_STRING([--enable-third-party-benchmarks],
@@ -402,16 +401,9 @@ dnl Test for this *before* AC_PROG_CC, to avoid getting the default CFLAGS
 dnl However, that means we don't know a ton about this machine or this compiler
 dnl yet, so we may have to reset it later.
 AS_IF([test "x$enable_debugging" = xyes],
-      [case "$build_cpu" in dnl (
-         sparc)
-           CFLAGS="$CFLAGS -O0 -g3"
-           CXXFLAGS="$CXXFLAGS -O0 -g3"
-           ;;
-         *)
-           CFLAGS="$CFLAGS -O0 -g"
-           CXXFLAGS="$CXXFLAGS -O0 -g"
-           ;;
-       esac])
+      [CFLAGS="$CFLAGS -O0 -g"
+       CXXFLAGS="$CXXFLAGS -O0 -g"
+      ])
 AC_PROG_CC
 dnl We use system extensions.  This includes setting _GNU_SOURCE
 AC_USE_SYSTEM_EXTENSIONS
@@ -495,7 +487,7 @@ AS_IF([test "x$enable_picky" = xyes],
 
 QTHREAD_CHECK_ASSEMBLY([have_assembly=1], [have_assembly=0])
 case "$qthread_cv_asm_arch" in
-    POWERPC32|SPARCV9_32)
+    POWERPC32)
     compile_compat_atomic=yes
     ;;
 esac
@@ -735,14 +727,7 @@ AS_IF([test "x$enable_oversubscription" = "xyes"],
        AC_CHECK_FUNCS([sched_yield])])
 
 AS_IF([test "x$enable_condwait_queue" = "x"],
-      [case "$host" in
-         sparc-sun-solaris*)
-           enable_condwait_queue="yes"
-           ;;
-         *)
-           enable_condwait_queue="no"
-           ;;
-       esac])
+      [enable_condwait_queue="no"])
 AS_IF([test "x$enable_condwait_queue" = "xyes"],
       [AC_DEFINE([QTHREAD_CONDWAIT_BLOCKING_QUEUE], [1], [use pthread-based condwait for lf queue])])
 
@@ -989,10 +974,6 @@ AS_IF([test "x$qthread_topo" != xno],
                                     [qthread_topo=libnumaV2],
                                     [AS_IF([test "x$qthread_topo" != xno],
                                            [AC_MSG_ERROR([Specified topology library ($qthread_topo) does not work.])])])])
-       AS_IF([test "x$qthread_topo" = xno -o "x$qthread_topo" = xtilera],
-             [QTHREAD_CHECK_TILETOPO([qthread_topo=tilera],
-                                     [AS_IF([test "x$qthread_topo" != xno],
-                                            [AC_MSG_ERROR([Specified topology library ($qthread_topo) does not work.])])])])
        # Third, check any others.
        AS_IF([test "x$qthread_topo" = xno -o "x$qthread_topo" = xmach],
              [QTHREAD_CHECK_MACHTOPO([qthread_topo=mach],
diff --git a/include/Makefile.am b/include/Makefile.am
index fb27e9547..50929a7a3 100644
--- a/include/Makefile.am
+++ b/include/Makefile.am
@@ -10,7 +10,6 @@ noinst_HEADERS = \
 	fastcontext/taskimpl.h \
 	fastcontext/power-ucontext.h \
 	fastcontext/386-ucontext.h \
-	fastcontext/tile-ucontext.h \
 	net/net.h \
 	qthread_innards.h \
 	qloop_innards.h \
diff --git a/include/fastcontext/taskimpl.h b/include/fastcontext/taskimpl.h
index 077fc6c26..e663a8d7d 100644
--- a/include/fastcontext/taskimpl.h
+++ b/include/fastcontext/taskimpl.h
@@ -7,16 +7,7 @@
 
 #include "qthread/common.h"
 
-#if ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEPRO) ||                             \
-     (QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEGX))
-#ifdef HAVE_STDARG_H
-#include <stdarg.h>
-#endif
-#include <stddef.h>
-#define NEEDTILEMAKECONTEXT
-#define NEEDSWAPCONTEXT
-#include "tile-ucontext.h"
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32)
+#if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32)
 #define NEEDX86MAKECONTEXT
 #define NEEDSWAPCONTEXT
 #include "386-ucontext.h"
diff --git a/include/fastcontext/tile-ucontext.h b/include/fastcontext/tile-ucontext.h
deleted file mode 100644
index c0a8408b4..000000000
--- a/include/fastcontext/tile-ucontext.h
+++ /dev/null
@@ -1,52 +0,0 @@
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <stddef.h> /* for size_t, per C89 */
-
-#include "qt_visibility.h"
-
-#define setcontext(u) qt_setmctxt(&(u)->mc)
-#define getcontext(u) qt_getmctxt(&(u)->mc)
-typedef struct mctxt mctxt_t;
-typedef struct uctxt uctxt_t;
-
-/*
- * This struct defines the way the registers are stored on the stack during a
- * system call/exception.  It should be a multiple of 8 bytes to preserve
- * normal stack alignment rules.
- *
- */
-struct mctxt {
-  /* Saved main processor registers; 56..63 are special. */
-  /* tp, sp, and lr must immediately follow regs[] for aliasing. */
-  unsigned long regs[23]; /* callee saves r30-r52 */
-  unsigned long tp;       /* thread-local data pointer (23*4) */
-  unsigned long sp;       /* stack pointer (grows DOWNWARD) (23*4)+4 */
-  unsigned long lr; /* aka link register (where to go when returning from a
-                     * function) (23*4)+(2*4) */
-
-  /* Saved special registers. */
-  unsigned long pc; /* (23*4)+(3*4) */
-  unsigned long r0; /* (23*4)+(4*4) */
-  // unsigned long ex1;      /* stored in EX_CONTEXT_1_1 (PL and ICS bit) */
-  unsigned long arg0; /* (23*4)+(5*4) only used for first function invocation */
-  unsigned long first; /* (23*4)+(6*4) */
-};
-
-struct uctxt {
-  struct {
-    void *ss_sp;
-    size_t ss_size;
-  } uc_stack;
-
-  // sigset_t uc_sigmask;
-  mctxt_t mc;
-  struct uctxt *uc_link; /* unused */
-};
-
-int INTERNAL qt_swapctxt(uctxt_t *, uctxt_t *);
-void INTERNAL qt_makectxt(uctxt_t *, void (*)(void), int, ...);
-int INTERNAL qt_getmctxt(mctxt_t *);
-void INTERNAL qt_setmctxt(mctxt_t *);
-/* vim:set expandtab: */
diff --git a/include/qt_atomics.h b/include/qt_atomics.h
index 70c71b73d..8b55ff864 100644
--- a/include/qt_atomics.h
+++ b/include/qt_atomics.h
@@ -4,14 +4,6 @@
 #include <stdatomic.h>
 #include <sys/time.h>
 
-#ifdef QTHREAD_NEEDS_IA64INTRIN
-#ifdef HAVE_IA64INTRIN_H
-#include <ia64intrin.h>
-#elif defined(HAVE_IA32INTRIN_H)
-#include <ia32intrin.h>
-#endif
-#endif
-
 #include <qthread/common.h>
 #include <qthread/qthread.h>
 
@@ -429,41 +421,6 @@ qt_cas(void **const ptr, void *const oldv, void *const newv) { /*{{{*/
                        : "cc", "memory");
   return result;
 
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32)
-  void *nv = newv;
-  __asm__ __volatile__("cas [%1], %2, %0"
-                       : "=&r"(nv)
-                       : "r"(ptr),
-                         "r"(oldv)
-#if !defined(__SUNPRO_C) && !defined(__SUNPRO_CC)
-                           ,
-                         "0"(nv)
-#endif
-                       : "cc", "memory");
-  return nv;
-
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64)
-  void *nv = newv;
-  __asm__ __volatile__("casx [%1], %2, %0"
-                       : "=&r"(nv)
-                       : "r"(ptr),
-                         "r"(oldv)
-#if !defined(__SUNPRO_C) && !defined(__SUNPRO_CC)
-                           ,
-                         "0"(nv)
-#endif
-                       : "cc", "memory");
-  return nv;
-
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64)
-  void **retval;
-  __asm__ __volatile__("mov ar.ccv=%0;;" : : "rO"(oldv));
-  __asm__ __volatile__("cmpxchg4.acq %0=[%1],%2,ar.ccv"
-                       : "=r"(retval)
-                       : "r"(ptr), "r"(newv)
-                       : "memory");
-  return retval;
-
 #elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) ||                              \
   (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32)
   void **retval;
@@ -624,110 +581,6 @@ static QINLINE aligned_t qthread_internal_incr_mod_(
                : "r"(operand), "r"(max)
                : "cc", "memory");
 
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32) ||                         \
-  ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64) &&                            \
-   (QTHREAD_SIZEOF_ALIGNED_T == 4))
-
-  uint32_t oldval, newval;
-
-  /* newval = *operand; */
-  do {
-    /* you *should* be able to move the *operand reference outside the
-     * loop and use the output of the CAS (namely, newval) instead.
-     * However, there seems to be a bug in gcc 4.0.4 wherein, if you do
-     * that, the while() comparison uses a temporary register value for
-     * newval that has nothing to do with the output of the CAS
-     * instruction. (See how obviously wrong that is?) For some reason that
-     * I haven't been able to figure out, moving the *operand reference
-     * inside the loop fixes that problem, even at -O2 optimization. */
-    retval = oldval = *operand;
-    newval = oldval + 1;
-    newval *= (newval < max);
-
-    /* if (*operand == oldval)
-     * swap(newval, *operand)
-     * else
-     * newval = *operand
-     */
-    __asm__ __volatile__("cas [%1] , %2, %0" /* */
-                         : "=&r"(newval)
-                         : "r"(operand), "r"(oldval), "0"(newval)
-                         : "memory");
-  } while (oldval != newval);
-
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64)
-  aligned_t oldval, newval;
-
-  /* newval = *operand; */
-  do {
-    /* you *should* be able to move the *operand reference outside the
-     * loop and use the output of the CAS (namely, newval) instead.
-     * However, there seems to be a bug in gcc 4.0.4 wherein, if you do
-     * that, the while() comparison uses a temporary register value for
-     * newval that has nothing to do with the output of the CAS
-     * instruction. (See how obviously wrong that is?) For some reason that
-     * I haven't been able to figure out, moving the *operand reference
-     * inside the loop fixes that problem, even at -O2 optimization. */
-    retval = oldval = *operand;
-    newval = oldval + 1;
-    newval *= (newval < max);
-
-    /* if (*operand == oldval)
-     * swap(newval, *operand)
-     * else
-     * newval = *operand
-     */
-    __asm__ __volatile__("casx [%1] , %2, %0"
-                         : "=&r"(newval)
-                         : "r"(operand),
-                           "r"(oldval)
-#if !defined(__SUNPRO_CC) && !defined(__SUNPRO_C)
-                             ,
-                           "0"(newval)
-#endif
-                         : "memory");
-  } while (oldval != newval);
-
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64)
-#if QTHREAD_SIZEOF_ALIGNED_T == 8
-  int64_t res, old, new;
-
-  do {
-    old = *operand; /* atomic, because operand is aligned */
-    new = old + 1;
-    new *= (new < max);
-    asm volatile("mov ar.ccv=%0;;"
-                 : /* no output */
-                 : "rO"(old));
-
-    /* separate so the compiler can insert its junk */
-    asm volatile("cmpxchg8.acq %0=[%1],%2,ar.ccv"
-                 : "=r"(res)
-                 : "r"(operand), "r"(new)
-                 : "memory");
-  } while (res != old); /* if res==old, new is out of date */
-  retval = old;
-
-#else  /* 32-bit aligned_t */
-  int32_t res, old, new;
-
-  do {
-    old = *operand; /* atomic, because operand is aligned */
-    new = old + 1;
-    new *= (new < max);
-    asm volatile("mov ar.ccv=%0;;"
-                 : /* no output */
-                 : "rO"(old));
-
-    /* separate so the compiler can insert its junk */
-    asm volatile("cmpxchg4.acq %0=[%1],%2,ar.ccv"
-                 : "=r"(res)
-                 : "r"(operand), "r"(new)
-                 : "memory");
-  } while (res != old); /* if res==old, new is out of date */
-  retval = old;
-#endif /* if QTHREAD_SIZEOF_ALIGNED_T == 8 */
-
 #elif ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32) &&                              \
        (QTHREAD_SIZEOF_ALIGNED_T == 4)) ||                                     \
   ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) &&                                 \
diff --git a/include/qthread/common.h.in b/include/qthread/common.h.in
index 2ea1bbede..ed26ef4f6 100644
--- a/include/qthread/common.h.in
+++ b/include/qthread/common.h.in
@@ -59,12 +59,6 @@
 /* builtin incr supported */
 #undef QTHREAD_ATOMIC_INCR
 
-/* ia64intrin.h available */
-#undef HAVE_IA64INTRIN_H
-
-/* if ia64intrin is needed */
-#undef QTHREAD_NEEDS_IA64INTRIN
-
 /* specifying data alignment is allowed */
 #undef QTHREAD_ALIGNEDDATA_ALLOWED
 
@@ -113,15 +107,8 @@
 #define QTHREAD_UNSUPPORTED 0
 #define QTHREAD_IA32        1
 #define QTHREAD_AMD64       2
-#define QTHREAD_IA64        3
-#define QTHREAD_ALPHA       4
-#define QTHREAD_MIPS        5
 #define QTHREAD_POWERPC32   6
 #define QTHREAD_POWERPC64   7
-#define QTHREAD_SPARCV9_32  8
-#define QTHREAD_SPARCV9_64  9
-#define QTHREAD_TILEPRO	    10
-#define QTHREAD_TILEGX	    11
 #define QTHREAD_ARM         12
 #define QTHREAD_ARMV8_A64   13
 
diff --git a/include/qthread/qthread.h b/include/qthread/qthread.h
index 4152d577a..2464d4ec7 100644
--- a/include/qthread/qthread.h
+++ b/include/qthread/qthread.h
@@ -22,14 +22,6 @@ using std::memory_order_relaxed;
 #include <stdio.h> /* for fprintf() */
 #endif
 
-#ifdef QTHREAD_NEEDS_IA64INTRIN
-#ifdef HAVE_IA64INTRIN_H
-#include <ia64intrin.h>
-#elif defined(HAVE_IA32INTRIN_H)
-#include <ia32intrin.h>
-#endif
-#endif
-
 #include "common.h"
 #include "qthread-int.h"
 
@@ -676,9 +668,7 @@ int qthread_spinlocks_destroy(qthread_spinlock_t *a);
 int qthread_lock_init(aligned_t const *a, bool const is_recursive);
 int qthread_lock_destroy(aligned_t *a);
 
-#if defined(QTHREAD_MUTEX_INCREMENT) ||                                        \
-  (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) ||                              \
-  (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32)
+#if defined(QTHREAD_MUTEX_INCREMENT) || QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32
 uint32_t qthread_incr32_(uint32_t *, int32_t);
 uint64_t qthread_incr64_(uint64_t *, int64_t);
 float qthread_fincr_(float *, float);
@@ -699,8 +689,7 @@ static QINLINE float qthread_fincr(float *operand, float incr) { /*{{{ */
 #if defined(QTHREAD_MUTEX_INCREMENT)
   return qthread_fincr_(operand, incr);
 
-#elif QTHREAD_ATOMIC_CAS && (!defined(HAVE_GCC_INLINE_ASSEMBLY) ||             \
-                             (QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEGX))
+#elif QTHREAD_ATOMIC_CAS && !defined(HAVE_GCC_INLINE_ASSEMBLY)
   union {
     float f;
     uint32_t i;
@@ -752,51 +741,6 @@ static QINLINE float qthread_fincr(float *operand, float incr) { /*{{{ */
 
   return retval.f;
 
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64) ||                         \
-  (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32)
-  union {
-    float f;
-    uint32_t i;
-  } oldval, newval;
-
-  /* newval.f = *operand; */
-  do {
-    /* you *should* be able to move the *operand reference outside the
-     * loop and use the output of the CAS (namely, newval) instead.
-     * However, there seems to be a bug in gcc 4.0.4 wherein, if you do
-     * that, the while() comparison uses a temporary register value for
-     * newval that has nothing to do with the output of the CAS
-     * instruction. (See how obviously wrong that is?) For some reason that
-     * I haven't been able to figure out, moving the *operand reference
-     * inside the loop fixes that problem, even at -O2 optimization. */
-    oldval.f = *(float volatile *)operand;
-    newval.f = oldval.f + incr;
-    __asm__ __volatile__(
-      "membar #StoreStore|#LoadStore|#StoreLoad|#LoadLoad\n\t"
-      "cas [%1], %2, %0"
-      : "+r"(newval.i)
-      : "r"(operand), "r"(oldval.i)
-      : "cc", "memory");
-  } while (oldval.i != newval.i);
-  return oldval.f;
-
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64)
-  union {
-    float f;
-    uint32_t i;
-  } oldval, newval, res;
-
-  do {
-    oldval.f = *(float volatile *)operand;
-    newval.f = oldval.f + incr;
-    __asm__ __volatile__("mov ar.ccv=%0;;" ::"rO"(oldval.i));
-    __asm__ __volatile__("cmpxchg4.acq %0=[%1],%2,ar.ccv"
-                         : "=r"(res.i)
-                         : "r"(operand), "r"(newval.i)
-                         : "memory");
-  } while (res.i != oldval.i); /* if res!=old, the calc is out of date */
-  return oldval.f;
-
 #elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) ||                              \
   (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32)
   union {
@@ -860,9 +804,7 @@ static QINLINE double qthread_dincr(double *operand, double incr) { /*{{{ */
   (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32)
   return qthread_dincr_(operand, incr);
 
-#elif QTHREAD_ATOMIC_CAS && (!defined(HAVE_GCC_INLINE_ASSEMBLY) ||             \
-                             (QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEGX) ||      \
-                             (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32))
+#elif QTHREAD_ATOMIC_CAS && !defined(HAVE_GCC_INLINE_ASSEMBLY)
   union {
     uint64_t i;
     double d;
@@ -916,82 +858,6 @@ static QINLINE double qthread_dincr(double *operand, double incr) { /*{{{ */
 
   return retval.d;
 
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32)
-  union {
-    uint64_t i;
-    double d;
-  } oldval, newval;
-
-  newval.d = *(double volatile *)operand;
-  do {
-    /* this allows the compiler to be as flexible as possible with register
-     * assignments */
-    uint64_t tmp1;
-    uint64_t tmp2;
-
-    oldval.d = newval.d;
-    newval.d += incr;
-    __asm__ __volatile__(
-      "ldd %0, %1\n\t"
-      "ldx %4, %2\n\t"
-      "membar #StoreStore|#LoadStore|#StoreLoad|#LoadLoad\n\t"
-      "sllx %1, 0x20, %1\n\t"
-      "sllx %2, 0x20, %2\n\t"
-      "casx [%3], %2, %1\n\t"
-      "srlx %1, 0x20, %1\n\t"
-      "std %1, %0"
-      /* h means 64-BIT REGISTER
-       * (probably unnecessary, but why take chances?) */
-      : "=m"(newval.i), "=h"(tmp1), "=h"(tmp2)
-      : "r"(operand), "m"(oldval.i)
-      : "memory");
-  } while (oldval.i != newval.i);
-  return oldval.d;
-
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64)
-  union {
-    uint64_t i;
-    double d;
-  } oldval, newval;
-
-  /*newval.d = *operand; */
-  do {
-    /* you *should* be able to move the *operand reference outside the
-     * loop and use the output of the CAS (namely, newval) instead.
-     * However, there seems to be a bug in gcc 4.0.4 wherein, if you do
-     * that, the while() comparison uses a temporary register value for
-     * newval that has nothing to do with the output of the CAS
-     * instruction. (See how obviously wrong that is?) For some reason that
-     * I haven't been able to figure out, moving the *operand reference
-     * inside the loop fixes that problem, even at -O2 optimization. */
-    oldval.d = *(double volatile *)operand;
-    newval.d = oldval.d + incr;
-    __asm__ __volatile__(
-      "membar #StoreStore|#LoadStore|#StoreLoad|#LoadLoad\n\t"
-      "casx [%1], %2, %0"
-      : "+r"(newval.i)
-      : "r"(operand), "r"(oldval.i)
-      : "memory");
-  } while (oldval.d != newval.d);
-  return oldval.d;
-
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64)
-  union {
-    uint64_t i;
-    double d;
-  } oldval, newval, res;
-
-  do {
-    oldval.d = *(double volatile *)operand;
-    newval.d = oldval.d + incr;
-    __asm__ __volatile__("mov ar.ccv=%0;;" ::"rO"(oldval.i));
-    __asm__ __volatile__("cmpxchg8.acq %0=[%1],%2,ar.ccv"
-                         : "=r"(res.i)
-                         : "r"(operand), "r"(newval.i)
-                         : "memory");
-  } while (res.i != oldval.i); /* if res!=old, the calc is out of date */
-  return oldval.d;
-
 #elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64)
   union {
     double d;
@@ -1172,57 +1038,6 @@ static QINLINE uint32_t qthread_incr32(uint32_t *operand,
 
   return retval;
 
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32) ||                         \
-  (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64)
-  uint32_t oldval, newval;
-
-  /* newval = *operand; */
-  do {
-    /* you *should* be able to move the *operand reference outside the
-     * loop and use the output of the CAS (namely, newval) instead.
-     * However, there seems to be a bug in gcc 4.0.4 wherein, if you do
-     * that, the while() comparison uses a temporary register value for
-     * newval that has nothing to do with the output of the CAS
-     * instruction. (See how obviously wrong that is?) For some reason that
-     * I haven't been able to figure out, moving the *operand reference
-     * inside the loop fixes that problem, even at -O2 optimization. */
-    oldval = *operand;
-    newval = oldval + incr;
-    /* newval always gets the value of *operand; if it's
-     * the same as oldval, then the swap was successful */
-    __asm__ __volatile__(
-      "membar #StoreStore|#LoadStore|#StoreLoad|#LoadLoad\n\t"
-      "cas [%1] , %2, %0"
-      : "+r"(newval)
-      : "r"(operand), "r"(oldval)
-      : "cc", "memory");
-  } while (oldval != newval);
-  return oldval;
-
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64)
-  uint32_t res;
-
-  if (incr == 1) {
-    asm volatile("fetchadd4.rel %0=[%1],1" : "=r"(res) : "r"(operand));
-  } else {
-    uint32_t old, newval;
-
-    do {
-      old = *operand; /* atomic, because operand is aligned */
-      newval = old + incr;
-      asm volatile("mov ar.ccv=%0;;"
-                   : /* no output */
-                   : "rO"(old));
-
-      /* separate so the compiler can insert its junk */
-      asm volatile("cmpxchg4.acq %0=[%1],%2,ar.ccv"
-                   : "=r"(res)
-                   : "r"(operand), "r"(newval)
-                   : "memory");
-    } while (res != old); /* if res!=old, the calc is out of date */
-  }
-  return res;
-
 #elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32) ||                               \
   (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64)
 
@@ -1245,8 +1060,7 @@ static QINLINE uint32_t qthread_incr32(uint32_t *operand,
 static QINLINE uint64_t qthread_incr64(uint64_t *operand,
                                        uint64_t incr) { /*{{{ */
 #if defined(QTHREAD_MUTEX_INCREMENT) ||                                        \
-  (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) ||                              \
-  (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32)
+  (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32)
   return qthread_incr64_(operand, incr);
 
 #elif defined(QTHREAD_ATOMIC_INCR)
@@ -1263,8 +1077,7 @@ static QINLINE uint64_t qthread_incr64(uint64_t *operand,
 
 #elif !defined(HAVE_GCC_INLINE_ASSEMBLY)
 #error Qthreads requires either mutex increments, inline assembly, or compiler atomic builtins
-#else // if defined(QTHREAD_MUTEX_INCREMENT) || (QTHREAD_ASSEMBLY_ARCH ==
-      // QTHREAD_POWERPC32) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32)
+#else // if defined(QTHREAD_MUTEX_INCREMENT) || QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32
 #if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64)
   uint64_t retval;
   uint64_t incrd = incrd; /* no initializing */
@@ -1280,90 +1093,6 @@ static QINLINE uint64_t qthread_incr64(uint64_t *operand,
 
   return retval;
 
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32)
-  uint64_t oldval, newval = *operand;
-
-  do {
-    /* this allows the compiler to be as flexible as possible with register
-     * assignments */
-    uint64_t tmp1 = tmp1;
-    uint64_t tmp2 = tmp2;
-
-    oldval = newval;
-    newval += incr;
-    /* newval always gets the value of *operand; if it's
-     * the same as oldval, then the swap was successful */
-    __asm__ __volatile__(
-      "ldx %0, %1\n\t"
-      "ldx %4, %2\n\t"
-      "membar #StoreStore|#LoadStore|#StoreLoad|#LoadLoad\n\t"
-      "casx [%3] , %2, %1\n\t"
-      "stx %1, %0"
-      /* h means 64-BIT REGISTER
-       * (probably unnecessary, but why take chances?) */
-      : "=m"(newval), "=&h"(tmp1), "=&h"(tmp2)
-      : "r"(operand), "m"(oldval)
-      : "cc", "memory");
-  } while (oldval != newval);
-  return oldval;
-
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64)
-  uint64_t oldval, newval;
-
-#ifdef QTHREAD_ATOMIC_CAS
-  newval = *operand;
-  do {
-    oldval = newval;
-    newval = __sync_val_compare_and_swap(operand, oldval, oldval + incr);
-  } while (oldval != newval);
-#else
-  do {
-    /* you *should* be able to move the *operand reference outside the
-     * loop and use the output of the CAS (namely, newval) instead.
-     * However, there seems to be a bug in gcc 4.0.4 wherein, if you do
-     * that, the while() comparison uses a temporary register value for
-     * newval that has nothing to do with the output of the CAS
-     * instruction. (See how obviously wrong that is?) For some reason that
-     * I haven't been able to figure out, moving the *operand reference
-     * inside the loop fixes that problem, even at -O2 optimization. */
-    oldval = *operand;
-    newval = oldval + incr;
-    /* newval always gets the value of *operand; if it's
-     * the same as oldval, then the swap was successful */
-    __asm__ __volatile__(
-      "membar #StoreStore|#LoadStore|#StoreLoad|#LoadLoad\n\t"
-      "casx [%1] , %2, %0"
-      : "+r"(newval)
-      : "r"(operand), "r"(oldval)
-      : "cc", "memory");
-  } while (oldval != newval);
-#endif // ifdef QTHREAD_ATOMIC_CAS
-  return oldval;
-
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64)
-  uint64_t res;
-
-  if (incr == 1) {
-    asm volatile("fetchadd8.rel %0=%1,1" : "=r"(res) : "m"(*operand));
-  } else {
-    uint64_t old, newval;
-
-    do {
-      old = *operand; /* atomic, because operand is aligned */
-      newval = old + incr;
-      asm volatile("mov ar.ccv=%0;;"
-                   : /* no output */
-                   : "rO"(old));
-
-      /* separate so the compiler can insert its junk */
-      asm volatile("cmpxchg8.acq %0=[%1],%2,ar.ccv"
-                   : "=r"(res)
-                   : "r"(operand), "r"(newval)
-                   : "memory");
-    } while (res != old); /* if res!=old, the calc is out of date */
-  }
-  return res;
-
 #elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32)
   union {
     uint64_t i;
@@ -1451,8 +1180,7 @@ static QINLINE uint64_t qthread_incr64(uint64_t *operand,
 #else // if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64)
 #error Unimplemented assembly architecture for qthread_incr64
 #endif // if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64)
-#endif // if defined(QTHREAD_MUTEX_INCREMENT) || (QTHREAD_ASSEMBLY_ARCH ==
-       // QTHREAD_POWERPC32) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32)
+#endif // if defined(QTHREAD_MUTEX_INCREMENT) || QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32
 } /*}}} */
 
 static QINLINE int64_t qthread_incr_xx(void *addr,
@@ -1495,25 +1223,6 @@ static QINLINE uint32_t qthread_cas32(uint32_t *operand,
                        : "cc", "memory");
   return result;
 
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32) ||                         \
-  (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64)
-  uint32_t newv = newval;
-  __asm__ __volatile__("membar #StoreStore|#LoadStore|#StoreLoad|#LoadLoad\n\t"
-                       "cas [%1], %2, %0"
-                       : "+r"(newv)
-                       : "r"(operand), "r"(oldval)
-                       : "cc", "memory");
-  return newv;
-
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64)
-  uint32_t retval;
-  __asm__ __volatile__("mov ar.ccv=%0;;" : : "rO"(oldval));
-  __asm__ __volatile__("cmpxchg4.acq %0=[%1],%2,ar.ccv"
-                       : "=r"(retval)
-                       : "r"(operand), "r"(newval)
-                       : "memory");
-  return retval;
-
 #elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) ||                              \
   (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32)
   uint32_t retval;
@@ -1559,40 +1268,6 @@ static QINLINE uint64_t qthread_cas64(uint64_t *operand,
                        : "cc", "memory");
   return result;
 
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32)
-  uint64_t tmp1 = tmp1;
-  uint64_t tmp2 = tmp2;
-  uint64_t newv = newval;
-  __asm__ __volatile__("ldx %0, %1\n\t"
-                       "ldx %4, %2\n\t"
-                       "membar #StoreStore|#LoadStore|#StoreLoad|#LoadLoad\n\t"
-                       "casx [%3], %2, %1\n\t"
-                       "stx %1, %0"
-                       /* h means 64-BIT REGISTER
-                        * (probably unneecessary, but why take chances?) */
-                       : "+m"(newv), "=&h"(tmp1), "=&h"(tmp2)
-                       : "r"(operand), "m"(oldval)
-                       : "cc", "memory");
-  return newv;
-
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64)
-  uint64_t newv = newval;
-  __asm__ __volatile__("membar #StoreStore|#LoadStore|#StoreLoad|#LoadLoad\n\t"
-                       "casx [%1], %2, %0"
-                       : "+r"(newv)
-                       : "r"(operand), "r"(oldval)
-                       : "cc", "memory");
-  return newv;
-
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64)
-  uint32_t retval;
-  __asm__ __volatile__("mov ar.ccv=%0;;" : : "rO"(oldval));
-  __asm__ __volatile__("cmpxchg8.acq %0=[%1],%2,ar.ccv"
-                       : "=r"(retval)
-                       : "r"(operand), "r"(newval)
-                       : "memory");
-  return retval;
-
 #elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32)
   union {
     uint64_t i;
diff --git a/include/qthread_innards.h b/include/qthread_innards.h
index 7c4f18cfd..4c819e41b 100644
--- a/include/qthread_innards.h
+++ b/include/qthread_innards.h
@@ -109,9 +109,7 @@ typedef struct qlib_s {
   aligned_t sched_shepherd;
   QTHREAD_FASTLOCK_TYPE sched_shepherd_lock;
 
-#if defined(QTHREAD_MUTEX_INCREMENT) ||                                        \
-  (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) ||                              \
-  (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32)
+#if defined(QTHREAD_MUTEX_INCREMENT) || QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32
   QTHREAD_FASTLOCK_TYPE *atomic_locks;
 #ifdef QTHREAD_COUNT_THREADS
   aligned_t *atomic_stripes;
diff --git a/src/Makefile.am b/src/Makefile.am
index da882059f..982cdb7f1 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -118,7 +118,6 @@ EXTRA_DIST += \
 			 affinity/libnuma.c \
 			 affinity/libnumaV2.c \
 			 affinity/mach.c \
-			 affinity/tilera.c \
 			 affinity/plpa.c \
 			 affinity/lgrp.c \
 			 affinity/shepcomp.h
diff --git a/src/affinity/tilera.c b/src/affinity/tilera.c
deleted file mode 100644
index f1459271c..000000000
--- a/src/affinity/tilera.c
+++ /dev/null
@@ -1,99 +0,0 @@
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-#ifdef HAVE_TMC_CPUS_H
-#include <tmc/cpus.h>
-#endif
-
-#include <stdio.h>
-
-#include "qt_affinity.h"
-#include "qt_asserts.h"
-#include "qt_debug.h" // for MALLOC()
-#include "shepcomp.h"
-#include "shufflesheps.h"
-
-qthread_shepherd_id_t guess_num_shepherds(void);
-qthread_worker_id_t
-guess_num_workers_per_shep(qthread_shepherd_id_t nshepherds);
-
-void INTERNAL qt_affinity_init(qthread_shepherd_id_t *nbshepherds,
-                               qthread_worker_id_t *nbworkers,
-                               size_t *hw_par) { /*{{{ */
-  if (*nbshepherds == 0) {
-    *nbshepherds = guess_num_shepherds();
-    if (*nbshepherds <= 0) { *nbshepherds = 1; }
-  }
-  if (*nbworkers == 0) {
-    *nbworkers = guess_num_workers_per_shep(*nbshepherds);
-    if (*nbworkers <= 0) { *nbworkers = 1; }
-  }
-} /*}}} */
-
-qthread_shepherd_id_t INTERNAL guess_num_shepherds(void) { /*{{{ */
-  cpu_set_t online_cpus;
-
-  qassert(tmc_cpus_get_online_cpus(&online_cpus), 0);
-  return tmc_cpus_count(&online_cpus);
-} /*}}} */
-
-void INTERNAL qt_affinity_set(qthread_worker_t *me,
-                              unsigned int Q_UNUSED(nw)) { /*{{{ */
-  if (tmc_cpus_set_my_cpu(me->packed_worker_id) < 0) {
-    perror("tmc_cpus_set_my_affinity() failed");
-    fprintf(stderr, "\tnode = %i\n", (int)me->packed_worker_id);
-  }
-} /*}}} */
-
-qthread_worker_id_t INTERNAL
-guess_num_workers_per_shep(qthread_shepherd_id_t nshepherds) { /*{{{ */
-  return 1;
-} /*}}} */
-
-int INTERNAL qt_affinity_gendists(qthread_shepherd_t *sheps,
-                                  qthread_shepherd_id_t nshepherds) { /*{{{ */
-  cpu_set_t online_cpus;
-  unsigned int *cpu_array;
-  size_t cpu_count, offset;
-
-#warning The logic for node assignment is completely wrong for multithreaded shepherds
-  qassert(tmc_cpus_get_online_cpus(&online_cpus), 0);
-  cpu_count = tmc_cpus_count(&online_cpus);
-  assert(cpu_count > 0);
-  /* assign nodes */
-  cpu_array = MALLOC(sizeof(unsigned int) * cpu_count);
-  assert(cpu_array != NULL);
-  qassert(tmc_cpus_to_array(&online_cpus, cpu_array, cpu_count), cpu_count);
-  offset = 0;
-  for (qthread_shepherd_id_t i = 0; i < nshepherds; i++) {
-    sheps[i].node = cpu_array[offset];
-    offset++;
-    offset *= (offset < cpu_count);
-  }
-  FREE(cpu_array, sizeof(unsigned int) * cpu_count);
-  for (qthread_shepherd_id_t i = 0; i < nshepherds; i++) {
-    size_t j, k;
-    unsigned int ix, iy;
-    sheps[i].shep_dists = qt_calloc(nshepherds, sizeof(unsigned int));
-    sheps[i].sorted_sheplist =
-      qt_calloc(nshepherds - 1, sizeof(qthread_shepherd_id_t));
-    assert(sheps[i].shep_dists);
-    assert(sheps[i].sorted_sheplist);
-    tmc_cpus_grid_cpu_to_tile(sheps[i].node, &ix, &iy);
-    for (j = 0; j < nshepherds; j++) {
-      unsigned int jx, jy;
-      tmc_cpus_grid_cpu_to_tile(sheps[j].node, &jx, &jy);
-      sheps[i].shep_dists[j] = abs((int)ix - (int)jx) + abs((int)iy - (int)jy);
-    }
-    for (j = k = 0; j < nshepherds; j++) {
-      if (j != i) { sheps[i].sorted_sheplist[k++] = j; }
-    }
-    if (nshepherds > 1) {
-      sort_sheps(sheps[i].shep_dists, sheps[i].sorted_sheplist, nshepherds);
-    }
-  }
-  return QTHREAD_SUCCESS;
-} /*}}} */
-
-/* vim:set expandtab: */
diff --git a/src/cacheline.c b/src/cacheline.c
index f05faac61..4c40ed940 100644
--- a/src/cacheline.c
+++ b/src/cacheline.c
@@ -205,18 +205,6 @@ static void figure_out_cacheline_size(void) { /*{{{ */
   } else {
     cacheline_bytes = 128; // G5
   }
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32) ||                         \
-  (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64)
-  cacheline_bytes = 128;
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64)
-#ifdef DEBUG_CPUID
-  printf("IA64 does not support CPUID; but is usually 128\n");
-#endif
-  cacheline_bytes = 128; // Itanium L2/L3 are 128, L1 is 64
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEPRO)
-  cacheline_bytes = 64;
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEGX)
-  cacheline_bytes = 64;
 #elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32) ||                               \
   (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64)
 #if !defined(HAVE_GCC_INLINE_ASSEMBLY)
diff --git a/src/compat_atomics.c b/src/compat_atomics.c
index 06afca9b8..09b5eb3bd 100644
--- a/src/compat_atomics.c
+++ b/src/compat_atomics.c
@@ -14,8 +14,7 @@ extern unsigned int QTHREAD_LOCKING_STRIPES;
   (((size_t)addr >> 4) & (QTHREAD_LOCKING_STRIPES - 1))
 
 #if defined(QTHREAD_MUTEX_INCREMENT) ||                                        \
-  (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) ||                              \
-  (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32)
+  (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32)
 uint32_t qthread_incr32_(uint32_t *op, int32_t const incr) { /*{{{ */
   unsigned int stripe = QTHREAD_CHOOSE_STRIPE(op);
   uint32_t retval;
@@ -110,9 +109,7 @@ uint64_t qthread_cas64_(uint64_t *operand,
   return retval;
 } /*}}} */
 
-#else /* if defined(QTHREAD_MUTEX_INCREMENT) || (QTHREAD_ASSEMBLY_ARCH ==      \
-         QTHREAD_POWERPC32) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32)   \
-       */
+#else /* if defined(QTHREAD_MUTEX_INCREMENT) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) */
 #error Building this file erroneously.
 #endif /* if defined(QTHREAD_MUTEX_INCREMENT) || (QTHREAD_ASSEMBLY_ARCH ==     \
           QTHREAD_POWERPC32) */
diff --git a/src/ds/qarray.c b/src/ds/qarray.c
index 2ab52807c..3329e04f6 100644
--- a/src/ds/qarray.c
+++ b/src/ds/qarray.c
@@ -38,8 +38,7 @@ qarray_internal_segment_shep(qarray const *a,
   char *ptr = (((char *)segment_head) + (a->segment_size * a->unit_size));
 
   qassert_ret(a->dist_type == DIST, NULL);
-  /* ensure that it's 4-byte aligned
-   * (mandatory on Sparc, good idea elsewhere) */
+  /* ensure that it's 4-byte aligned */
   if (((uintptr_t)ptr) & 3) { ptr += 4 - (((uintptr_t)ptr) & 3); }
   /* first, do we have the space? */
   qassert_ret((((ptr + sizeof(qthread_shepherd_id_t) - 1) <
@@ -419,24 +418,12 @@ static qarray *qarray_create_internal(size_t const count,
 } /*}}} */
 
 qarray *qarray_create(size_t const count, size_t const obj_size) { /*{{{ */
-#if QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32 ||                             \
-  QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64
-  return qarray_create_internal(count, obj_size, DIST_STRIPES, 0, 0);
-
-#else
   return qarray_create_internal(count, obj_size, FIXED_HASH, 0, 0);
-#endif
 } /*}}} */
 
 qarray *qarray_create_tight(size_t const count,
                             size_t const obj_size) { /*{{{ */
-#if QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_32 ||                             \
-  QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64
-  return qarray_create_internal(count, obj_size, DIST_STRIPES, 1, 0);
-
-#else
   return qarray_create_internal(count, obj_size, FIXED_HASH, 1, 0);
-#endif
 } /*}}} */
 
 qarray *qarray_create_configured(size_t const count,
diff --git a/src/fastcontext/asm.S b/src/fastcontext/asm.S
index 4ef4392c6..0472fc000 100644
--- a/src/fastcontext/asm.S
+++ b/src/fastcontext/asm.S
@@ -59,14 +59,6 @@
 #  define NEEDARMA64CONTEXT 1
 #  define SET qt_setmctxt
 #  define GET qt_getmctxt
-# elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEPRO)
-#  define NEEDTILEPROCONTEXT 1
-#  define SET _qt_setmctxt
-#  define GET _qt_getmctxt
-# elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEGX)
-#  define NEEDTILEGXCONTEXT 1
-#  define SET qt_setmctxt
-#  define GET qt_getmctxt
 # elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32)
 #  define NEEDX86CONTEXT 1
 #  define SET qt_setmctxt
@@ -215,332 +207,6 @@ GET:
         ret
 #endif
 
-#ifdef NEEDTILEPROCONTEXT
-.text
-.align 2
-
-.type  GET,@function
-.globl GET
-GET:
-        ## .frame $sp, 8, $sp
-        # .caller_lr = 8
-        # .caller_caller_sp = 12
-        addli   r23, sp, -8     _(the arg)
-        sw              r23, r0
-        _(/*) prologue end */)
-        _(/*) setup the pointer */)
-        addli   r1, sp, -8
-        lw              r1, r1
-        _(/* note that each of these uses different temporary
-           * registers, to allow efficient scheduling */)
-        addi    r2, r1, (0*4)
-        sw              r2, r30
-        addi    r3, r1, (1*4)
-        sw              r3, r31
-        addi    r4, r1, (2*4)
-        sw              r4, r32
-        addi    r5, r1, (3*4)
-        sw              r5, r33
-        addi    r6, r1, (4*4)
-        sw              r6, r34
-        addi    r7, r1, (5*4)
-        sw              r7, r35
-        addi    r8, r1, (6*4)
-        sw              r8, r36
-        addi    r9, r1, (7*4)
-        sw              r9, r37
-        addi    r10, r1, (8*4)
-        sw              r10, r38
-        addi    r11, r1, (9*4)
-        sw              r11, r39
-        addi    r12, r1, (10*4)
-        sw              r12, r40
-        addi    r13, r1, (11*4)
-        sw              r13, r41
-        addi    r14, r1, (12*4)
-        sw              r14, r42
-        addi    r15, r1, (13*4)
-        sw              r15, r43
-        addi    r16, r1, (14*4)
-        sw              r16, r44
-        addi    r17, r1, (15*4)
-        sw              r17, r45
-        addi    r18, r1, (16*4)
-        sw              r18, r46
-        addi    r19, r1, (17*4)
-        sw              r19, r47
-        addi    r20, r1, (18*4)
-        sw              r20, r48
-        addi    r21, r1, (19*4)
-        sw              r21, r49
-        addi    r22, r1, (20*4)
-        sw              r22, r50
-        addi    r23, r1, (21*4)
-        sw              r23, r51
-        addi    r24, r1, (22*4)
-        sw              r24, r52
-        _(/*) gotten contexts are not function calls */)
-        addi    r6, r1, (23*4)+(6*4)
-        sw              r6, zero
-        _(/*) store the link register as the new pc */)
-        move    r25, lr
-        addi    r27, r1, (23*4)+(3*4)
-        sw              r27, r25
-        _(/*) store the stack pointer */)
-        addi    r27, sp, 0
-        addi    r28, r1, (23*4)+(1*4)
-        sw              r28, r27
-        _(/*) store the return for swapcontext */)
-        addi    r3, r1, (23*4)+(4*4)
-        movei   r4, 1
-        sw              r3, r4
-        _(/*) return value */)
-        move    r0, zero _(/*) success! */)
-        jrp     lr
-.type  SET,@function
-.globl SET
-SET:
-        ## .frame $sp, 8, $sp
-        # .caller_lr = 8
-        # .caller_caller_sp = 12
-        addli   r6, sp, -8
-        sw              r6, r0
-        _(/*) prologue end */)
-        _(/*) setup the pointer */)
-        addli   r1, sp, -8
-        lw              r1, r1
-        _(/* note that each of these uses different temporary
-           * registers, to allow efficient scheduling */)
-        addi    r2, r1, (0*4)
-        lw              r30, r2
-        addi    r3, r1, (1*4)
-        lw              r31, r3
-        addi    r4, r1, (2*4)
-        lw              r32, r4
-        addi    r5, r1, (3*4)
-        lw              r33, r5
-        addi    r6, r1, (4*4)
-        lw              r34, r6
-        addi    r7, r1, (5*4)
-        lw              r35, r7
-        addi    r8, r1, (6*4)
-        lw              r36, r8
-        addi    r9, r1, (7*4)
-        lw              r37, r9
-        addi    r10, r1, (8*4)
-        lw              r38, r10
-        addi    r11, r1, (9*4)
-        lw              r39, r11
-        addi    r12, r1, (10*4)
-        lw              r40, r12
-        addi    r13, r1, (11*4)
-        lw              r41, r13
-        addi    r14, r1, (12*4)
-        lw              r42, r14
-        addi    r15, r1, (13*4)
-        lw              r43, r15
-        addi    r16, r1, (14*4)
-        lw              r44, r17
-        addi    r18, r1, (15*4)
-        lw              r45, r18
-        addi    r19, r1, (16*4)
-        lw              r46, r19
-        addi    r20, r1, (17*4)
-        lw              r47, r20
-        addi    r21, r1, (18*4)
-        lw              r48, r21
-        addi    r22, r1, (19*4)
-        lw              r49, r22
-        addi    r23, r1, (20*4)
-        lw              r50, r23
-        addi    r24, r1, (21*4)
-        lw              r51, r24
-        addi    r25, r1, (22*4)
-        lw              r52, r25
-        _(/*) fiddle with the stack */)
-        addi    r2, r1, (23*4)+(1*4)
-        lw              r3, r2
-        move    sp, r3
-        _(/*) retrieve the new PC */)
-        addi    r6, r1, (23*4)+(3*4)
-        lw              r7, r6
-        _(/*) first argument? */)
-        addi    r4, r1, (23*4)+(6*4)
-        lw              r5, r4
-        bz              r5, 1f
-        addi    r0, r1, (23*4)+(5*4)
-        lw              r0, r0
-        jf              2f
-1:
-        addi    r0, r1, (23*4)+(4*4)
-        lw              r0, r0
-2:
-        jrp     r7
-#endif
-
-#ifdef NEEDTILEGXCONTEXT
-.text
-.align 2
-
-.type  GET,@function
-.globl GET
-GET:
-        _("## .frame $sp, 8, $sp")
-        _(# .caller_lr = 8)
-        _(# .caller_caller_sp = 12)
-        addli   r23, sp, -16     _(the arg)
-        st              r23, r0
-        _(/*) prologue end */)
-        _(/*) setup the pointer */)
-        addli   r1, sp, -16
-        ld              r1, r1
-        _(/* note that each of these uses different temporary
-           * registers, to allow efficient scheduling */)
-        addi    r2, r1, (0*8)
-        st      r2, r30
-        addi    r3, r1, (1*8)
-        st      r3, r31
-        addi    r4, r1, (2*8)
-        st      r4, r32
-        addi    r5, r1, (3*8)
-        st      r5, r33
-        addi    r6, r1, (4*8)
-        st      r6, r34
-        addi    r7, r1, (5*8)
-        st      r7, r35
-        addi    r8, r1, (6*8)
-        st      r8, r36
-        addi    r9, r1, (7*8)
-        st      r9, r37
-        addi    r10, r1, (8*8)
-        st      r10, r38
-        addi    r11, r1, (9*8)
-        st      r11, r39
-        addi    r12, r1, (10*8)
-        st      r12, r40
-        addi    r13, r1, (11*8)
-        st      r13, r41
-        addi    r14, r1, (12*8)
-        st      r14, r42
-        addi    r15, r1, (13*8)
-        st      r15, r43
-        addi    r16, r1, (14*8)
-        st      r16, r44
-        addi    r17, r1, (15*8)
-        st      r17, r45
-        addli    r18, r1, (16*8)
-        st      r18, r46
-        addli    r19, r1, (17*8)
-        st      r19, r47
-        addli    r20, r1, (18*8)
-        st      r20, r48
-        addli    r21, r1, (19*8)
-        st      r21, r49
-        addli    r22, r1, (20*8)
-        st      r22, r50
-        addli    r23, r1, (21*8)
-        st      r23, r51
-        addli    r24, r1, (22*8)
-        st      r24, r52
-        _(/*) gotten contexts are not function calls */)
-        addli    r6, r1, (23*8)+(6*8)
-        st      r6, zero
-        _(/*) store the link register as the new pc */)
-        move    r25, lr
-        addli    r27, r1, (23*8)+(3*8)
-        st      r27, r25
-        _(/*) store the stack pointer */)
-        addli    r27, sp, 0
-        addli    r28, r1, (23*8)+(1*8)
-        st      r28, r27
-        _(/*) store the return for swapcontext */)
-        addli    r3, r1, (23*8)+(4*8)
-        movei   r4, 1
-        st      r3, r4
-        _(/*) return value */)
-        move    r0, zero _(/*) success! */)
-        jrp     lr
-.type  SET,@function
-.globl SET
-SET:
-        _("## .frame $sp, 8, $sp")
-        _(# .caller_lr = 8)
-        _(# .caller_caller_sp = 12)
-        addli   r6, sp, -16
-        st      r6, r0
-        _(/*) prologue end */)
-        _(/*) setup the pointer */)
-        addli   r1, sp, -16
-        ld      r1, r1
-        _(/* note that each of these uses different temporary
-           * registers, to allow efficient scheduling */)
-        addi    r2, r1, (0*8)
-        ld      r30, r2
-        addi    r3, r1, (1*8)
-        ld      r31, r3
-        addi    r4, r1, (2*8)
-        ld      r32, r4
-        addi    r5, r1, (3*8)
-        ld      r33, r5
-        addi    r6, r1, (4*8)
-        ld      r34, r6
-        addi    r7, r1, (5*8)
-        ld      r35, r7
-        addi    r8, r1, (6*8)
-        ld      r36, r8
-        addi    r9, r1, (7*8)
-        ld      r37, r9
-        addi    r10, r1, (8*8)
-        ld      r38, r10
-        addi    r11, r1, (9*8)
-        ld      r39, r11
-        addi    r12, r1, (10*8)
-        ld      r40, r12
-        addi    r13, r1, (11*8)
-        ld      r41, r13
-        addi    r14, r1, (12*8)
-        ld      r42, r14
-        addi    r15, r1, (13*8)
-        ld      r43, r15
-        addi    r16, r1, (14*8)
-        ld      r44, r16
-        addi    r17, r1, (15*8)
-        ld      r45, r17
-        addli    r18, r1, (16*8)
-        ld      r46, r18
-        addli    r19, r1, (17*8)
-        ld      r47, r19
-        addli    r20, r1, (18*8)
-        ld      r48, r20
-        addli    r21, r1, (19*8)
-        ld      r49, r21
-        addli    r22, r1, (20*8)
-        ld      r50, r22
-        addli    r23, r1, (21*8)
-        ld      r51, r23
-        addli    r24, r1, (22*8)
-        ld      r52, r24
-        _(/*) fiddle with the stack */)
-        addli    r2, r1, (23*8)+(1*8)
-        ld      r3, r2
-        move    sp, r3
-        _(/*) retrieve the new PC */)
-        addli    r6, r1, (23*8)+(3*8)
-        ld      r7, r6
-        _(/*) first argument? */)
-        addli    r4, r1, (23*8)+(6*8)
-        ld      r5, r4
-        beqz    r5, 1f
-        addli    r0, r1, (23*8)+(5*8)
-        ld      r0, r0
-        j       2f
-1:
-        addli    r0, r1, (23*8)+(4*8)
-        ld      r0, r0
-2:
-        jrp     r7
-#endif
-
 #ifdef NEEDPOWERCONTEXT
 /* get FPR and VR use flags with sc 0x7FF3 */
 /* get vsave with mfspr reg, 256 */
diff --git a/src/fastcontext/context.c b/src/fastcontext/context.c
index d7835bfbc..aa33f137e 100644
--- a/src/fastcontext/context.c
+++ b/src/fastcontext/context.c
@@ -83,33 +83,6 @@ void INTERNAL qt_makectxt(uctxt_t *ucp, void (*func)(void), int argc, ...) {
   ucp->mc.mc_esp = (long)sp;
 }
 
-#elif defined(NEEDTILEMAKECONTEXT)
-/* This function is entirely copyright Sandia National Laboratories */
-void INTERNAL qt_makectxt(uctxt_t *ucp, void (*func)(void), int argc, ...) {
-  unsigned long *sp;
-  unsigned long *tos = ucp->uc_stack.ss_sp;
-  int i;
-  va_list arg;
-
-  tos += ucp->uc_stack.ss_size / sizeof(unsigned long);
-  tos -= 1;        // allow space for an incoming lr
-  sp = tos - argc; // allow space for arguments
-  sp = (void *)((unsigned long)sp -
-                (unsigned long)sp % 64); /* 64-align for Tilera */
-  /* now copy from my arg list to the function's arglist (yes, I know this is
-   * voodoo) */
-  // memmove(sp, &argc + 1, argc * sizeof(void*));
-  /* The function may also expect to pull args from up to nine registers */
-  va_start(arg, argc);
-  for (i = 0; i < argc; i++) {
-    if (i == 0) { ucp->mc.arg0 = va_arg(arg, unsigned long); }
-  }
-  ucp->mc.pc = (unsigned long)func;
-  ucp->mc.sp = (unsigned long)sp;
-  ucp->mc.first = 1;
-  va_end(arg);
-}
-
 #elif defined(NEEDARMMAKECONTEXT)
 /* This function is entirely copyright Sandia National Laboratories */
 void INTERNAL qt_makectxt(uctxt_t *ucp, void (*func)(void), int argc, ...) {
@@ -177,8 +150,7 @@ QT_SKIP_THREAD_SANITIZER int INTERNAL qt_swapctxt(uctxt_t *oucp, uctxt_t *ucp) {
   Q_PREFETCH(ucp, 0, 0);
   if (getcontext(oucp) == 0) {
 #if ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32) ||                                \
-     (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) ||                               \
-     (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64))
+     (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64))
     Q_PREFETCH((void *)ucp->mc.mc_esp, 1, 3);
 #endif
     setcontext(ucp);
diff --git a/src/qthread.c b/src/qthread.c
index 112caa2a1..32550ef63 100644
--- a/src/qthread.c
+++ b/src/qthread.c
@@ -88,8 +88,7 @@
 
 #if !(defined(HAVE_GCC_INLINE_ASSEMBLY) &&                                     \
       (QTHREAD_SIZEOF_ALIGNED_T == 4 ||                                        \
-       (QTHREAD_ASSEMBLY_ARCH != QTHREAD_POWERPC32 &&                          \
-        QTHREAD_ASSEMBLY_ARCH != QTHREAD_SPARCV9_32))) &&                      \
+       QTHREAD_ASSEMBLY_ARCH != QTHREAD_POWERPC32)) &&                      \
   !defined(QTHREAD_ATOMIC_CAS) && !defined(QTHREAD_MUTEX_INCREMENT)
 #warning QTHREAD_MUTEX_INCREMENT not defined. It probably should be.
 #define QTHREAD_MUTEX_INCREMENT 1
diff --git a/src/syncvar.c b/src/syncvar.c
index d9803a564..07f283773 100644
--- a/src/syncvar.c
+++ b/src/syncvar.c
@@ -103,12 +103,9 @@ extern unsigned int QTHREAD_LOCKING_STRIPES;
                           BUILD_UNLOCKED_SYNCVAR(val, state),                  \
                           memory_order_relaxed);                               \
   } while (0)
-#elif ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) ||                         \
-       (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) ||                         \
-       (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32) ||                              \
-       (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64) ||                              \
-       (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64) ||                        \
-       (QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEPRO))
+#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) ||                         \
+      (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) ||                         \
+      (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA32)
 #define UNLOCK_THIS_UNMODIFIED_SYNCVAR(addr, unlocked)                         \
   do {                                                                         \
     atomic_store_explicit(                                                     \
@@ -141,8 +138,7 @@ static uint64_t qthread_mwaitc(syncvar_t *restrict const addr,
                                unsigned char const statemask,
                                unsigned int timeout,
                                eflags_t *restrict const err) { /*{{{ */
-#if ((QTHREAD_ASSEMBLY_ARCH != QTHREAD_TILEPRO) &&                             \
-     (QTHREAD_ASSEMBLY_ARCH != QTHREAD_POWERPC32))
+#if (QTHREAD_ASSEMBLY_ARCH != QTHREAD_POWERPC32)
   syncvar_t unlocked;
 #endif
   syncvar_t locked;
@@ -156,21 +152,7 @@ static uint64_t qthread_mwaitc(syncvar_t *restrict const addr,
   e.zf = 0;
   e.cf = 1;
   do {
-#if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEPRO)
-    uint32_t low, high;
-    int32_t *addrptr = (int32_t *)addr;
-    /* note that the tilera is little-endian, otherwise this would be
-     * addrptr+1 */
-    while ((low = __insn_tns(addrptr)) == 1) {
-      if (timeout-- <= 0) { goto errexit; }
-      SPINLOCK_BODY();
-    }
-    /* now addrptr[0] is 1 and low is the "real" (unlocked) addrptr[0]
-     * value. */
-    high = addrptr[1];
-    locked.u.w = (((uint64_t)high) << 32) | low;
-    MACHINE_FENCE;
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32)
+#if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32)
     /* This applies for any 32-bit architecture with a valid 32-bit CAS
      * (though I'm making some big-endian assumptions at the moment) */
     uint32_t low_unlocked, low_locked;
@@ -191,7 +173,7 @@ static uint64_t qthread_mwaitc(syncvar_t *restrict const addr,
       if (timeout-- <= 0) { goto errexit; }
     } while (1);
     locked.u.w = addr->u.w; // I locked it, so I can read it
-#else  /* if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEPRO) */
+#else  /* if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) */
     {
       syncvar_t tmp;
     loop_start:
@@ -213,7 +195,7 @@ static uint64_t qthread_mwaitc(syncvar_t *restrict const addr,
         if (timeout-- <= 0) { goto errexit; }
       } while (1);
     }
-#endif /* if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEPRO) */
+#endif /* if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32) */
     /***************************************************
      * now locked == unlocked, and the lock bit is set *
      ***************************************************/
@@ -229,10 +211,7 @@ static uint64_t qthread_mwaitc(syncvar_t *restrict const addr,
       return locked.u.s.data;
     } else {
       /* this is NOT a state of interest, so unlock the locked bit */
-#if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_TILEPRO)
-      MACHINE_FENCE;
-      addrptr[0] = low;
-#elif (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32)
+#if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC32)
       MACHINE_FENCE;
       addrptr[1] = low_unlocked;
 #else
@@ -404,12 +383,10 @@ int API_FUNC qthread_syncvar_readFF(uint64_t *restrict dest,
   QTHREAD_FEB_UNIQUERECORD(feb, src, me);
   QTHREAD_FEB_TIMER_START(febblock);
 
-#if ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) ||                               \
-     (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64) ||                                \
-     (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) ||                           \
-     (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64)) ||                         \
-  (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARM) ||                                    \
-  (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARMV8_A64)
+#if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) || \
+    (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) || \
+    (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARM) || \
+    (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARMV8_A64)
   {
     /* I'm being optimistic here; this only works if a basic 64-bit load is
      * atomic (on most platforms it is). Thus, if I've done an atomic read
@@ -426,11 +403,10 @@ int API_FUNC qthread_syncvar_readFF(uint64_t *restrict dest,
       return QTHREAD_SUCCESS;
     }
   }
-#endif /* if ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) ||                      \
-          (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64) || (QTHREAD_ASSEMBLY_ARCH == \
-          QTHREAD_POWERPC64) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64)  \
-          || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARM) || (QTHREAD_ASSEMBLY_ARCH  \
-          == QTHREAD_ARMV8_A64)) */
+#endif /* if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) || \
+             (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) || \
+             (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARM) || \
+             (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARMV8_A64)) */
   ret = qthread_mwaitc(src, SYNCFEB_FULL, INITIAL_TIMEOUT, &e);
   qthread_debug(SYNCVAR_DETAILS,
                 "2 src(%p) = %x, ret = %x\n",
@@ -549,12 +525,10 @@ int API_FUNC qthread_syncvar_readFF_nb(uint64_t *restrict dest,
 
   if (!me) { return qthread_syncvar_blocker_func(dest, src, READFF_NB); }
 
-#if ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) ||                               \
-     (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64) ||                                \
-     (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) ||                           \
-     (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64) ||                          \
-     (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARM) ||                                 \
-     (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARMV8_A64))
+#if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) ||      \
+    (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) ||  \
+    (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARM) ||        \
+    (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARMV8_A64)
   {
     /* I'm being optimistic here; this only works if a basic 64-bit load is
      * atomic (on most platforms it is). Thus, if I've done an atomic read
@@ -569,11 +543,10 @@ int API_FUNC qthread_syncvar_readFF_nb(uint64_t *restrict dest,
       return QTHREAD_SUCCESS;
     }
   }
-#endif /* if ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) ||                      \
-          (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64) || (QTHREAD_ASSEMBLY_ARCH == \
-          QTHREAD_POWERPC64) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64)  \
-          || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARM) || (QTHREAD_ASSEMBLY_ARCH  \
-          == QTHREAD_ARMV8_A64)) */
+#endif /* if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) ||       \
+             (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) ||   \
+             (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARM) ||         \
+             (QTHREAD_ASSEMBLY_ARCH == QTHREAD_ARMV8_A64) */
   ret = qthread_mwaitc(src, SYNCFEB_FULL, 1, &e);
   qthread_debug(SYNCVAR_DETAILS,
                 "2 src(%p) = %x, ret = %x\n",
diff --git a/src/threadqueues/sherwood_threadqueues.c b/src/threadqueues/sherwood_threadqueues.c
index 43e639b34..642937475 100644
--- a/src/threadqueues/sherwood_threadqueues.c
+++ b/src/threadqueues/sherwood_threadqueues.c
@@ -223,10 +223,8 @@ void INTERNAL qt_threadqueue_subsystem_init(void) { /*{{{*/
 #endif /* if defined(UNPOOLED_QUEUES) || defined(UNPOOLED) */
 
 ssize_t INTERNAL qt_threadqueue_advisory_queuelen(qt_threadqueue_t *q) { /*{{{*/
-#if ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) ||                               \
-     (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64) ||                                \
-     (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64) ||                           \
-     (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64))
+#if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) || \
+    (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64)
   /* only works if a basic load is atomic */
   return q->qlength;
 
@@ -237,9 +235,8 @@ ssize_t INTERNAL qt_threadqueue_advisory_queuelen(qt_threadqueue_t *q) { /*{{{*/
   tmp = q->qlength;
   QTHREAD_TRYLOCK_UNLOCK(&q->qlock);
   return tmp;
-#endif /* if ((QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) ||                      \
-          (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64) || (QTHREAD_ASSEMBLY_ARCH == \
-          QTHREAD_POWERPC64) || (QTHREAD_ASSEMBLY_ARCH == QTHREAD_SPARCV9_64)) \
+#endif /* if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_AMD64) || \
+             (QTHREAD_ASSEMBLY_ARCH == QTHREAD_POWERPC64)
         */
 } /*}}}*/
 
diff --git a/test/basics/qthread_stackleft.c b/test/basics/qthread_stackleft.c
index df323cca2..24b96b0cb 100644
--- a/test/basics/qthread_stackleft.c
+++ b/test/basics/qthread_stackleft.c
@@ -38,11 +38,7 @@ static aligned_t alldone;
 static STACKLEFT_NOINLINE size_t thread2(size_t left, size_t depth) {
   size_t foo = qthread_stackleft();
   iprintf("leveli%i: %zu bytes left\n", (int)depth, foo);
-#if (QTHREAD_ASSEMBLY_ARCH == QTHREAD_IA64)
-  assert(foo <= left);
-#else
   assert(foo < left);
-#endif
   if (depth < 5) { thread2(foo, depth + 1); }
   return 1;
 }