fix abd, have taskq_wait_synced() wait for threads to be created (#24)

taskq_wait_synced() did a VERIFY() on whether the taskq's threads were the requested number, but taskq_create() can ultimately return early because taskq_thread_create() is allowed to return when two desired threads are created. Fix this race panic. Also, taskq_wait_synced() may fail if if num_ecores is nonzero (on Apple Silicon), so create a flag that lets taskq_create_common() deal with the max_ncpus. Make boot_ncpus a variable that's MAX(1, (int)max_ncores - num_ecores). boot_ncpus is used in common code. Modify the alignments and quanta/import sizes of the abd kmem and vmem cache creations. Make DEBUG builds work with KMF_LITE | KMF_BUFCTL on the abd kmem caches. Signed-off-by: Sean Doran <smd@use.net>
openzfsonosx · Dec 6, 2023 · 4c607f0 · 4c607f0
1 parent 33ad46e
commit 4c607f0
Show file tree

Hide file tree

Showing 6 changed files with 43 additions and 23 deletions.
diff --git a/include/os/macos/spl/sys/sysmacros.h b/include/os/macos/spl/sys/sysmacros.h
@@ -89,7 +89,7 @@ extern "C" {
 #define	is_system_labeled()		0
 
 extern unsigned int max_ncpus;
-#define	boot_ncpus max_ncpus
+extern unsigned	int boot_ncpus;
 extern unsigned int num_ecores;
 
 #ifndef RLIM64_INFINITY

diff --git a/include/os/macos/spl/sys/taskq.h b/include/os/macos/spl/sys/taskq.h
@@ -61,6 +61,7 @@ struct taskq_ent;
 #ifdef __APPLE__
 #define	TASKQ_TIMESHARE		0x0020  /* macOS dynamic thread priority */
 #define	TASKQ_REALLY_DYNAMIC	0x0040  /* don't filter out TASKQ_DYNAMIC */
+#define	TASKQ_CREATE_SYNCED	0x0080	/* don't deflate ncpus */
 #endif
 
 /*

diff --git a/module/os/macos/spl/spl-osx.c b/module/os/macos/spl/spl-osx.c
@@ -49,6 +49,7 @@
 static utsname_t utsname_static = { { 0 } };
 
 unsigned int max_ncpus = 0;
+unsigned int boot_ncpus = 0;
 unsigned int num_ecores = 0;
 uint64_t  total_memory = 0;
 uint64_t  real_total_memory = 0;
@@ -495,6 +496,9 @@ spl_start(kmod_info_t *ki, void *d)
 
 #if defined(__arm64__)
 	num_ecores = (max_ncpus > 4) ? 4 : 0;
+	boot_ncpus = MAX(1, (int)max_ncpus - (int)num_ecores);
+#else
+	boot_ncpus = max_ncpus;
 #endif
 
 	/*

diff --git a/module/os/macos/spl/spl-seg_kmem.c b/module/os/macos/spl/spl-seg_kmem.c
@@ -281,31 +281,27 @@ segkmem_abd_init()
 	/*
 	 * OpenZFS does not segregate the abd kmem cache out of the general
 	 * heap, leading to large numbers of short-lived slabs exchanged
-	 * between the kmem cache and it's parent.  XNU absorbs this with a
-	 * qcache, following its history of absorbing the pre-ABD zio file and
-	 * metadata caches being qcached (which raises the exchanges with the
-	 * general heap from PAGESIZE to 256k).
+	 * between the kmem cache and its parent.  XNU absorbs this with a a
+	 * large minimum request to the parent vmem_caches on large-memory
+	 * MacOS systems.
 	 */
 
 	extern vmem_t *spl_heap_arena;
 
-#define	BIG_SLAB 131072
-#ifdef __arm64__
-#define	BIG_BIG_SLAB (BIG_SLAB * 2)
-#else
-#define	BIG_BIG_SLAB BIG_SLAB
-#endif
+#define	BIG_SLAB (PAGESIZE * 16)
 
 #define	SMALL_RAM_MACHINE (4ULL * 1024ULL * 1024ULL * 1024ULL)
 
 	if (total_memory >= SMALL_RAM_MACHINE) {
 		abd_arena = vmem_create("abd_cache", NULL, 0,
-		    PAGESIZE, vmem_alloc_impl, vmem_free_impl, spl_heap_arena,
-		    BIG_BIG_SLAB, VM_SLEEP | VMC_NO_QCACHE);
+		    sizeof (void *),
+		    vmem_alloc_impl, vmem_free_impl, spl_heap_arena,
+		    BIG_SLAB, VM_SLEEP | VMC_NO_QCACHE);
 	} else {
 		abd_arena = vmem_create("abd_cache", NULL, 0,
-		    PAGESIZE, vmem_alloc_impl, vmem_free_impl, spl_heap_arena,
-		    131072, VM_SLEEP | VMC_NO_QCACHE);
+		    sizeof (void *),
+		    vmem_alloc_impl, vmem_free_impl, spl_heap_arena,
+		    PAGESIZE, VM_SLEEP | VMC_NO_QCACHE);
 	}
 
 	VERIFY3P(abd_arena, !=, NULL);
@@ -322,13 +318,15 @@ segkmem_abd_init()
 
 	if (total_memory >= SMALL_RAM_MACHINE) {
 		abd_subpage_arena = vmem_create("abd_subpage_cache", NULL, 0,
-		    sizeof (void *), vmem_alloc_impl, vmem_free_impl,
+		    sizeof (void *),
+		    vmem_alloc_impl, vmem_free_impl,
 		    spl_heap_arena,
 		    BIG_SLAB, VM_SLEEP | VMC_NO_QCACHE);
 	} else {
 		abd_subpage_arena = vmem_create("abd_subpage_cache", NULL, 0,
-		    512, vmem_alloc_impl, vmem_free_impl, abd_arena,
-		    131072, VM_SLEEP | VMC_NO_QCACHE);
+		    sizeof (void *),
+		    vmem_alloc_impl, vmem_free_impl, abd_arena,
+		    PAGESIZE, VM_SLEEP | VMC_NO_QCACHE);
 	}
 
 	VERIFY3P(abd_subpage_arena, !=, NULL);

diff --git a/module/os/macos/spl/spl-taskq.c b/module/os/macos/spl/spl-taskq.c
@@ -2401,7 +2401,9 @@ taskq_create_common(const char *name, int instance, int nthreads, pri_t pri,
 {
 	taskq_t *tq = kmem_cache_alloc(taskq_cache, KM_SLEEP);
 #ifdef __APPLE__
-	uint_t ncpus = max_ncpus - num_ecores;
+	uint_t ncpus = max_ncpus;
+	if (!(flags & TASKQ_CREATE_SYNCED))
+		ncpus = boot_ncpus; /* possibly deflated by num_ecores */
 #else
 	uint_t ncpus = ((boot_max_ncpus == -1) ? max_ncpus : boot_max_ncpus);
 #endif
@@ -2861,9 +2863,22 @@ taskq_create_synced(const char *name, int nthreads, pri_t pri,
 	flags &= ~(TASKQ_DYNAMIC | TASKQ_THREADS_CPU_PCT | TASKQ_DC_BATCH);
 
 	tq = taskq_create(name, nthreads, minclsyspri, nthreads, INT_MAX,
-	    flags | TASKQ_PREPOPULATE);
+	    flags | TASKQ_PREPOPULATE | TASKQ_CREATE_SYNCED);
+
 	VERIFY(tq != NULL);
-	VERIFY(tq->tq_nthreads == nthreads);
+
+	/* wait until our minalloc (nthreads) threads are created */
+	mutex_enter(&tq->tq_lock);
+	for (int i = 1; tq->tq_nthreads != nthreads; i++) {
+		printf("SPL: %s:%d: waiting for tq_nthreads (%d)"
+		    " to be nthreads (%d), (target = %d, pass %d)\n",
+		    __func__, __LINE__,
+		    tq->tq_nthreads, tq->tq_nthreads_target,  nthreads, i);
+		cv_wait(&tq->tq_wait_cv, &tq->tq_lock);
+	}
+	mutex_exit(&tq->tq_lock);
+
+	VERIFY3U(tq->tq_nthreads, ==, nthreads);
 
 	/* spawn all syncthreads */
 	for (int i = 0; i < nthreads; i++) {

diff --git a/module/os/macos/zfs/abd_os.c b/module/os/macos/zfs/abd_os.c
@@ -401,15 +401,17 @@ abd_init(void)
 	 * const int cflags = KMF_BUFTAG | KMF_LITE;
 	 * or
 	 * const int cflags = KMC_ARENA_SLAB;
+	 * (the latter tests larger exchanges of memory with the kernel)
 	 */
 
-	int cflags = KMC_ARENA_SLAB;
+	int cflags = KMF_BUFTAG | KMF_LITE;
+	// int cflags = KMC_ARENA_SLAB;
 #else
 	int cflags = KMC_NOTOUCH;
 #endif
 
 	abd_chunk_cache = kmem_cache_create("abd_chunk", zfs_abd_chunk_size,
-	    ABD_PGSIZE,
+	    sizeof (void *),
 	    NULL, NULL, NULL, NULL, abd_arena, cflags);
 
 	wmsum_init(&abd_sums.abdstat_struct_size, 0);