Skip to content

Commit

Permalink
Use large stacks when available
Browse files Browse the repository at this point in the history
While stack size will vary by architecture it has historically defaulted to
8K on x86_64 systems.  However, as of Linux 3.15 the default thread stack
size was increased to 16K.  These kernels are now the default in most non-
enterprise distributions which means we no longer need to assume 8K stacks.

This patch takes advantage of that fact by appropriately reverting stack
conservation changes which were made to ensure stability.  Changes which
may have had a negative impact on performance for certain workloads.  This
also has the side effect of bringing the code slightly more in line with
upstream.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Richard Yao <ryao@gentoo.org>
Closes #4059
  • Loading branch information
behlendorf committed Dec 7, 2015
1 parent f409267 commit b58986e
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 19 deletions.
3 changes: 2 additions & 1 deletion config/Rules.am
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ AM_CFLAGS += ${NO_BOOL_COMPARE}
AM_CFLAGS += -fno-strict-aliasing
AM_CPPFLAGS = -D_GNU_SOURCE -D__EXTENSIONS__ -D_REENTRANT
AM_CPPFLAGS += -D_POSIX_PTHREAD_SEMANTICS -D_FILE_OFFSET_BITS=64
AM_CPPFLAGS += -D_LARGEFILE64_SOURCE -DTEXT_DOMAIN=\"zfs-linux-user\"
AM_CPPFLAGS += -D_LARGEFILE64_SOURCE -DHAVE_LARGE_STACKS=1
AM_CPPFLAGS += -DTEXT_DOMAIN=\"zfs-linux-user\"
AM_CPPFLAGS += -DLIBEXECDIR=\"$(libexecdir)\"
AM_CPPFLAGS += -DRUNSTATEDIR=\"$(runstatedir)\"
AM_CPPFLAGS += -DSBINDIR=\"$(sbindir)\"
Expand Down
28 changes: 27 additions & 1 deletion config/kernel.m4
Original file line number Diff line number Diff line change
Expand Up @@ -460,9 +460,35 @@ AC_DEFUN([ZFS_AC_KERNEL_CONFIG], [
], [
])
ZFS_AC_KERNEL_CONFIG_THREAD_SIZE
ZFS_AC_KERNEL_CONFIG_DEBUG_LOCK_ALLOC
])

dnl #
dnl # Check configured THREAD_SIZE
dnl #
dnl # The stack size will vary by architecture, but as of Linux 3.15 on x86_64
dnl # the default thread stack size was increased to 16K from 8K. Therefore,
dnl # on newer kernels and some architectures stack usage optimizations can be
dnl # conditionally applied to improve performance without negatively impacting
dnl # stability.
dnl #
AC_DEFUN([ZFS_AC_KERNEL_CONFIG_THREAD_SIZE], [
AC_MSG_CHECKING([whether kernel was built with 16K or larger stacks])
ZFS_LINUX_TRY_COMPILE([
#include <linux/module.h>
],[
#if (THREAD_SIZE < 16384)
#error "THREAD_SIZE is less than 16K"
#endif
],[
AC_MSG_RESULT([yes])
AC_DEFINE(HAVE_LARGE_STACKS, 1, [kernel has large stacks])
],[
AC_MSG_RESULT([no])
])
])

dnl #
dnl # Check CONFIG_DEBUG_LOCK_ALLOC
dnl #
Expand Down Expand Up @@ -572,7 +598,7 @@ dnl #
dnl # ZFS_LINUX_CONFIG
dnl #
AC_DEFUN([ZFS_LINUX_CONFIG],
[AC_MSG_CHECKING([whether Linux was built with CONFIG_$1])
[AC_MSG_CHECKING([whether kernel was built with CONFIG_$1])
ZFS_LINUX_TRY_COMPILE([
#include <linux/module.h>
],[
Expand Down
8 changes: 6 additions & 2 deletions module/zfs/dmu_send.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ typedef struct dump_bytes_io {
} dump_bytes_io_t;

static void
dump_bytes_strategy(void *arg)
dump_bytes_cb(void *arg)
{
dump_bytes_io_t *dbi = (dump_bytes_io_t *)arg;
dmu_sendarg_t *dsp = dbi->dbi_dsp;
Expand All @@ -94,14 +94,18 @@ dump_bytes(dmu_sendarg_t *dsp, void *buf, int len)
dbi.dbi_buf = buf;
dbi.dbi_len = len;

#if defined(HAVE_LARGE_STACKS)
dump_bytes_cb(&dbi);
#else
/*
* The vn_rdwr() call is performed in a taskq to ensure that there is
* always enough stack space to write safely to the target filesystem.
* The ZIO_TYPE_FREE threads are used because there can be a lot of
* them and they are used in vdev_file.c for a similar purpose.
*/
spa_taskq_dispatch_sync(dmu_objset_spa(dsp->dsa_os), ZIO_TYPE_FREE,
ZIO_TASKQ_ISSUE, dump_bytes_strategy, &dbi, TQ_SLEEP);
ZIO_TASKQ_ISSUE, dump_bytes_cb, &dbi, TQ_SLEEP);
#endif /* HAVE_LARGE_STACKS */

return (dsp->dsa_err);
}
Expand Down
47 changes: 32 additions & 15 deletions module/zfs/zio.c
Original file line number Diff line number Diff line change
Expand Up @@ -1401,6 +1401,31 @@ zio_execute(zio_t *zio)
spl_fstrans_unmark(cookie);
}

/*
* Used to determine if in the current context the stack is sized large
* enough to allow zio_execute() to be called recursively. A minimum
* stack size of 16K is required to avoid needing to re-dispatch the zio.
*/
boolean_t
zio_execute_stack_check(zio_t *zio)
{
#if !defined(HAVE_LARGE_STACKS)
dsl_pool_t *dp = spa_get_dsl(zio->io_spa);

/* Executing in txg_sync_thread() context. */
if (dp && curthread == dp->dp_tx.tx_sync_thread)
return (B_TRUE);

/* Pool initialization outside of zio_taskq context. */
if (dp && spa_is_initializing(dp->dp_spa) &&
!zio_taskq_member(zio, ZIO_TASKQ_ISSUE) &&
!zio_taskq_member(zio, ZIO_TASKQ_ISSUE_HIGH))
return (B_TRUE);
#endif /* HAVE_LARGE_STACKS */

return (B_FALSE);

This comment has been minimized.

Copy link
@adilger

adilger Nov 30, 2016

Contributor

A bit late in the game, given that this patch has already landed, but this check doesn't make sense to me - if HAVE_LARGE_STACKS is set, then this function always returns B_FALSE rather than B_TRUE as I would have expected?

This comment has been minimized.

Copy link
@adilger

adilger Nov 30, 2016

Contributor

My bad, B_FALSE means "execute on stack" even though the function name is zio_execute_stack_check(). It probably would be better to rename this function to something better like zio_execute_stack_is_small() or something that more clearly indicates what is being returned.

}

__attribute__((always_inline))
static inline void
__zio_execute(zio_t *zio)
Expand All @@ -1410,8 +1435,6 @@ __zio_execute(zio_t *zio)
while (zio->io_stage < ZIO_STAGE_DONE) {
enum zio_stage pipeline = zio->io_pipeline;
enum zio_stage stage = zio->io_stage;
dsl_pool_t *dp;
boolean_t cut;
int rv;

ASSERT(!MUTEX_HELD(&zio->io_lock));
Expand All @@ -1424,10 +1447,6 @@ __zio_execute(zio_t *zio)

ASSERT(stage <= ZIO_STAGE_DONE);

dp = spa_get_dsl(zio->io_spa);
cut = (stage == ZIO_STAGE_VDEV_IO_START) ?
zio_requeue_io_start_cut_in_line : B_FALSE;

/*
* If we are in interrupt context and this pipeline stage
* will grab a config lock that is held across I/O,
Expand All @@ -1439,21 +1458,19 @@ __zio_execute(zio_t *zio)
*/
if ((stage & ZIO_BLOCKING_STAGES) && zio->io_vd == NULL &&
zio_taskq_member(zio, ZIO_TASKQ_INTERRUPT)) {
boolean_t cut = (stage == ZIO_STAGE_VDEV_IO_START) ?
zio_requeue_io_start_cut_in_line : B_FALSE;
zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, cut);
return;
}

/*
* If we executing in the context of the tx_sync_thread,
* or we are performing pool initialization outside of a
* zio_taskq[ZIO_TASKQ_ISSUE|ZIO_TASKQ_ISSUE_HIGH] context.
* Then issue the zio asynchronously to minimize stack usage
* for these deep call paths.
* If the current context doesn't have large enough stacks
* the zio must be issued asynchronously to prevent overflow.
*/
if ((dp && curthread == dp->dp_tx.tx_sync_thread) ||
(dp && spa_is_initializing(dp->dp_spa) &&
!zio_taskq_member(zio, ZIO_TASKQ_ISSUE) &&
!zio_taskq_member(zio, ZIO_TASKQ_ISSUE_HIGH))) {
if (zio_execute_stack_check(zio)) {
boolean_t cut = (stage == ZIO_STAGE_VDEV_IO_START) ?
zio_requeue_io_start_cut_in_line : B_FALSE;
zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, cut);
return;
}
Expand Down

0 comments on commit b58986e

Please sign in to comment.