Skip to content

Commit

Permalink
Prevent unnecessary resilver restarts
Browse files Browse the repository at this point in the history
If a device is participating in an active resilver, then it will have a
non-empty DTL. Operations like vdev_{open,reopen,probe}() can cause the
resilver to be restarted (or deferred to be restarted later), which is
unnecessary if the DTL is still covered by the current scan range. This
is similar to the logic in vdev_dtl_should_excise() where the DTL can
only be excised if it's max txg is in the resilvered range.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: John Gallagher <john.gallagher@delphix.com>
Reviewed-by: Kjeld Schouten <kjeld@schouten-lebbing.nl>
Signed-off-by: John Poduska <jpoduska@datto.com>
Issue openzfs#840
Closes openzfs#9155
Closes openzfs#9378
Closes openzfs#9551
Closes openzfs#9588
  • Loading branch information
jwpoduska authored and tonyhutter committed Jan 22, 2020
1 parent 8d26607 commit f5ff772
Show file tree
Hide file tree
Showing 14 changed files with 409 additions and 87 deletions.
1 change: 1 addition & 0 deletions configure.ac
Expand Up @@ -323,6 +323,7 @@ AC_CONFIG_FILES([
tests/zfs-tests/tests/functional/rename_dirs/Makefile
tests/zfs-tests/tests/functional/replacement/Makefile
tests/zfs-tests/tests/functional/reservation/Makefile
tests/zfs-tests/tests/functional/resilver/Makefile
tests/zfs-tests/tests/functional/rootpool/Makefile
tests/zfs-tests/tests/functional/rsend/Makefile
tests/zfs-tests/tests/functional/scrub_mirror/Makefile
Expand Down
6 changes: 4 additions & 2 deletions include/sys/dsl_scan.h
Expand Up @@ -21,7 +21,7 @@
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2017 by Delphix. All rights reserved.
* Copyright (c) 2017 Datto Inc.
* Copyright (c) 2017, 2019, Datto Inc. All rights reserved.
*/

#ifndef _SYS_DSL_SCAN_H
Expand Down Expand Up @@ -164,10 +164,12 @@ void dsl_scan_fini(struct dsl_pool *dp);
void dsl_scan_sync(struct dsl_pool *, dmu_tx_t *);
int dsl_scan_cancel(struct dsl_pool *);
int dsl_scan(struct dsl_pool *, pool_scan_func_t);
void dsl_scan_assess_vdev(struct dsl_pool *dp, vdev_t *vd);
boolean_t dsl_scan_scrubbing(const struct dsl_pool *dp);
int dsl_scrub_set_pause_resume(const struct dsl_pool *dp, pool_scrub_cmd_t cmd);
void dsl_resilver_restart(struct dsl_pool *, uint64_t txg);
void dsl_scan_restart_resilver(struct dsl_pool *, uint64_t txg);
boolean_t dsl_scan_resilvering(struct dsl_pool *dp);
boolean_t dsl_scan_resilver_scheduled(struct dsl_pool *dp);
boolean_t dsl_dataset_unstable(struct dsl_dataset *ds);
void dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum,
ddt_entry_t *dde, dmu_tx_t *tx);
Expand Down
3 changes: 2 additions & 1 deletion include/sys/spa.h
Expand Up @@ -26,7 +26,7 @@
* Copyright 2013 Saso Kiselkov. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
* Copyright 2017 Joyent, Inc.
* Copyright (c) 2017 Datto Inc.
* Copyright (c) 2017, 2019, Datto Inc. All rights reserved.
* Copyright (c) 2017, Intel Corporation.
*/

Expand Down Expand Up @@ -777,6 +777,7 @@ extern void spa_async_request(spa_t *spa, int flag);
extern void spa_async_unrequest(spa_t *spa, int flag);
extern void spa_async_suspend(spa_t *spa);
extern void spa_async_resume(spa_t *spa);
extern int spa_async_tasks(spa_t *spa);
extern spa_t *spa_inject_addref(char *pool);
extern void spa_inject_delref(spa_t *spa);
extern void spa_scan_stat_init(spa_t *spa);
Expand Down
4 changes: 3 additions & 1 deletion include/sys/vdev.h
Expand Up @@ -23,6 +23,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright (c) 2017, Intel Corporation.
* Copyright (c) 2019, Datto Inc. All rights reserved.
*/

#ifndef _SYS_VDEV_H
Expand Down Expand Up @@ -151,7 +152,8 @@ extern int vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg);
extern void vdev_state_dirty(vdev_t *vd);
extern void vdev_state_clean(vdev_t *vd);

extern void vdev_set_deferred_resilver(spa_t *spa, vdev_t *vd);
extern void vdev_defer_resilver(vdev_t *vd);
extern boolean_t vdev_clear_resilver_deferred(vdev_t *vd, dmu_tx_t *tx);

typedef enum vdev_config_flag {
VDEV_CONFIG_SPARE = 1 << 0,
Expand Down
100 changes: 49 additions & 51 deletions module/zfs/dsl_scan.c
Expand Up @@ -22,7 +22,7 @@
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright 2016 Gary Mills
* Copyright (c) 2017 Datto Inc.
* Copyright (c) 2017, 2019, Datto Inc. All rights reserved.
* Copyright 2019 Joyent, Inc.
*/

Expand Down Expand Up @@ -591,6 +591,13 @@ dsl_scan_restarting(dsl_scan_t *scn, dmu_tx_t *tx)
scn->scn_restart_txg <= tx->tx_txg);
}

boolean_t
dsl_scan_resilver_scheduled(dsl_pool_t *dp)
{
return ((dp->dp_scan && dp->dp_scan->scn_restart_txg != 0) ||
(spa_async_tasks(dp->dp_spa) & SPA_ASYNC_RESILVER));
}

boolean_t
dsl_scan_scrubbing(const dsl_pool_t *dp)
{
Expand Down Expand Up @@ -786,7 +793,7 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func)
(void) spa_vdev_state_exit(spa, NULL, 0);

if (func == POOL_SCAN_RESILVER) {
dsl_resilver_restart(spa->spa_dsl_pool, 0);
dsl_scan_restart_resilver(spa->spa_dsl_pool, 0);
return (0);
}

Expand All @@ -806,41 +813,6 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func)
dsl_scan_setup_sync, &func, 0, ZFS_SPACE_CHECK_EXTRA_RESERVED));
}

/*
* Sets the resilver defer flag to B_FALSE on all leaf devs under vd. Returns
* B_TRUE if we have devices that need to be resilvered and are available to
* accept resilver I/Os.
*/
static boolean_t
dsl_scan_clear_deferred(vdev_t *vd, dmu_tx_t *tx)
{
boolean_t resilver_needed = B_FALSE;
spa_t *spa = vd->vdev_spa;

for (int c = 0; c < vd->vdev_children; c++) {
resilver_needed |=
dsl_scan_clear_deferred(vd->vdev_child[c], tx);
}

if (vd == spa->spa_root_vdev &&
spa_feature_is_active(spa, SPA_FEATURE_RESILVER_DEFER)) {
spa_feature_decr(spa, SPA_FEATURE_RESILVER_DEFER, tx);
vdev_config_dirty(vd);
spa->spa_resilver_deferred = B_FALSE;
return (resilver_needed);
}

if (!vdev_is_concrete(vd) || vd->vdev_aux ||
!vd->vdev_ops->vdev_op_leaf)
return (resilver_needed);

if (vd->vdev_resilver_deferred)
vd->vdev_resilver_deferred = B_FALSE;

return (!vdev_is_dead(vd) && !vd->vdev_offline &&
vdev_resilver_needed(vd, NULL, NULL));
}

/* ARGSUSED */
static void
dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
Expand Down Expand Up @@ -943,25 +915,21 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
spa_async_request(spa, SPA_ASYNC_RESILVER_DONE);

/*
* Clear any deferred_resilver flags in the config.
* Clear any resilver_deferred flags in the config.
* If there are drives that need resilvering, kick
* off an asynchronous request to start resilver.
* dsl_scan_clear_deferred() may update the config
* vdev_clear_resilver_deferred() may update the config
* before the resilver can restart. In the event of
* a crash during this period, the spa loading code
* will find the drives that need to be resilvered
* when the machine reboots and start the resilver then.
* and start the resilver then.
*/
if (spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER)) {
boolean_t resilver_needed =
dsl_scan_clear_deferred(spa->spa_root_vdev, tx);
if (resilver_needed) {
spa_history_log_internal(spa,
"starting deferred resilver", tx,
"errors=%llu",
(u_longlong_t)spa_get_errlog_size(spa));
spa_async_request(spa, SPA_ASYNC_RESILVER);
}
if (spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER) &&
vdev_clear_resilver_deferred(spa->spa_root_vdev, tx)) {
spa_history_log_internal(spa,
"starting deferred resilver", tx, "errors=%llu",
(u_longlong_t)spa_get_errlog_size(spa));
spa_async_request(spa, SPA_ASYNC_RESILVER);
}
}

Expand Down Expand Up @@ -1071,7 +1039,7 @@ dsl_scrub_set_pause_resume(const dsl_pool_t *dp, pool_scrub_cmd_t cmd)

/* start a new scan, or restart an existing one. */
void
dsl_resilver_restart(dsl_pool_t *dp, uint64_t txg)
dsl_scan_restart_resilver(dsl_pool_t *dp, uint64_t txg)
{
if (txg == 0) {
dmu_tx_t *tx;
Expand Down Expand Up @@ -4232,6 +4200,36 @@ dsl_scan_freed(spa_t *spa, const blkptr_t *bp)
dsl_scan_freed_dva(spa, bp, i);
}

/*
* Check if a vdev needs resilvering (non-empty DTL), if so, and resilver has
* not started, start it. Otherwise, only restart if max txg in DTL range is
* greater than the max txg in the current scan. If the DTL max is less than
* the scan max, then the vdev has not missed any new data since the resilver
* started, so a restart is not needed.
*/
void
dsl_scan_assess_vdev(dsl_pool_t *dp, vdev_t *vd)
{
uint64_t min, max;

if (!vdev_resilver_needed(vd, &min, &max))
return;

if (!dsl_scan_resilvering(dp)) {
spa_async_request(dp->dp_spa, SPA_ASYNC_RESILVER);
return;
}

if (max <= dp->dp_scan->scn_phys.scn_max_txg)
return;

/* restart is needed, check if it can be deferred */
if (spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_RESILVER_DEFER))
vdev_defer_resilver(vd);
else
spa_async_request(dp->dp_spa, SPA_ASYNC_RESILVER);
}

#if defined(_KERNEL)
/* CSTYLED */
module_param(zfs_scan_vdev_limit, ulong, 0644);
Expand Down
14 changes: 10 additions & 4 deletions module/zfs/spa.c
Expand Up @@ -29,7 +29,7 @@
* Copyright 2016 Toomas Soome <tsoome@me.com>
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
* Copyright 2018 Joyent, Inc.
* Copyright (c) 2017 Datto Inc.
* Copyright (c) 2017, 2019, Datto Inc. All rights reserved.
* Copyright 2017 Joyent, Inc.
* Copyright (c) 2017, Intel Corporation.
*/
Expand Down Expand Up @@ -6242,9 +6242,9 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
*/
if (dsl_scan_resilvering(spa_get_dsl(spa)) &&
spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER))
vdev_set_deferred_resilver(spa, newvd);
vdev_defer_resilver(newvd);
else
dsl_resilver_restart(spa->spa_dsl_pool, dtl_max_txg);
dsl_scan_restart_resilver(spa->spa_dsl_pool, dtl_max_txg);

if (spa->spa_bootfs)
spa_event_notify(spa, newvd, NULL, ESC_ZFS_BOOTFS_VDEV_ATTACH);
Expand Down Expand Up @@ -7479,7 +7479,7 @@ spa_async_thread(void *arg)
if (tasks & SPA_ASYNC_RESILVER &&
(!dsl_scan_resilvering(dp) ||
!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_RESILVER_DEFER)))
dsl_resilver_restart(dp, 0);
dsl_scan_restart_resilver(dp, 0);

if (tasks & SPA_ASYNC_INITIALIZE_RESTART) {
mutex_enter(&spa_namespace_lock);
Expand Down Expand Up @@ -7595,6 +7595,12 @@ spa_async_request(spa_t *spa, int task)
mutex_exit(&spa->spa_async_lock);
}

int
spa_async_tasks(spa_t *spa)
{
return (spa->spa_async_tasks);
}

/*
* ==========================================================================
* SPA syncing routines
Expand Down
76 changes: 48 additions & 28 deletions module/zfs/vdev.c
Expand Up @@ -27,6 +27,7 @@
* Copyright 2016 Toomas Soome <tsoome@me.com>
* Copyright 2017 Joyent, Inc.
* Copyright (c) 2017, Intel Corporation.
* Copyright (c) 2019, Datto Inc. All rights reserved.
*/

#include <sys/zfs_context.h>
Expand Down Expand Up @@ -833,7 +834,7 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
&vd->vdev_resilver_txg);

if (nvlist_exists(nv, ZPOOL_CONFIG_RESILVER_DEFER))
vdev_set_deferred_resilver(spa, vd);
vdev_defer_resilver(vd);

/*
* In general, when importing a pool we want to ignore the
Expand Down Expand Up @@ -1863,18 +1864,12 @@ vdev_open(vdev_t *vd)
}

/*
* If a leaf vdev has a DTL, and seems healthy, then kick off a
* resilver. But don't do this if we are doing a reopen for a scrub,
* since this would just restart the scrub we are already doing.
* If this is a leaf vdev, assess whether a resilver is needed.
* But don't do this if we are doing a reopen for a scrub, since
* this would just restart the scrub we are already doing.
*/
if (vd->vdev_ops->vdev_op_leaf && !spa->spa_scrub_reopen &&
vdev_resilver_needed(vd, NULL, NULL)) {
if (dsl_scan_resilvering(spa->spa_dsl_pool) &&
spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER))
vdev_set_deferred_resilver(spa, vd);
else
spa_async_request(spa, SPA_ASYNC_RESILVER);
}
if (vd->vdev_ops->vdev_op_leaf && !spa->spa_scrub_reopen)
dsl_scan_assess_vdev(spa->spa_dsl_pool, vd);

return (0);
}
Expand Down Expand Up @@ -3693,14 +3688,11 @@ vdev_clear(spa_t *spa, vdev_t *vd)
if (vd != rvd && vdev_writeable(vd->vdev_top))
vdev_state_dirty(vd->vdev_top);

if (vd->vdev_aux == NULL && !vdev_is_dead(vd)) {
if (dsl_scan_resilvering(spa->spa_dsl_pool) &&
spa_feature_is_enabled(spa,
SPA_FEATURE_RESILVER_DEFER))
vdev_set_deferred_resilver(spa, vd);
else
spa_async_request(spa, SPA_ASYNC_RESILVER);
}
/* If a resilver isn't required, check if vdevs can be culled */
if (vd->vdev_aux == NULL && !vdev_is_dead(vd) &&
!dsl_scan_resilvering(spa->spa_dsl_pool) &&
!dsl_scan_resilver_scheduled(spa->spa_dsl_pool))
spa_async_request(spa, SPA_ASYNC_RESILVER_DONE);

spa_event_notify(spa, vd, NULL, ESC_ZFS_VDEV_CLEAR);
}
Expand Down Expand Up @@ -4693,18 +4685,46 @@ vdev_deadman(vdev_t *vd, char *tag)
}

void
vdev_set_deferred_resilver(spa_t *spa, vdev_t *vd)
vdev_defer_resilver(vdev_t *vd)
{
for (uint64_t i = 0; i < vd->vdev_children; i++)
vdev_set_deferred_resilver(spa, vd->vdev_child[i]);
ASSERT(vd->vdev_ops->vdev_op_leaf);

if (!vd->vdev_ops->vdev_op_leaf || !vdev_writeable(vd) ||
range_tree_is_empty(vd->vdev_dtl[DTL_MISSING])) {
return;
vd->vdev_resilver_deferred = B_TRUE;
vd->vdev_spa->spa_resilver_deferred = B_TRUE;
}

/*
* Clears the resilver deferred flag on all leaf devs under vd. Returns
* B_TRUE if we have devices that need to be resilvered and are available to
* accept resilver I/Os.
*/
boolean_t
vdev_clear_resilver_deferred(vdev_t *vd, dmu_tx_t *tx)
{
boolean_t resilver_needed = B_FALSE;
spa_t *spa = vd->vdev_spa;

for (int c = 0; c < vd->vdev_children; c++) {
vdev_t *cvd = vd->vdev_child[c];
resilver_needed |= vdev_clear_resilver_deferred(cvd, tx);
}

vd->vdev_resilver_deferred = B_TRUE;
spa->spa_resilver_deferred = B_TRUE;
if (vd == spa->spa_root_vdev &&
spa_feature_is_active(spa, SPA_FEATURE_RESILVER_DEFER)) {
spa_feature_decr(spa, SPA_FEATURE_RESILVER_DEFER, tx);
vdev_config_dirty(vd);
spa->spa_resilver_deferred = B_FALSE;
return (resilver_needed);
}

if (!vdev_is_concrete(vd) || vd->vdev_aux ||
!vd->vdev_ops->vdev_op_leaf)
return (resilver_needed);

vd->vdev_resilver_deferred = B_FALSE;

return (!vdev_is_dead(vd) && !vd->vdev_offline &&
vdev_resilver_needed(vd, NULL, NULL));
}

/*
Expand Down
4 changes: 4 additions & 0 deletions tests/runfiles/linux.run
Expand Up @@ -796,6 +796,10 @@ tests = ['reservation_001_pos', 'reservation_002_pos', 'reservation_003_pos',
'reservation_022_pos']
tags = ['functional', 'reservation']

[tests/functional/resilver]
tests = ['resilver_restart_001']
tags = ['functional', 'resilver']

[tests/functional/rootpool]
tests = ['rootpool_002_neg', 'rootpool_003_neg', 'rootpool_007_pos']
tags = ['functional', 'rootpool']
Expand Down
1 change: 1 addition & 0 deletions tests/zfs-tests/tests/functional/Makefile.am
Expand Up @@ -59,6 +59,7 @@ SUBDIRS = \
rename_dirs \
replacement \
reservation \
resilver \
rootpool \
rsend \
scrub_mirror \
Expand Down

0 comments on commit f5ff772

Please sign in to comment.