Skip to content

Commit

Permalink
Implemented zpool scrub pause/resume
Browse files Browse the repository at this point in the history
Currently, there is no way to pause a scrub. Pausing may
be useful when the pool is busy with other I/O to preserve
bandwidth.

This patch adds the ability to pause and resume scrubbing.
This is achieved by maintaining a persistent on-disk scrub state.
While the state is 'paused' we do not scrub any more blocks.
We do however perform regular scan housekeeping such as
freeing async destroyed and deadlist blocks while paused.

Signed-off-by: Alek Pinchuk <apinchuk@datto.com>
  • Loading branch information
alek-p committed Jul 3, 2017
1 parent fe46eeb commit 388a9ab
Show file tree
Hide file tree
Showing 17 changed files with 362 additions and 124 deletions.
60 changes: 45 additions & 15 deletions cmd/zpool/zpool_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ get_usage(zpool_help_t idx)
case HELP_REOPEN:
return (gettext("\treopen <pool>\n"));
case HELP_SCRUB:
return (gettext("\tscrub [-s] <pool> ...\n"));
return (gettext("\tscrub [-s | -p] <pool> ...\n"));
case HELP_STATUS:
return (gettext("\tstatus [-c [script1,script2,...]] [-gLPvxD]"
"[-T d|u] [pool] ... [interval [count]]\n"));
Expand Down Expand Up @@ -5759,6 +5759,7 @@ typedef struct scrub_cbdata {
int cb_type;
int cb_argc;
char **cb_argv;
pool_scrub_cmd_t cb_scrub_cmd;
} scrub_cbdata_t;

int
Expand All @@ -5776,15 +5777,16 @@ scrub_callback(zpool_handle_t *zhp, void *data)
return (1);
}

err = zpool_scan(zhp, cb->cb_type);
err = zpool_scan(zhp, cb->cb_type, cb->cb_scrub_cmd);

return (err != 0);
}

/*
* zpool scrub [-s] <pool> ...
* zpool scrub [-s | -p] <pool> ...
*
* -s Stop. Stops any in-progress scrub.
* -p Pause. Pause in-progress scrub.
*/
int
zpool_do_scrub(int argc, char **argv)
Expand All @@ -5793,20 +5795,31 @@ zpool_do_scrub(int argc, char **argv)
scrub_cbdata_t cb;

cb.cb_type = POOL_SCAN_SCRUB;
cb.cb_scrub_cmd = POOL_SCRUB_NORMAL;

/* check options */
while ((c = getopt(argc, argv, "s")) != -1) {
while ((c = getopt(argc, argv, "sp")) != -1) {
switch (c) {
case 's':
cb.cb_type = POOL_SCAN_NONE;
break;
case 'p':
cb.cb_scrub_cmd = POOL_SCRUB_PAUSE;
break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
usage(B_FALSE);
}
}

if (cb.cb_type == POOL_SCAN_NONE &&
cb.cb_scrub_cmd == POOL_SCRUB_PAUSE) {
(void) fprintf(stderr, gettext("invalid option combination: "
"-s and -p are mutually exclusive\n"));
usage(B_FALSE);
}

cb.cb_argc = argc;
cb.cb_argv = argv;
argc -= optind;
Expand All @@ -5826,7 +5839,7 @@ zpool_do_scrub(int argc, char **argv)
void
print_scan_status(pool_scan_stat_t *ps)
{
time_t start, end;
time_t start, end, pause;
uint64_t elapsed, mins_left, hours_left;
uint64_t pass_exam, examined, total;
uint_t rate;
Expand All @@ -5844,6 +5857,7 @@ print_scan_status(pool_scan_stat_t *ps)

start = ps->pss_start_time;
end = ps->pss_end_time;
pause = ps->pss_pass_scrub_pause;
zfs_nicebytes(ps->pss_processed, processed_buf, sizeof (processed_buf));

assert(ps->pss_func == POOL_SCAN_SCRUB ||
Expand Down Expand Up @@ -5886,8 +5900,17 @@ print_scan_status(pool_scan_stat_t *ps)
* Scan is in progress.
*/
if (ps->pss_func == POOL_SCAN_SCRUB) {
(void) printf(gettext("scrub in progress since %s"),
ctime(&start));
if (pause == 0) {
(void) printf(gettext("scrub in progress since %s"),
ctime(&start));
} else {
char buf[32];
struct tm *p = localtime(&pause);
(void) strftime(buf, sizeof (buf), "%a %b %e %T %Y", p);
(void) printf(gettext("scrub paused since %s\n"), buf);
(void) printf(gettext("\tscrub started on %s"),
ctime(&start));
}
} else if (ps->pss_func == POOL_SCAN_RESILVER) {
(void) printf(gettext("resilver in progress since %s"),
ctime(&start));
Expand All @@ -5899,6 +5922,7 @@ print_scan_status(pool_scan_stat_t *ps)

/* elapsed time for this pass */
elapsed = time(NULL) - ps->pss_pass_start;
elapsed -= ps->pss_pass_scrub_spent_paused;
elapsed = elapsed ? elapsed : 1;
pass_exam = ps->pss_pass_exam ? ps->pss_pass_exam : 1;
rate = pass_exam / elapsed;
Expand All @@ -5908,19 +5932,25 @@ print_scan_status(pool_scan_stat_t *ps)

zfs_nicebytes(examined, examined_buf, sizeof (examined_buf));
zfs_nicebytes(total, total_buf, sizeof (total_buf));
zfs_nicebytes(rate, rate_buf, sizeof (rate_buf));

/*
* do not print estimated time if hours_left is more than 30 days
* or we have a paused scrub
*/
(void) printf(gettext("\t%s scanned out of %s at %s/s"),
examined_buf, total_buf, rate_buf);
if (hours_left < (30 * 24)) {
(void) printf(gettext(", %lluh%um to go\n"),
(u_longlong_t)hours_left, (uint_t)(mins_left % 60));
if (pause == 0) {
zfs_nicebytes(rate, rate_buf, sizeof (rate_buf));
(void) printf(gettext("\t%s scanned out of %s at %s/s"),
examined_buf, total_buf, rate_buf);
if (hours_left < (30 * 24)) {
(void) printf(gettext(", %lluh%um to go\n"),
(u_longlong_t)hours_left, (uint_t)(mins_left % 60));
} else {
(void) printf(gettext(
", (scan is slow, no estimated time)\n"));
}
} else {
(void) printf(gettext(
", (scan is slow, no estimated time)\n"));
(void) printf(gettext("\t%s scanned out of %s\n"),
examined_buf, total_buf);
}

if (ps->pss_func == POOL_SCAN_RESILVER) {
Expand Down
3 changes: 2 additions & 1 deletion include/libzfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ typedef enum zfs_error {
EZFS_DIFF, /* general failure of zfs diff */
EZFS_DIFFDATA, /* bad zfs diff data */
EZFS_POOLREADONLY, /* pool is in read-only mode */
EZFS_SCRUB_PAUSED, /* scrub currently paused */
EZFS_UNKNOWN
} zfs_error_t;

Expand Down Expand Up @@ -260,7 +261,7 @@ typedef struct splitflags {
/*
* Functions to manipulate pool and vdev state
*/
extern int zpool_scan(zpool_handle_t *, pool_scan_func_t);
extern int zpool_scan(zpool_handle_t *, pool_scan_func_t, pool_scrub_cmd_t);
extern int zpool_clear(zpool_handle_t *, const char *, nvlist_t *);
extern int zpool_reguid(zpool_handle_t *);
extern int zpool_reopen(zpool_handle_t *);
Expand Down
13 changes: 8 additions & 5 deletions include/sys/dsl_scan.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
* Copyright (c) 2017 Datto Inc.
*/

#ifndef _SYS_DSL_SCAN_H
Expand Down Expand Up @@ -70,6 +71,7 @@ typedef struct dsl_scan_phys {

typedef enum dsl_scan_flags {
DSF_VISIT_DS_AGAIN = 1<<0,
DSF_SCRUB_PAUSED = 1<<1,
} dsl_scan_flags_t;

#define DSL_SCAN_FLAGS_MASK (DSF_VISIT_DS_AGAIN)
Expand All @@ -84,8 +86,8 @@ typedef enum dsl_scan_flags {
*
* The following members of this structure direct the behavior of the scan:
*
* scn_pausing - a scan that cannot be completed in a single txg or
* has exceeded its allotted time will need to pause.
* scn_suspending - a scan that cannot be completed in a single txg or
* has exceeded its allotted time will need to suspend.
* When this flag is set the scanner will stop traversing
* the pool and write out the current state to disk.
*
Expand All @@ -107,7 +109,7 @@ typedef enum dsl_scan_flags {
typedef struct dsl_scan {
struct dsl_pool *scn_dp;

boolean_t scn_pausing;
boolean_t scn_suspending;
uint64_t scn_restart_txg;
uint64_t scn_done_txg;
uint64_t scn_sync_start_time;
Expand All @@ -117,8 +119,6 @@ typedef struct dsl_scan {
boolean_t scn_is_bptree;
boolean_t scn_async_destroying;
boolean_t scn_async_stalled;

/* for debugging / information */
uint64_t scn_visited_this_txg;

dsl_scan_phys_t scn_phys;
Expand All @@ -129,6 +129,8 @@ void dsl_scan_fini(struct dsl_pool *dp);
void dsl_scan_sync(struct dsl_pool *, dmu_tx_t *);
int dsl_scan_cancel(struct dsl_pool *);
int dsl_scan(struct dsl_pool *, pool_scan_func_t);
boolean_t dsl_scan_scrubbing(const struct dsl_pool *dp);
int dsl_scrub_set_pause_resume(const struct dsl_pool *dp, pool_scrub_cmd_t cmd);
void dsl_resilver_restart(struct dsl_pool *, uint64_t txg);
boolean_t dsl_scan_resilvering(struct dsl_pool *dp);
boolean_t dsl_dataset_unstable(struct dsl_dataset *ds);
Expand All @@ -139,6 +141,7 @@ void dsl_scan_ds_snapshotted(struct dsl_dataset *ds, struct dmu_tx *tx);
void dsl_scan_ds_clone_swapped(struct dsl_dataset *ds1, struct dsl_dataset *ds2,
struct dmu_tx *tx);
boolean_t dsl_scan_active(dsl_scan_t *scn);
boolean_t dsl_scan_is_paused_scrub(const dsl_scan_t *scn);

#ifdef __cplusplus
}
Expand Down
13 changes: 13 additions & 0 deletions include/sys/fs/zfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -765,6 +765,16 @@ typedef enum pool_scan_func {
POOL_SCAN_FUNCS
} pool_scan_func_t;

/*
* Used to control scrub pause and resume.
*/
typedef enum pool_scrub_cmd {
POOL_SCRUB_NORMAL = 0,
POOL_SCRUB_PAUSE,
POOL_SCRUB_FLAGS_END
} pool_scrub_cmd_t;


/*
* ZIO types. Needed to interpret vdev statistics below.
*/
Expand Down Expand Up @@ -797,6 +807,9 @@ typedef struct pool_scan_stat {
/* values not stored on disk */
uint64_t pss_pass_exam; /* examined bytes per scan pass */
uint64_t pss_pass_start; /* start time of a scan pass */
uint64_t pss_pass_scrub_pause; /* pause time of a scurb pass */
/* cumulative time scrub spent paused, needed for rate calculation */
uint64_t pss_pass_scrub_spent_paused;
} pool_scan_stat_t;

typedef enum dsl_scan_state {
Expand Down
2 changes: 2 additions & 0 deletions include/sys/spa.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright 2013 Saso Kiselkov. All rights reserved.
* Copyright (c) 2017 Datto Inc.
*/

#ifndef _SYS_SPA_H
Expand Down Expand Up @@ -657,6 +658,7 @@ extern void spa_l2cache_drop(spa_t *spa);
/* scanning */
extern int spa_scan(spa_t *spa, pool_scan_func_t func);
extern int spa_scan_stop(spa_t *spa);
extern int spa_scrub_pause_resume(spa_t *spa, pool_scrub_cmd_t flag);

/* spa syncing */
extern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */
Expand Down
3 changes: 3 additions & 0 deletions include/sys/spa_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright 2013 Saso Kiselkov. All rights reserved.
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
* Copyright (c) 2017 Datto Inc.
*/

#ifndef _SYS_SPA_IMPL_H
Expand Down Expand Up @@ -193,6 +194,8 @@ struct spa {
uint8_t spa_scrub_started; /* started since last boot */
uint8_t spa_scrub_reopen; /* scrub doing vdev_reopen */
uint64_t spa_scan_pass_start; /* start time per pass/reboot */
uint64_t spa_scan_pass_scrub_pause; /* scrub pause time */
uint64_t spa_scan_pass_scrub_spent_paused; /* total paused */
uint64_t spa_scan_pass_exam; /* examined bytes per pass */
kmutex_t spa_async_lock; /* protect async state */
kthread_t *spa_async_thread; /* thread doing async task */
Expand Down
43 changes: 32 additions & 11 deletions lib/libzfs/libzfs_pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -1898,22 +1898,39 @@ zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
* Scan the pool.
*/
int
zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func)
zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd)
{
zfs_cmd_t zc = {"\0"};
char msg[1024];
int err;
libzfs_handle_t *hdl = zhp->zpool_hdl;

(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
zc.zc_cookie = func;
zc.zc_flags = cmd;

if (zfs_ioctl(hdl, ZFS_IOC_POOL_SCAN, &zc) == 0)
return (0);

err = errno;

if (zfs_ioctl(hdl, ZFS_IOC_POOL_SCAN, &zc) == 0 ||
(errno == ENOENT && func != POOL_SCAN_NONE))
/* ECANCELED on a scrub means we resumed a paused scrub */
if (err == ECANCELED && func == POOL_SCAN_SCRUB &&
cmd == POOL_SCRUB_NORMAL)
return (0);

if (err == ENOENT && func != POOL_SCAN_NONE && cmd == POOL_SCRUB_NORMAL)
return (0);

if (func == POOL_SCAN_SCRUB) {
(void) snprintf(msg, sizeof (msg),
dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name);
if (cmd == POOL_SCRUB_PAUSE) {
(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
"cannot pause scrubbing %s"), zc.zc_name);
} else {
assert(cmd == POOL_SCRUB_NORMAL);
(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
"cannot scrub %s"), zc.zc_name);
}
} else if (func == POOL_SCAN_NONE) {
(void) snprintf(msg, sizeof (msg),
dgettext(TEXT_DOMAIN, "cannot cancel scrubbing %s"),
Expand All @@ -1922,7 +1939,7 @@ zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func)
assert(!"unexpected result");
}

if (errno == EBUSY) {
if (err == EBUSY) {
nvlist_t *nvroot;
pool_scan_stat_t *ps = NULL;
uint_t psc;
Expand All @@ -1931,14 +1948,18 @@ zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func)
ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
(void) nvlist_lookup_uint64_array(nvroot,
ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &psc);
if (ps && ps->pss_func == POOL_SCAN_SCRUB)
return (zfs_error(hdl, EZFS_SCRUBBING, msg));
else
if (ps && ps->pss_func == POOL_SCAN_SCRUB) {
if (cmd == POOL_SCRUB_PAUSE)
return (zfs_error(hdl, EZFS_SCRUB_PAUSED, msg));
else
return (zfs_error(hdl, EZFS_SCRUBBING, msg));
} else {
return (zfs_error(hdl, EZFS_RESILVERING, msg));
} else if (errno == ENOENT) {
}
} else if (err == ENOENT) {
return (zfs_error(hdl, EZFS_NO_SCRUB, msg));
} else {
return (zpool_standard_error(hdl, errno, msg));
return (zpool_standard_error(hdl, err, msg));
}
}

Expand Down
4 changes: 4 additions & 0 deletions lib/libzfs/libzfs_util.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
* Copyright (c) 2017 Datto Inc.
*/

/*
Expand Down Expand Up @@ -246,6 +247,9 @@ libzfs_error_description(libzfs_handle_t *hdl)
case EZFS_POSTSPLIT_ONLINE:
return (dgettext(TEXT_DOMAIN, "disk was split from this pool "
"into a new one"));
case EZFS_SCRUB_PAUSED:
return (dgettext(TEXT_DOMAIN, "scrub is paused; "
"use 'zpool scrub' to resume"));
case EZFS_SCRUBBING:
return (dgettext(TEXT_DOMAIN, "currently scrubbing; "
"use 'zpool scrub -s' to cancel current scrub"));
Expand Down
Loading

0 comments on commit 388a9ab

Please sign in to comment.