Skip to content

Commit

Permalink
Implemented zpool scrub pause/resume
Browse files Browse the repository at this point in the history
Currently, there is no way to pause a scrub. Pausing may
be useful when the pool is busy with other I/O to preserve
bandwidth.

This patch adds the ability to pause and resume scrubbing.
This is achieved by maintaining an in-memory scrub state.
While the state is 'paused' we do not scrub any more blocks.
We do however perform regular scan housekeeping such as
freeing async destroyed and deadlist blocks while paused.

This patch also adds a option to which allows
one to pause scrub instead of resuming it on import.

Signed-off-by: Alek Pinchuk <apinchuk@datto.com>
  • Loading branch information
alek-p committed Jun 5, 2017
1 parent 4358afa commit bf05fdd
Show file tree
Hide file tree
Showing 20 changed files with 390 additions and 83 deletions.
70 changes: 52 additions & 18 deletions cmd/zpool/zpool_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -312,13 +312,14 @@ get_usage(zpool_help_t idx)
return (gettext("\thistory [-il] [<pool>] ...\n"));
case HELP_IMPORT:
return (gettext("\timport [-d dir] [-D]\n"
"\timport [-d dir | -c cachefile] [-F [-n]] <pool | id>\n"
"\timport [-d dir | -c cachefile] [-F [-n]] [-p] "
"<pool | id>\n"
"\timport [-o mntopts] [-o property=value] ... \n"
"\t [-d dir | -c cachefile] [-D] [-f] [-m] [-N] "
"[-R root] [-F [-n]] -a\n"
"[-R root] [-F [-n]] [-p] -a\n"
"\timport [-o mntopts] [-o property=value] ... \n"
"\t [-d dir | -c cachefile] [-D] [-f] [-m] [-N] "
"[-R root] [-F [-n]]\n"
"[-R root] [-F [-n]] [-p]\n"
"\t <pool | id> [newpool]\n"));
case HELP_IOSTAT:
return (gettext("\tiostat [[[-c [script1,script2,...]"
Expand All @@ -342,7 +343,7 @@ get_usage(zpool_help_t idx)
case HELP_REOPEN:
return (gettext("\treopen <pool>\n"));
case HELP_SCRUB:
return (gettext("\tscrub [-s] <pool> ...\n"));
return (gettext("\tscrub [-s | -p] <pool> ...\n"));
case HELP_STATUS:
return (gettext("\tstatus [-c [script1,script2,...]] [-gLPvxD]"
"[-T d|u] [pool] ... [interval [count]]\n"));
Expand Down Expand Up @@ -2300,6 +2301,8 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
*
* -o Set property=value and/or temporary mount options (without '=').
*
* -p If the pool had an ongoing scrub - pause it instead of resuming.
*
* -s Scan using the default search path, the libblkid cache will
* not be consulted.
*
Expand Down Expand Up @@ -2339,7 +2342,7 @@ zpool_do_import(int argc, char **argv)
char *endptr;

/* check options */
while ((c = getopt(argc, argv, ":aCc:d:DEfFmnNo:R:stT:VX")) != -1) {
while ((c = getopt(argc, argv, ":aCc:d:DEfFmnNo:pR:stT:VX")) != -1) {
switch (c) {
case 'a':
do_all = B_TRUE;
Expand Down Expand Up @@ -2389,6 +2392,9 @@ zpool_do_import(int argc, char **argv)
mntopts = optarg;
}
break;
case 'p':
flags |= ZFS_IMPORT_SCRUB_PAUSE;
break;
case 'R':
if (add_prop_list(zpool_prop_to_name(
ZPOOL_PROP_ALTROOT), optarg, &props, B_TRUE))
Expand Down Expand Up @@ -5737,6 +5743,7 @@ typedef struct scrub_cbdata {
int cb_type;
int cb_argc;
char **cb_argv;
uint8_t cb_scrub_cmd;
} scrub_cbdata_t;

int
Expand All @@ -5754,15 +5761,16 @@ scrub_callback(zpool_handle_t *zhp, void *data)
return (1);
}

err = zpool_scan(zhp, cb->cb_type);
err = zpool_scan(zhp, cb->cb_type, cb->cb_scrub_cmd);

return (err != 0);
}

/*
* zpool scrub [-s] <pool> ...
* zpool scrub [-s | -p] <pool> ...
*
* -s Stop. Stops any in-progress scrub.
* -p Pause. Pause in-progress scrub.
*/
int
zpool_do_scrub(int argc, char **argv)
Expand All @@ -5771,20 +5779,31 @@ zpool_do_scrub(int argc, char **argv)
scrub_cbdata_t cb;

cb.cb_type = POOL_SCAN_SCRUB;
cb.cb_scrub_cmd = POOL_SCRUB_NORMAL;

/* check options */
while ((c = getopt(argc, argv, "s")) != -1) {
while ((c = getopt(argc, argv, "sp")) != -1) {
switch (c) {
case 's':
cb.cb_type = POOL_SCAN_NONE;
break;
case 'p':
cb.cb_scrub_cmd = POOL_SCRUB_PAUSE;
break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
usage(B_FALSE);
}
}

if (cb.cb_type == POOL_SCAN_NONE &&
cb.cb_scrub_cmd == POOL_SCRUB_PAUSE) {
(void) fprintf(stderr, gettext("invalid option combination: "
"-s and -p are mutually exclusive\n"));
usage(B_FALSE);
}

cb.cb_argc = argc;
cb.cb_argv = argv;
argc -= optind;
Expand All @@ -5804,7 +5823,7 @@ zpool_do_scrub(int argc, char **argv)
void
print_scan_status(pool_scan_stat_t *ps)
{
time_t start, end;
time_t start, end, pause;
uint64_t elapsed, mins_left, hours_left;
uint64_t pass_exam, examined, total;
uint_t rate;
Expand All @@ -5822,6 +5841,7 @@ print_scan_status(pool_scan_stat_t *ps)

start = ps->pss_start_time;
end = ps->pss_end_time;
pause = ps->pss_pause_time;
zfs_nicebytes(ps->pss_processed, processed_buf, sizeof (processed_buf));

assert(ps->pss_func == POOL_SCAN_SCRUB ||
Expand Down Expand Up @@ -5864,8 +5884,16 @@ print_scan_status(pool_scan_stat_t *ps)
* Scan is in progress.
*/
if (ps->pss_func == POOL_SCAN_SCRUB) {
(void) printf(gettext("scrub in progress since %s"),
ctime(&start));
if (pause == 0) {
(void) printf(gettext("scrub in progress since %s"),
ctime(&start));
} else {
char buf[32];
struct tm *p = localtime(&pause);
(void) strftime(buf, sizeof (buf), "%a %b %e %T %Y", p);
(void) printf(gettext("scrub paused since %s; started "
"on %s"), buf, ctime(&start));
}
} else if (ps->pss_func == POOL_SCAN_RESILVER) {
(void) printf(gettext("resilver in progress since %s"),
ctime(&start));
Expand All @@ -5890,15 +5918,21 @@ print_scan_status(pool_scan_stat_t *ps)

/*
* do not print estimated time if hours_left is more than 30 days
* or we have a paused scrub
*/
(void) printf(gettext("\t%s scanned out of %s at %s/s"),
examined_buf, total_buf, rate_buf);
if (hours_left < (30 * 24)) {
(void) printf(gettext(", %lluh%um to go\n"),
(u_longlong_t)hours_left, (uint_t)(mins_left % 60));
if (pause == 0) {
(void) printf(gettext("\t%s scanned out of %s at %s/s"),
examined_buf, total_buf, rate_buf);
if (hours_left < (30 * 24)) {
(void) printf(gettext(", %lluh%um to go\n"),
(u_longlong_t)hours_left, (uint_t)(mins_left % 60));
} else {
(void) printf(gettext(
", (scan is slow, no estimated time)\n"));
}
} else {
(void) printf(gettext(
", (scan is slow, no estimated time)\n"));
(void) printf(gettext("\t%s scanned out of %s\n"),
examined_buf, total_buf);
}

if (ps->pss_func == POOL_SCAN_RESILVER) {
Expand Down
3 changes: 2 additions & 1 deletion include/libzfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ typedef enum zfs_error {
EZFS_DIFF, /* general failure of zfs diff */
EZFS_DIFFDATA, /* bad zfs diff data */
EZFS_POOLREADONLY, /* pool is in read-only mode */
EZFS_SCRUB_PAUSED, /* scrub currently paused */
EZFS_UNKNOWN
} zfs_error_t;

Expand Down Expand Up @@ -260,7 +261,7 @@ typedef struct splitflags {
/*
* Functions to manipulate pool and vdev state
*/
extern int zpool_scan(zpool_handle_t *, pool_scan_func_t);
extern int zpool_scan(zpool_handle_t *, pool_scan_func_t, pool_scrub_cmd_t);
extern int zpool_clear(zpool_handle_t *, const char *, nvlist_t *);
extern int zpool_reguid(zpool_handle_t *);
extern int zpool_reopen(zpool_handle_t *);
Expand Down
16 changes: 11 additions & 5 deletions include/sys/dsl_scan.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
* Copyright (c) 2017 Datto Inc.
*/

#ifndef _SYS_DSL_SCAN_H
Expand Down Expand Up @@ -84,8 +85,8 @@ typedef enum dsl_scan_flags {
*
* The following members of this structure direct the behavior of the scan:
*
* scn_pausing - a scan that cannot be completed in a single txg or
* has exceeded its allotted time will need to pause.
* scn_suspending - a scan that cannot be completed in a single txg or
* has exceeded its allotted time will need to suspend.
* When this flag is set the scanner will stop traversing
* the pool and write out the current state to disk.
*
Expand All @@ -98,6 +99,8 @@ typedef enum dsl_scan_flags {
* the scan but have not yet been processed (i.e deferred
* frees) are accounted for.
*
* scn_scrub_state - state of the scrub, can be either normal or paused
*
* This structure also maintains information about deferred frees which are
* a special kind of traversal. Deferred free can exist in either a bptree or
* a bpobj structure. The scn_is_bptree flag will indicate the type of
Expand All @@ -107,7 +110,9 @@ typedef enum dsl_scan_flags {
typedef struct dsl_scan {
struct dsl_pool *scn_dp;

boolean_t scn_pausing;
boolean_t scn_suspending;
uint8_t scn_scrub_state;
uint64_t scn_scrub_pause_time;
uint64_t scn_restart_txg;
uint64_t scn_done_txg;
uint64_t scn_sync_start_time;
Expand All @@ -117,8 +122,6 @@ typedef struct dsl_scan {
boolean_t scn_is_bptree;
boolean_t scn_async_destroying;
boolean_t scn_async_stalled;

/* for debugging / information */
uint64_t scn_visited_this_txg;

dsl_scan_phys_t scn_phys;
Expand All @@ -129,6 +132,8 @@ void dsl_scan_fini(struct dsl_pool *dp);
void dsl_scan_sync(struct dsl_pool *, dmu_tx_t *);
int dsl_scan_cancel(struct dsl_pool *);
int dsl_scan(struct dsl_pool *, pool_scan_func_t);
boolean_t dsl_scan_scrubbing(const struct dsl_pool *dp);
int dsl_scrub_set_pause_resume(const struct dsl_pool *dp, pool_scrub_cmd_t cmd);
void dsl_resilver_restart(struct dsl_pool *, uint64_t txg);
boolean_t dsl_scan_resilvering(struct dsl_pool *dp);
boolean_t dsl_dataset_unstable(struct dsl_dataset *ds);
Expand All @@ -139,6 +144,7 @@ void dsl_scan_ds_snapshotted(struct dsl_dataset *ds, struct dmu_tx *tx);
void dsl_scan_ds_clone_swapped(struct dsl_dataset *ds1, struct dsl_dataset *ds2,
struct dmu_tx *tx);
boolean_t dsl_scan_active(dsl_scan_t *scn);
boolean_t dsl_scan_is_paused_scrub(const dsl_scan_t *scn);

#ifdef __cplusplus
}
Expand Down
12 changes: 12 additions & 0 deletions include/sys/fs/zfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -765,6 +765,16 @@ typedef enum pool_scan_func {
POOL_SCAN_FUNCS
} pool_scan_func_t;

/*
* Used to control scrub pause and resume.
*/
typedef enum pool_scrub_cmd {
POOL_SCRUB_NORMAL = 0,
POOL_SCRUB_PAUSE,
POOL_SCRUB_FLAGS_END
} pool_scrub_cmd_t;


/*
* ZIO types. Needed to interpret vdev statistics below.
*/
Expand All @@ -788,6 +798,7 @@ typedef struct pool_scan_stat {
uint64_t pss_state; /* dsl_scan_state_t */
uint64_t pss_start_time; /* scan start time */
uint64_t pss_end_time; /* scan end time */
uint64_t pss_pause_time; /* scrub pause time */
uint64_t pss_to_examine; /* total bytes to scan */
uint64_t pss_examined; /* total examined bytes */
uint64_t pss_to_process; /* total bytes to process */
Expand Down Expand Up @@ -1104,6 +1115,7 @@ typedef enum {
#define ZFS_IMPORT_MISSING_LOG 0x4
#define ZFS_IMPORT_ONLY 0x8
#define ZFS_IMPORT_TEMP_NAME 0x10
#define ZFS_IMPORT_SCRUB_PAUSE 0x20

/*
* Sysevent payload members. ZFS will generate the following sysevents with the
Expand Down
2 changes: 2 additions & 0 deletions include/sys/spa.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright 2013 Saso Kiselkov. All rights reserved.
* Copyright (c) 2017 Datto Inc.
*/

#ifndef _SYS_SPA_H
Expand Down Expand Up @@ -657,6 +658,7 @@ extern void spa_l2cache_drop(spa_t *spa);
/* scanning */
extern int spa_scan(spa_t *spa, pool_scan_func_t func);
extern int spa_scan_stop(spa_t *spa);
extern int spa_scrub_pause_resume(spa_t *spa, pool_scrub_cmd_t flag);

/* spa syncing */
extern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */
Expand Down
27 changes: 20 additions & 7 deletions lib/libzfs/libzfs_pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -1898,22 +1898,31 @@ zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
* Scan the pool.
*/
int
zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func)
zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd)
{
zfs_cmd_t zc = {"\0"};
char msg[1024];
libzfs_handle_t *hdl = zhp->zpool_hdl;

(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
zc.zc_cookie = func;
zc.zc_flags = cmd;

/* ERESTART on a scrub means we resumed a paused scrub */
if (zfs_ioctl(hdl, ZFS_IOC_POOL_SCAN, &zc) == 0 ||
(errno == ENOENT && func != POOL_SCAN_NONE))
(errno == ERESTART && func == POOL_SCAN_SCRUB) ||
(errno == ENOENT && func != POOL_SCAN_NONE &&
cmd == POOL_SCRUB_NORMAL))
return (0);

if (func == POOL_SCAN_SCRUB) {
(void) snprintf(msg, sizeof (msg),
dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name);
if (cmd == POOL_SCRUB_PAUSE) {
(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
"cannot pause scrubbing %s"), zc.zc_name);
} else {
(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
"cannot scrub %s"), zc.zc_name);
}
} else if (func == POOL_SCAN_NONE) {
(void) snprintf(msg, sizeof (msg),
dgettext(TEXT_DOMAIN, "cannot cancel scrubbing %s"),
Expand All @@ -1931,10 +1940,14 @@ zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func)
ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
(void) nvlist_lookup_uint64_array(nvroot,
ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &psc);
if (ps && ps->pss_func == POOL_SCAN_SCRUB)
return (zfs_error(hdl, EZFS_SCRUBBING, msg));
else
if (ps && ps->pss_func == POOL_SCAN_SCRUB) {
if (cmd == POOL_SCRUB_PAUSE)
return (zfs_error(hdl, EZFS_SCRUB_PAUSED, msg));
else
return (zfs_error(hdl, EZFS_SCRUBBING, msg));
} else {
return (zfs_error(hdl, EZFS_RESILVERING, msg));
}
} else if (errno == ENOENT) {
return (zfs_error(hdl, EZFS_NO_SCRUB, msg));
} else {
Expand Down
4 changes: 4 additions & 0 deletions lib/libzfs/libzfs_util.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
* Copyright (c) 2017 Datto Inc.
*/

/*
Expand Down Expand Up @@ -246,6 +247,9 @@ libzfs_error_description(libzfs_handle_t *hdl)
case EZFS_POSTSPLIT_ONLINE:
return (dgettext(TEXT_DOMAIN, "disk was split from this pool "
"into a new one"));
case EZFS_SCRUB_PAUSED:
return (dgettext(TEXT_DOMAIN, "scrub currently paused; "
"use 'zpool scrub' to resume scrubbing"));
case EZFS_SCRUBBING:
return (dgettext(TEXT_DOMAIN, "currently scrubbing; "
"use 'zpool scrub -s' to cancel current scrub"));
Expand Down
Loading

0 comments on commit bf05fdd

Please sign in to comment.