Skip to content

Commit

Permalink
Cleanup raidz expand pause variable usage
Browse files Browse the repository at this point in the history
Signed-off-by: Don Brady <dev.fs.zfs@gmail.com>
  • Loading branch information
don-brady committed Aug 29, 2023
1 parent e661c08 commit 34cdb40
Show file tree
Hide file tree
Showing 8 changed files with 74 additions and 58 deletions.
29 changes: 16 additions & 13 deletions cmd/ztest.c
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,8 @@ extern uint_t dmu_object_alloc_chunk_shift;
extern boolean_t zfs_force_some_double_word_sm_entries;
extern unsigned long zio_decompress_fail_fraction;
extern unsigned long zfs_reconstruct_indirect_damage_fraction;
extern uint64_t raidz_expand_max_offset_pause;
extern uint64_t raidz_expand_max_reflow_bytes;
extern uint_t raidz_expand_pause_point;


static ztest_shared_opts_t *ztest_shared_opts;
Expand Down Expand Up @@ -1226,21 +1227,21 @@ ztest_kill(ztest_shared_t *zs)
* Before we kill ourselves, make sure that the config is updated.
* See comment above spa_write_cachefile().
*/
if (raidz_expand_max_offset_pause) {
if (raidz_expand_pause_point != RAIDZ_EXPAND_PAUSE_NONE) {
if (mutex_tryenter(&spa_namespace_lock)) {
spa_write_cachefile(ztest_spa, B_FALSE, B_FALSE,
B_FALSE);
mutex_exit(&spa_namespace_lock);

ztest_scratch_state->zs_raidz_scratch_verify_pause =
raidz_expand_max_offset_pause;
raidz_expand_pause_point;
} else {
/*
* Do not verify scratch object in case if
* spa_namespace_lock cannot be acquired,
* it can cause deadlock in spa_config_update().
*/
raidz_expand_max_offset_pause = 0;
raidz_expand_pause_point = RAIDZ_EXPAND_PAUSE_NONE;

return;
}
Expand Down Expand Up @@ -3949,7 +3950,7 @@ raidz_scratch_verify(void)
vdev_raidz_expand_t *vre;
vdev_t *raidvd;

ASSERT(raidz_expand_max_offset_pause == 0);
ASSERT(raidz_expand_pause_point == RAIDZ_EXPAND_PAUSE_NONE);

if (ztest_scratch_state->zs_raidz_scratch_verify_pause == 0)
return;
Expand Down Expand Up @@ -4031,7 +4032,7 @@ ztest_scratch_thread(void *arg)

/* wait up to 10 seconds */
for (int t = 100; t > 0; t -= 1) {
if (raidz_expand_max_offset_pause == 0)
if (raidz_expand_pause_point == RAIDZ_EXPAND_PAUSE_NONE)
thread_exit();

(void) poll(NULL, 0, 100);
Expand Down Expand Up @@ -4113,14 +4114,14 @@ ztest_vdev_raidz_attach(ztest_ds_t *zd, uint64_t id)
0, 0, 1);

/*
* 50% of the time, set raidz_expand_max_offset_pause to cause
* 50% of the time, set raidz_expand_pause_point to cause
* raidz_reflow_scratch_sync() and vdev_raidz_reflow_copy_scratch()
* to pause at a certain point and then kill the test after 10
* seconds so raidz_scratch_verify() can confirm consistency when
* the pool is imported.
*/
if (ztest_random(2) == 0 && expected_error == 0) {
raidz_expand_max_offset_pause =
raidz_expand_pause_point =
ztest_random(RAIDZ_EXPAND_PAUSE_SCRATCH_NOT_IN_USE) + 1;
scratch_thread = thread_create(NULL, 0, ztest_scratch_thread,
ztest_shared, 0, NULL, TS_RUN | TS_JOINABLE, defclsyspri);
Expand All @@ -4140,13 +4141,13 @@ ztest_vdev_raidz_attach(ztest_ds_t *zd, uint64_t id)
newpath, newsize, error, expected_error);
}

if (raidz_expand_max_offset_pause) {
if (raidz_expand_pause_point) {
if (error != 0) {
/*
* Do not verify scratch object in case of error
* returned by vdev attaching.
*/
raidz_expand_max_offset_pause = 0;
raidz_expand_pause_point = RAIDZ_EXPAND_PAUSE_NONE;
}

VERIFY0(thread_join(scratch_thread));
Expand Down Expand Up @@ -7635,8 +7636,10 @@ ztest_thread(void *arg)
/*
* See if it's time to force a crash.
*/
if (now > zs->zs_thread_kill && !raidz_expand_max_offset_pause)
if (now > zs->zs_thread_kill &&
raidz_expand_pause_point == RAIDZ_EXPAND_PAUSE_NONE) {
ztest_kill(zs);
}

/*
* If we're getting ENOSPC with some regularity, stop.
Expand Down Expand Up @@ -8110,7 +8113,7 @@ ztest_raidz_expand_run(ztest_shared_t *zs, spa_t *spa)
/* Set our reflow target to 25%, 50% or 75% of allocated size */
uint_t multiple = ztest_random(3) + 1;
uint64_t reflow_max = (rzvd->vdev_stat.vs_alloc * multiple) / 4;
raidz_expand_max_offset_pause = reflow_max;
raidz_expand_max_reflow_bytes = reflow_max;

if (ztest_opts.zo_verbose >= 1) {
(void) printf("running raidz expansion test, killing when "
Expand Down Expand Up @@ -8194,7 +8197,7 @@ ztest_raidz_expand_run(ztest_shared_t *zs, spa_t *spa)
}

/* Reset the reflow pause before killing */
raidz_expand_max_offset_pause = 0;
raidz_expand_max_reflow_bytes = 0;

if (ztest_opts.zo_verbose >= 1) {
(void) printf("killing raidz expansion test after reflow "
Expand Down
10 changes: 9 additions & 1 deletion include/sys/vdev_raidz.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,10 +159,18 @@ extern void spa_start_raidz_expansion_thread(spa_t *);
extern int spa_raidz_expand_get_stats(spa_t *, pool_raidz_expand_stat_t *);
extern int vdev_raidz_load(vdev_t *);

/* Some of raidz scratch area states */
/* RAIDZ scratch area pause points (for testing) */
#define RAIDZ_EXPAND_PAUSE_NONE 0
#define RAIDZ_EXPAND_PAUSE_PRE_SCRATCH_1 1
#define RAIDZ_EXPAND_PAUSE_PRE_SCRATCH_2 2
#define RAIDZ_EXPAND_PAUSE_PRE_SCRATCH_3 3
#define RAIDZ_EXPAND_PAUSE_SCRATCH_VALID 4
#define RAIDZ_EXPAND_PAUSE_SCRATCH_REFLOWED 5
#define RAIDZ_EXPAND_PAUSE_SCRATCH_POST_REFLOW_1 6
#define RAIDZ_EXPAND_PAUSE_SCRATCH_POST_REFLOW_2 7
#define RAIDZ_EXPAND_PAUSE_SCRATCH_CRASH_COPY_1 8
#define RAIDZ_EXPAND_PAUSE_SCRATCH_CRASH_COPY_2 9
#define RAIDZ_EXPAND_PAUSE_SCRATCH_CRASH_COPY_3 10
#define RAIDZ_EXPAND_PAUSE_SCRATCH_NOT_IN_USE 11

#ifdef __cplusplus
Expand Down
4 changes: 2 additions & 2 deletions man/man4/zfs.4
Original file line number Diff line number Diff line change
Expand Up @@ -428,8 +428,8 @@ active.
Max amount of memory to use for RAID-Z expansion I/O.
This limits how much I/O can be outstanding at once.
.
.It Sy raidz_expand_max_offset_pause Ns = Ns Sy 0 Pq ulong
For testing, pause RAID-Z expansion at this offset.
.It Sy raidz_expand_max_reflow_bytes Ns = Ns Sy 0 Pq ulong
For testing, pause RAID-Z expansion when reflow amount reaches this value.
.
.It Sy raidz_io_aggregate_rows Ns = Ns Sy 4 Pq ulong
For expanded RAID-Z, aggregate reads that have more rows than this.
Expand Down
47 changes: 26 additions & 21 deletions module/zfs/vdev_raidz.c
Original file line number Diff line number Diff line change
Expand Up @@ -142,13 +142,18 @@
}

/*
* For testing only: logical offset at which to pause the raidz expansion.
* For testing only: pause the raidz expansion after reflowing this amount.
* (accessed by ZTS and ztest)
*/
#ifdef _KERNEL
static
#endif /* _KERNEL */
unsigned long raidz_expand_max_offset_pause = 0;
unsigned long raidz_expand_max_reflow_bytes = 0;

/*
* For testing only: pause the raidz expansion at a certain point.
*/
uint_t raidz_expand_pause_point = 0;

/*
* Maximum amount of copy io's outstanding at once.
Expand Down Expand Up @@ -3771,13 +3776,13 @@ raidz_reflow_impl(vdev_t *vd, vdev_raidz_expand_t *vre, range_tree_t *rt,
}

/*
* For testing.
* For testing (ztest specific)
*/
static void
raidz_expand_pause(uint64_t progress)
raidz_expand_pause(uint_t pause_point)
{
while (raidz_expand_max_offset_pause != 0 &&
raidz_expand_max_offset_pause <= progress)
while (raidz_expand_pause_point != 0 &&
raidz_expand_pause_point <= pause_point)
delay(hz);
}

Expand Down Expand Up @@ -3834,7 +3839,7 @@ raidz_reflow_scratch_sync(void *arg, dmu_tx_t *tx)
abds[i] = abd_alloc_linear(read_size, B_FALSE);
}

raidz_expand_pause(1);
raidz_expand_pause(RAIDZ_EXPAND_PAUSE_PRE_SCRATCH_1);

/*
* If we have already written the scratch area then we must read from
Expand Down Expand Up @@ -3893,10 +3898,11 @@ raidz_reflow_scratch_sync(void *arg, dmu_tx_t *tx)
return;
}

raidz_expand_pause(RAIDZ_EXPAND_PAUSE_PRE_SCRATCH_2);

/*
* Reflow in memory.
*/
raidz_expand_pause(2);
uint64_t logical_sectors = logical_size >> ashift;
for (int i = raidvd->vdev_children - 1; i < logical_sectors; i++) {
int oldchild = i % (raidvd->vdev_children - 1);
Expand Down Expand Up @@ -3946,7 +3952,8 @@ raidz_reflow_scratch_sync(void *arg, dmu_tx_t *tx)

zfs_dbgmsg("reflow: wrote %llu bytes (logical) to scratch area",
(long long)logical_size);
raidz_expand_pause(3);

raidz_expand_pause(RAIDZ_EXPAND_PAUSE_PRE_SCRATCH_3);

/*
* Update uberblock to indicate that scratch space is valid. This is
Expand Down Expand Up @@ -4032,7 +4039,7 @@ raidz_reflow_scratch_sync(void *arg, dmu_tx_t *tx)
(long long)logical_size,
(long long)spa->spa_ubsync.ub_timestamp);

raidz_expand_pause(6);
raidz_expand_pause(RAIDZ_EXPAND_PAUSE_SCRATCH_POST_REFLOW_1);

/*
* Update progress.
Expand All @@ -4050,7 +4057,7 @@ raidz_reflow_scratch_sync(void *arg, dmu_tx_t *tx)
*/
raidz_reflow_sync(spa, tx);

raidz_expand_pause(7);
raidz_expand_pause(RAIDZ_EXPAND_PAUSE_SCRATCH_POST_REFLOW_2);
}

/*
Expand Down Expand Up @@ -4080,7 +4087,7 @@ vdev_raidz_reflow_copy_scratch(spa_t *spa)
abds[i] = abd_alloc_linear(write_size, B_FALSE);
}

raidz_expand_pause(8);
raidz_expand_pause(RAIDZ_EXPAND_PAUSE_SCRATCH_CRASH_COPY_1);
pio = zio_root(spa, NULL, NULL, 0);
for (int i = 0; i < raidvd->vdev_children; i++) {
/*
Expand All @@ -4095,7 +4102,7 @@ vdev_raidz_reflow_copy_scratch(spa_t *spa)
raidz_scratch_child_done, pio));
}
zio_wait(pio);
raidz_expand_pause(9);
raidz_expand_pause(RAIDZ_EXPAND_PAUSE_SCRATCH_CRASH_COPY_2);

/*
* Overwrite real location with reflow'ed data.
Expand All @@ -4118,7 +4125,7 @@ vdev_raidz_reflow_copy_scratch(spa_t *spa)
for (int i = 0; i < raidvd->vdev_children; i++)
abd_free(abds[i]);
kmem_free(abds, raidvd->vdev_children * sizeof (abd_t *));
raidz_expand_pause(10);
raidz_expand_pause(RAIDZ_EXPAND_PAUSE_SCRATCH_CRASH_COPY_3);

/*
* Update uberblock.
Expand Down Expand Up @@ -4272,14 +4279,12 @@ spa_raidz_expand_cb(void *arg, zthr_t *zthr)

/*
* If requested, pause the reflow when the amount
* specified by raidz_expand_max_offset_pause is reached
* specified by raidz_expand_max_reflow_bytes is reached
*
* This pause is only used during testing or debugging.
*
* XXX Rename once we confirm that we want bytes
*/
while (raidz_expand_max_offset_pause != 0 &&
raidz_expand_max_offset_pause <=
while (raidz_expand_max_reflow_bytes != 0 &&
raidz_expand_max_reflow_bytes <=
vre->vre_bytes_copied && !zthr_iscancelled(zthr)) {
delay(hz);
}
Expand Down Expand Up @@ -4775,8 +4780,8 @@ vdev_ops_t vdev_raidz_ops = {
.vdev_op_leaf = B_FALSE /* not a leaf vdev */
};

ZFS_MODULE_PARAM(zfs_vdev, raidz_, expand_max_offset_pause, ULONG, ZMOD_RW,
"For testing, pause RAIDZ expansion at this offset");
ZFS_MODULE_PARAM(zfs_vdev, raidz_, expand_max_reflow_bytes, ULONG, ZMOD_RW,
"For testing, pause RAIDZ expansion after reflowing this many bytes");
ZFS_MODULE_PARAM(zfs_vdev, raidz_, expand_max_copy_bytes, ULONG, ZMOD_RW,
"Max amount of concurrent i/o for RAIDZ expansion");
ZFS_MODULE_PARAM(zfs_vdev, raidz_, io_aggregate_rows, ULONG, ZMOD_RW,
Expand Down
2 changes: 1 addition & 1 deletion tests/zfs-tests/include/tunables.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ MULTIHOST_IMPORT_INTERVALS multihost.import_intervals zfs_multihost_import_inter
MULTIHOST_INTERVAL multihost.interval zfs_multihost_interval
OVERRIDE_ESTIMATE_RECORDSIZE send.override_estimate_recordsize zfs_override_estimate_recordsize
PREFETCH_DISABLE prefetch.disable zfs_prefetch_disable
RAIDZ_EXPAND_MAX_OFFSET_PAUSE vdev.expand_max_offset_pause raidz_expand_max_offset_pause
RAIDZ_EXPAND_MAX_REFLOW_BYTES vdev.expand_max_reflow_bytes raidz_expand_max_reflow_bytes
REBUILD_SCRUB_ENABLED rebuild_scrub_enabled zfs_rebuild_scrub_enabled
REMOVAL_SUSPEND_PROGRESS removal_suspend_progress zfs_removal_suspend_progress
REMOVE_MAX_SEGMENT remove_max_segment zfs_remove_max_segment
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ function cleanup
done

log_must set_tunable32 PREFETCH_DISABLE $prefetch_disable
log_must set_tunable64 RAIDZ_EXPAND_MAX_OFFSET_PAUSE 0
log_must set_tunable64 RAIDZ_EXPAND_MAX_REFLOW_BYTES 0
}

function wait_expand_paused
Expand Down Expand Up @@ -144,7 +144,7 @@ function test_scrub # <pool> <parity> <dir>

randbyte=$(( ((RANDOM<<15) + RANDOM) % \
(dev_size_mb * (devs-1) * 1024 * 1024) ))
log_must set_tunable64 RAIDZ_EXPAND_MAX_OFFSET_PAUSE $randbyte
log_must set_tunable64 RAIDZ_EXPAND_MAX_REFLOW_BYTES $randbyte
log_must zpool attach $TESTPOOL ${raid}-0 $dir/dev-$devs
wait_expand_paused

Expand Down Expand Up @@ -175,7 +175,7 @@ function test_scrub # <pool> <parity> <dir>
log_must check_pool_status $pool "errors" "No known data errors"

log_must zpool clear $pool
log_must set_tunable64 RAIDZ_EXPAND_MAX_OFFSET_PAUSE 0
log_must set_tunable64 RAIDZ_EXPAND_MAX_REFLOW_BYTES 0
log_must zpool wait -t raidz_expand $TESTPOOL
}

Expand Down
14 changes: 7 additions & 7 deletions tests/zfs-tests/tests/functional/raidz/raidz_expand_003_pos.ksh
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,13 @@
# 2. For each parity value [1..3]
# - create raidz pool with minimum block device files required
# - create couple of datasets with different recordsize and fill it
# - set raidz expand offset pause
# - set raidz expand maximum reflow bytes
# - start randwritecomp on one of the datasets files
# - attach new device to the pool
# - wait until reflow offset is equal to raidz expand pause offset
# - wait for reflow bytes to reach the maximum
# - kill randwritecomp
# - verify pool
# - set raidz expand offset to max value to complete raidz expansion
# - set reflow bytes to max value to complete the expansion

typeset -r devs=10
typeset -r dev_size_mb=128
Expand All @@ -60,7 +60,7 @@ function cleanup
done

log_must set_tunable32 EMBEDDED_SLOG_MIN_MS $embedded_slog_min_ms
log_must set_tunable64 RAIDZ_EXPAND_MAX_OFFSET_PAUSE 0
log_must set_tunable64 RAIDZ_EXPAND_MAX_REFLOW_BYTES 0
}

function wait_expand_paused
Expand Down Expand Up @@ -105,7 +105,7 @@ for nparity in 1 2 3; do
pool_size=$(get_pool_prop size $pool)
# Pause at random location near the end of vdev
pause=$((((RANDOM << 15) + RANDOM) % pool_size))
log_must set_tunable64 RAIDZ_EXPAND_MAX_OFFSET_PAUSE $pause
log_must set_tunable64 RAIDZ_EXPAND_MAX_REFLOW_BYTES $pause

log_bkgrnd randwritecomp /$pool/fs/file
pid0=$!
Expand All @@ -125,9 +125,9 @@ for nparity in 1 2 3; do
log_must check_pool_status $pool "scan" "with 0 errors"
log_must check_pool_status $pool "scan" "repaired 0B"

# Set pause past largest possible offset for this pool
# Set pause past largest possible value for this pool
pause=$((devs*dev_size_mb*1024*1024))
log_must set_tunable64 RAIDZ_EXPAND_MAX_OFFSET_PAUSE $pause
log_must set_tunable64 RAIDZ_EXPAND_MAX_REFLOW_BYTES $pause

log_must zpool wait -t raidz_expand $pool
done
Expand Down

0 comments on commit 34cdb40

Please sign in to comment.