Skip to content
Permalink
Browse files

5960 zfs recv should prefetch indirect blocks

5925 zfs receive -o origin=
Reviewed by: Prakash Surya <prakash.surya@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
  • Loading branch information...
pcd1193182 authored and ahrens committed Jul 17, 2015
1 parent e47aa97 commit a2cdcdd260232b58202b11a9bfc0103c9449ed52
Showing with 1,388 additions and 384 deletions.
  1. +2 −2 usr/src/cmd/ndmpd/ndmp/ndmpd_zfs.c
  2. +4 −1 usr/src/cmd/zdb/zdb.c
  3. +23 −6 usr/src/cmd/zfs/zfs_main.c
  4. +6 −3 usr/src/cmd/ztest/ztest.c
  5. +2 −4 usr/src/lib/libbe/common/be_create.c
  6. +2 −2 usr/src/lib/libzfs/common/libzfs.h
  7. +2 −2 usr/src/lib/libzfs/common/libzfs_pool.c
  8. +37 −18 usr/src/lib/libzfs/common/libzfs_sendrecv.c
  9. +11 −1 usr/src/lib/libzpool/common/sys/zfs_context.h
  10. +8 −0 usr/src/man/man1m/zfs.1m
  11. +4 −1 usr/src/uts/common/Makefile.files
  12. +1 −1 usr/src/uts/common/fs/zfs/bptree.c
  13. +111 −0 usr/src/uts/common/fs/zfs/bqueue.c
  14. +226 −35 usr/src/uts/common/fs/zfs/dbuf.c
  15. +25 −19 usr/src/uts/common/fs/zfs/dmu.c
  16. +1 −1 usr/src/uts/common/fs/zfs/dmu_diff.c
  17. +5 −0 usr/src/uts/common/fs/zfs/dmu_object.c
  18. +610 −199 usr/src/uts/common/fs/zfs/dmu_send.c
  19. +26 −2 usr/src/uts/common/fs/zfs/dmu_traverse.c
  20. +4 −2 usr/src/uts/common/fs/zfs/dmu_tx.c
  21. +2 −1 usr/src/uts/common/fs/zfs/dmu_zfetch.c
  22. +9 −9 usr/src/uts/common/fs/zfs/dnode.c
  23. +3 −3 usr/src/uts/common/fs/zfs/dnode_sync.c
  24. +15 −13 usr/src/uts/common/fs/zfs/dsl_dataset.c
  25. +1 −1 usr/src/uts/common/fs/zfs/dsl_destroy.c
  26. +2 −1 usr/src/uts/common/fs/zfs/dsl_scan.c
  27. +1 −1 usr/src/uts/common/fs/zfs/spa.c
  28. +2 −2 usr/src/uts/common/fs/zfs/space_map.c
  29. +54 −0 usr/src/uts/common/fs/zfs/sys/bqueue.h
  30. +5 −4 usr/src/uts/common/fs/zfs/sys/dbuf.h
  31. +3 −2 usr/src/uts/common/fs/zfs/sys/dmu.h
  32. +1 −1 usr/src/uts/common/fs/zfs/sys/dsl_dataset.h
  33. +9 −13 usr/src/uts/common/fs/zfs/sys/zio.h
  34. +1 −1 usr/src/uts/common/fs/zfs/sys/zio_checksum.h
  35. +40 −0 usr/src/uts/common/fs/zfs/sys/zio_priority.h
  36. +8 −5 usr/src/uts/common/fs/zfs/zap.c
  37. +2 −2 usr/src/uts/common/fs/zfs/zfs_vfsops.c
  38. +2 −1 usr/src/uts/common/fs/zfs/zfs_vnops.c
  39. +117 −24 usr/src/uts/common/fs/zfs/zio.c
  40. +1 −1 usr/src/uts/common/fs/zfs/zvol.c
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
*/

/*
@@ -916,7 +916,7 @@ ndmpd_zfs_restore_recv_write(ndmpd_zfs_args_t *ndmpd_zfs_args)
flags.force = B_TRUE;

err = zfs_receive(ndmpd_zfs_args->nz_zlibh, ndmpd_zfs_args->nz_dataset,
&flags, ndmpd_zfs_args->nz_pipe_fd[PIPE_ZFS], NULL);
NULL, &flags, ndmpd_zfs_args->nz_pipe_fd[PIPE_ZFS], NULL);

if (err && !session->ns_data.dd_abort)
NDMPD_ZFS_LOG_ZERR(ndmpd_zfs_args, "zfs_receive: %d", err);
@@ -2403,6 +2403,9 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
dmu_object_type_t type;
boolean_t is_metadata;

if (bp == NULL)
return (0);

if (dump_opt['b'] >= 5 && bp->blk_birth > 0) {
char blkbuf[BP_SPRINTF_LEN];
snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
@@ -2892,7 +2895,7 @@ zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
avl_index_t where;
zdb_ddt_entry_t *zdde, zdde_search;

if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
return (0);

if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) {
@@ -247,8 +247,9 @@ get_usage(zfs_help_t idx)
return (gettext("\tpromote <clone-filesystem>\n"));
case HELP_RECEIVE:
return (gettext("\treceive [-vnFu] <filesystem|volume|"
"snapshot>\n"
"\treceive [-vnFu] [-d | -e] <filesystem>\n"));
"snapshot>\n"
"\treceive [-vnFu] [-o origin=<snapshot>] [-d | -e] "
"<filesystem>\n"));
case HELP_RENAME:
return (gettext("\trename [-f] <filesystem|volume|snapshot> "
"<filesystem|volume|snapshot>\n"
@@ -751,7 +752,7 @@ zfs_do_create(int argc, char **argv)
nomem();
break;
case 'o':
if (parseprop(props, optarg))
if (parseprop(props, optarg) != 0)
goto error;
break;
case 's':
@@ -3590,7 +3591,7 @@ zfs_do_snapshot(int argc, char **argv)
while ((c = getopt(argc, argv, "ro:")) != -1) {
switch (c) {
case 'o':
if (parseprop(props, optarg))
if (parseprop(props, optarg) != 0)
return (1);
break;
case 'r':
@@ -3849,10 +3850,19 @@ zfs_do_receive(int argc, char **argv)
{
int c, err;
recvflags_t flags = { 0 };
nvlist_t *props;
nvpair_t *nvp = NULL;

if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0)
nomem();

/* check options */
while ((c = getopt(argc, argv, ":denuvF")) != -1) {
while ((c = getopt(argc, argv, ":o:denuvF")) != -1) {
switch (c) {
case 'o':
if (parseprop(props, optarg) != 0)
return (1);
break;
case 'd':
flags.isprefix = B_TRUE;
break;
@@ -3897,6 +3907,13 @@ zfs_do_receive(int argc, char **argv)
usage(B_FALSE);
}

while ((nvp = nvlist_next_nvpair(props, nvp))) {
if (strcmp(nvpair_name(nvp), "origin") != 0) {
(void) fprintf(stderr, gettext("invalid option"));
usage(B_FALSE);
}
}

if (isatty(STDIN_FILENO)) {
(void) fprintf(stderr,
gettext("Error: Backup stream can not be read "
@@ -3905,7 +3922,7 @@ zfs_do_receive(int argc, char **argv)
return (1);
}

err = zfs_receive(g_zfs, argv[0], &flags, STDIN_FILENO, NULL);
err = zfs_receive(g_zfs, argv[0], props, &flags, STDIN_FILENO, NULL);

return (err != 0);
}
@@ -3584,7 +3584,8 @@ ztest_dmu_read_write(ztest_ds_t *zd, uint64_t id)
*/
n = ztest_random(regions) * stride + ztest_random(width);
s = 1 + ztest_random(2 * width - 1);
dmu_prefetch(os, bigobj, n * chunksize, s * chunksize);
dmu_prefetch(os, bigobj, 0, n * chunksize, s * chunksize,
ZIO_PRIORITY_SYNC_READ);

/*
* Pick a random index and compute the offsets into packobj and bigobj.
@@ -5703,8 +5704,10 @@ ztest_run(ztest_shared_t *zs)
* Right before closing the pool, kick off a bunch of async I/O;
* spa_close() should wait for it to complete.
*/
for (uint64_t object = 1; object < 50; object++)
dmu_prefetch(spa->spa_meta_objset, object, 0, 1ULL << 20);
for (uint64_t object = 1; object < 50; object++) {
dmu_prefetch(spa->spa_meta_objset, object, 0, 0, 1ULL << 20,
ZIO_PRIORITY_SYNC_READ);
}

spa_close(spa, FTAG);

@@ -21,10 +21,8 @@

/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
*/

/*
* Copyright 2013 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 by Delphix. All rights reserved.
*/

/*
@@ -2480,7 +2478,7 @@ be_send_fs_callback(zfs_handle_t *zhp, void *data)
(void) close(srpipe[1]);

/* Receive dataset */
if (zfs_receive(g_zfs, clone_ds, &flags, srpipe[0], NULL) != 0) {
if (zfs_receive(g_zfs, clone_ds, NULL, &flags, srpipe[0], NULL) != 0) {
be_print_err(gettext("be_send_fs_callback: failed to "
"recv dataset (%s)\n"), clone_ds);
}
@@ -651,8 +651,8 @@ typedef struct recvflags {
boolean_t nomount;
} recvflags_t;

extern int zfs_receive(libzfs_handle_t *, const char *, recvflags_t *,
int, avl_tree_t *);
extern int zfs_receive(libzfs_handle_t *, const char *, nvlist_t *,
recvflags_t *, int, avl_tree_t *);

typedef enum diff_flags {
ZFS_DIFF_PARSEABLE = 0x1,
@@ -3398,7 +3398,7 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv,
}

static int
zbookmark_compare(const void *a, const void *b)
zbookmark_mem_compare(const void *a, const void *b)
{
return (memcmp(a, b, sizeof (zbookmark_phys_t)));
}
@@ -3461,7 +3461,7 @@ zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp)
zc.zc_nvlist_dst_size;
count -= zc.zc_nvlist_dst_size;

qsort(zb, count, sizeof (zbookmark_phys_t), zbookmark_compare);
qsort(zb, count, sizeof (zbookmark_phys_t), zbookmark_mem_compare);

verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0);

@@ -55,8 +55,9 @@
/* in libzfs_dataset.c */
extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *);

static int zfs_receive_impl(libzfs_handle_t *, const char *, recvflags_t *,
int, const char *, nvlist_t *, avl_tree_t *, char **, int, uint64_t *);
static int zfs_receive_impl(libzfs_handle_t *, const char *, const char *,
recvflags_t *, int, const char *, nvlist_t *, avl_tree_t *, char **, int,
uint64_t *);

static const zio_cksum_t zero_cksum = { 0 };

@@ -2465,7 +2466,7 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
* zfs_receive_one() will take care of it (ie,
* recv_skip() and return 0).
*/
error = zfs_receive_impl(hdl, destname, flags, fd,
error = zfs_receive_impl(hdl, destname, NULL, flags, fd,
sendfs, stream_nv, stream_avl, top_zfs, cleanup_fd,
action_handlep);
if (error == ENODATA) {
@@ -2598,9 +2599,9 @@ recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
*/
static int
zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
recvflags_t *flags, dmu_replay_record_t *drr,
dmu_replay_record_t *drr_noswap, const char *sendfs,
nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd,
const char *originsnap, recvflags_t *flags, dmu_replay_record_t *drr,
dmu_replay_record_t *drr_noswap, const char *sendfs, nvlist_t *stream_nv,
avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd,
uint64_t *action_handlep)
{
zfs_cmd_t zc = { 0 };
@@ -2756,10 +2757,15 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
}
if (flags->verbose)
(void) printf("found clone origin %s\n", zc.zc_string);
} else if (originsnap) {
(void) strncpy(zc.zc_string, originsnap, ZFS_MAXNAMELEN);
if (flags->verbose)
(void) printf("using provided clone origin %s\n",
zc.zc_string);
}

stream_wantsnewfs = (drrb->drr_fromguid == NULL ||
(drrb->drr_flags & DRR_FLAG_CLONE));
(drrb->drr_flags & DRR_FLAG_CLONE) || originsnap);

if (stream_wantsnewfs) {
/*
@@ -3137,9 +3143,10 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
}

static int
zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t *flags,
int infd, const char *sendfs, nvlist_t *stream_nv, avl_tree_t *stream_avl,
char **top_zfs, int cleanup_fd, uint64_t *action_handlep)
zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap,
const char *originsnap, recvflags_t *flags, int infd, const char *sendfs,
nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd,
uint64_t *action_handlep)
{
int err;
dmu_replay_record_t drr, drr_noswap;
@@ -3158,6 +3165,12 @@ zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t *flags,
"(%s) does not exist"), tosnap);
return (zfs_error(hdl, EZFS_NOENT, errbuf));
}
if (originsnap &&
!zfs_dataset_exists(hdl, originsnap, ZFS_TYPE_DATASET)) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified origin fs "
"(%s) does not exist"), originsnap);
return (zfs_error(hdl, EZFS_NOENT, errbuf));
}

/* read in the BEGIN record */
if (0 != (err = recv_read(hdl, infd, &drr, sizeof (drr), B_FALSE,
@@ -3230,14 +3243,14 @@ zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t *flags,
*cp = '\0';
sendfs = nonpackage_sendfs;
}
return (zfs_receive_one(hdl, infd, tosnap, flags,
&drr, &drr_noswap, sendfs, stream_nv, stream_avl,
top_zfs, cleanup_fd, action_handlep));
return (zfs_receive_one(hdl, infd, tosnap, originsnap, flags,
&drr, &drr_noswap, sendfs, stream_nv, stream_avl, top_zfs,
cleanup_fd, action_handlep));
} else {
assert(DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
DMU_COMPOUNDSTREAM);
return (zfs_receive_package(hdl, infd, tosnap, flags,
&drr, &zcksum, top_zfs, cleanup_fd, action_handlep));
return (zfs_receive_package(hdl, infd, tosnap, flags, &drr,
&zcksum, top_zfs, cleanup_fd, action_handlep));
}
}

@@ -3248,18 +3261,24 @@ zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t *flags,
* (-1 will override -2).
*/
int
zfs_receive(libzfs_handle_t *hdl, const char *tosnap, recvflags_t *flags,
int infd, avl_tree_t *stream_avl)
zfs_receive(libzfs_handle_t *hdl, const char *tosnap, nvlist_t *props,
recvflags_t *flags, int infd, avl_tree_t *stream_avl)
{
char *top_zfs = NULL;
int err;
int cleanup_fd;
uint64_t action_handle = 0;
char *originsnap = NULL;
if (props) {
err = nvlist_lookup_string(props, "origin", &originsnap);
if (err && err != ENOENT)
return (err);
}

cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
VERIFY(cleanup_fd >= 0);

err = zfs_receive_impl(hdl, tosnap, flags, infd, NULL, NULL,
err = zfs_receive_impl(hdl, tosnap, originsnap, flags, infd, NULL, NULL,
stream_avl, &top_zfs, cleanup_fd, &action_handle);

VERIFY(0 == close(cleanup_fd));
@@ -116,8 +116,18 @@ extern int aok;

/*
* DTrace SDT probes have different signatures in userland than they do in
* kernel. If they're being used in kernel code, re-define them out of
* the kernel. If they're being used in kernel code, re-define them out of
* existence for their counterparts in libzpool.
*
* Here's an example of how to use the set-error probes in userland:
* zfs$target:::set-error /arg0 == EBUSY/ {stack();}
*
* Here's an example of how to use DTRACE_PROBE probes in userland:
* If there is a probe declared as follows:
* DTRACE_PROBE2(zfs__probe_name, uint64_t, blkid, dnode_t *, dn);
* Then you can use it as follows:
* zfs$target:::probe2 /copyinstr(arg0) == "zfs__probe_name"/
* {printf("%u %p\n", arg1, arg2);}
*/

#ifdef DTRACE_PROBE
@@ -175,11 +175,13 @@
.Nm
.Cm receive
.Op Fl Fnuv
.Op Fl o Sy origin Ns = Ns Ar snapshot
.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot
.Nm
.Cm receive
.Op Fl Fnuv
.Op Fl d Ns | Ns Fl e
.Op Fl o Sy origin Ns = Ns Ar snapshot
.Ar filesystem
.Nm
.Cm allow
@@ -2635,12 +2637,14 @@ origin, etc.
.Nm
.Cm receive
.Op Fl Fnuv
.Op Fl o Sy origin Ns = Ns Ar snapshot
.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot
.br
.Nm
.Cm receive
.Op Fl Fnuv
.Op Fl d Ns | Ns Fl e
.Op Fl o Sy origin Ns = Ns Ar snapshot
.Ar filesystem
.Xc
Creates a snapshot whose contents are as specified in the stream provided on
@@ -2730,6 +2734,10 @@ snapshot as described in the paragraph above.
Do not actually receive the stream. This can be useful in conjunction with the
.Fl v
option to verify the name the receive operation would use.
.It Fl o Sy origin Ns = Ns Ar snapshot
Forces the stream to be received as a clone of the given snapshot.
This is only valid if the stream is an incremental stream whose source
is the same as the provided origin.
.It Fl u
File system that is associated with the received stream is not mounted.
.It Fl v
@@ -21,7 +21,9 @@

#
# Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2013 by Delphix. All rights reserved.
# Copyright (c) 2012 Nexenta Systems, Inc. All rights reserved.
# Copyright (c) 2012 Joyent, Inc. All rights reserved.
# Copyright (c) 2011, 2014 by Delphix. All rights reserved.
# Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
# Copyright 2014 Nexenta Systems, Inc. All rights reserved.
#
@@ -1328,6 +1330,7 @@ ZFS_COMMON_OBJS += \
bplist.o \
bpobj.o \
bptree.o \
bqueue.o \
dbuf.o \
ddt.o \
ddt_zap.o \
@@ -154,7 +154,7 @@ bptree_visit_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
int err;
struct bptree_args *ba = arg;

if (BP_IS_HOLE(bp))
if (bp == NULL || BP_IS_HOLE(bp))
return (0);

err = ba->ba_func(ba->ba_arg, bp, ba->ba_tx);

0 comments on commit a2cdcdd

Please sign in to comment.
You can’t perform that action at this time.