Skip to content
This repository has been archived by the owner on Nov 7, 2019. It is now read-only.

Commit

Permalink
9166 zfs storage pool checkpoint
Browse files Browse the repository at this point in the history
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: John Kennedy <john.kennedy@delphix.com>
Reviewed by: Dan Kimmel <dan.kimmel@delphix.com>
Approved by: Richard Lowe <richlowe@richlowe.net>

Details about the motivation of this feature and its usage can
be found in this blogpost:
https://sdimitro.github.io/post/zpool-checkpoint/

A lightning talk of this feature can be found here:
https://www.youtube.com/watch?v=fPQA8K40jAM

Implementation details can be found in big block comment of
spa_checkpoint.c

Side-changes that are relevant to this commit but not explained
elsewhere:
* renames metaslab trees to be shorter without losing meaning
* space_map_{alloc,truncate}() accept a block size as a
  parameter. The reason is that in the current state all space
  maps that we allocate through the DMU use a global tunable
  (space_map_blksz) which defauls to 4KB. This is ok for
  metaslab space maps in terms of bandwirdth since they are
  scattered all over the disk. But for other space maps this
  default is probably not what we want. Examples are device
  removal's vdev_obsolete_sm or vdev_chedkpoint_sm from this
  review. Both of these have a 1:1 relationship with each vdev
  and could benefit from a bigger block size.

Closes #560
  • Loading branch information
sdimitro authored and prakashsurya committed Mar 14, 2018
1 parent 0e776dc commit 7159fdb
Show file tree
Hide file tree
Showing 114 changed files with 6,299 additions and 849 deletions.
67 changes: 38 additions & 29 deletions usr/src/cmd/mdb/common/modules/zfs/zfs.c
Expand Up @@ -21,8 +21,8 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2011, 2016 by Delphix. All rights reserved.
* Copyright (c) 2017, Joyent, Inc. All rights reserved.
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
*/

/* Portions Copyright 2010 Robert Milkowski */
Expand Down Expand Up @@ -1316,22 +1316,23 @@ typedef struct mdb_metaslab {
int64_t ms_deferspace;
uint64_t ms_fragmentation;
uint64_t ms_weight;
uintptr_t ms_alloctree[TXG_SIZE];
uintptr_t ms_freeingtree;
uintptr_t ms_freedtree;
uintptr_t ms_tree;
uintptr_t ms_allocating[TXG_SIZE];
uintptr_t ms_checkpointing;
uintptr_t ms_freeing;
uintptr_t ms_freed;
uintptr_t ms_allocatable;
uintptr_t ms_sm;
} mdb_metaslab_t;

typedef struct mdb_space_map_phys_t {
uint64_t smp_alloc;
int64_t smp_alloc;
uint64_t smp_histogram[SPACE_MAP_HISTOGRAM_SIZE];
} mdb_space_map_phys_t;

typedef struct mdb_space_map {
uint64_t sm_size;
uint8_t sm_shift;
uint64_t sm_alloc;
int64_t sm_alloc;
uintptr_t sm_phys;
} mdb_space_map_t;

Expand Down Expand Up @@ -1937,10 +1938,11 @@ typedef struct mdb_dsl_dir_phys {
} mdb_dsl_dir_phys_t;

typedef struct space_data {
uint64_t ms_alloctree[TXG_SIZE];
uint64_t ms_freeingtree;
uint64_t ms_freedtree;
uint64_t ms_tree;
uint64_t ms_allocating[TXG_SIZE];
uint64_t ms_checkpointing;
uint64_t ms_freeing;
uint64_t ms_freed;
uint64_t ms_allocatable;
int64_t ms_deferspace;
uint64_t avail;
uint64_t nowavail;
Expand All @@ -1963,27 +1965,32 @@ space_cb(uintptr_t addr, const void *unknown, void *arg)

for (i = 0; i < TXG_SIZE; i++) {
if (mdb_ctf_vread(&rt, "range_tree_t",
"mdb_range_tree_t", ms.ms_alloctree[i], 0) == -1)
"mdb_range_tree_t", ms.ms_allocating[i], 0) == -1)
return (WALK_ERR);

sd->ms_alloctree[i] += rt.rt_space;
sd->ms_allocating[i] += rt.rt_space;

}

if (mdb_ctf_vread(&rt, "range_tree_t",
"mdb_range_tree_t", ms.ms_freeingtree, 0) == -1)
"mdb_range_tree_t", ms.ms_checkpointing, 0) == -1)
return (WALK_ERR);
sd->ms_freeingtree += rt.rt_space;
sd->ms_checkpointing += rt.rt_space;

if (mdb_ctf_vread(&rt, "range_tree_t",
"mdb_range_tree_t", ms.ms_freedtree, 0) == -1)
"mdb_range_tree_t", ms.ms_freeing, 0) == -1)
return (WALK_ERR);
sd->ms_freedtree += rt.rt_space;
sd->ms_freeing += rt.rt_space;

if (mdb_ctf_vread(&rt, "range_tree_t",
"mdb_range_tree_t", ms.ms_tree, 0) == -1)
"mdb_range_tree_t", ms.ms_freed, 0) == -1)
return (WALK_ERR);
sd->ms_tree += rt.rt_space;
sd->ms_freed += rt.rt_space;

if (mdb_ctf_vread(&rt, "range_tree_t",
"mdb_range_tree_t", ms.ms_allocatable, 0) == -1)
return (WALK_ERR);
sd->ms_allocatable += rt.rt_space;

if (ms.ms_sm != NULL &&
mdb_ctf_vread(&sm, "space_map_t",
Expand Down Expand Up @@ -2067,16 +2074,18 @@ spa_space(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
}

mdb_printf("ms_allocmap = %llu%s %llu%s %llu%s %llu%s\n",
sd.ms_alloctree[0] >> shift, suffix,
sd.ms_alloctree[1] >> shift, suffix,
sd.ms_alloctree[2] >> shift, suffix,
sd.ms_alloctree[3] >> shift, suffix);
mdb_printf("ms_freeingtree = %llu%s\n",
sd.ms_freeingtree >> shift, suffix);
mdb_printf("ms_freedtree = %llu%s\n",
sd.ms_freedtree >> shift, suffix);
mdb_printf("ms_tree = %llu%s\n",
sd.ms_tree >> shift, suffix);
sd.ms_allocating[0] >> shift, suffix,
sd.ms_allocating[1] >> shift, suffix,
sd.ms_allocating[2] >> shift, suffix,
sd.ms_allocating[3] >> shift, suffix);
mdb_printf("ms_checkpointing = %llu%s\n",
sd.ms_checkpointing >> shift, suffix);
mdb_printf("ms_freeing = %llu%s\n",
sd.ms_freeing >> shift, suffix);
mdb_printf("ms_freed = %llu%s\n",
sd.ms_freed >> shift, suffix);
mdb_printf("ms_allocatable = %llu%s\n",
sd.ms_allocatable >> shift, suffix);
mdb_printf("ms_deferspace = %llu%s\n",
sd.ms_deferspace >> shift, suffix);
mdb_printf("last synced avail = %llu%s\n",
Expand Down
6 changes: 5 additions & 1 deletion usr/src/cmd/truss/codes.c
Expand Up @@ -21,7 +21,7 @@

/*
* Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013, 2016 by Delphix. All rights reserved.
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2015, Joyent, Inc. All rights reserved.
* Copyright (c) 2014, OmniTI Computer Consulting, Inc. All rights reserved.
Expand Down Expand Up @@ -1164,6 +1164,10 @@ const struct ioc {
"zfs_cmd_t" },
{ (uint_t)ZFS_IOC_POOL_GET_HISTORY, "ZFS_IOC_POOL_GET_HISTORY",
"zfs_cmd_t" },
{ (uint_t)ZFS_IOC_POOL_CHECKPOINT, "ZFS_IOC_POOL_CHECKPOINT",
"zfs_cmd_t" },
{ (uint_t)ZFS_IOC_POOL_DISCARD_CHECKPOINT,
"ZFS_IOC_POOL_DISCARD_CHECKPOINT", "zfs_cmd_t" },
{ (uint_t)ZFS_IOC_VDEV_ADD, "ZFS_IOC_VDEV_ADD",
"zfs_cmd_t" },
{ (uint_t)ZFS_IOC_VDEV_REMOVE, "ZFS_IOC_VDEV_REMOVE",
Expand Down

0 comments on commit 7159fdb

Please sign in to comment.