Skip to content

Commit

Permalink
Persistent L2ARC
Browse files Browse the repository at this point in the history
This commit makes the L2ARC persistent across reboots. It is largely
based on issue 3525 in Illumos. This feature implements a light-weight
persistent L2ARC metadata structure that allows L2ARC contents to be
recovered after a reboot. This significantly eases the impact a reboot
has on read performance on systems with large caches.

Co-authored-by: Saso Kiselkov <skiselkov@gmail.com>
Co-authored-by: Jorgen Lundman <lundman@lundman.net>
Co-authored-by: George Amanakis <gamanakis@gmail.com>
Ported-by: Yuxuan Shui <yshuiv7@gmail.com>
Signed-off-by: George Amanakis <gamanakis@gmail.com>
  • Loading branch information
3 people committed Feb 26, 2020
1 parent 610eec4 commit 937bd94
Show file tree
Hide file tree
Showing 28 changed files with 2,660 additions and 96 deletions.
192 changes: 192 additions & 0 deletions cmd/zdb/zdb.c
Expand Up @@ -62,6 +62,7 @@
#include <sys/zio_compress.h>
#include <sys/zfs_fuid.h>
#include <sys/arc.h>
#include <sys/arc_impl.h>
#include <sys/ddt.h>
#include <sys/zfeature.h>
#include <sys/abd.h>
Expand Down Expand Up @@ -3474,6 +3475,185 @@ print_label_header(zdb_label_t *label, int l)
label->header_printed = B_TRUE;
}

static void
print_l2arc_header(void)
{
(void) printf("------------------------------------\n");
(void) printf("L2ARC device header\n");
(void) printf("------------------------------------\n");
}

static void
print_l2arc_log_blocks(void)
{
(void) printf("------------------------------------\n");
(void) printf("L2ARC device log blocks\n");
(void) printf("------------------------------------\n");
}

static void
dump_l2arc_log_entries(uint64_t log_entries,
l2arc_log_ent_phys_t *le, int i)
{
for (int j = 0; j < log_entries; j++) {
dva_t dva = le[j].le_dva;
printf("lb[%d]\t\tle[%d]\t\tDVA asize: %llu, vdev: %llu,"
"offset: %llu\n", i + 1, j + 1,
(u_longlong_t)DVA_GET_ASIZE(&dva),
(u_longlong_t)DVA_GET_VDEV(&dva),
(u_longlong_t)DVA_GET_OFFSET(&dva));
printf("|\t\t\t\tbirth: %llu\n", (u_longlong_t)le[j].le_birth);
printf("|\t\t\t\tlsize: %llu\n",
(u_longlong_t)L2BLK_GET_LSIZE((&le[j])->le_prop));
printf("|\t\t\t\tpsize: %llu\n",
(u_longlong_t)L2BLK_GET_PSIZE((&le[j])->le_prop));
printf("|\t\t\t\tcompr: %llu\n",
(u_longlong_t)L2BLK_GET_COMPRESS((&le[j])->le_prop));
printf("|\t\t\t\ttype: %llu\n",
(u_longlong_t)L2BLK_GET_TYPE((&le[j])->le_prop));
printf("|\t\t\t\tprotected: %llu\n",
(u_longlong_t)L2BLK_GET_PROTECTED((&le[j])->le_prop));
printf("|\t\t\t\tprefetch: %llu\n",
(u_longlong_t)L2BLK_GET_PREFETCH((&le[j])->le_prop));
printf("|\t\t\t\taddress: %llu\n",
(u_longlong_t)le[j].le_daddr);
printf("|\n");
}
printf("\n");
}

static void
dump_l2arc_log_blocks(int fd, l2arc_dev_hdr_phys_t l2dhdr)
{
print_l2arc_log_blocks();
l2arc_log_blk_phys_t this_lb;
uint64_t psize;
l2arc_log_blkptr_t lbps[2];
abd_t *abd;
zio_cksum_t cksum;
int i, failed = 0;

bcopy((&l2dhdr)->dh_start_lbps, lbps, sizeof (lbps));

for (i = 0; i < l2dhdr.dh_log_blk_count; i++) {

psize = L2BLK_GET_PSIZE((&lbps[0])->lbp_prop);
pread64(fd, &this_lb, psize, lbps[0].lbp_daddr);

fletcher_4_native_varsize(&this_lb, psize, &cksum);

switch (L2BLK_GET_COMPRESS((&lbps[0])->lbp_prop)) {
case ZIO_COMPRESS_OFF:
break;
case ZIO_COMPRESS_LZ4:
abd = abd_alloc_for_io(psize, B_TRUE);
abd_copy_from_buf_off(abd, &this_lb, 0, psize);
zio_decompress_data(L2BLK_GET_COMPRESS(
(&lbps[0])->lbp_prop), abd, &this_lb, psize,
sizeof (this_lb));
abd_free(abd);
break;
default:
break;
}

if (this_lb.lb_magic == BSWAP_64(L2ARC_LOG_BLK_MAGIC))
byteswap_uint64_array(&this_lb, psize);

if (this_lb.lb_magic != L2ARC_LOG_BLK_MAGIC)
return;

(void) printf("lb[%d] magic: %llu\n", i + 1,
(u_longlong_t)this_lb.lb_magic);
(void) printf("| \tdaddr: %llu\n",
(u_longlong_t)lbps[0].lbp_daddr);
(void) printf("| \tlsize: %llu\n",
(u_longlong_t)L2BLK_GET_LSIZE((&lbps[0])->lbp_prop));
(void) printf("| \tpsize: %llu\n",
(u_longlong_t)L2BLK_GET_PSIZE((&lbps[0])->lbp_prop));
(void) printf("| \tcompralgo: %llu\n",
(u_longlong_t)L2BLK_GET_COMPRESS((&lbps[0])->lbp_prop));
(void) printf("| \tcksumalgo: %llu\n",
(u_longlong_t)L2BLK_GET_CHECKSUM((&lbps[0])->lbp_prop));

if (!ZIO_CHECKSUM_EQUAL(cksum, lbps[0].lbp_cksum)) {
failed++;
(void) printf("| \t!! invalid cksum\n");
} else {
(void) printf("| \tvalid cksum\n");
}

(void) printf("|\n");

if (dump_opt['l'] > 2)
dump_l2arc_log_entries(l2dhdr.dh_log_blk_ent,
this_lb.lb_entries, i);

lbps[0] = lbps[1];
lbps[1] = this_lb.lb_prev_lbp;
}
(void) printf("%d out of %d log blocks dumped\n",
(int)l2dhdr.dh_log_blk_count - failed, i);
(void) printf("\n");
}

static boolean_t
dump_l2arc_header(int fd)
{
l2arc_dev_hdr_phys_t l2dhdr;
int error = B_FALSE;

if (pread64(fd, &l2dhdr, sizeof (l2dhdr),
VDEV_LABEL_START_SIZE) == sizeof (l2dhdr)) {

if (l2dhdr.dh_magic == BSWAP_64(L2ARC_DEV_HDR_MAGIC))
byteswap_uint64_array(&l2dhdr, sizeof (l2dhdr));

if (l2dhdr.dh_magic != L2ARC_DEV_HDR_MAGIC) {
error = B_TRUE;
}

if (!dump_opt['q']) {
print_l2arc_header();

(void) printf(" magic: %llu\n",
(u_longlong_t)l2dhdr.dh_magic);
(void) printf(" version: %llu\n",
(u_longlong_t)l2dhdr.dh_version);
(void) printf(" pool_guid: %llu\n",
(u_longlong_t)l2dhdr.dh_spa_guid);
(void) printf(" flags: %llu\n",
(u_longlong_t)l2dhdr.dh_flags);
(void) printf(" start_lbps[0]: %llu\n",
(u_longlong_t)
l2dhdr.dh_start_lbps[0].lbp_daddr);
(void) printf(" start_lbps[1]: %llu\n",
(u_longlong_t)
l2dhdr.dh_start_lbps[1].lbp_daddr);
(void) printf(" log_blk_count: %llu\n",
(u_longlong_t)l2dhdr.dh_log_blk_count);
(void) printf(" log_blk_ent: %llu\n",
(u_longlong_t)l2dhdr.dh_log_blk_ent);
(void) printf(" evict: %llu\n",
(u_longlong_t)l2dhdr.dh_evict);
(void) printf("\n");

if (dump_opt['l'] > 1)
dump_l2arc_log_blocks(fd, l2dhdr);
}
} else {
error = B_TRUE;
}

if (error) {
(void) printf("invalid L2ARC device header\n");
(void) printf("\n");
return (error);
}

return (error);
}

static void
dump_config_from_label(zdb_label_t *label, size_t buflen, int l)
{
Expand Down Expand Up @@ -3643,6 +3823,7 @@ dump_label(const char *dev)
struct stat64 statbuf;
boolean_t config_found = B_FALSE;
boolean_t error = B_FALSE;
boolean_t l2arc_header = B_FALSE;
avl_tree_t config_tree;
avl_tree_t uberblock_tree;
void *node, *cookie;
Expand Down Expand Up @@ -3732,6 +3913,11 @@ dump_label(const char *dev)
ZPOOL_CONFIG_ASHIFT, &ashift) != 0))
ashift = SPA_MINBLOCKSHIFT;

if (!l2arc_header)
(void) (nvlist_lookup_boolean_value(config,
ZPOOL_CONFIG_L2CACHE_PERSISTENT,
&l2arc_header));

if (nvlist_size(config, &size, NV_ENCODE_XDR) != 0)
size = buflen;

Expand Down Expand Up @@ -3785,6 +3971,12 @@ dump_label(const char *dev)
nvlist_free(label->config_nv);
}

/*
* Dump the L2ARC header, if existent.
*/
if (l2arc_header)
error |= dump_l2arc_header(fd);

cookie = NULL;
while ((node = avl_destroy_nodes(&config_tree, &cookie)) != NULL)
umem_free(node, sizeof (cksum_record_t));
Expand Down
4 changes: 4 additions & 0 deletions cmd/ztest/ztest.c
Expand Up @@ -1117,6 +1117,10 @@ make_vdev_root(char *path, char *aux, char *pool, size_t size, uint64_t ashift,
r, m);
VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
log) == 0);
if (aux != NULL && strcmp(aux, ZPOOL_CONFIG_L2CACHE) == 0) {
VERIFY(nvlist_add_boolean_value(child[c],
ZPOOL_CONFIG_L2CACHE_PERSISTENT, B_TRUE) == 0);
}

if (class != NULL && class[0] != '\0') {
ASSERT(m > 1 || log); /* expecting a mirror */
Expand Down
1 change: 1 addition & 0 deletions configure.ac
Expand Up @@ -334,6 +334,7 @@ AC_CONFIG_FILES([
tests/zfs-tests/tests/functional/no_space/Makefile
tests/zfs-tests/tests/functional/nopwrite/Makefile
tests/zfs-tests/tests/functional/online_offline/Makefile
tests/zfs-tests/tests/functional/persist_l2arc/Makefile
tests/zfs-tests/tests/functional/pool_checkpoint/Makefile
tests/zfs-tests/tests/functional/pool_names/Makefile
tests/zfs-tests/tests/functional/poolversion/Makefile
Expand Down
4 changes: 3 additions & 1 deletion include/sys/arc.h
Expand Up @@ -300,13 +300,15 @@ void arc_fini(void);
* Level 2 ARC
*/

void l2arc_add_vdev(spa_t *spa, vdev_t *vd);
void l2arc_add_vdev(spa_t *spa, vdev_t *vd, boolean_t rebuild);
void l2arc_remove_vdev(vdev_t *vd);
boolean_t l2arc_vdev_present(vdev_t *vd);
void l2arc_rebuild_vdev(vdev_t *vd, boolean_t rebuild, boolean_t reopen);
void l2arc_init(void);
void l2arc_fini(void);
void l2arc_start(void);
void l2arc_stop(void);
void l2arc_spa_rebuild_start(spa_t *spa);

#ifndef _KERNEL
extern boolean_t arc_watch;
Expand Down

0 comments on commit 937bd94

Please sign in to comment.