282 changes: 274 additions & 8 deletions cmd/zfs/zfs_main.c

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions cmd/zinject/translate.c
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ object_from_path(const char *dataset, const char *path, struct stat64 *statbuf,
*/
sync();

err = dmu_objset_own(dataset, DMU_OST_ZFS, B_TRUE, FTAG, &os);
err = dmu_objset_own(dataset, DMU_OST_ZFS, B_TRUE, B_FALSE, FTAG, &os);
if (err != 0) {
(void) fprintf(stderr, "cannot open dataset '%s': %s\n",
dataset, strerror(err));
Expand All @@ -189,7 +189,7 @@ object_from_path(const char *dataset, const char *path, struct stat64 *statbuf,
record->zi_objset = dmu_objset_id(os);
record->zi_object = statbuf->st_ino;

dmu_objset_disown(os, FTAG);
dmu_objset_disown(os, B_FALSE, FTAG);

return (0);
}
Expand Down Expand Up @@ -267,7 +267,7 @@ calculate_range(const char *dataset, err_type_t type, int level, char *range,
* size.
*/
if ((err = dmu_objset_own(dataset, DMU_OST_ANY,
B_TRUE, FTAG, &os)) != 0) {
B_TRUE, B_FALSE, FTAG, &os)) != 0) {
(void) fprintf(stderr, "cannot open dataset '%s': %s\n",
dataset, strerror(err));
goto out;
Expand Down Expand Up @@ -329,7 +329,7 @@ calculate_range(const char *dataset, err_type_t type, int level, char *range,
dnode_rele(dn, FTAG);
}
if (os)
dmu_objset_disown(os, FTAG);
dmu_objset_disown(os, B_FALSE, FTAG);

return (ret);
}
Expand Down
66 changes: 56 additions & 10 deletions cmd/zpool/zpool_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
#include <sys/fm/util.h>
#include <sys/fm/protocol.h>
#include <sys/zfs_ioctl.h>

#include <sys/mount.h>
#include <math.h>

#include <libzfs.h>
Expand Down Expand Up @@ -313,12 +313,13 @@ get_usage(zpool_help_t idx)
return (gettext("\thistory [-il] [<pool>] ...\n"));
case HELP_IMPORT:
return (gettext("\timport [-d dir] [-D]\n"
"\timport [-d dir | -c cachefile] [-F [-n]] <pool | id>\n"
"\timport [-d dir | -c cachefile] [-F [-n]] [-l] "
"<pool | id>\n"
"\timport [-o mntopts] [-o property=value] ... \n"
"\t [-d dir | -c cachefile] [-D] [-f] [-m] [-N] "
"\t [-d dir | -c cachefile] [-D] [-l] [-f] [-m] [-N] "
"[-R root] [-F [-n]] -a\n"
"\timport [-o mntopts] [-o property=value] ... \n"
"\t [-d dir | -c cachefile] [-D] [-f] [-m] [-N] "
"\t [-d dir | -c cachefile] [-D] [-l] [-f] [-m] [-N] "
"[-R root] [-F [-n]]\n"
"\t <pool | id> [newpool]\n"));
case HELP_IOSTAT:
Expand Down Expand Up @@ -359,7 +360,7 @@ get_usage(zpool_help_t idx)
case HELP_SET:
return (gettext("\tset <property=value> <pool> \n"));
case HELP_SPLIT:
return (gettext("\tsplit [-gLnP] [-R altroot] [-o mntopts]\n"
return (gettext("\tsplit [-gLnPl] [-R altroot] [-o mntopts]\n"
"\t [-o property=value] <pool> <newpool> "
"[<device> ...]\n"));
case HELP_REGUID:
Expand Down Expand Up @@ -2261,6 +2262,7 @@ static int
do_import(nvlist_t *config, const char *newname, const char *mntopts,
nvlist_t *props, int flags)
{
int ret = 0;
zpool_handle_t *zhp;
char *name;
uint64_t state;
Expand Down Expand Up @@ -2343,6 +2345,16 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
if ((zhp = zpool_open_canfail(g_zfs, name)) == NULL)
return (1);

/*
* Loading keys is best effort. We don't want to return immediately
* if it fails but we do want to give the error to the caller.
*/
if (flags & ZFS_IMPORT_LOAD_KEYS) {
ret = zfs_crypto_attempt_load_keys(g_zfs, name);
if (ret != 0)
ret = 1;
}

if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL &&
!(flags & ZFS_IMPORT_ONLY) &&
zpool_enable_datasets(zhp, mntopts, 0) != 0) {
Expand All @@ -2351,14 +2363,14 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
}

zpool_close(zhp);
return (0);
return (ret);
}

/*
* zpool import [-d dir] [-D]
* import [-o mntopts] [-o prop=value] ... [-R root] [-D]
* import [-o mntopts] [-o prop=value] ... [-R root] [-D] [-l]
* [-d dir | -c cachefile] [-f] -a
* import [-o mntopts] [-o prop=value] ... [-R root] [-D]
* import [-o mntopts] [-o prop=value] ... [-R root] [-D] [-l]
* [-d dir | -c cachefile] [-f] [-n] [-F] <pool | id> [newpool]
*
* -c Read pool information from a cachefile instead of searching
Expand Down Expand Up @@ -2393,6 +2405,8 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
*
* -a Import all pools found.
*
* -l Load encryption keys while importing.
*
* -o Set property=value and/or temporary mount options (without '=').
*
* -s Scan using the default search path, the libblkid cache will
Expand Down Expand Up @@ -2434,7 +2448,7 @@ zpool_do_import(int argc, char **argv)
char *endptr;

/* check options */
while ((c = getopt(argc, argv, ":aCc:d:DEfFmnNo:R:stT:VX")) != -1) {
while ((c = getopt(argc, argv, ":aCc:d:DEfFlmnNo:R:stT:VX")) != -1) {
switch (c) {
case 'a':
do_all = B_TRUE;
Expand Down Expand Up @@ -2464,6 +2478,9 @@ zpool_do_import(int argc, char **argv)
case 'F':
do_rewind = B_TRUE;
break;
case 'l':
flags |= ZFS_IMPORT_LOAD_KEYS;
break;
case 'm':
flags |= ZFS_IMPORT_MISSING_LOG;
break;
Expand Down Expand Up @@ -2538,6 +2555,17 @@ zpool_do_import(int argc, char **argv)
usage(B_FALSE);
}

if ((flags & ZFS_IMPORT_LOAD_KEYS) && (flags & ZFS_IMPORT_ONLY)) {
(void) fprintf(stderr, gettext("-l is incompatible with -N\n"));
usage(B_FALSE);
}

if ((flags & ZFS_IMPORT_LOAD_KEYS) && !do_all && argc == 0) {
(void) fprintf(stderr, gettext("-l is only meaningful during "
"an import\n"));
usage(B_FALSE);
}

if ((dryrun || xtreme_rewind) && !do_rewind) {
(void) fprintf(stderr,
gettext("-n or -X only meaningful with -F\n"));
Expand Down Expand Up @@ -5370,6 +5398,7 @@ zpool_do_detach(int argc, char **argv)
* -o Set property=value, or set mount options.
* -P Display full path for vdev name.
* -R Mount the split-off pool under an alternate root.
* -l Load encryption keys while importing.
*
* Splits the named pool and gives it the new pool name. Devices to be split
* off may be listed, provided that no more than one device is specified
Expand All @@ -5387,6 +5416,7 @@ zpool_do_split(int argc, char **argv)
char *mntopts = NULL;
splitflags_t flags;
int c, ret = 0;
boolean_t loadkeys = B_FALSE;
zpool_handle_t *zhp;
nvlist_t *config, *props = NULL;

Expand All @@ -5395,7 +5425,7 @@ zpool_do_split(int argc, char **argv)
flags.name_flags = 0;

/* check options */
while ((c = getopt(argc, argv, ":gLR:no:P")) != -1) {
while ((c = getopt(argc, argv, ":gLR:lno:P")) != -1) {
switch (c) {
case 'g':
flags.name_flags |= VDEV_NAME_GUID;
Expand All @@ -5412,6 +5442,9 @@ zpool_do_split(int argc, char **argv)
usage(B_FALSE);
}
break;
case 'l':
loadkeys = B_TRUE;
break;
case 'n':
flags.dryrun = B_TRUE;
break;
Expand Down Expand Up @@ -5450,6 +5483,12 @@ zpool_do_split(int argc, char **argv)
usage(B_FALSE);
}

if (!flags.import && loadkeys) {
(void) fprintf(stderr, gettext("loading keys is only "
"valid when importing the pool\n"));
usage(B_FALSE);
}

argc -= optind;
argv += optind;

Expand Down Expand Up @@ -5502,6 +5541,13 @@ zpool_do_split(int argc, char **argv)
nvlist_free(props);
return (1);
}

if (loadkeys) {
ret = zfs_crypto_attempt_load_keys(g_zfs, newpool);
if (ret != 0)
ret = 1;
}

if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL &&
zpool_enable_datasets(zhp, mntopts, 0) != 0) {
ret = 1;
Expand Down
121 changes: 105 additions & 16 deletions cmd/zstreamdump/zstreamdump.c
Original file line number Diff line number Diff line change
Expand Up @@ -197,12 +197,33 @@ print_block(char *buf, int length)
}
}

/*
* Print an array of bytes to stdout as hexidecimal characters. str must
* have buf_len * 2 + 1 bytes of space.
*/
static void
sprintf_bytes(char *str, uint8_t *buf, uint_t buf_len)
{
int i, n;

for (i = 0; i < buf_len; i++) {
n = sprintf(str, "%02x", buf[i] & 0xff);
str += n;
}

str[0] = '\0';
}

int
main(int argc, char *argv[])
{
char *buf = safe_malloc(SPA_MAXBLOCKSIZE);
uint64_t drr_record_count[DRR_NUMTYPES] = { 0 };
char salt[ZIO_DATA_SALT_LEN * 2 + 1];
char iv[ZIO_DATA_IV_LEN * 2 + 1];
char mac[ZIO_DATA_MAC_LEN * 2 + 1];
uint64_t total_records = 0;
uint64_t payload_size;
dmu_replay_record_t thedrr;
dmu_replay_record_t *drr = &thedrr;
struct drr_begin *drrb = &thedrr.drr_u.drr_begin;
Expand All @@ -214,6 +235,7 @@ main(int argc, char *argv[])
struct drr_free *drrf = &thedrr.drr_u.drr_free;
struct drr_spill *drrs = &thedrr.drr_u.drr_spill;
struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded;
struct drr_object_range *drror = &thedrr.drr_u.drr_object_range;
struct drr_checksum *drrc = &thedrr.drr_u.drr_checksum;
char c;
boolean_t verbose = B_FALSE;
Expand Down Expand Up @@ -418,26 +440,35 @@ main(int argc, char *argv[])
drro->drr_blksz = BSWAP_32(drro->drr_blksz);
drro->drr_bonuslen =
BSWAP_32(drro->drr_bonuslen);
drro->drr_raw_bonuslen =
BSWAP_32(drro->drr_raw_bonuslen);
drro->drr_toguid = BSWAP_64(drro->drr_toguid);
}

payload_size = DRR_OBJECT_PAYLOAD_SIZE(drro);

if (verbose) {
(void) printf("OBJECT object = %llu type = %u "
"bonustype = %u blksz = %u bonuslen = %u "
"dn_slots = %u\n",
"dn_slots = %u raw_bonuslen = %u "
"flags = %u indblkshift = %u nlevels = %u "
"nblkptr = %u\n",
(u_longlong_t)drro->drr_object,
drro->drr_type,
drro->drr_bonustype,
drro->drr_blksz,
drro->drr_bonuslen,
drro->drr_dn_slots);
drro->drr_dn_slots,
drro->drr_raw_bonuslen,
drro->drr_flags,
drro->drr_indblkshift,
drro->drr_nlevels,
drro->drr_nblkptr);
}
if (drro->drr_bonuslen > 0) {
(void) ssread(buf,
P2ROUNDUP(drro->drr_bonuslen, 8), &zc);
if (dump) {
print_block(buf,
P2ROUNDUP(drro->drr_bonuslen, 8));
}
(void) ssread(buf, payload_size, &zc);
if (dump)
print_block(buf, payload_size);
}
break;

Expand Down Expand Up @@ -471,28 +502,40 @@ main(int argc, char *argv[])
BSWAP_64(drrw->drr_compressed_size);
}

uint64_t payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw);
payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw);

/*
* If this is verbose and/or dump output,
* print info on the modified block
*/
if (verbose) {
sprintf_bytes(salt, drrw->drr_salt,
ZIO_DATA_SALT_LEN);
sprintf_bytes(iv, drrw->drr_iv,
ZIO_DATA_IV_LEN);
sprintf_bytes(mac, drrw->drr_mac,
ZIO_DATA_MAC_LEN);

(void) printf("WRITE object = %llu type = %u "
"checksum type = %u compression type = %u\n"
" offset = %llu logical_size = %llu "
" flags = %u offset = %llu "
"logical_size = %llu "
"compressed_size = %llu "
"payload_size = %llu "
"props = %llx\n",
"payload_size = %llu props = %llx "
"salt = %s iv = %s mac = %s\n",
(u_longlong_t)drrw->drr_object,
drrw->drr_type,
drrw->drr_checksumtype,
drrw->drr_compressiontype,
drrw->drr_flags,
(u_longlong_t)drrw->drr_offset,
(u_longlong_t)drrw->drr_logical_size,
(u_longlong_t)drrw->drr_compressed_size,
(u_longlong_t)payload_size,
(u_longlong_t)drrw->drr_key.ddk_prop);
(u_longlong_t)drrw->drr_key.ddk_prop,
salt,
iv,
mac);
}

/*
Expand Down Expand Up @@ -563,12 +606,31 @@ main(int argc, char *argv[])
if (do_byteswap) {
drrs->drr_object = BSWAP_64(drrs->drr_object);
drrs->drr_length = BSWAP_64(drrs->drr_length);
drrs->drr_compressed_size =
BSWAP_64(drrs->drr_compressed_size);
drrs->drr_type = BSWAP_32(drrs->drr_type);
}
if (verbose) {
sprintf_bytes(salt, drrs->drr_salt,
ZIO_DATA_SALT_LEN);
sprintf_bytes(iv, drrs->drr_iv,
ZIO_DATA_IV_LEN);
sprintf_bytes(mac, drrs->drr_mac,
ZIO_DATA_MAC_LEN);

(void) printf("SPILL block for object = %llu "
"length = %llu\n",
(long long unsigned int)drrs->drr_object,
(long long unsigned int)drrs->drr_length);
"length = %llu flags = %u "
"compression type = %u "
"compressed_size = %llu "
"salt = %s iv = %s mac = %s\n",
(u_longlong_t)drrs->drr_object,
(u_longlong_t)drrs->drr_length,
drrs->drr_flags,
drrs->drr_compressiontype,
(u_longlong_t)drrs->drr_compressed_size,
salt,
iv,
mac);
}
(void) ssread(buf, drrs->drr_length, &zc);
if (dump) {
Expand Down Expand Up @@ -607,6 +669,33 @@ main(int argc, char *argv[])
(void) ssread(buf,
P2ROUNDUP(drrwe->drr_psize, 8), &zc);
break;
case DRR_OBJECT_RANGE:
if (do_byteswap) {
drror->drr_firstobj =
BSWAP_64(drror->drr_firstobj);
drror->drr_numslots =
BSWAP_64(drror->drr_numslots);
drror->drr_toguid = BSWAP_64(drror->drr_toguid);
}
if (verbose) {
sprintf_bytes(salt, drror->drr_salt,
ZIO_DATA_SALT_LEN);
sprintf_bytes(iv, drror->drr_iv,
ZIO_DATA_IV_LEN);
sprintf_bytes(mac, drror->drr_mac,
ZIO_DATA_MAC_LEN);

(void) printf("OBJECT_RANGE firstobj = %llu "
"numslots = %llu flags = %u "
"salt = %s iv = %s mac = %s\n",
(u_longlong_t)drror->drr_firstobj,
(u_longlong_t)drror->drr_numslots,
drror->drr_flags,
salt,
iv,
mac);
}
break;
case DRR_NUMTYPES:
/* should never be reached */
exit(1);
Expand Down
49 changes: 27 additions & 22 deletions cmd/ztest/ztest.c
Original file line number Diff line number Diff line change
Expand Up @@ -2636,15 +2636,15 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id)
*/
nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1);
VERIFY3U(ENOENT, ==,
spa_create("ztest_bad_file", nvroot, NULL, NULL));
spa_create("ztest_bad_file", nvroot, NULL, NULL, NULL));
nvlist_free(nvroot);

/*
* Attempt to create using a bad mirror.
*/
nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 2, 1);
VERIFY3U(ENOENT, ==,
spa_create("ztest_bad_mirror", nvroot, NULL, NULL));
spa_create("ztest_bad_mirror", nvroot, NULL, NULL, NULL));
nvlist_free(nvroot);

/*
Expand All @@ -2653,7 +2653,8 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id)
*/
(void) rw_rdlock(&ztest_name_lock);
nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1);
VERIFY3U(EEXIST, ==, spa_create(zo->zo_pool, nvroot, NULL, NULL));
VERIFY3U(EEXIST, ==,
spa_create(zo->zo_pool, nvroot, NULL, NULL, NULL));
nvlist_free(nvroot);
VERIFY3U(0, ==, spa_open(zo->zo_pool, &spa, FTAG));
VERIFY3U(EBUSY, ==, spa_destroy(zo->zo_pool));
Expand Down Expand Up @@ -2755,7 +2756,7 @@ ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id)
props = fnvlist_alloc();
fnvlist_add_uint64(props,
zpool_prop_to_name(ZPOOL_PROP_VERSION), version);
VERIFY3S(spa_create(name, nvroot, props, NULL), ==, 0);
VERIFY3S(spa_create(name, nvroot, props, NULL, NULL), ==, 0);
fnvlist_free(nvroot);
fnvlist_free(props);

Expand Down Expand Up @@ -3530,7 +3531,7 @@ static int
ztest_dataset_create(char *dsname)
{
uint64_t zilset = ztest_random(100);
int err = dmu_objset_create(dsname, DMU_OST_OTHER, 0,
int err = dmu_objset_create(dsname, DMU_OST_OTHER, 0, NULL,
ztest_objset_create_cb, NULL);

if (err || zilset < 80)
Expand All @@ -3553,15 +3554,15 @@ ztest_objset_destroy_cb(const char *name, void *arg)
/*
* Verify that the dataset contains a directory object.
*/
VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, FTAG, &os));
VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, B_TRUE, FTAG, &os));
error = dmu_object_info(os, ZTEST_DIROBJ, &doi);
if (error != ENOENT) {
/* We could have crashed in the middle of destroying it */
ASSERT0(error);
ASSERT3U(doi.doi_type, ==, DMU_OT_ZAP_OTHER);
ASSERT3S(doi.doi_physical_blocks_512, >=, 0);
}
dmu_objset_disown(os, FTAG);
dmu_objset_disown(os, B_TRUE, FTAG);

/*
* Destroy the dataset.
Expand Down Expand Up @@ -3637,11 +3638,12 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id)
* (invoked from ztest_objset_destroy_cb()) should just throw it away.
*/
if (ztest_random(2) == 0 &&
dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os) == 0) {
dmu_objset_own(name, DMU_OST_OTHER, B_FALSE,
B_TRUE, FTAG, &os) == 0) {
ztest_zd_init(zdtmp, NULL, os);
zil_replay(os, zdtmp, ztest_replay_vector);
ztest_zd_fini(zdtmp);
dmu_objset_disown(os, FTAG);
dmu_objset_disown(os, B_TRUE, FTAG);
}

/*
Expand All @@ -3655,7 +3657,7 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id)
/*
* Verify that the destroyed dataset is no longer in the namespace.
*/
VERIFY3U(ENOENT, ==, dmu_objset_own(name, DMU_OST_OTHER, B_TRUE,
VERIFY3U(ENOENT, ==, dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, B_TRUE,
FTAG, &os));

/*
Expand All @@ -3670,7 +3672,8 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id)
fatal(0, "dmu_objset_create(%s) = %d", name, error);
}

VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os));
VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, B_TRUE,
FTAG, &os));

ztest_zd_init(zdtmp, NULL, os);

Expand All @@ -3694,7 +3697,7 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id)
* Verify that we cannot create an existing dataset.
*/
VERIFY3U(EEXIST, ==,
dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL));
dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL, NULL));

/*
* Verify that we can hold an objset that is also owned.
Expand All @@ -3706,10 +3709,10 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id)
* Verify that we cannot own an objset that is already owned.
*/
VERIFY3U(EBUSY, ==,
dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os2));
dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, B_TRUE, FTAG, &os2));

zil_close(zilog);
dmu_objset_disown(os, FTAG);
dmu_objset_disown(os, B_TRUE, FTAG);
ztest_zd_fini(zdtmp);
out:
(void) rw_unlock(&ztest_name_lock);
Expand Down Expand Up @@ -3863,19 +3866,20 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id)
fatal(0, "dmu_objset_create(%s) = %d", clone2name, error);
}

error = dmu_objset_own(snap2name, DMU_OST_ANY, B_TRUE, FTAG, &os);
error = dmu_objset_own(snap2name, DMU_OST_ANY, B_TRUE, B_TRUE,
FTAG, &os);
if (error)
fatal(0, "dmu_objset_own(%s) = %d", snap2name, error);
error = dsl_dataset_promote(clone2name, NULL);
if (error == ENOSPC) {
dmu_objset_disown(os, FTAG);
dmu_objset_disown(os, B_TRUE, FTAG);
ztest_record_enospc(FTAG);
goto out;
}
if (error != EBUSY)
fatal(0, "dsl_dataset_promote(%s), %d, not EBUSY", clone2name,
error);
dmu_objset_disown(os, FTAG);
dmu_objset_disown(os, B_TRUE, FTAG);

out:
ztest_dsl_dataset_cleanup(osname, id);
Expand Down Expand Up @@ -6253,7 +6257,7 @@ ztest_dataset_open(int d)
}
ASSERT(error == 0 || error == EEXIST);

VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, zd, &os));
VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, B_TRUE, zd, &os));
(void) rw_unlock(&ztest_name_lock);

ztest_zd_init(zd, ZTEST_GET_SHARED_DS(d), os);
Expand Down Expand Up @@ -6294,7 +6298,7 @@ ztest_dataset_close(int d)
ztest_ds_t *zd = &ztest_ds[d];

zil_close(zd->zd_zilog);
dmu_objset_disown(zd->zd_os, zd);
dmu_objset_disown(zd->zd_os, B_TRUE, zd);

ztest_zd_fini(zd);
}
Expand Down Expand Up @@ -6347,12 +6351,12 @@ ztest_run(ztest_shared_t *zs)

dmu_objset_stats_t dds;
VERIFY0(dmu_objset_own(ztest_opts.zo_pool,
DMU_OST_ANY, B_TRUE, FTAG, &os));
DMU_OST_ANY, B_TRUE, B_TRUE, FTAG, &os));
dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
dmu_objset_fast_stat(os, &dds);
dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
zs->zs_guid = dds.dds_guid;
dmu_objset_disown(os, FTAG);
dmu_objset_disown(os, B_TRUE, FTAG);

spa->spa_dedup_ditto = 2 * ZIO_DEDUPDITTO_MIN;

Expand Down Expand Up @@ -6705,7 +6709,8 @@ ztest_init(ztest_shared_t *zs)
VERIFY3U(0, ==, nvlist_add_uint64(props, buf, 0));
free(buf);
}
VERIFY3U(0, ==, spa_create(ztest_opts.zo_pool, nvroot, props, NULL));
VERIFY3U(0, ==,
spa_create(ztest_opts.zo_pool, nvroot, props, NULL, NULL));
nvlist_free(nvroot);
nvlist_free(props);

Expand Down
3 changes: 3 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -186,12 +186,14 @@ AC_CONFIG_FILES([
tests/zfs-tests/tests/functional/clean_mirror/Makefile
tests/zfs-tests/tests/functional/cli_root/Makefile
tests/zfs-tests/tests/functional/cli_root/zdb/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_change-key/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_clone/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_copies/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_create/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_destroy/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_get/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_inherit/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_load-key/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_mount/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_promote/Makefile
Expand All @@ -204,6 +206,7 @@ AC_CONFIG_FILES([
tests/zfs-tests/tests/functional/cli_root/zfs_set/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_share/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_unload-key/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_unmount/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_unshare/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/Makefile
Expand Down
4 changes: 2 additions & 2 deletions include/libuutil.h
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ void uu_list_pool_destroy(uu_list_pool_t *);
* usage:
*
* foo_t *a;
* a = malloc(sizeof(*a));
* a = malloc(sizeof (*a));
* uu_list_node_init(a, &a->foo_list, pool);
* ...
* uu_list_node_fini(a, &a->foo_list, pool);
Expand Down Expand Up @@ -345,7 +345,7 @@ void uu_avl_pool_destroy(uu_avl_pool_t *);
* usage:
*
* foo_t *a;
* a = malloc(sizeof(*a));
* a = malloc(sizeof (*a));
* uu_avl_node_init(a, &a->foo_avl, pool);
* ...
* uu_avl_node_fini(a, &a->foo_avl, pool);
Expand Down
22 changes: 20 additions & 2 deletions include/libzfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ typedef enum zfs_error {
EZFS_POOLREADONLY, /* pool is in read-only mode */
EZFS_SCRUB_PAUSED, /* scrub currently paused */
EZFS_ACTIVE_POOL, /* pool is imported on a different system */
EZFS_CRYPTOFAILED, /* failed to setup encryption */
EZFS_UNKNOWN
} zfs_error_t;

Expand Down Expand Up @@ -474,8 +475,8 @@ extern uint64_t zfs_prop_default_numeric(zfs_prop_t);
extern const char *zfs_prop_column_name(zfs_prop_t);
extern boolean_t zfs_prop_align_right(zfs_prop_t);

extern nvlist_t *zfs_valid_proplist(libzfs_handle_t *, zfs_type_t,
nvlist_t *, uint64_t, zfs_handle_t *, zpool_handle_t *, const char *);
extern nvlist_t *zfs_valid_proplist(libzfs_handle_t *, zfs_type_t, nvlist_t *,
uint64_t, zfs_handle_t *, zpool_handle_t *, boolean_t, const char *);

extern const char *zfs_prop_to_name(zfs_prop_t);
extern int zfs_prop_set(zfs_handle_t *, const char *, const char *);
Expand Down Expand Up @@ -505,6 +506,19 @@ extern nvlist_t *zfs_get_user_props(zfs_handle_t *);
extern nvlist_t *zfs_get_recvd_props(zfs_handle_t *);
extern nvlist_t *zfs_get_clones_nvl(zfs_handle_t *);

/*
* zfs encryption management
*/
extern int zfs_crypto_get_encryption_root(zfs_handle_t *, boolean_t *, char *);
extern int zfs_crypto_create(libzfs_handle_t *, char *, nvlist_t *, nvlist_t *,
uint8_t **, uint_t *);
extern int zfs_crypto_clone_check(libzfs_handle_t *, zfs_handle_t *, char *,
nvlist_t *);
extern int zfs_crypto_attempt_load_keys(libzfs_handle_t *, char *);
extern int zfs_crypto_load_key(zfs_handle_t *, boolean_t, char *);
extern int zfs_crypto_unload_key(zfs_handle_t *);
extern int zfs_crypto_rewrap(zfs_handle_t *, nvlist_t *, boolean_t);

typedef struct zprop_list {
int pl_prop;
char *pl_user_prop;
Expand Down Expand Up @@ -654,6 +668,9 @@ typedef struct sendflags {

/* compressed WRITE records are permitted */
boolean_t compress;

/* raw encrypted records are permitted */
boolean_t raw;
} sendflags_t;

typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *);
Expand Down Expand Up @@ -737,6 +754,7 @@ extern const char *zfs_type_to_name(zfs_type_t);
extern void zfs_refresh_properties(zfs_handle_t *);
extern int zfs_name_valid(const char *, zfs_type_t);
extern zfs_handle_t *zfs_path_to_zhandle(libzfs_handle_t *, char *, zfs_type_t);
extern int zfs_parent_name(zfs_handle_t *, char *, size_t);
extern boolean_t zfs_dataset_exists(libzfs_handle_t *, const char *,
zfs_type_t);
extern int zfs_spa_version(zfs_handle_t *, int *);
Expand Down
25 changes: 16 additions & 9 deletions include/libzfs_core.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,17 @@ enum lzc_dataset_type {
};

int lzc_snapshot(nvlist_t *, nvlist_t *, nvlist_t **);
int lzc_create(const char *, enum lzc_dataset_type, nvlist_t *);
int lzc_create(const char *, enum lzc_dataset_type, nvlist_t *, uint8_t *,
uint_t);
int lzc_clone(const char *, const char *, nvlist_t *);
int lzc_promote(const char *, char *, int);
int lzc_destroy_snaps(nvlist_t *, boolean_t, nvlist_t **);
int lzc_bookmark(nvlist_t *, nvlist_t **);
int lzc_get_bookmarks(const char *, nvlist_t *, nvlist_t **);
int lzc_destroy_bookmarks(nvlist_t *, nvlist_t **);
int lzc_load_key(const char *, boolean_t, uint8_t *, uint_t);
int lzc_unload_key(const char *);
int lzc_change_key(const char *, uint64_t, nvlist_t *, uint8_t *, uint_t);

int lzc_snaprange_space(const char *, const char *, uint64_t *);

Expand All @@ -66,7 +70,8 @@ int lzc_get_holds(const char *, nvlist_t **);
enum lzc_send_flags {
LZC_SEND_FLAG_EMBED_DATA = 1 << 0,
LZC_SEND_FLAG_LARGE_BLOCK = 1 << 1,
LZC_SEND_FLAG_COMPRESS = 1 << 2
LZC_SEND_FLAG_COMPRESS = 1 << 2,
LZC_SEND_FLAG_RAW = 1 << 3,
};

int lzc_send(const char *, const char *, int, enum lzc_send_flags);
Expand All @@ -76,17 +81,19 @@ int lzc_send_space(const char *, const char *, enum lzc_send_flags, uint64_t *);

struct dmu_replay_record;

int lzc_receive(const char *, nvlist_t *, const char *, boolean_t, int);
int lzc_receive_resumable(const char *, nvlist_t *, const char *,
int lzc_receive(const char *, nvlist_t *, const char *, boolean_t, boolean_t,
int);
int lzc_receive_resumable(const char *, nvlist_t *, const char *, boolean_t,
boolean_t, int);
int lzc_receive_with_header(const char *, nvlist_t *, const char *, boolean_t,
boolean_t, int, const struct dmu_replay_record *);
boolean_t, boolean_t, int, const struct dmu_replay_record *);
int lzc_receive_one(const char *, nvlist_t *, const char *, boolean_t,
boolean_t, int, const struct dmu_replay_record *, int, uint64_t *,
uint64_t *, uint64_t *, nvlist_t **);
boolean_t, boolean_t, int, const struct dmu_replay_record *, int,
uint64_t *, uint64_t *, uint64_t *, nvlist_t **);
int lzc_receive_with_cmdprops(const char *, nvlist_t *, nvlist_t *,
const char *, boolean_t, boolean_t, int, const struct dmu_replay_record *,
int, uint64_t *, uint64_t *, uint64_t *, nvlist_t **);
const char *, boolean_t, boolean_t, boolean_t, int,
const struct dmu_replay_record *, int, uint64_t *, uint64_t *,
uint64_t *, nvlist_t **);

boolean_t lzc_exists(const char *);

Expand Down
2 changes: 2 additions & 0 deletions include/sys/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ COMMON_H = \
$(top_srcdir)/include/sys/dsl_deleg.h \
$(top_srcdir)/include/sys/dsl_destroy.h \
$(top_srcdir)/include/sys/dsl_dir.h \
$(top_srcdir)/include/sys/dsl_crypt.h \
$(top_srcdir)/include/sys/dsl_pool.h \
$(top_srcdir)/include/sys/dsl_prop.h \
$(top_srcdir)/include/sys/dsl_scan.h \
Expand Down Expand Up @@ -109,6 +110,7 @@ COMMON_H = \
$(top_srcdir)/include/sys/zil_impl.h \
$(top_srcdir)/include/sys/zio_checksum.h \
$(top_srcdir)/include/sys/zio_compress.h \
$(top_srcdir)/include/sys/zio_crypt.h \
$(top_srcdir)/include/sys/zio.h \
$(top_srcdir)/include/sys/zio_impl.h \
$(top_srcdir)/include/sys/zio_priority.h \
Expand Down
69 changes: 55 additions & 14 deletions include/sys/arc.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,15 +60,26 @@ _NOTE(CONSTCOND) } while (0)
typedef struct arc_buf_hdr arc_buf_hdr_t;
typedef struct arc_buf arc_buf_t;
typedef struct arc_prune arc_prune_t;
typedef void arc_done_func_t(zio_t *zio, arc_buf_t *buf, void *private);

/*
* Because the ARC can store encrypted data, errors (not due to bugs) may arise
* while transforming data into its desired format - specifically, when
* decrypting, the key may not be present, or the HMAC may not be correct
* which signifies deliberate tampering with the on-disk state
* (assuming that the checksum was correct). The "error" parameter will be
* nonzero in this case, even if there is no associated zio.
*/
typedef void arc_read_done_func_t(zio_t *zio, int error, arc_buf_t *buf,
void *private);
typedef void arc_write_done_func_t(zio_t *zio, arc_buf_t *buf, void *private);
typedef void arc_prune_func_t(int64_t bytes, void *private);

/* Shared module parameters */
extern int zfs_arc_average_blocksize;

/* generic arc_done_func_t's which you can use */
arc_done_func_t arc_bcopy_func;
arc_done_func_t arc_getbuf_func;
arc_read_done_func_t arc_bcopy_func;
arc_read_done_func_t arc_getbuf_func;

/* generic arc_prune_func_t wrapper for callbacks */
struct arc_prune {
Expand Down Expand Up @@ -110,20 +121,29 @@ typedef enum arc_flags
ARC_FLAG_L2_WRITING = 1 << 11, /* write in progress */
ARC_FLAG_L2_EVICTED = 1 << 12, /* evicted during I/O */
ARC_FLAG_L2_WRITE_HEAD = 1 << 13, /* head of write list */
/*
* Encrypted or authenticated on disk (may be plaintext in memory).
* This header has b_crypt_hdr allocated. Does not include indirect
* blocks with checksums of MACs which will also have their X
* (encrypted) bit set in the bp.
*/
ARC_FLAG_PROTECTED = 1 << 14,
/* data has not been authenticated yet */
ARC_FLAG_NOAUTH = 1 << 15,
/* indicates that the buffer contains metadata (otherwise, data) */
ARC_FLAG_BUFC_METADATA = 1 << 14,
ARC_FLAG_BUFC_METADATA = 1 << 16,

/* Flags specifying whether optional hdr struct fields are defined */
ARC_FLAG_HAS_L1HDR = 1 << 15,
ARC_FLAG_HAS_L2HDR = 1 << 16,
ARC_FLAG_HAS_L1HDR = 1 << 17,
ARC_FLAG_HAS_L2HDR = 1 << 18,

/*
* Indicates the arc_buf_hdr_t's b_pdata matches the on-disk data.
* This allows the l2arc to use the blkptr's checksum to verify
* the data without having to store the checksum in the hdr.
*/
ARC_FLAG_COMPRESSED_ARC = 1 << 17,
ARC_FLAG_SHARED_DATA = 1 << 18,
ARC_FLAG_COMPRESSED_ARC = 1 << 19,
ARC_FLAG_SHARED_DATA = 1 << 20,

/*
* The arc buffer's compression mode is stored in the top 7 bits of the
Expand All @@ -142,7 +162,12 @@ typedef enum arc_flags

typedef enum arc_buf_flags {
ARC_BUF_FLAG_SHARED = 1 << 0,
ARC_BUF_FLAG_COMPRESSED = 1 << 1
ARC_BUF_FLAG_COMPRESSED = 1 << 1,
/*
* indicates whether this arc_buf_t is encrypted, regardless of
* state on-disk
*/
ARC_BUF_FLAG_ENCRYPTED = 1 << 2
} arc_buf_flags_t;

struct arc_buf {
Expand Down Expand Up @@ -206,15 +231,31 @@ typedef struct arc_buf_info {
void arc_space_consume(uint64_t space, arc_space_type_t type);
void arc_space_return(uint64_t space, arc_space_type_t type);
boolean_t arc_is_metadata(arc_buf_t *buf);
boolean_t arc_is_encrypted(arc_buf_t *buf);
boolean_t arc_is_unauthenticated(arc_buf_t *buf);
enum zio_compress arc_get_compression(arc_buf_t *buf);
int arc_decompress(arc_buf_t *buf);
void arc_get_raw_params(arc_buf_t *buf, boolean_t *byteorder, uint8_t *salt,
uint8_t *iv, uint8_t *mac);
int arc_untransform(arc_buf_t *buf, spa_t *spa, uint64_t dsobj,
boolean_t in_place);
void arc_convert_to_raw(arc_buf_t *buf, uint64_t dsobj, boolean_t byteorder,
dmu_object_type_t ot, const uint8_t *salt, const uint8_t *iv,
const uint8_t *mac);
arc_buf_t *arc_alloc_buf(spa_t *spa, void *tag, arc_buf_contents_t type,
int32_t size);
arc_buf_t *arc_alloc_compressed_buf(spa_t *spa, void *tag,
uint64_t psize, uint64_t lsize, enum zio_compress compression_type);
arc_buf_t *arc_alloc_raw_buf(spa_t *spa, void *tag, uint64_t dsobj,
boolean_t byteorder, const uint8_t *salt, const uint8_t *iv,
const uint8_t *mac, dmu_object_type_t ot, uint64_t psize, uint64_t lsize,
enum zio_compress compression_type);
arc_buf_t *arc_loan_buf(spa_t *spa, boolean_t is_metadata, int size);
arc_buf_t *arc_loan_compressed_buf(spa_t *spa, uint64_t psize, uint64_t lsize,
enum zio_compress compression_type);
arc_buf_t *arc_loan_raw_buf(spa_t *spa, uint64_t dsobj, boolean_t byteorder,
const uint8_t *salt, const uint8_t *iv, const uint8_t *mac,
dmu_object_type_t ot, uint64_t psize, uint64_t lsize,
enum zio_compress compression_type);
void arc_return_buf(arc_buf_t *buf, void *tag);
void arc_loan_inuse_buf(arc_buf_t *buf, void *tag);
void arc_buf_destroy(arc_buf_t *buf, void *tag);
Expand All @@ -231,12 +272,12 @@ int arc_referenced(arc_buf_t *buf);
#endif

int arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
arc_done_func_t *done, void *private, zio_priority_t priority, int flags,
arc_flags_t *arc_flags, const zbookmark_phys_t *zb);
arc_read_done_func_t *done, void *private, zio_priority_t priority,
int flags, arc_flags_t *arc_flags, const zbookmark_phys_t *zb);
zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc, const zio_prop_t *zp,
arc_done_func_t *ready, arc_done_func_t *child_ready,
arc_done_func_t *physdone, arc_done_func_t *done,
arc_write_done_func_t *ready, arc_write_done_func_t *child_ready,
arc_write_done_func_t *physdone, arc_write_done_func_t *done,
void *private, zio_priority_t priority, int zio_flags,
const zbookmark_phys_t *zb);

Expand Down
52 changes: 45 additions & 7 deletions include/sys/arc_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#define _SYS_ARC_IMPL_H

#include <sys/arc.h>
#include <sys/zio_crypt.h>

#ifdef __cplusplus
extern "C" {
Expand Down Expand Up @@ -90,22 +91,24 @@ typedef struct arc_callback arc_callback_t;

struct arc_callback {
void *acb_private;
arc_done_func_t *acb_done;
arc_read_done_func_t *acb_done;
arc_buf_t *acb_buf;
boolean_t acb_encrypted;
boolean_t acb_compressed;
boolean_t acb_noauth;
zio_t *acb_zio_dummy;
arc_callback_t *acb_next;
};

typedef struct arc_write_callback arc_write_callback_t;

struct arc_write_callback {
void *awcb_private;
arc_done_func_t *awcb_ready;
arc_done_func_t *awcb_children_ready;
arc_done_func_t *awcb_physdone;
arc_done_func_t *awcb_done;
arc_buf_t *awcb_buf;
void *awcb_private;
arc_write_done_func_t *awcb_ready;
arc_write_done_func_t *awcb_children_ready;
arc_write_done_func_t *awcb_physdone;
arc_write_done_func_t *awcb_done;
arc_buf_t *awcb_buf;
};

/*
Expand Down Expand Up @@ -169,6 +172,36 @@ typedef struct l1arc_buf_hdr {
abd_t *b_pabd;
} l1arc_buf_hdr_t;

/*
* Encrypted blocks will need to be stored encrypted on the L2ARC
* disk as they appear in the main pool. In order for this to work we
* need to pass around the encryption parameters so they can be used
* to write data to the L2ARC. This struct is only defined in the
* arc_buf_hdr_t if the L1 header is defined and has the ARC_FLAG_ENCRYPTED
* flag set.
*/
typedef struct arc_buf_hdr_crypt {
abd_t *b_rabd; /* raw encrypted data */
dmu_object_type_t b_ot; /* object type */
uint32_t b_ebufcnt; /* count of encrypted buffers */

/* dsobj for looking up encryption key for l2arc encryption */
uint64_t b_dsobj;

/* encryption parameters */
uint8_t b_salt[ZIO_DATA_SALT_LEN];
uint8_t b_iv[ZIO_DATA_IV_LEN];

/*
* Technically this could be removed since we will always be able to
* get the mac from the bp when we need it. However, it is inconvenient
* for callers of arc code to have to pass a bp in all the time. This
* also allows us to assert that L2ARC data is properly encrypted to
* match the data in the main storage pool.
*/
uint8_t b_mac[ZIO_DATA_MAC_LEN];
} arc_buf_hdr_crypt_t;

typedef struct l2arc_dev {
vdev_t *l2ad_vdev; /* vdev */
spa_t *l2ad_spa; /* spa */
Expand Down Expand Up @@ -237,6 +270,11 @@ struct arc_buf_hdr {
l2arc_buf_hdr_t b_l2hdr;
/* L1ARC fields. Undefined when in l2arc_only state */
l1arc_buf_hdr_t b_l1hdr;
/*
* Encryption parameters. Defined only when ARC_FLAG_ENCRYPTED
* is set and the L1 header exists.
*/
arc_buf_hdr_crypt_t b_crypt_hdr;
};
#ifdef __cplusplus
}
Expand Down
2 changes: 2 additions & 0 deletions include/sys/dbuf.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ extern "C" {
#define DB_RF_NOPREFETCH (1 << 3)
#define DB_RF_NEVERWAIT (1 << 4)
#define DB_RF_CACHED (1 << 5)
#define DB_RF_NO_DECRYPT (1 << 6)

/*
* The simplified state transition diagram for dbufs looks like:
Expand Down Expand Up @@ -146,6 +147,7 @@ typedef struct dbuf_dirty_record {
override_states_t dr_override_state;
uint8_t dr_copies;
boolean_t dr_nopwrite;
boolean_t dr_raw;
} dl;
} dt;
} dbuf_dirty_record_t;
Expand Down
15 changes: 11 additions & 4 deletions include/sys/ddt.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,10 @@ enum ddt_class {
typedef struct ddt_key {
zio_cksum_t ddk_cksum; /* 256-bit block checksum */
/*
* Encoded with logical & physical size, and compression, as follows:
* Encoded with logical & physical size, encryption, and compression,
* as follows:
* +-------+-------+-------+-------+-------+-------+-------+-------+
* | 0 | 0 | 0 | comp | PSIZE | LSIZE |
* | 0 | 0 | 0 |X| comp| PSIZE | LSIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
*/
uint64_t ddk_prop;
Expand All @@ -85,11 +86,17 @@ typedef struct ddt_key {
#define DDK_SET_PSIZE(ddk, x) \
BF64_SET_SB((ddk)->ddk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x)

#define DDK_GET_COMPRESS(ddk) BF64_GET((ddk)->ddk_prop, 32, 8)
#define DDK_SET_COMPRESS(ddk, x) BF64_SET((ddk)->ddk_prop, 32, 8, x)
#define DDK_GET_COMPRESS(ddk) BF64_GET((ddk)->ddk_prop, 32, 7)
#define DDK_SET_COMPRESS(ddk, x) BF64_SET((ddk)->ddk_prop, 32, 7, x)

#define DDK_GET_CRYPT(ddk) BF64_GET((ddk)->ddk_prop, 39, 1)
#define DDK_SET_CRYPT(ddk, x) BF64_SET((ddk)->ddk_prop, 39, 1, x)

#define DDT_KEY_WORDS (sizeof (ddt_key_t) / sizeof (uint64_t))

#define DDE_GET_NDVAS(dde) (DDK_GET_CRYPT(&dde->dde_key) \
? SPA_DVAS_PER_BP : SPA_DVAS_PER_BP - 1)

typedef struct ddt_phys {
dva_t ddp_dva[SPA_DVAS_PER_BP];
uint64_t ddp_refcnt;
Expand Down
72 changes: 57 additions & 15 deletions include/sys/dmu.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ struct nvlist;
struct arc_buf;
struct zio_prop;
struct sa_handle;
struct dsl_crypto_params;

typedef struct objset objset_t;
typedef struct dmu_tx dmu_tx_t;
Expand Down Expand Up @@ -100,16 +101,18 @@ typedef enum dmu_object_byteswap {

#define DMU_OT_NEWTYPE 0x80
#define DMU_OT_METADATA 0x40
#define DMU_OT_BYTESWAP_MASK 0x3f
#define DMU_OT_ENCRYPTED 0x20
#define DMU_OT_BYTESWAP_MASK 0x1f

/*
* Defines a uint8_t object type. Object types specify if the data
* in the object is metadata (boolean) and how to byteswap the data
* (dmu_object_byteswap_t).
*/
#define DMU_OT(byteswap, metadata) \
#define DMU_OT(byteswap, metadata, encrypted) \
(DMU_OT_NEWTYPE | \
((metadata) ? DMU_OT_METADATA : 0) | \
((encrypted) ? DMU_OT_ENCRYPTED : 0) | \
((byteswap) & DMU_OT_BYTESWAP_MASK))

#define DMU_OT_IS_VALID(ot) (((ot) & DMU_OT_NEWTYPE) ? \
Expand All @@ -120,6 +123,10 @@ typedef enum dmu_object_byteswap {
((ot) & DMU_OT_METADATA) : \
dmu_ot[(int)(ot)].ot_metadata)

#define DMU_OT_IS_ENCRYPTED(ot) (((ot) & DMU_OT_NEWTYPE) ? \
((ot) & DMU_OT_ENCRYPTED) : \
dmu_ot[(int)(ot)].ot_encrypt)

/*
* These object types use bp_fill != 1 for their L0 bp's. Therefore they can't
* have their data embedded (i.e. use a BP_IS_EMBEDDED() bp), because bp_fill
Expand Down Expand Up @@ -215,16 +222,27 @@ typedef enum dmu_object_type {
/*
* Names for valid types declared with DMU_OT().
*/
DMU_OTN_UINT8_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE),
DMU_OTN_UINT8_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE),
DMU_OTN_UINT16_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE),
DMU_OTN_UINT16_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE),
DMU_OTN_UINT32_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE),
DMU_OTN_UINT32_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE),
DMU_OTN_UINT64_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE),
DMU_OTN_UINT64_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE),
DMU_OTN_ZAP_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE),
DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE),
DMU_OTN_UINT8_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE, B_FALSE),
DMU_OTN_UINT8_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE, B_FALSE),
DMU_OTN_UINT16_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE, B_FALSE),
DMU_OTN_UINT16_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE, B_FALSE),
DMU_OTN_UINT32_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE, B_FALSE),
DMU_OTN_UINT32_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE, B_FALSE),
DMU_OTN_UINT64_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE, B_FALSE),
DMU_OTN_UINT64_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE, B_FALSE),
DMU_OTN_ZAP_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE, B_FALSE),
DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE, B_FALSE),

DMU_OTN_UINT8_ENC_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE, B_TRUE),
DMU_OTN_UINT8_ENC_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE, B_TRUE),
DMU_OTN_UINT16_ENC_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE, B_TRUE),
DMU_OTN_UINT16_ENC_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE, B_TRUE),
DMU_OTN_UINT32_ENC_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE, B_TRUE),
DMU_OTN_UINT32_ENC_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE, B_TRUE),
DMU_OTN_UINT64_ENC_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE, B_TRUE),
DMU_OTN_UINT64_ENC_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE, B_TRUE),
DMU_OTN_ZAP_ENC_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE, B_TRUE),
DMU_OTN_ZAP_ENC_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE, B_TRUE),
} dmu_object_type_t;

typedef enum txg_how {
Expand Down Expand Up @@ -267,19 +285,24 @@ void zfs_znode_byteswap(void *buf, size_t size);
*/
#define DMU_BONUS_BLKID (-1ULL)
#define DMU_SPILL_BLKID (-2ULL)

/*
* Public routines to create, destroy, open, and close objsets.
*/
typedef void dmu_objset_create_sync_func_t(objset_t *os, void *arg,
cred_t *cr, dmu_tx_t *tx);

int dmu_objset_hold(const char *name, void *tag, objset_t **osp);
int dmu_objset_own(const char *name, dmu_objset_type_t type,
boolean_t readonly, void *tag, objset_t **osp);
boolean_t readonly, boolean_t key_required, void *tag, objset_t **osp);
void dmu_objset_rele(objset_t *os, void *tag);
void dmu_objset_disown(objset_t *os, void *tag);
void dmu_objset_disown(objset_t *os, boolean_t key_required, void *tag);
int dmu_objset_open_ds(struct dsl_dataset *ds, objset_t **osp);

void dmu_objset_evict_dbufs(objset_t *os);
int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
struct dsl_crypto_params *dcp, dmu_objset_create_sync_func_t func,
void *arg);
int dmu_objset_clone(const char *name, const char *origin);
int dsl_destroy_snapshots_nvl(struct nvlist *snaps, boolean_t defer,
struct nvlist *errlist);
Expand Down Expand Up @@ -390,6 +413,13 @@ int dmu_object_free(objset_t *os, uint64_t object, dmu_tx_t *tx);
int dmu_object_next(objset_t *os, uint64_t *objectp,
boolean_t hole, uint64_t txg);

/*
* Set the number of levels on a dnode. nlevels must be greater than the
* current number of levels or an EINVAL will be returned.
*/
int dmu_object_set_nlevels(objset_t *os, uint64_t object, int nlevels,
dmu_tx_t *tx);

/*
* Set the data blocksize for an object.
*
Expand Down Expand Up @@ -432,6 +462,7 @@ dmu_write_embedded(objset_t *os, uint64_t object, uint64_t offset,

void dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp,
struct zio_prop *zp);

/*
* The bonus data is accessed more or less like a regular buffer.
* You must dmu_bonus_hold() to get the buffer, which will give you a
Expand All @@ -444,6 +475,8 @@ void dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp,
*
* Returns ENOENT, EIO, or 0.
*/
int dmu_bonus_hold_impl(objset_t *os, uint64_t object, void *tag,
uint32_t flags, dmu_buf_t **dbp);
int dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **);
int dmu_bonus_max(void);
int dmu_set_bonus(dmu_buf_t *, int, dmu_tx_t *);
Expand Down Expand Up @@ -655,6 +688,7 @@ struct blkptr *dmu_buf_get_blkptr(dmu_buf_t *db);
* (ie. you've called dmu_tx_hold_object(tx, db->db_object)).
*/
void dmu_buf_will_dirty(dmu_buf_t *db, dmu_tx_t *tx);
void dmu_buf_will_change_crypt_params(dmu_buf_t *db, dmu_tx_t *tx);

/*
* You must create a transaction, then hold the objects which you will
Expand Down Expand Up @@ -737,6 +771,7 @@ int dmu_free_long_object(objset_t *os, uint64_t object);
*/
#define DMU_READ_PREFETCH 0 /* prefetch */
#define DMU_READ_NO_PREFETCH 1 /* don't prefetch */
#define DMU_READ_NO_DECRYPT 2 /* don't decrypt */
int dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
void *buf, uint32_t flags);
int dmu_read_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size, void *buf,
Expand All @@ -763,6 +798,12 @@ struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size);
void dmu_return_arcbuf(struct arc_buf *buf);
void dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, struct arc_buf *buf,
dmu_tx_t *tx);
void dmu_assign_arcbuf_impl(dmu_buf_t *handle, struct arc_buf *buf,
dmu_tx_t *tx);
void dmu_convert_to_raw(dmu_buf_t *handle, boolean_t byteorder,
const uint8_t *salt, const uint8_t *iv, const uint8_t *mac, dmu_tx_t *tx);
void dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset,
dmu_buf_t *handle, dmu_tx_t *tx);
#ifdef HAVE_UIO_ZEROCOPY
int dmu_xuio_init(struct xuio *uio, int niov);
void dmu_xuio_fini(struct xuio *uio);
Expand Down Expand Up @@ -807,6 +848,7 @@ typedef void (*const arc_byteswap_func_t)(void *buf, size_t size);
typedef struct dmu_object_type_info {
dmu_object_byteswap_t ot_byteswap;
boolean_t ot_metadata;
boolean_t ot_encrypt;
char *ot_name;
} dmu_object_type_info_t;

Expand Down
29 changes: 24 additions & 5 deletions include/sys/dmu_objset.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,19 @@ struct dmu_tx;
#define OBJSET_FLAG_USERACCOUNTING_COMPLETE (1ULL<<0)
#define OBJSET_FLAG_USEROBJACCOUNTING_COMPLETE (1ULL<<1)

/* all flags are currently non-portable */
#define OBJSET_CRYPT_PORTABLE_FLAGS_MASK (0)

typedef struct objset_phys {
dnode_phys_t os_meta_dnode;
zil_header_t os_zil_header;
uint64_t os_type;
uint64_t os_flags;
uint8_t os_portable_mac[ZIO_OBJSET_MAC_LEN];
uint8_t os_local_mac[ZIO_OBJSET_MAC_LEN];
char os_pad[OBJSET_PHYS_SIZE - sizeof (dnode_phys_t)*3 -
sizeof (zil_header_t) - sizeof (uint64_t)*2];
sizeof (zil_header_t) - sizeof (uint64_t)*2 -
2*ZIO_OBJSET_MAC_LEN];
dnode_phys_t os_userused_dnode;
dnode_phys_t os_groupused_dnode;
} objset_phys_t;
Expand All @@ -77,6 +83,8 @@ struct objset {
spa_t *os_spa;
arc_buf_t *os_phys_buf;
objset_phys_t *os_phys;
boolean_t os_encrypted;

/*
* The following "special" dnodes have no parent, are exempt
* from dnode_move(), and are not recorded in os_dnodes, but they
Expand Down Expand Up @@ -118,6 +126,9 @@ struct objset {
uint64_t os_freed_dnodes;
boolean_t os_rescan_dnodes;

/* os_phys_buf should be written raw next txg */
boolean_t os_next_write_raw;

/* Protected by os_obj_lock */
kmutex_t os_obj_lock;
uint64_t os_obj_next_chunk;
Expand Down Expand Up @@ -161,13 +172,18 @@ struct objset {

/* called from zpl */
int dmu_objset_hold(const char *name, void *tag, objset_t **osp);
int dmu_objset_hold_flags(const char *name, boolean_t decrypt, void *tag,
objset_t **osp);
int dmu_objset_own(const char *name, dmu_objset_type_t type,
boolean_t readonly, void *tag, objset_t **osp);
boolean_t readonly, boolean_t decrypt, void *tag, objset_t **osp);
int dmu_objset_own_obj(struct dsl_pool *dp, uint64_t obj,
dmu_objset_type_t type, boolean_t readonly, void *tag, objset_t **osp);
void dmu_objset_refresh_ownership(objset_t *os, void *tag);
dmu_objset_type_t type, boolean_t readonly, boolean_t decrypt,
void *tag, objset_t **osp);
void dmu_objset_refresh_ownership(objset_t *os, boolean_t key_needed,
void *tag);
void dmu_objset_rele(objset_t *os, void *tag);
void dmu_objset_disown(objset_t *os, void *tag);
void dmu_objset_rele_flags(objset_t *os, boolean_t decrypt, void *tag);
void dmu_objset_disown(objset_t *os, boolean_t decrypt, void *tag);
int dmu_objset_from_ds(struct dsl_dataset *ds, objset_t **osp);

void dmu_objset_stats(objset_t *os, nvlist_t *nv);
Expand All @@ -184,6 +200,9 @@ timestruc_t dmu_objset_snap_cmtime(objset_t *os);
/* called from dsl */
void dmu_objset_sync(objset_t *os, zio_t *zio, dmu_tx_t *tx);
boolean_t dmu_objset_is_dirty(objset_t *os, uint64_t txg);
objset_t *dmu_objset_create_impl_dnstats(spa_t *spa, struct dsl_dataset *ds,
blkptr_t *bp, dmu_objset_type_t type, int levels, int blksz, int ibs,
dmu_tx_t *tx);
objset_t *dmu_objset_create_impl(spa_t *spa, struct dsl_dataset *ds,
blkptr_t *bp, dmu_objset_type_t type, dmu_tx_t *tx);
int dmu_objset_open_impl(spa_t *spa, struct dsl_dataset *ds, blkptr_t *bp,
Expand Down
5 changes: 3 additions & 2 deletions include/sys/dmu_send.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,15 @@ struct dmu_replay_record;
extern const char *recv_clone_name;

int dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
boolean_t large_block_ok, boolean_t compressok, int outfd,
boolean_t large_block_ok, boolean_t compressok, boolean_t rawok, int outfd,
uint64_t resumeobj, uint64_t resumeoff, struct vnode *vp, offset_t *off);
int dmu_send_estimate(struct dsl_dataset *ds, struct dsl_dataset *fromds,
boolean_t stream_compressed, uint64_t *sizep);
int dmu_send_estimate_from_txg(struct dsl_dataset *ds, uint64_t fromtxg,
boolean_t stream_compressed, uint64_t *sizep);
int dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
boolean_t embedok, boolean_t large_block_ok, boolean_t compressok,
int outfd, struct vnode *vp, offset_t *off);
boolean_t rawok, int outfd, struct vnode *vp, offset_t *off);

typedef struct dmu_recv_cookie {
struct dsl_dataset *drc_ds;
Expand All @@ -61,6 +61,7 @@ typedef struct dmu_recv_cookie {
boolean_t drc_byteswap;
boolean_t drc_force;
boolean_t drc_resumable;
boolean_t drc_raw;
struct avl_tree *drc_guid_to_ds_map;
zio_cksum_t drc_cksum;
uint64_t drc_newsnapobj;
Expand Down
9 changes: 9 additions & 0 deletions include/sys/dmu_traverse.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,15 @@ typedef int (blkptr_cb_t)(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
#define TRAVERSE_PREFETCH (TRAVERSE_PREFETCH_METADATA | TRAVERSE_PREFETCH_DATA)
#define TRAVERSE_HARD (1<<4)

/*
* Encrypted dnode blocks have encrypted bonus buffers while the rest
* of the dnode is left unencrypted. Callers can specify the
* TRAVERSE_NO_DECRYPT flag to indicate to the traversal code that
* they wish to receive the raw encrypted dnodes instead of attempting
* to read the logical data.
*/
#define TRAVERSE_NO_DECRYPT (1<<5)

/* Special traverse error return value to indicate skipping of children */
#define TRAVERSE_VISIT_NO_CHILDREN -1

Expand Down
11 changes: 8 additions & 3 deletions include/sys/dnode.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,7 @@ extern "C" {
/*
* dnode id flags
*
* Note: a file will never ever have its
* ids moved from bonus->spill
* and only in a crypto environment would it be on spill
* Note: a file will never ever have its ids moved from bonus->spill
*/
#define DN_ID_CHKED_BONUS 0x1
#define DN_ID_CHKED_SPILL 0x2
Expand Down Expand Up @@ -115,6 +113,10 @@ extern "C" {

#define DN_BONUS(dnp) ((void*)((dnp)->dn_bonus + \
(((dnp)->dn_nblkptr - 1) * sizeof (blkptr_t))))
#define DN_MAX_BONUS_LEN(dnp) \
((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ? \
(uint8_t *)DN_SPILL_BLKPTR(dnp) - (uint8_t *)DN_BONUS(dnp) : \
(uint8_t *)(dnp + (dnp->dn_extra_slots + 1)) - (uint8_t *)DN_BONUS(dnp))

#define DN_USED_BYTES(dnp) (((dnp)->dn_flags & DNODE_FLAG_USED_BYTES) ? \
(dnp)->dn_used : (dnp)->dn_used << SPA_MINBLOCKSHIFT)
Expand All @@ -141,6 +143,8 @@ enum dnode_dirtycontext {
/* User/Group dnode accounting */
#define DNODE_FLAG_USEROBJUSED_ACCOUNTED (1 << 3)

#define DNODE_CRYPT_PORTABLE_FLAGS_MASK (DNODE_FLAG_SPILL_BLKPTR)

typedef struct dnode_phys {
uint8_t dn_type; /* dmu_object_type_t */
uint8_t dn_indblkshift; /* ln2(indirect block size) */
Expand Down Expand Up @@ -342,6 +346,7 @@ void dnode_free(dnode_t *dn, dmu_tx_t *tx);
void dnode_byteswap(dnode_phys_t *dnp);
void dnode_buf_byteswap(void *buf, size_t size);
void dnode_verify(dnode_t *dn);
int dnode_set_nlevels(dnode_t *dn, int nlevels, dmu_tx_t *tx);
int dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx);
void dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx);
void dnode_diduse_space(dnode_t *dn, int64_t space);
Expand Down
218 changes: 218 additions & 0 deletions include/sys/dsl_crypt.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
/*
* CDDL HEADER START
*
* This file and its contents are supplied under the terms of the
* Common Development and Distribution License ("CDDL"), version 1.0.
* You may only use this file in accordance with the terms of version
* 1.0 of the CDDL.
*
* A full copy of the text of the CDDL should have accompanied this
* source. A copy of the CDDL is also available via the Internet at
* http://www.illumos.org/license/CDDL.
*
* CDDL HEADER END
*/

/*
* Copyright (c) 2017, Datto, Inc. All rights reserved.
*/

#ifndef _SYS_DSL_CRYPT_H
#define _SYS_DSL_CRYPT_H

#include <sys/dmu_tx.h>
#include <sys/dmu.h>
#include <sys/zio_crypt.h>
#include <sys/spa.h>
#include <sys/dsl_dataset.h>

/*
* ZAP entry keys for DSL Crypto Keys stored on disk. In addition,
* ZFS_PROP_KEYFORMAT, ZFS_PROP_PBKDF2_SALT, and ZFS_PROP_PBKDF2_ITERS are
* also maintained here using their respective property names.
*/
#define DSL_CRYPTO_KEY_CRYPTO_SUITE "DSL_CRYPTO_SUITE"
#define DSL_CRYPTO_KEY_GUID "DSL_CRYPTO_GUID"
#define DSL_CRYPTO_KEY_IV "DSL_CRYPTO_IV"
#define DSL_CRYPTO_KEY_MAC "DSL_CRYPTO_MAC"
#define DSL_CRYPTO_KEY_MASTER_KEY "DSL_CRYPTO_MASTER_KEY_1"
#define DSL_CRYPTO_KEY_HMAC_KEY "DSL_CRYPTO_HMAC_KEY_1"
#define DSL_CRYPTO_KEY_ROOT_DDOBJ "DSL_CRYPTO_ROOT_DDOBJ"
#define DSL_CRYPTO_KEY_REFCOUNT "DSL_CRYPTO_REFCOUNT"


/*
* In-memory representation of a wrapping key. One of these structs will exist
* for each encryption root with its key loaded.
*/
typedef struct dsl_wrapping_key {
/* link on spa_keystore_t:sk_wkeys */
avl_node_t wk_avl_link;

/* keyformat property enum */
zfs_keyformat_t wk_keyformat;

/* the pbkdf2 salt, if the keyformat is of type passphrase */
uint64_t wk_salt;

/* the pbkdf2 iterations, if the keyformat is of type passphrase */
uint64_t wk_iters;

/* actual wrapping key */
crypto_key_t wk_key;

/* refcount of number of dsl_crypto_key_t's holding this struct */
refcount_t wk_refcnt;

/* dsl directory object that owns this wrapping key */
uint64_t wk_ddobj;
} dsl_wrapping_key_t;

/* enum of commands indicating special actions that should be run */
typedef enum dcp_cmd {
/* key creation commands */
DCP_CMD_NONE = 0, /* no specific command */
DCP_CMD_RAW_RECV, /* raw receive */

/* key changing commands */
DCP_CMD_NEW_KEY, /* rewrap key as an encryption root */
DCP_CMD_INHERIT, /* rewrap key with parent's wrapping key */
DCP_CMD_FORCE_NEW_KEY, /* change to encryption root without rewrap */
DCP_CMD_FORCE_INHERIT, /* inherit parent's key without rewrap */

DCP_CMD_MAX
} dcp_cmd_t;

/*
* This struct is a simple wrapper around all the parameters that are usually
* required to setup encryption. It exists so that all of the params can be
* passed around the kernel together for convenience.
*/
typedef struct dsl_crypto_params {
/* command indicating intended action */
dcp_cmd_t cp_cmd;

/* the encryption algorithm */
enum zio_encrypt cp_crypt;

/* keylocation property string */
char *cp_keylocation;

/* the wrapping key */
dsl_wrapping_key_t *cp_wkey;
} dsl_crypto_params_t;

/*
* In-memory representation of a DSL Crypto Key object. One of these structs
* (and corresponding on-disk ZAP object) will exist for each encrypted
* clone family that is mounted or otherwise reading protected data.
*/
typedef struct dsl_crypto_key {
/* link on spa_keystore_t:sk_dsl_keys */
avl_node_t dck_avl_link;

/* refcount of dsl_key_mapping_t's holding this key */
refcount_t dck_holds;

/* master key used to derive encryption keys */
zio_crypt_key_t dck_key;

/* wrapping key for syncing this structure to disk */
dsl_wrapping_key_t *dck_wkey;

/* on-disk object id */
uint64_t dck_obj;
} dsl_crypto_key_t;

/*
* In-memory mapping of a dataset object id to a DSL Crypto Key. This is used
* to look up the corresponding dsl_crypto_key_t from the zio layer for
* performing data encryption and decryption.
*/
typedef struct dsl_key_mapping {
/* link on spa_keystore_t:sk_key_mappings */
avl_node_t km_avl_link;

/* refcount of how many users are depending on this mapping */
refcount_t km_refcnt;

/* dataset this crypto key belongs to (index) */
uint64_t km_dsobj;

/* crypto key (value) of this record */
dsl_crypto_key_t *km_key;
} dsl_key_mapping_t;

/* in memory structure for holding all wrapping and dsl keys */
typedef struct spa_keystore {
/* lock for protecting sk_dsl_keys */
krwlock_t sk_dk_lock;

/* tree of all dsl_crypto_key_t's */
avl_tree_t sk_dsl_keys;

/* lock for protecting sk_key_mappings */
krwlock_t sk_km_lock;

/* tree of all dsl_key_mapping_t's, indexed by dsobj */
avl_tree_t sk_key_mappings;

/* lock for protecting the wrapping keys tree */
krwlock_t sk_wkeys_lock;

/* tree of all dsl_wrapping_key_t's, indexed by ddobj */
avl_tree_t sk_wkeys;
} spa_keystore_t;

int dsl_crypto_params_create_nvlist(dcp_cmd_t cmd, nvlist_t *props,
nvlist_t *crypto_args, dsl_crypto_params_t **dcp_out);
void dsl_crypto_params_free(dsl_crypto_params_t *dcp, boolean_t unload);
void dsl_dataset_crypt_stats(struct dsl_dataset *ds, nvlist_t *nv);
int dsl_crypto_can_set_keylocation(const char *dsname, const char *keylocation);

void spa_keystore_init(spa_keystore_t *sk);
void spa_keystore_fini(spa_keystore_t *sk);

void spa_keystore_dsl_key_rele(spa_t *spa, dsl_crypto_key_t *dck, void *tag);
int spa_keystore_load_wkey_impl(spa_t *spa, dsl_wrapping_key_t *wkey);
int spa_keystore_load_wkey(const char *dsname, dsl_crypto_params_t *dcp,
boolean_t noop);
int spa_keystore_unload_wkey_impl(spa_t *spa, uint64_t ddobj);
int spa_keystore_unload_wkey(const char *dsname);

int spa_keystore_create_mapping_impl(spa_t *spa, uint64_t dsobj, dsl_dir_t *dd,
void *tag);
int spa_keystore_create_mapping(spa_t *spa, struct dsl_dataset *ds, void *tag);
int spa_keystore_remove_mapping(spa_t *spa, uint64_t dsobj, void *tag);
int spa_keystore_lookup_key(spa_t *spa, uint64_t dsobj, void *tag,
dsl_crypto_key_t **dck_out);

int dsl_crypto_populate_key_nvlist(struct dsl_dataset *ds, nvlist_t **nvl_out);
int dsl_crypto_recv_key(const char *poolname, uint64_t dsobj,
dmu_objset_type_t ostype, nvlist_t *nvl);

int spa_keystore_change_key(const char *dsname, dsl_crypto_params_t *dcp);
int dsl_dir_rename_crypt_check(dsl_dir_t *dd, dsl_dir_t *newparent);
int dsl_dataset_promote_crypt_check(dsl_dir_t *target, dsl_dir_t *origin);
void dsl_dataset_promote_crypt_sync(dsl_dir_t *target, dsl_dir_t *origin,
dmu_tx_t *tx);
int dmu_objset_create_crypt_check(dsl_dir_t *parentdd,
dsl_crypto_params_t *dcp);
void dsl_dataset_create_crypt_sync(uint64_t dsobj, dsl_dir_t *dd,
struct dsl_dataset *origin, dsl_crypto_params_t *dcp, dmu_tx_t *tx);
uint64_t dsl_crypto_key_create_sync(uint64_t crypt, dsl_wrapping_key_t *wkey,
dmu_tx_t *tx);
int dmu_objset_clone_crypt_check(dsl_dir_t *parentdd, dsl_dir_t *origindd);
uint64_t dsl_crypto_key_clone_sync(dsl_dir_t *origindd, dmu_tx_t *tx);
void dsl_crypto_key_destroy_sync(uint64_t dckobj, dmu_tx_t *tx);

int spa_crypt_get_salt(spa_t *spa, uint64_t dsobj, uint8_t *salt);
int spa_do_crypt_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj,
abd_t *abd, uint_t datalen, uint8_t *mac);
int spa_do_crypt_objset_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj,
abd_t *abd, uint_t datalen, boolean_t byteswap);
int spa_do_crypt_abd(boolean_t encrypt, spa_t *spa, uint64_t dsobj,
const blkptr_t *bp, uint64_t txgid, uint_t datalen, abd_t *pabd,
abd_t *cabd, uint8_t *iv, uint8_t *mac, uint8_t *salt, boolean_t *no_crypt);

#endif
29 changes: 23 additions & 6 deletions include/sys/dsl_dataset.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#include <sys/dsl_deadlist.h>
#include <sys/refcount.h>
#include <sys/rrwlock.h>
#include <sys/dsl_crypt.h>
#include <zfeature_common.h>

#ifdef __cplusplus
Expand All @@ -48,6 +49,7 @@ extern "C" {
struct dsl_dataset;
struct dsl_dir;
struct dsl_pool;
struct dsl_crypto_params;

#define DS_FLAG_INCONSISTENT (1ULL<<0)
#define DS_IS_INCONSISTENT(ds) \
Expand Down Expand Up @@ -105,6 +107,7 @@ struct dsl_pool;
#define DS_FIELD_RESUME_LARGEBLOCK "com.delphix:resume_largeblockok"
#define DS_FIELD_RESUME_EMBEDOK "com.delphix:resume_embedok"
#define DS_FIELD_RESUME_COMPRESSOK "com.delphix:resume_compressok"
#define DS_FIELD_RESUME_RAWOK "com.datto:resume_rawok"

/*
* DS_FLAG_CI_DATASET is set if the dataset contains a file system whose
Expand Down Expand Up @@ -245,26 +248,38 @@ dsl_dataset_phys(dsl_dataset_t *ds)
#define DS_UNIQUE_IS_ACCURATE(ds) \
((dsl_dataset_phys(ds)->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0)

/* flags for holding the dataset */
typedef enum ds_hold_flags {
DS_HOLD_FLAG_DECRYPT = 1 << 0 /* needs access to encrypted data */
} ds_hold_flags_t;

int dsl_dataset_hold(struct dsl_pool *dp, const char *name, void *tag,
dsl_dataset_t **dsp);
int dsl_dataset_hold_flags(struct dsl_pool *dp, const char *name,
ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp);
boolean_t dsl_dataset_try_add_ref(struct dsl_pool *dp, dsl_dataset_t *ds,
void *tag);
int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj, void *tag,
dsl_dataset_t **);
int dsl_dataset_hold_obj_flags(struct dsl_pool *dp, uint64_t dsobj,
ds_hold_flags_t flags, void *tag, dsl_dataset_t **);
void dsl_dataset_rele(dsl_dataset_t *ds, void *tag);
void dsl_dataset_rele_flags(dsl_dataset_t *ds, ds_hold_flags_t flags,
void *tag);
int dsl_dataset_own(struct dsl_pool *dp, const char *name,
void *tag, dsl_dataset_t **dsp);
ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp);
int dsl_dataset_own_obj(struct dsl_pool *dp, uint64_t dsobj,
void *tag, dsl_dataset_t **dsp);
void dsl_dataset_disown(dsl_dataset_t *ds, void *tag);
ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp);
void dsl_dataset_disown(dsl_dataset_t *ds, ds_hold_flags_t flags, void *tag);
void dsl_dataset_name(dsl_dataset_t *ds, char *name);
boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, void *tag);
int dsl_dataset_namelen(dsl_dataset_t *ds);
boolean_t dsl_dataset_has_owner(dsl_dataset_t *ds);
boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, void *tag);
uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname,
dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *);
dsl_dataset_t *origin, uint64_t flags, cred_t *,
struct dsl_crypto_params *, dmu_tx_t *);
uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
uint64_t flags, dmu_tx_t *tx);
struct dsl_crypto_params *dcp, uint64_t flags, dmu_tx_t *tx);
int dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors);
int dsl_dataset_promote(const char *name, char *conflsnap);
int dsl_dataset_rename_snapshot(const char *fsname,
Expand Down Expand Up @@ -343,6 +358,8 @@ boolean_t dsl_dataset_has_resume_receive_state(dsl_dataset_t *ds);
int dsl_dataset_rollback(const char *fsname, const char *tosnap, void *owner,
nvlist_t *result);

void dsl_dataset_activate_feature(uint64_t dsobj,
spa_feature_t f, dmu_tx_t *tx);
void dsl_dataset_deactivate_feature(uint64_t dsobj,
spa_feature_t f, dmu_tx_t *tx);

Expand Down
2 changes: 2 additions & 0 deletions include/sys/dsl_deleg.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ extern "C" {
#define ZFS_DELEG_PERM_RELEASE "release"
#define ZFS_DELEG_PERM_DIFF "diff"
#define ZFS_DELEG_PERM_BOOKMARK "bookmark"
#define ZFS_DELEG_PERM_LOAD_KEY "load-key"
#define ZFS_DELEG_PERM_CHANGE_KEY "change-key"

/*
* Note: the names of properties that are marked delegatable are also
Expand Down
3 changes: 3 additions & 0 deletions include/sys/dsl_dir.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include <sys/dsl_synctask.h>
#include <sys/refcount.h>
#include <sys/zfs_context.h>
#include <sys/dsl_crypt.h>

#ifdef __cplusplus
extern "C" {
Expand All @@ -47,6 +48,7 @@ struct dsl_dataset;

#define DD_FIELD_FILESYSTEM_COUNT "com.joyent:filesystem_count"
#define DD_FIELD_SNAPSHOT_COUNT "com.joyent:snapshot_count"
#define DD_FIELD_CRYPTO_KEY_OBJ "com.datto:crypto_key_obj"

typedef enum dd_used {
DD_USED_HEAD,
Expand Down Expand Up @@ -89,6 +91,7 @@ struct dsl_dir {

/* These are immutable; no lock needed: */
uint64_t dd_object;
uint64_t dd_crypto_obj;
dsl_pool_t *dd_pool;

/* Stable until user eviction; no lock needed: */
Expand Down
4 changes: 3 additions & 1 deletion include/sys/dsl_pool.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ struct dsl_dataset;
struct dsl_pool;
struct dmu_tx;
struct dsl_scan;
struct dsl_crypto_params;

extern unsigned long zfs_dirty_data_max;
extern unsigned long zfs_dirty_data_max_max;
Expand Down Expand Up @@ -142,7 +143,8 @@ typedef struct dsl_pool {
int dsl_pool_init(spa_t *spa, uint64_t txg, dsl_pool_t **dpp);
int dsl_pool_open(dsl_pool_t *dp);
void dsl_pool_close(dsl_pool_t *dp);
dsl_pool_t *dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg);
dsl_pool_t *dsl_pool_create(spa_t *spa, nvlist_t *zplprops,
struct dsl_crypto_params *dcp, uint64_t txg);
void dsl_pool_sync(dsl_pool_t *dp, uint64_t txg);
void dsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg);
int dsl_pool_sync_context(dsl_pool_t *dp);
Expand Down
1 change: 1 addition & 0 deletions include/sys/fm/fs/zfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ extern "C" {
#define ZFS_ERROR_CLASS "fs.zfs"

#define FM_EREPORT_ZFS_CHECKSUM "checksum"
#define FM_EREPORT_ZFS_AUTHENTICATION "authentication"
#define FM_EREPORT_ZFS_IO "io"
#define FM_EREPORT_ZFS_DATA "data"
#define FM_EREPORT_ZFS_DELAY "delay"
Expand Down
44 changes: 44 additions & 0 deletions include/sys/fs/zfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,14 @@ typedef enum {
ZFS_PROP_OVERLAY,
ZFS_PROP_PREV_SNAP,
ZFS_PROP_RECEIVE_RESUME_TOKEN,
ZFS_PROP_ENCRYPTION,
ZFS_PROP_KEYLOCATION,
ZFS_PROP_KEYFORMAT,
ZFS_PROP_PBKDF2_SALT,
ZFS_PROP_PBKDF2_ITERS,
ZFS_PROP_ENCRYPTION_ROOT,
ZFS_PROP_KEY_GUID,
ZFS_PROP_KEYSTATUS,
ZFS_NUM_PROPS
} zfs_prop_t;

Expand Down Expand Up @@ -281,6 +289,8 @@ uint64_t zfs_prop_default_numeric(zfs_prop_t);
boolean_t zfs_prop_readonly(zfs_prop_t);
boolean_t zfs_prop_inheritable(zfs_prop_t);
boolean_t zfs_prop_setonce(zfs_prop_t);
boolean_t zfs_prop_encryption_key_param(zfs_prop_t);
boolean_t zfs_prop_valid_keylocation(const char *, boolean_t);
const char *zfs_prop_to_name(zfs_prop_t);
zfs_prop_t zfs_name_to_prop(const char *);
boolean_t zfs_prop_user(const char *);
Expand Down Expand Up @@ -404,6 +414,30 @@ typedef enum {
ZFS_VOLMODE_NONE = 3
} zfs_volmode_t;

typedef enum zfs_keystatus {
ZFS_KEYSTATUS_NONE = 0,
ZFS_KEYSTATUS_UNAVAILABLE,
ZFS_KEYSTATUS_AVAILABLE,
} zfs_keystatus_t;

typedef enum zfs_keyformat {
ZFS_KEYFORMAT_NONE = 0,
ZFS_KEYFORMAT_RAW,
ZFS_KEYFORMAT_HEX,
ZFS_KEYFORMAT_PASSPHRASE,
ZFS_KEYFORMAT_FORMATS
} zfs_keyformat_t;

typedef enum zfs_key_location {
ZFS_KEYLOCATION_NONE = 0,
ZFS_KEYLOCATION_PROMPT,
ZFS_KEYLOCATION_URI,
ZFS_KEYLOCATION_LOCATIONS
} zfs_keylocation_t;

#define DEFAULT_PBKDF2_ITERATIONS 350000
#define MIN_PBKDF2_ITERATIONS 100000

/*
* On-disk version number.
*/
Expand Down Expand Up @@ -1061,6 +1095,9 @@ typedef enum zfs_ioc {
ZFS_IOC_DESTROY_BOOKMARKS,
ZFS_IOC_RECV_NEW,
ZFS_IOC_POOL_SYNC,
ZFS_IOC_LOAD_KEY,
ZFS_IOC_UNLOAD_KEY,
ZFS_IOC_CHANGE_KEY,

/*
* Linux - 3/64 numbers reserved.
Expand Down Expand Up @@ -1125,6 +1162,12 @@ typedef enum {
#define ZPOOL_HIST_DSNAME "dsname"
#define ZPOOL_HIST_DSID "dsid"

/*
* Special nvlist name that will not have its args recorded in the pool's
* history log.
*/
#define ZPOOL_HIDDEN_ARGS "hidden_args"

/*
* Flags for ZFS_IOC_VDEV_SET_STATE
*/
Expand All @@ -1144,6 +1187,7 @@ typedef enum {
#define ZFS_IMPORT_ONLY 0x8
#define ZFS_IMPORT_TEMP_NAME 0x10
#define ZFS_IMPORT_SKIP_MMP 0x20
#define ZFS_IMPORT_LOAD_KEYS 0x40

/*
* Sysevent payload members. ZFS will generate the following sysevents with the
Expand Down
157 changes: 143 additions & 14 deletions include/sys/spa.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ typedef struct zbookmark_phys zbookmark_phys_t;

struct dsl_pool;
struct dsl_dataset;
struct dsl_crypto_params;

/*
* General-purpose 32-bit and 64-bit bitfield encodings.
Expand Down Expand Up @@ -222,7 +223,7 @@ typedef struct zio_cksum_salt {
* G gang block indicator
* B byteorder (endianness)
* D dedup
* X encryption (on version 30, which is not supported)
* X encryption
* E blkptr_t contains embedded data (see below)
* lvl level of indirection
* type DMU object type
Expand All @@ -232,6 +233,83 @@ typedef struct zio_cksum_salt {
* checksum[4] 256-bit checksum of the data this bp describes
*/

/*
* The blkptr_t's of encrypted blocks also need to store the encryption
* parameters so that the block can be decrypted. This layout is as follows:
*
* 64 56 48 40 32 24 16 8 0
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 0 | vdev1 | GRID | ASIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 1 |G| offset1 |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 2 | vdev2 | GRID | ASIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 3 |G| offset2 |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 4 | salt |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 5 | IV1 |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 6 |BDX|lvl| type | cksum |E| comp| PSIZE | LSIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 7 | padding |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 8 | padding |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 9 | physical birth txg |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* a | logical birth txg |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* b | IV2 | fill count |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* c | checksum[0] |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* d | checksum[1] |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* e | MAC[0] |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* f | MAC[1] |
* +-------+-------+-------+-------+-------+-------+-------+-------+
*
* Legend:
*
* salt Salt for generating encryption keys
* IV1 First 64 bits of encryption IV
* X Block requires encryption handling (set to 1)
* E blkptr_t contains embedded data (set to 0, see below)
* fill count number of non-zero blocks under this bp (truncated to 32 bits)
* IV2 Last 32 bits of encryption IV
* checksum[2] 128-bit checksum of the data this bp describes
* MAC[2] 128-bit message authentication code for this data
*
* The X bit being set indicates that this block is one of 3 types. If this is
* a level 0 block with an encrypted object type, the block is encrypted
* (see BP_IS_ENCRYPTED()). If this is a level 0 block with an unencrypted
* object type, this block is authenticated with an HMAC (see
* BP_IS_AUTHENTICATED()). Otherwise (if level > 0), this bp will use the MAC
* words to store a checksum-of-MACs from the level below (see
* BP_HAS_INDIRECT_MAC_CKSUM()). For convenience in the code, BP_IS_PROTECTED()
* refers to both encrypted and authenticated blocks and BP_USES_CRYPT()
* refers to any of these 3 kinds of blocks.
*
* The additional encryption parameters are the salt, IV, and MAC which are
* explained in greater detail in the block comment at the top of zio_crypt.c.
* The MAC occupies half of the checksum space since it serves a very similar
* purpose: to prevent data corruption on disk. The only functional difference
* is that the checksum is used to detect on-disk corruption whether or not the
* encryption key is loaded and the MAC provides additional protection against
* malicious disk tampering. We use the 3rd DVA to store the salt and first
* 64 bits of the IV. As a result encrypted blocks can only have 2 copies
* maximum instead of the normal 3. The last 32 bits of the IV are stored in
* the upper bits of what is usually the fill count. Note that only blocks at
* level 0 or -2 are ever encrypted, which allows us to guarantee that these
* 32 bits are not trampled over by other code (see zio_crypt.c for details).
* The salt and IV are not used for authenticated bps or bps with an indirect
* MAC checksum, so these blocks can utilize all 3 DVAs and the full 64 bits
* for the fill count.
*/

/*
* "Embedded" blkptr_t's don't actually point to a block, instead they
* have a data payload embedded in the blkptr_t itself. See the comment
Expand Down Expand Up @@ -268,7 +346,7 @@ typedef struct zio_cksum_salt {
* payload contains the embedded data
* B (byteorder) byteorder (endianness)
* D (dedup) padding (set to zero)
* X encryption (set to zero; see above)
* X encryption (set to zero)
* E (embedded) set to one
* lvl indirection level
* type DMU object type
Expand All @@ -287,7 +365,9 @@ typedef struct zio_cksum_salt {
* BP's so the BP_SET_* macros can be used with them. etype, PSIZE, LSIZE must
* be set with the BPE_SET_* macros. BP_SET_EMBEDDED() should be called before
* other macros, as they assert that they are only used on BP's of the correct
* "embedded-ness".
* "embedded-ness". Encrypted blkptr_t's cannot be embedded because they use
* the payload space for encryption parameters (see the comment above on
* how encryption parameters are stored).
*/

#define BPE_GET_ETYPE(bp) \
Expand Down Expand Up @@ -411,6 +491,26 @@ _NOTE(CONSTCOND) } while (0)
#define BP_GET_LEVEL(bp) BF64_GET((bp)->blk_prop, 56, 5)
#define BP_SET_LEVEL(bp, x) BF64_SET((bp)->blk_prop, 56, 5, x)

/* encrypted, authenticated, and MAC cksum bps use the same bit */
#define BP_USES_CRYPT(bp) BF64_GET((bp)->blk_prop, 61, 1)
#define BP_SET_CRYPT(bp, x) BF64_SET((bp)->blk_prop, 61, 1, x)

#define BP_IS_ENCRYPTED(bp) \
(BP_USES_CRYPT(bp) && \
BP_GET_LEVEL(bp) <= 0 && \
DMU_OT_IS_ENCRYPTED(BP_GET_TYPE(bp)))

#define BP_IS_AUTHENTICATED(bp) \
(BP_USES_CRYPT(bp) && \
BP_GET_LEVEL(bp) <= 0 && \
!DMU_OT_IS_ENCRYPTED(BP_GET_TYPE(bp)))

#define BP_HAS_INDIRECT_MAC_CKSUM(bp) \
(BP_USES_CRYPT(bp) && BP_GET_LEVEL(bp) > 0)

#define BP_IS_PROTECTED(bp) \
(BP_IS_ENCRYPTED(bp) || BP_IS_AUTHENTICATED(bp))

#define BP_GET_DEDUP(bp) BF64_GET((bp)->blk_prop, 62, 1)
#define BP_SET_DEDUP(bp, x) BF64_SET((bp)->blk_prop, 62, 1, x)

Expand All @@ -428,7 +528,26 @@ _NOTE(CONSTCOND) } while (0)
(bp)->blk_phys_birth = ((logical) == (physical) ? 0 : (physical)); \
}

#define BP_GET_FILL(bp) (BP_IS_EMBEDDED(bp) ? 1 : (bp)->blk_fill)
#define BP_GET_FILL(bp) \
((BP_IS_ENCRYPTED(bp)) ? BF64_GET((bp)->blk_fill, 0, 32) : \
((BP_IS_EMBEDDED(bp)) ? 1 : (bp)->blk_fill))

#define BP_SET_FILL(bp, fill) \
{ \
if (BP_IS_ENCRYPTED(bp)) \
BF64_SET((bp)->blk_fill, 0, 32, fill); \
else \
(bp)->blk_fill = fill; \
}

#define BP_GET_IV2(bp) \
(ASSERT(BP_IS_ENCRYPTED(bp)), \
BF64_GET((bp)->blk_fill, 32, 32))
#define BP_SET_IV2(bp, iv2) \
{ \
ASSERT(BP_IS_ENCRYPTED(bp)); \
BF64_SET((bp)->blk_fill, 32, 32, iv2); \
}

#define BP_IS_METADATA(bp) \
(BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp)))
Expand All @@ -437,7 +556,7 @@ _NOTE(CONSTCOND) } while (0)
(BP_IS_EMBEDDED(bp) ? 0 : \
DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \
DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
DVA_GET_ASIZE(&(bp)->blk_dva[2]))
(DVA_GET_ASIZE(&(bp)->blk_dva[2]) * !BP_IS_ENCRYPTED(bp)))

#define BP_GET_UCSIZE(bp) \
(BP_IS_METADATA(bp) ? BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp))
Expand All @@ -446,13 +565,13 @@ _NOTE(CONSTCOND) } while (0)
(BP_IS_EMBEDDED(bp) ? 0 : \
!!DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \
!!DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
!!DVA_GET_ASIZE(&(bp)->blk_dva[2]))
(!!DVA_GET_ASIZE(&(bp)->blk_dva[2]) * !BP_IS_ENCRYPTED(bp)))

#define BP_COUNT_GANG(bp) \
(BP_IS_EMBEDDED(bp) ? 0 : \
(DVA_GET_GANG(&(bp)->blk_dva[0]) + \
DVA_GET_GANG(&(bp)->blk_dva[1]) + \
DVA_GET_GANG(&(bp)->blk_dva[2])))
(DVA_GET_GANG(&(bp)->blk_dva[2]) * !BP_IS_ENCRYPTED(bp))))

#define DVA_EQUAL(dva1, dva2) \
((dva1)->dva_word[1] == (dva2)->dva_word[1] && \
Expand Down Expand Up @@ -505,14 +624,15 @@ _NOTE(CONSTCOND) } while (0)

#define BP_SHOULD_BYTESWAP(bp) (BP_GET_BYTEORDER(bp) != ZFS_HOST_BYTEORDER)

#define BP_SPRINTF_LEN 320
#define BP_SPRINTF_LEN 400

/*
* This macro allows code sharing between zfs, libzpool, and mdb.
* 'func' is either snprintf() or mdb_snprintf().
* 'ws' (whitespace) can be ' ' for single-line format, '\n' for multi-line.
*/
#define SNPRINTF_BLKPTR(func, ws, buf, size, bp, type, checksum, compress) \
#define SNPRINTF_BLKPTR(func, ws, buf, size, bp, type, checksum, crypt_type, \
compress) \
{ \
static const char *copyname[] = \
{ "zero", "single", "double", "triple" }; \
Expand Down Expand Up @@ -553,18 +673,27 @@ _NOTE(CONSTCOND) } while (0)
(u_longlong_t)DVA_GET_ASIZE(dva), \
ws); \
} \
if (BP_IS_ENCRYPTED(bp)) { \
len += func(buf + len, size - len, \
"salt=%llx iv=%llx:%llx%c", \
(u_longlong_t)bp->blk_dva[2].dva_word[0], \
(u_longlong_t)bp->blk_dva[2].dva_word[1], \
(u_longlong_t)BP_GET_IV2(bp), \
ws); \
} \
if (BP_IS_GANG(bp) && \
DVA_GET_ASIZE(&bp->blk_dva[2]) <= \
DVA_GET_ASIZE(&bp->blk_dva[1]) / 2) \
copies--; \
len += func(buf + len, size - len, \
"[L%llu %s] %s %s %s %s %s %s%c" \
"[L%llu %s] %s %s %s %s %s %s %s%c" \
"size=%llxL/%llxP birth=%lluL/%lluP fill=%llu%c" \
"cksum=%llx:%llx:%llx:%llx", \
(u_longlong_t)BP_GET_LEVEL(bp), \
type, \
checksum, \
compress, \
crypt_type, \
BP_GET_BYTEORDER(bp) == 0 ? "BE" : "LE", \
BP_IS_GANG(bp) ? "gang" : "contiguous", \
BP_GET_DEDUP(bp) ? "dedup" : "unique", \
Expand Down Expand Up @@ -598,8 +727,8 @@ extern int spa_open_rewind(const char *pool, spa_t **, void *tag,
nvlist_t *policy, nvlist_t **config);
extern int spa_get_stats(const char *pool, nvlist_t **config, char *altroot,
size_t buflen);
extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props,
nvlist_t *zplprops);
extern int spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
nvlist_t *zplprops, struct dsl_crypto_params *dcp);
extern int spa_import(char *pool, nvlist_t *config, nvlist_t *props,
uint64_t flags);
extern nvlist_t *spa_tryimport(nvlist_t *tryconfig);
Expand Down Expand Up @@ -886,9 +1015,9 @@ extern void spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation,

/* error handling */
struct zbookmark_phys;
extern void spa_log_error(spa_t *spa, zio_t *zio);
extern void spa_log_error(spa_t *spa, const zbookmark_phys_t *zb);
extern void zfs_ereport_post(const char *class, spa_t *spa, vdev_t *vd,
zio_t *zio, uint64_t stateoroffset, uint64_t length);
zbookmark_phys_t *zb, zio_t *zio, uint64_t stateoroffset, uint64_t length);
extern nvlist_t *zfs_event_create(spa_t *spa, vdev_t *vd, const char *type,
const char *name, nvlist_t *aux);
extern void zfs_post_remove(spa_t *spa, vdev_t *vd);
Expand Down
2 changes: 2 additions & 0 deletions include/sys/spa_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
#include <sys/refcount.h>
#include <sys/bplist.h>
#include <sys/bpobj.h>
#include <sys/dsl_crypt.h>
#include <sys/zfeature.h>
#include <zfeature_common.h>

Expand Down Expand Up @@ -273,6 +274,7 @@ struct spa {
spa_avz_action_t spa_avz_action; /* destroy/rebuild AVZ? */
uint64_t spa_errata; /* errata issues detected */
spa_stats_t spa_stats; /* assorted spa statistics */
spa_keystore_t spa_keystore; /* loaded crypto keys */
hrtime_t spa_ccw_fail_time; /* Conf cache write fail time */
taskq_t *spa_zvol_taskq; /* Taskq for minor management */
uint64_t spa_multihost; /* multihost aware (mmp) */
Expand Down
58 changes: 49 additions & 9 deletions include/sys/zfs_ioctl.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ typedef enum drr_headertype {
/* flag #21 is reserved for a Delphix feature */
#define DMU_BACKUP_FEATURE_COMPRESSED (1 << 22)
#define DMU_BACKUP_FEATURE_LARGE_DNODE (1 << 23)
#define DMU_BACKUP_FEATURE_RAW (1 << 24)

/*
* Mask of all supported backup features
Expand All @@ -112,7 +113,8 @@ typedef enum drr_headertype {
DMU_BACKUP_FEATURE_DEDUPPROPS | DMU_BACKUP_FEATURE_SA_SPILL | \
DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_LZ4 | \
DMU_BACKUP_FEATURE_RESUMING | DMU_BACKUP_FEATURE_LARGE_BLOCKS | \
DMU_BACKUP_FEATURE_COMPRESSED | DMU_BACKUP_FEATURE_LARGE_DNODE)
DMU_BACKUP_FEATURE_COMPRESSED | DMU_BACKUP_FEATURE_LARGE_DNODE | \
DMU_BACKUP_FEATURE_RAW)

/* Are all features in the given flag word currently supported? */
#define DMU_STREAM_SUPPORTED(x) (!((x) & ~DMU_BACKUP_FEATURE_MASK))
Expand Down Expand Up @@ -158,18 +160,28 @@ typedef enum dmu_send_resume_token_version {
#define DRR_FLAG_FREERECORDS (1<<2)

/*
* flags in the drr_checksumflags field in the DRR_WRITE and
* DRR_WRITE_BYREF blocks
* flags in the drr_flags field in the DRR_WRITE, DRR_SPILL, DRR_OBJECT,
* DRR_WRITE_BYREF, and DRR_OBJECT_RANGE blocks
*/
#define DRR_CHECKSUM_DEDUP (1<<0)
#define DRR_CHECKSUM_DEDUP (1<<0) /* not used for DRR_SPILL blocks */
#define DRR_RAW_ENCRYPTED (1<<1)
#define DRR_RAW_BYTESWAP (1<<2)

#define DRR_IS_DEDUP_CAPABLE(flags) ((flags) & DRR_CHECKSUM_DEDUP)
#define DRR_IS_RAW_ENCRYPTED(flags) ((flags) & DRR_RAW_ENCRYPTED)
#define DRR_IS_RAW_BYTESWAPPED(flags) ((flags) & DRR_RAW_BYTESWAP)

/* deal with compressed drr_write replay records */
#define DRR_WRITE_COMPRESSED(drrw) ((drrw)->drr_compressiontype != 0)
#define DRR_WRITE_PAYLOAD_SIZE(drrw) \
(DRR_WRITE_COMPRESSED(drrw) ? (drrw)->drr_compressed_size : \
(drrw)->drr_logical_size)
#define DRR_SPILL_PAYLOAD_SIZE(drrs) \
(DRR_IS_RAW_ENCRYPTED(drrs->drr_flags) ? \
(drrs)->drr_compressed_size : (drrs)->drr_length)
#define DRR_OBJECT_PAYLOAD_SIZE(drro) \
(DRR_IS_RAW_ENCRYPTED(drro->drr_flags) ? \
drro->drr_raw_bonuslen : P2ROUNDUP(drro->drr_bonuslen, 8))

/*
* zfs ioctl command structure
Expand All @@ -178,7 +190,8 @@ typedef struct dmu_replay_record {
enum {
DRR_BEGIN, DRR_OBJECT, DRR_FREEOBJECTS,
DRR_WRITE, DRR_FREE, DRR_END, DRR_WRITE_BYREF,
DRR_SPILL, DRR_WRITE_EMBEDDED, DRR_NUMTYPES
DRR_SPILL, DRR_WRITE_EMBEDDED, DRR_OBJECT_RANGE,
DRR_NUMTYPES
} drr_type;
uint32_t drr_payloadlen;
union {
Expand All @@ -205,8 +218,13 @@ typedef struct dmu_replay_record {
uint8_t drr_checksumtype;
uint8_t drr_compress;
uint8_t drr_dn_slots;
uint8_t drr_pad[5];
uint8_t drr_flags;
uint32_t drr_raw_bonuslen;
uint64_t drr_toguid;
/* only nonzero if DRR_RAW_ENCRYPTED flag is set */
uint8_t drr_indblkshift;
uint8_t drr_nlevels;
uint8_t drr_nblkptr;
/* bonus content follows */
} drr_object;
struct drr_freeobjects {
Expand All @@ -222,13 +240,17 @@ typedef struct dmu_replay_record {
uint64_t drr_logical_size;
uint64_t drr_toguid;
uint8_t drr_checksumtype;
uint8_t drr_checksumflags;
uint8_t drr_flags;
uint8_t drr_compressiontype;
uint8_t drr_pad2[5];
/* deduplication key */
ddt_key_t drr_key;
/* only nonzero if drr_compressiontype is not 0 */
uint64_t drr_compressed_size;
/* only nonzero if DRR_RAW_ENCRYPTED flag is set */
uint8_t drr_salt[ZIO_DATA_SALT_LEN];
uint8_t drr_iv[ZIO_DATA_IV_LEN];
uint8_t drr_mac[ZIO_DATA_MAC_LEN];
/* content follows */
} drr_write;
struct drr_free {
Expand All @@ -249,15 +271,23 @@ typedef struct dmu_replay_record {
uint64_t drr_refoffset;
/* properties of the data */
uint8_t drr_checksumtype;
uint8_t drr_checksumflags;
uint8_t drr_flags;
uint8_t drr_pad2[6];
ddt_key_t drr_key; /* deduplication key */
} drr_write_byref;
struct drr_spill {
uint64_t drr_object;
uint64_t drr_length;
uint64_t drr_toguid;
uint64_t drr_pad[4]; /* needed for crypto */
uint8_t drr_flags;
uint8_t drr_compressiontype;
uint8_t drr_pad[6];
/* only nonzero if DRR_RAW_ENCRYPTED flag is set */
uint64_t drr_compressed_size;
uint8_t drr_salt[ZIO_DATA_SALT_LEN];
uint8_t drr_iv[ZIO_DATA_IV_LEN];
uint8_t drr_mac[ZIO_DATA_MAC_LEN];
dmu_object_type_t drr_type;
/* spill data follows */
} drr_spill;
struct drr_write_embedded {
Expand All @@ -273,6 +303,16 @@ typedef struct dmu_replay_record {
uint32_t drr_psize; /* compr. (real) size of payload */
/* (possibly compressed) content follows */
} drr_write_embedded;
struct drr_object_range {
uint64_t drr_firstobj;
uint64_t drr_numslots;
uint64_t drr_toguid;
uint8_t drr_salt[ZIO_DATA_SALT_LEN];
uint8_t drr_iv[ZIO_DATA_IV_LEN];
uint8_t drr_mac[ZIO_DATA_MAC_LEN];
uint8_t drr_flags;
uint8_t drr_pad[3];
} drr_object_range;

/*
* Nore: drr_checksum is overlaid with all record types
Expand Down
4 changes: 3 additions & 1 deletion include/sys/zil.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include <sys/spa.h>
#include <sys/zio.h>
#include <sys/dmu.h>
#include <sys/zio_crypt.h>

#ifdef __cplusplus
extern "C" {
Expand Down Expand Up @@ -466,7 +467,8 @@ typedef int (*const zil_replay_func_t)(void *, char *, boolean_t);
typedef int zil_get_data_t(void *arg, lr_write_t *lr, char *dbuf, zio_t *zio);

extern int zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg);
zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg,
boolean_t decrypt);

extern void zil_init(void);
extern void zil_fini(void);
Expand Down
57 changes: 44 additions & 13 deletions include/sys/zio.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,29 @@ enum zio_checksum {
#define ZIO_DEDUPCHECKSUM ZIO_CHECKSUM_SHA256
#define ZIO_DEDUPDITTO_MIN 100

/* supported encryption algorithms */
enum zio_encrypt {
ZIO_CRYPT_INHERIT = 0,
ZIO_CRYPT_ON,
ZIO_CRYPT_OFF,
ZIO_CRYPT_AES_128_CCM,
ZIO_CRYPT_AES_192_CCM,
ZIO_CRYPT_AES_256_CCM,
ZIO_CRYPT_AES_128_GCM,
ZIO_CRYPT_AES_192_GCM,
ZIO_CRYPT_AES_256_GCM,
ZIO_CRYPT_FUNCTIONS
};

#define ZIO_CRYPT_ON_VALUE ZIO_CRYPT_AES_256_CCM
#define ZIO_CRYPT_DEFAULT ZIO_CRYPT_OFF

/* macros defining encryption lengths */
#define ZIO_OBJSET_MAC_LEN 32
#define ZIO_DATA_IV_LEN 12
#define ZIO_DATA_SALT_LEN 8
#define ZIO_DATA_MAC_LEN 16

/*
* The number of "legacy" compression functions which can be set on individual
* objects.
Expand Down Expand Up @@ -191,17 +214,19 @@ enum zio_flag {
ZIO_FLAG_DONT_PROPAGATE = 1 << 20,
ZIO_FLAG_IO_BYPASS = 1 << 21,
ZIO_FLAG_IO_REWRITE = 1 << 22,
ZIO_FLAG_RAW = 1 << 23,
ZIO_FLAG_GANG_CHILD = 1 << 24,
ZIO_FLAG_DDT_CHILD = 1 << 25,
ZIO_FLAG_GODFATHER = 1 << 26,
ZIO_FLAG_NOPWRITE = 1 << 27,
ZIO_FLAG_REEXECUTED = 1 << 28,
ZIO_FLAG_DELEGATED = 1 << 29,
ZIO_FLAG_FASTWRITE = 1 << 30
ZIO_FLAG_RAW_COMPRESS = 1 << 23,
ZIO_FLAG_RAW_ENCRYPT = 1 << 24,
ZIO_FLAG_GANG_CHILD = 1 << 25,
ZIO_FLAG_DDT_CHILD = 1 << 26,
ZIO_FLAG_GODFATHER = 1 << 27,
ZIO_FLAG_NOPWRITE = 1 << 28,
ZIO_FLAG_REEXECUTED = 1 << 29,
ZIO_FLAG_DELEGATED = 1 << 30,
ZIO_FLAG_FASTWRITE = 1 << 31,
};

#define ZIO_FLAG_MUSTSUCCEED 0
#define ZIO_FLAG_RAW (ZIO_FLAG_RAW_COMPRESS | ZIO_FLAG_RAW_ENCRYPT)

#define ZIO_DDT_CHILD_FLAGS(zio) \
(((zio)->io_flags & ZIO_FLAG_DDT_INHERIT) | \
Expand Down Expand Up @@ -303,6 +328,11 @@ typedef struct zio_prop {
boolean_t zp_dedup;
boolean_t zp_dedup_verify;
boolean_t zp_nopwrite;
boolean_t zp_encrypt;
boolean_t zp_byteorder;
uint8_t zp_salt[ZIO_DATA_SALT_LEN];
uint8_t zp_iv[ZIO_DATA_IV_LEN];
uint8_t zp_mac[ZIO_DATA_MAC_LEN];
} zio_prop_t;

typedef struct zio_cksum_report zio_cksum_report_t;
Expand Down Expand Up @@ -514,8 +544,8 @@ extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
extern zio_t *zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg,
const blkptr_t *bp, enum zio_flag flags);

extern int zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp,
uint64_t size, boolean_t *slog);
extern int zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg,
blkptr_t *new_bp, uint64_t size, boolean_t *slog);
extern void zio_free_zil(spa_t *spa, uint64_t txg, blkptr_t *bp);
extern void zio_flush(zio_t *zio, vdev_t *vd);
extern void zio_shrink(zio_t *zio, uint64_t size);
Expand Down Expand Up @@ -596,16 +626,17 @@ extern hrtime_t zio_handle_io_delay(zio_t *zio);
/*
* Checksum ereport functions
*/
extern void zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, struct zio *zio,
uint64_t offset, uint64_t length, void *arg, struct zio_bad_cksum *info);
extern void zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd,
zbookmark_phys_t *zb, struct zio *zio, uint64_t offset, uint64_t length,
void *arg, struct zio_bad_cksum *info);
extern void zfs_ereport_finish_checksum(zio_cksum_report_t *report,
const abd_t *good_data, const abd_t *bad_data, boolean_t drop_if_identical);

extern void zfs_ereport_free_checksum(zio_cksum_report_t *report);

/* If we have the good data in hand, this function can be used */
extern void zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd,
struct zio *zio, uint64_t offset, uint64_t length,
zbookmark_phys_t *zb, struct zio *zio, uint64_t offset, uint64_t length,
const abd_t *good_data, const abd_t *bad_data, struct zio_bad_cksum *info);

/* Called from spa_sync(), but primarily an injection handler */
Expand Down
147 changes: 147 additions & 0 deletions include/sys/zio_crypt.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
/*
* CDDL HEADER START
*
* This file and its contents are supplied under the terms of the
* Common Development and Distribution License ("CDDL"), version 1.0.
* You may only use this file in accordance with the terms of version
* 1.0 of the CDDL.
*
* A full copy of the text of the CDDL should have accompanied this
* source. A copy of the CDDL is also available via the Internet at
* http://www.illumos.org/license/CDDL.
*
* CDDL HEADER END
*/

/*
* Copyright (c) 2017, Datto, Inc. All rights reserved.
*/

#ifndef _SYS_ZIO_CRYPT_H
#define _SYS_ZIO_CRYPT_H

#include <sys/dmu.h>
#include <sys/refcount.h>
#include <sys/crypto/api.h>
#include <sys/nvpair.h>
#include <sys/avl.h>
#include <sys/zio.h>

/* forward declarations */
struct zbookmark_phys;

#define WRAPPING_KEY_LEN 32
#define WRAPPING_IV_LEN ZIO_DATA_IV_LEN
#define WRAPPING_MAC_LEN 16

#define SHA1_DIGEST_LEN 20
#define SHA512_DIGEST_LEN 64
#define SHA512_HMAC_KEYLEN 64

#define MASTER_KEY_MAX_LEN 32
#define L2ARC_DEFAULT_CRYPT ZIO_CRYPT_AES_256_CCM

/* utility macros */
#define BITS_TO_BYTES(x) ((x + NBBY - 1) / NBBY)
#define BYTES_TO_BITS(x) (x * NBBY)

typedef enum zio_crypt_type {
ZC_TYPE_NONE = 0,
ZC_TYPE_CCM,
ZC_TYPE_GCM
} zio_crypt_type_t;

/* table of supported crypto algorithms, modes and keylengths. */
typedef struct zio_crypt_info {
/* mechanism name, needed by ICP */
crypto_mech_name_t ci_mechname;

/* cipher mode type (GCM, CCM) */
zio_crypt_type_t ci_crypt_type;

/* length of the encryption key */
size_t ci_keylen;

/* human-readable name of the encryption alforithm */
char *ci_name;
} zio_crypt_info_t;

extern zio_crypt_info_t zio_crypt_table[ZIO_CRYPT_FUNCTIONS];

/* in memory representation of an unwrapped key that is loaded into memory */
typedef struct zio_crypt_key {
/* encryption algorithm */
uint64_t zk_crypt;

/* GUID for uniquely identifying this key. Not encrypted on disk. */
uint64_t zk_guid;

/* buffer for master key */
uint8_t zk_master_keydata[MASTER_KEY_MAX_LEN];

/* buffer for hmac key */
uint8_t zk_hmac_keydata[SHA512_HMAC_KEYLEN];

/* buffer for currrent encryption key derived from master key */
uint8_t zk_current_keydata[MASTER_KEY_MAX_LEN];

/* current 64 bit salt for deriving an encryption key */
uint8_t zk_salt[ZIO_DATA_SALT_LEN];

/* count of how many times the current salt has been used */
uint64_t zk_salt_count;

/* illumos crypto api current encryption key */
crypto_key_t zk_current_key;

/* template of current encryption key for illumos crypto api */
crypto_ctx_template_t zk_current_tmpl;

/* illumos crypto api current hmac key */
crypto_key_t zk_hmac_key;

/* template of hmac key for illumos crypto api */
crypto_ctx_template_t zk_hmac_tmpl;

/* lock for changing the salt and dependant values */
krwlock_t zk_salt_lock;
} zio_crypt_key_t;

void zio_crypt_key_destroy(zio_crypt_key_t *key);
int zio_crypt_key_init(uint64_t crypt, zio_crypt_key_t *key);
int zio_crypt_key_get_salt(zio_crypt_key_t *key, uint8_t *salt_out);

int zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv,
uint8_t *mac, uint8_t *keydata_out, uint8_t *hmac_keydata_out);
int zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t guid,
uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv, uint8_t *mac,
zio_crypt_key_t *key);
int zio_crypt_generate_iv(uint8_t *ivbuf);
int zio_crypt_generate_iv_salt_dedup(zio_crypt_key_t *key, uint8_t *data,
uint_t datalen, uint8_t *ivbuf, uint8_t *salt);

void zio_crypt_encode_params_bp(blkptr_t *bp, uint8_t *salt, uint8_t *iv);
void zio_crypt_decode_params_bp(const blkptr_t *bp, uint8_t *salt, uint8_t *iv);
void zio_crypt_encode_mac_bp(blkptr_t *bp, uint8_t *mac);
void zio_crypt_decode_mac_bp(const blkptr_t *bp, uint8_t *mac);
void zio_crypt_encode_mac_zil(void *data, uint8_t *mac);
void zio_crypt_decode_mac_zil(const void *data, uint8_t *mac);
void zio_crypt_copy_dnode_bonus(abd_t *src_abd, uint8_t *dst, uint_t datalen);

int zio_crypt_do_indirect_mac_checksum(boolean_t generate, void *buf,
uint_t datalen, boolean_t byteswap, uint8_t *cksum);
int zio_crypt_do_indirect_mac_checksum_abd(boolean_t generate, abd_t *abd,
uint_t datalen, boolean_t byteswap, uint8_t *cksum);
int zio_crypt_do_hmac(zio_crypt_key_t *key, uint8_t *data, uint_t datalen,
uint8_t *digestbuf);
int zio_crypt_do_objset_hmacs(zio_crypt_key_t *key, void *data, uint_t datalen,
boolean_t byteswap, uint8_t *portable_mac, uint8_t *local_mac);
int zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key, uint8_t *salt,
dmu_object_type_t ot, uint8_t *iv, uint8_t *mac, uint_t datalen,
boolean_t byteswap, uint8_t *plainbuf, uint8_t *cipherbuf,
boolean_t *no_crypt);
int zio_do_crypt_abd(boolean_t encrypt, zio_crypt_key_t *key, uint8_t *salt,
dmu_object_type_t ot, uint8_t *iv, uint8_t *mac, uint_t datalen,
boolean_t byteswap, abd_t *pabd, abd_t *cabd, boolean_t *no_crypt);

#endif
52 changes: 34 additions & 18 deletions include/sys/zio_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,18 @@ extern "C" {
* physical I/O. The nop write feature can handle writes in either
* syncing or open context (i.e. zil writes) and as a result is mutually
* exclusive with dedup.
*
* Encryption:
* Encryption and authentication is handled by the ZIO_STAGE_ENCRYPT stage.
* This stage determines how the encryption metadata is stored in the bp.
* Decryption and MAC verification is performed during zio_decrypt() as a
* transform callback. Encryption is mutually exclusive with nopwrite, because
* blocks with the same plaintext will be encrypted with different salts and
* IV's (if dedup is off), and therefore have different ciphertexts. For dedup
* blocks we deterministically generate the IV and salt by performing an HMAC
* of the plaintext, which is computationally expensive, but allows us to keep
* support for encrypted dedup. See the block comment in zio_crypt.c for
* details.
*/

/*
Expand All @@ -110,32 +122,33 @@ enum zio_stage {
ZIO_STAGE_ISSUE_ASYNC = 1 << 4, /* RWF-- */
ZIO_STAGE_WRITE_COMPRESS = 1 << 5, /* -W--- */

ZIO_STAGE_CHECKSUM_GENERATE = 1 << 6, /* -W--- */
ZIO_STAGE_ENCRYPT = 1 << 6, /* -W--- */
ZIO_STAGE_CHECKSUM_GENERATE = 1 << 7, /* -W--- */

ZIO_STAGE_NOP_WRITE = 1 << 7, /* -W--- */
ZIO_STAGE_NOP_WRITE = 1 << 8, /* -W--- */

ZIO_STAGE_DDT_READ_START = 1 << 8, /* R---- */
ZIO_STAGE_DDT_READ_DONE = 1 << 9, /* R---- */
ZIO_STAGE_DDT_WRITE = 1 << 10, /* -W--- */
ZIO_STAGE_DDT_FREE = 1 << 11, /* --F-- */
ZIO_STAGE_DDT_READ_START = 1 << 9, /* R---- */
ZIO_STAGE_DDT_READ_DONE = 1 << 10, /* R---- */
ZIO_STAGE_DDT_WRITE = 1 << 11, /* -W--- */
ZIO_STAGE_DDT_FREE = 1 << 12, /* --F-- */

ZIO_STAGE_GANG_ASSEMBLE = 1 << 12, /* RWFC- */
ZIO_STAGE_GANG_ISSUE = 1 << 13, /* RWFC- */
ZIO_STAGE_GANG_ASSEMBLE = 1 << 13, /* RWFC- */
ZIO_STAGE_GANG_ISSUE = 1 << 14, /* RWFC- */

ZIO_STAGE_DVA_THROTTLE = 1 << 14, /* -W--- */
ZIO_STAGE_DVA_ALLOCATE = 1 << 15, /* -W--- */
ZIO_STAGE_DVA_FREE = 1 << 16, /* --F-- */
ZIO_STAGE_DVA_CLAIM = 1 << 17, /* ---C- */
ZIO_STAGE_DVA_THROTTLE = 1 << 15, /* -W--- */
ZIO_STAGE_DVA_ALLOCATE = 1 << 16, /* -W--- */
ZIO_STAGE_DVA_FREE = 1 << 17, /* --F-- */
ZIO_STAGE_DVA_CLAIM = 1 << 18, /* ---C- */

ZIO_STAGE_READY = 1 << 18, /* RWFCI */
ZIO_STAGE_READY = 1 << 19, /* RWFCI */

ZIO_STAGE_VDEV_IO_START = 1 << 19, /* RW--I */
ZIO_STAGE_VDEV_IO_DONE = 1 << 20, /* RW--I */
ZIO_STAGE_VDEV_IO_ASSESS = 1 << 21, /* RW--I */
ZIO_STAGE_VDEV_IO_START = 1 << 20, /* RW--I */
ZIO_STAGE_VDEV_IO_DONE = 1 << 21, /* RW--I */
ZIO_STAGE_VDEV_IO_ASSESS = 1 << 22, /* RW--I */

ZIO_STAGE_CHECKSUM_VERIFY = 1 << 22, /* R---- */
ZIO_STAGE_CHECKSUM_VERIFY = 1 << 23, /* R---- */

ZIO_STAGE_DONE = 1 << 23 /* RWFCI */
ZIO_STAGE_DONE = 1 << 24 /* RWFCI */
};

#define ZIO_INTERLOCK_STAGES \
Expand Down Expand Up @@ -187,12 +200,14 @@ enum zio_stage {
#define ZIO_REWRITE_PIPELINE \
(ZIO_WRITE_COMMON_STAGES | \
ZIO_STAGE_WRITE_COMPRESS | \
ZIO_STAGE_ENCRYPT | \
ZIO_STAGE_WRITE_BP_INIT)

#define ZIO_WRITE_PIPELINE \
(ZIO_WRITE_COMMON_STAGES | \
ZIO_STAGE_WRITE_BP_INIT | \
ZIO_STAGE_WRITE_COMPRESS | \
ZIO_STAGE_ENCRYPT | \
ZIO_STAGE_DVA_THROTTLE | \
ZIO_STAGE_DVA_ALLOCATE)

Expand All @@ -207,6 +222,7 @@ enum zio_stage {
ZIO_STAGE_WRITE_BP_INIT | \
ZIO_STAGE_ISSUE_ASYNC | \
ZIO_STAGE_WRITE_COMPRESS | \
ZIO_STAGE_ENCRYPT | \
ZIO_STAGE_CHECKSUM_GENERATE | \
ZIO_STAGE_DDT_WRITE)

Expand Down
1 change: 1 addition & 0 deletions include/zfeature_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ typedef enum spa_feature {
SPA_FEATURE_SKEIN,
SPA_FEATURE_EDONR,
SPA_FEATURE_USEROBJ_ACCOUNTING,
SPA_FEATURE_ENCRYPTION,
SPA_FEATURES
} spa_feature_t;

Expand Down
2 changes: 2 additions & 0 deletions include/zfs_deleg.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ typedef enum {
ZFS_DELEG_NOTE_RELEASE,
ZFS_DELEG_NOTE_DIFF,
ZFS_DELEG_NOTE_BOOKMARK,
ZFS_DELEG_NOTE_LOAD_KEY,
ZFS_DELEG_NOTE_CHANGE_KEY,
ZFS_DELEG_NOTE_NONE
} zfs_deleg_note_t;

Expand Down
7 changes: 5 additions & 2 deletions include/zfs_prop.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,12 @@ typedef enum {
* ONETIME properties are a sort of conglomeration of READONLY
* and INHERIT. They can be set only during object creation,
* after that they are READONLY. If not explicitly set during
* creation, they can be inherited.
* creation, they can be inherited. ONETIME_DEFAULT properties
* work the same way, but they will default instead of
* inheriting a value.
*/
PROP_ONETIME
PROP_ONETIME,
PROP_ONETIME_DEFAULT
} zprop_attr_t;

typedef struct zfs_index {
Expand Down
4 changes: 2 additions & 2 deletions lib/libicp/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ if TARGET_ASM_I386
ASM_SOURCES_C =
ASM_SOURCES_AS =
endif

if TARGET_ASM_GENERIC
ASM_SOURCES_C =
ASM_SOURCES_AS =
Expand Down Expand Up @@ -81,5 +81,5 @@ nodist_libicp_la_SOURCES = \
$(USER_ASM) \
$(KERNEL_C) \
$(KERNEL_ASM)

libicp_la_LIBADD = -lrt
7 changes: 7 additions & 0 deletions lib/libspl/include/sys/mount.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,4 +88,11 @@
*/
#define MS_OVERLAY 0x00000004

/*
* MS_CRYPT indicates that encryption keys should be loaded if they are not
* already available. This is not defined in glibc, but it is never seen by
* the kernel so it will not cause any problems.
*/
#define MS_CRYPT 0x00000008

#endif /* _LIBSPL_SYS_MOUNT_H */
4 changes: 3 additions & 1 deletion lib/libzfs/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ lib_LTLIBRARIES = libzfs.la
USER_C = \
libzfs_changelist.c \
libzfs_config.c \
libzfs_crypto.c \
libzfs_dataset.c \
libzfs_diff.c \
libzfs_fru.c \
Expand All @@ -30,7 +31,6 @@ USER_C = \
libzfs_util.c

KERNEL_C = \
algs/sha2/sha2.c \
zfeature_common.c \
zfs_comutil.c \
zfs_deleg.c \
Expand All @@ -53,10 +53,12 @@ nodist_libzfs_la_SOURCES = \

libzfs_la_LIBADD = \
$(top_builddir)/lib/libefi/libefi.la \
$(top_builddir)/lib/libicp/libicp.la \
$(top_builddir)/lib/libnvpair/libnvpair.la \
$(top_builddir)/lib/libshare/libshare.la \
$(top_builddir)/lib/libtpool/libtpool.la \
$(top_builddir)/lib/libuutil/libuutil.la \
$(top_builddir)/lib/libzpool/libzpool.la \
$(top_builddir)/lib/libzfs_core/libzfs_core.la

libzfs_la_LIBADD += -lm $(LIBBLKID) $(LIBUDEV)
Expand Down
6 changes: 5 additions & 1 deletion lib/libzfs/libzfs_changelist.c
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ changelist_postfix(prop_changelist_t *clp)
boolean_t sharenfs;
boolean_t sharesmb;
boolean_t mounted;
boolean_t needs_key;

/*
* If we are in the global zone, but this dataset is exported
Expand Down Expand Up @@ -229,9 +230,12 @@ changelist_postfix(prop_changelist_t *clp)
shareopts, sizeof (shareopts), NULL, NULL, 0,
B_FALSE) == 0) && (strcmp(shareopts, "off") != 0));

needs_key = (zfs_prop_get_int(cn->cn_handle,
ZFS_PROP_KEYSTATUS) == ZFS_KEYSTATUS_UNAVAILABLE);

mounted = zfs_is_mounted(cn->cn_handle, NULL);

if (!mounted && (cn->cn_mounted ||
if (!mounted && !needs_key && (cn->cn_mounted ||
((sharenfs || sharesmb || clp->cl_waslegacy) &&
(zfs_prop_get_int(cn->cn_handle,
ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_ON)))) {
Expand Down
Loading