Skip to content
This repository has been archived by the owner on Nov 7, 2019. It is now read-only.

Commit

Permalink
7004 dmu_tx_hold_zap() does dnode_hold() 7x on same object
Browse files Browse the repository at this point in the history
Reviewed by: Steve Gonczi <steve.gonczi@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com>
Reviewed by: Ned Bass <bass6@llnl.gov>
Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov>
Approved by: Gordon Ross <gordon.w.ross@gmail.com>
  • Loading branch information
ahrens committed Sep 3, 2016
1 parent f9a78bf commit 79d7283
Show file tree
Hide file tree
Showing 8 changed files with 142 additions and 17 deletions.
15 changes: 15 additions & 0 deletions usr/src/uts/common/fs/zfs/dbuf.c
Expand Up @@ -2910,6 +2910,21 @@ dmu_buf_get_objset(dmu_buf_t *db)
return (dbi->db_objset);
}

dnode_t *
dmu_buf_dnode_enter(dmu_buf_t *db)
{
dmu_buf_impl_t *dbi = (dmu_buf_impl_t *)db;
DB_DNODE_ENTER(dbi);
return (DB_DNODE(dbi));
}

void
dmu_buf_dnode_exit(dmu_buf_t *db)
{
dmu_buf_impl_t *dbi = (dmu_buf_impl_t *)db;
DB_DNODE_EXIT(dbi);
}

static void
dbuf_check_blkptr(dnode_t *dn, dmu_buf_impl_t *db)
{
Expand Down
43 changes: 43 additions & 0 deletions usr/src/uts/common/fs/zfs/dmu.c
Expand Up @@ -126,6 +126,26 @@ const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS] = {
{ zfs_acl_byteswap, "acl" }
};

int
dmu_buf_hold_noread_by_dnode(dnode_t *dn, uint64_t offset,
void *tag, dmu_buf_t **dbp)
{
uint64_t blkid;
dmu_buf_impl_t *db;

blkid = dbuf_whichblock(dn, 0, offset);
rw_enter(&dn->dn_struct_rwlock, RW_READER);
db = dbuf_hold(dn, blkid, tag);
rw_exit(&dn->dn_struct_rwlock);

if (db == NULL) {
*dbp = NULL;
return (SET_ERROR(EIO));
}

*dbp = &db->db;
return (0);
}
int
dmu_buf_hold_noread(objset_t *os, uint64_t object, uint64_t offset,
void *tag, dmu_buf_t **dbp)
Expand Down Expand Up @@ -153,6 +173,29 @@ dmu_buf_hold_noread(objset_t *os, uint64_t object, uint64_t offset,
return (err);
}

int
dmu_buf_hold_by_dnode(dnode_t *dn, uint64_t offset,
void *tag, dmu_buf_t **dbp, int flags)
{
int err;
int db_flags = DB_RF_CANFAIL;

if (flags & DMU_READ_NO_PREFETCH)
db_flags |= DB_RF_NOPREFETCH;

err = dmu_buf_hold_noread_by_dnode(dn, offset, tag, dbp);
if (err == 0) {
dmu_buf_impl_t *db = (dmu_buf_impl_t *)(*dbp);
err = dbuf_read(db, NULL, db_flags);
if (err != 0) {
dbuf_rele(db, tag);
*dbp = NULL;
}
}

return (err);
}

int
dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
void *tag, dmu_buf_t **dbp, int flags)
Expand Down
7 changes: 3 additions & 4 deletions usr/src/uts/common/fs/zfs/dmu_tx.c
Expand Up @@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
* Copyright (c) 2012, 2016 by Delphix. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
*/

Expand Down Expand Up @@ -807,15 +807,14 @@ dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name)
* access the name in this fat-zap so that we'll check
* for i/o errors to the leaf blocks, etc.
*/
err = zap_lookup(dn->dn_objset, dn->dn_object, name,
8, 0, NULL);
err = zap_lookup_by_dnode(dn, name, 8, 0, NULL);
if (err == EIO) {
tx->tx_err = err;
return;
}
}

err = zap_count_write(dn->dn_objset, dn->dn_object, name, add,
err = zap_count_write_by_dnode(dn, name, add,
&txh->txh_space_towrite, &txh->txh_space_tooverwrite);

/*
Expand Down
11 changes: 8 additions & 3 deletions usr/src/uts/common/fs/zfs/sys/dmu.h
Expand Up @@ -78,6 +78,7 @@ struct sa_handle;
typedef struct objset objset_t;
typedef struct dmu_tx dmu_tx_t;
typedef struct dsl_dir dsl_dir_t;
typedef struct dnode dnode_t;

typedef enum dmu_object_byteswap {
DMU_BSWAP_UINT8,
Expand Down Expand Up @@ -417,7 +418,7 @@ dmu_write_embedded(objset_t *os, uint64_t object, uint64_t offset,
#define WP_DMU_SYNC 0x2
#define WP_SPILL 0x4

void dmu_write_policy(objset_t *os, struct dnode *dn, int level, int wp,
void dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp,
struct zio_prop *zp);
/*
* The bonus data is accessed more or less like a regular buffer.
Expand All @@ -443,7 +444,7 @@ int dmu_rm_spill(objset_t *, uint64_t, dmu_tx_t *);
*/

int dmu_spill_hold_by_bonus(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp);
int dmu_spill_hold_by_dnode(struct dnode *dn, uint32_t flags,
int dmu_spill_hold_by_dnode(dnode_t *dn, uint32_t flags,
void *tag, dmu_buf_t **dbp);
int dmu_spill_hold_existing(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp);

Expand All @@ -463,6 +464,8 @@ int dmu_spill_hold_existing(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp);
*/
int dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
void *tag, dmu_buf_t **, int flags);
int dmu_buf_hold_by_dnode(dnode_t *dn, uint64_t offset,
void *tag, dmu_buf_t **dbp, int flags);

/*
* Add a reference to a dmu buffer that has already been held via
Expand Down Expand Up @@ -616,6 +619,8 @@ void *dmu_buf_remove_user(dmu_buf_t *db, dmu_buf_user_t *user);
void *dmu_buf_get_user(dmu_buf_t *db);

objset_t *dmu_buf_get_objset(dmu_buf_t *db);
dnode_t *dmu_buf_dnode_enter(dmu_buf_t *db);
void dmu_buf_dnode_exit(dmu_buf_t *db);

/* Block until any in-progress dmu buf user evictions complete. */
void dmu_buf_user_evict_wait(void);
Expand Down Expand Up @@ -792,7 +797,7 @@ extern const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS];
*/
int dmu_object_info(objset_t *os, uint64_t object, dmu_object_info_t *doi);
/* Like dmu_object_info, but faster if you have a held dnode in hand. */
void dmu_object_info_from_dnode(struct dnode *dn, dmu_object_info_t *doi);
void dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi);
/* Like dmu_object_info, but faster if you have a held dbuf in hand. */
void dmu_object_info_from_db(dmu_buf_t *db, dmu_object_info_t *doi);
/*
Expand Down
6 changes: 3 additions & 3 deletions usr/src/uts/common/fs/zfs/sys/dnode.h
Expand Up @@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
* Copyright (c) 2012, 2016 by Delphix. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/

Expand Down Expand Up @@ -149,7 +149,7 @@ typedef struct dnode_phys {
blkptr_t dn_spill;
} dnode_phys_t;

typedef struct dnode {
struct dnode {
/*
* Protects the structure of the dnode, including the number of levels
* of indirection (dn_nlevels), dn_maxblkid, and dn_next_*
Expand Down Expand Up @@ -247,7 +247,7 @@ typedef struct dnode {

/* holds prefetch structure */
struct zfetch dn_zfetch;
} dnode_t;
};

/*
* Adds a level of indirection between the dbuf and the dnode to avoid
Expand Down
10 changes: 8 additions & 2 deletions usr/src/uts/common/fs/zfs/sys/zap.h
Expand Up @@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
* Copyright (c) 2012, 2016 by Delphix. All rights reserved.
*/

#ifndef _SYS_ZAP_H
Expand Down Expand Up @@ -216,8 +216,14 @@ int zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
int zap_contains(objset_t *ds, uint64_t zapobj, const char *name);
int zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
int key_numints);
int zap_lookup_by_dnode(dnode_t *dn, const char *name,
uint64_t integer_size, uint64_t num_integers, void *buf);
int zap_lookup_norm_by_dnode(dnode_t *dn, const char *name,
uint64_t integer_size, uint64_t num_integers, void *buf,
matchtype_t mt, char *realname, int rn_len,
boolean_t *ncp);

int zap_count_write(objset_t *os, uint64_t zapobj, const char *name,
int zap_count_write_by_dnode(dnode_t *dn, const char *name,
int add, refcount_t *towrite, refcount_t *tooverwrite);

/*
Expand Down
18 changes: 15 additions & 3 deletions usr/src/uts/common/fs/zfs/zap.c
Expand Up @@ -270,15 +270,23 @@ zap_table_load(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t *valp)
uint64_t blk, off;
int err;
dmu_buf_t *db;
dnode_t *dn;
int bs = FZAP_BLOCK_SHIFT(zap);

ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));

blk = idx >> (bs-3);
off = idx & ((1<<(bs-3))-1);

err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
/*
* Note: this is equivalent to dmu_buf_hold(), but we use
* _dnode_enter / _by_dnode because it's faster because we don't
* have to hold the dnode.
*/
dn = dmu_buf_dnode_enter(zap->zap_dbuf);
err = dmu_buf_hold_by_dnode(dn,
(tbl->zt_blk + blk) << bs, FTAG, &db, DMU_READ_NO_PREFETCH);
dmu_buf_dnode_exit(zap->zap_dbuf);
if (err)
return (err);
*valp = ((uint64_t *)db->db_data)[off];
Expand All @@ -292,9 +300,11 @@ zap_table_load(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t *valp)
*/
blk = (idx*2) >> (bs-3);

err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
dn = dmu_buf_dnode_enter(zap->zap_dbuf);
err = dmu_buf_hold_by_dnode(dn,
(tbl->zt_nextblk + blk) << bs, FTAG, &db,
DMU_READ_NO_PREFETCH);
dmu_buf_dnode_exit(zap->zap_dbuf);
if (err == 0)
dmu_buf_rele(db, FTAG);
}
Expand Down Expand Up @@ -505,8 +515,10 @@ zap_get_leaf_byblk(zap_t *zap, uint64_t blkid, dmu_tx_t *tx, krw_t lt,

ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));

err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
dnode_t *dn = dmu_buf_dnode_enter(zap->zap_dbuf);
err = dmu_buf_hold_by_dnode(dn,
blkid << bs, NULL, &db, DMU_READ_NO_PREFETCH);
dmu_buf_dnode_exit(zap->zap_dbuf);
if (err)
return (err);

Expand Down
49 changes: 47 additions & 2 deletions usr/src/uts/common/fs/zfs/zap_micro.c
Expand Up @@ -539,6 +539,24 @@ zap_lockdir_impl(dmu_buf_t *db, void *tag, dmu_tx_t *tx,
return (0);
}

static int
zap_lockdir_by_dnode(dnode_t *dn, dmu_tx_t *tx,
krw_t lti, boolean_t fatreader, boolean_t adding, void *tag, zap_t **zapp)
{
dmu_buf_t *db;
int err;

err = dmu_buf_hold_by_dnode(dn, 0, tag, &db, DMU_READ_NO_PREFETCH);
if (err != 0) {
return (err);
}
err = zap_lockdir_impl(db, tag, tx, lti, fatreader, adding, zapp);
if (err != 0) {
dmu_buf_rele(db, tag);
}
return (err);
}

int
zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
krw_t lti, boolean_t fatreader, boolean_t adding, void *tag, zap_t **zapp)
Expand Down Expand Up @@ -863,6 +881,33 @@ zap_lookup_norm(objset_t *os, uint64_t zapobj, const char *name,
return (err);
}

int
zap_lookup_by_dnode(dnode_t *dn, const char *name,
uint64_t integer_size, uint64_t num_integers, void *buf)
{
return (zap_lookup_norm_by_dnode(dn, name, integer_size,
num_integers, buf, MT_EXACT, NULL, 0, NULL));
}

int
zap_lookup_norm_by_dnode(dnode_t *dn, const char *name,
uint64_t integer_size, uint64_t num_integers, void *buf,
matchtype_t mt, char *realname, int rn_len,
boolean_t *ncp)
{
zap_t *zap;
int err;

err = zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE,
FTAG, &zap);
if (err != 0)
return (err);
err = zap_lookup_impl(zap, name, integer_size,
num_integers, buf, mt, realname, rn_len, ncp);
zap_unlockdir(zap, FTAG);
return (err);
}

int
zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
int key_numints)
Expand Down Expand Up @@ -1394,7 +1439,7 @@ zap_get_stats(objset_t *os, uint64_t zapobj, zap_stats_t *zs)
}

int
zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add,
zap_count_write_by_dnode(dnode_t *dn, const char *name, int add,
refcount_t *towrite, refcount_t *tooverwrite)
{
zap_t *zap;
Expand Down Expand Up @@ -1423,7 +1468,7 @@ zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add,
* At present we are just evaluating the possibility of this operation
* and hence we do not want to trigger an upgrade.
*/
err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE,
err = zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE,
FTAG, &zap);
if (err != 0)
return (err);
Expand Down

0 comments on commit 79d7283

Please sign in to comment.