Skip to content
This repository has been archived by the owner on Nov 7, 2019. It is now read-only.

Commit

Permalink
4631 zvol_get_stats triggering too many reads
Browse files Browse the repository at this point in the history
Reviewed by: Adam Leventhal <ahl@delphix.com>
Reviewed by: Sebastien Roy <sebastien.roy@delphix.com>
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Approved by: Dan McDonald <danmcd@omniti.com>
  • Loading branch information
ahrens authored and Christopher Siden committed Jul 17, 2014
1 parent 2a104a5 commit bbfa8ea
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 79 deletions.
102 changes: 38 additions & 64 deletions usr/src/uts/common/fs/zfs/arc.c
Expand Up @@ -105,7 +105,7 @@
* with the buffer may be evicted prior to the callback. The callback
* must be made with *no locks held* (to prevent deadlock). Additionally,
* the users of callbacks must ensure that their private data is
* protected from simultaneous callbacks from arc_buf_evict()
* protected from simultaneous callbacks from arc_clear_callback()
* and arc_do_user_evicts().
*
* Note that the majority of the performance stats are manipulated
Expand Down Expand Up @@ -1495,8 +1495,12 @@ arc_buf_data_free(arc_buf_t *buf, void (*free_func)(void *, size_t))
}
}

/*
* Free up buf->b_data and if 'remove' is set, then pull the
* arc_buf_t off of the the arc_buf_hdr_t's list and free it.
*/
static void
arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t all)
arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t remove)
{
arc_buf_t **bufp;

Expand Down Expand Up @@ -1547,7 +1551,7 @@ arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t all)
}

/* only remove the buf if requested */
if (!all)
if (!remove)
return;

/* remove the buf from the hdr list */
Expand Down Expand Up @@ -2121,7 +2125,7 @@ arc_do_user_evicts(void)
mutex_exit(&arc_eviction_mtx);

if (buf->b_efunc != NULL)
VERIFY(buf->b_efunc(buf) == 0);
VERIFY0(buf->b_efunc(buf->b_private));

buf->b_efunc = NULL;
buf->b_private = NULL;
Expand Down Expand Up @@ -3240,16 +3244,25 @@ arc_freed(spa_t *spa, const blkptr_t *bp)
}

/*
* This is used by the DMU to let the ARC know that a buffer is
* being evicted, so the ARC should clean up. If this arc buf
* is not yet in the evicted state, it will be put there.
* Clear the user eviction callback set by arc_set_callback(), first calling
* it if it exists. Because the presence of a callback keeps an arc_buf cached
* clearing the callback may result in the arc_buf being destroyed. However,
* it will not result in the *last* arc_buf being destroyed, hence the data
* will remain cached in the ARC. We make a copy of the arc buffer here so
* that we can process the callback without holding any locks.
*
* It's possible that the callback is already in the process of being cleared
* by another thread. In this case we can not clear the callback.
*
* Returns B_TRUE if the callback was successfully called and cleared.
*/
int
arc_buf_evict(arc_buf_t *buf)
boolean_t
arc_clear_callback(arc_buf_t *buf)
{
arc_buf_hdr_t *hdr;
kmutex_t *hash_lock;
arc_buf_t **bufp;
arc_evict_func_t *efunc = buf->b_efunc;
void *private = buf->b_private;

mutex_enter(&buf->b_evict_lock);
hdr = buf->b_hdr;
Expand All @@ -3259,17 +3272,16 @@ arc_buf_evict(arc_buf_t *buf)
*/
ASSERT(buf->b_data == NULL);
mutex_exit(&buf->b_evict_lock);
return (0);
return (B_FALSE);
} else if (buf->b_data == NULL) {
arc_buf_t copy = *buf; /* structure assignment */
/*
* We are on the eviction list; process this buffer now
* but let arc_do_user_evicts() do the reaping.
*/
buf->b_efunc = NULL;
mutex_exit(&buf->b_evict_lock);
VERIFY(copy.b_efunc(&copy) == 0);
return (1);
VERIFY0(efunc(private));
return (B_TRUE);
}
hash_lock = HDR_LOCK(hdr);
mutex_enter(hash_lock);
Expand All @@ -3279,48 +3291,21 @@ arc_buf_evict(arc_buf_t *buf)
ASSERT3U(refcount_count(&hdr->b_refcnt), <, hdr->b_datacnt);
ASSERT(hdr->b_state == arc_mru || hdr->b_state == arc_mfu);

/*
* Pull this buffer off of the hdr
*/
bufp = &hdr->b_buf;
while (*bufp != buf)
bufp = &(*bufp)->b_next;
*bufp = buf->b_next;

ASSERT(buf->b_data != NULL);
arc_buf_destroy(buf, FALSE, FALSE);

if (hdr->b_datacnt == 0) {
arc_state_t *old_state = hdr->b_state;
arc_state_t *evicted_state;

ASSERT(hdr->b_buf == NULL);
ASSERT(refcount_is_zero(&hdr->b_refcnt));

evicted_state =
(old_state == arc_mru) ? arc_mru_ghost : arc_mfu_ghost;

mutex_enter(&old_state->arcs_mtx);
mutex_enter(&evicted_state->arcs_mtx);

arc_change_state(evicted_state, hdr, hash_lock);
ASSERT(HDR_IN_HASH_TABLE(hdr));
hdr->b_flags |= ARC_IN_HASH_TABLE;
hdr->b_flags &= ~ARC_BUF_AVAILABLE;
buf->b_efunc = NULL;
buf->b_private = NULL;

mutex_exit(&evicted_state->arcs_mtx);
mutex_exit(&old_state->arcs_mtx);
if (hdr->b_datacnt > 1) {
mutex_exit(&buf->b_evict_lock);
arc_buf_destroy(buf, FALSE, TRUE);
} else {
ASSERT(buf == hdr->b_buf);
hdr->b_flags |= ARC_BUF_AVAILABLE;
mutex_exit(&buf->b_evict_lock);
}
mutex_exit(hash_lock);
mutex_exit(&buf->b_evict_lock);

VERIFY(buf->b_efunc(buf) == 0);
buf->b_efunc = NULL;
buf->b_private = NULL;
buf->b_hdr = NULL;
buf->b_next = NULL;
kmem_cache_free(buf_cache, buf);
return (1);
mutex_exit(hash_lock);
VERIFY0(efunc(private));
return (B_TRUE);
}

/*
Expand Down Expand Up @@ -3466,17 +3451,6 @@ arc_released(arc_buf_t *buf)
return (released);
}

int
arc_has_callback(arc_buf_t *buf)
{
int callback;

mutex_enter(&buf->b_evict_lock);
callback = (buf->b_efunc != NULL);
mutex_exit(&buf->b_evict_lock);
return (callback);
}

#ifdef ZFS_DEBUG
int
arc_referenced(arc_buf_t *buf)
Expand Down
38 changes: 25 additions & 13 deletions usr/src/uts/common/fs/zfs/dbuf.c
Expand Up @@ -181,8 +181,7 @@ dbuf_hash_insert(dmu_buf_impl_t *db)
}

/*
* Remove an entry from the hash table. This operation will
* fail if there are any existing holds on the db.
* Remove an entry from the hash table. It must be in the EVICTING state.
*/
static void
dbuf_hash_remove(dmu_buf_impl_t *db)
Expand All @@ -194,7 +193,7 @@ dbuf_hash_remove(dmu_buf_impl_t *db)
dmu_buf_impl_t *dbf, **dbp;

/*
* We musn't hold db_mtx to maintin lock ordering:
* We musn't hold db_mtx to maintain lock ordering:
* DBUF_HASH_MUTEX > db_mtx.
*/
ASSERT(refcount_is_zero(&db->db_holds));
Expand Down Expand Up @@ -431,7 +430,6 @@ static void
dbuf_set_data(dmu_buf_impl_t *db, arc_buf_t *buf)
{
ASSERT(MUTEX_HELD(&db->db_mtx));
ASSERT(db->db_buf == NULL || !arc_has_callback(db->db_buf));
db->db_buf = buf;
if (buf != NULL) {
ASSERT(buf->b_data != NULL);
Expand Down Expand Up @@ -1544,20 +1542,23 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
* when we are not holding the dn_dbufs_mtx, we can't clear the
* entry in the dn_dbufs list. We have to wait until dbuf_destroy()
* in this case. For callers from the DMU we will usually see:
* dbuf_clear()->arc_buf_evict()->dbuf_do_evict()->dbuf_destroy()
* dbuf_clear()->arc_clear_callback()->dbuf_do_evict()->dbuf_destroy()
* For the arc callback, we will usually see:
* dbuf_do_evict()->dbuf_clear();dbuf_destroy()
* Sometimes, though, we will get a mix of these two:
* DMU: dbuf_clear()->arc_buf_evict()
* DMU: dbuf_clear()->arc_clear_callback()
* ARC: dbuf_do_evict()->dbuf_destroy()
*
* This routine will dissociate the dbuf from the arc, by calling
* arc_clear_callback(), but will not evict the data from the ARC.
*/
void
dbuf_clear(dmu_buf_impl_t *db)
{
dnode_t *dn;
dmu_buf_impl_t *parent = db->db_parent;
dmu_buf_impl_t *dndb;
int dbuf_gone = FALSE;
boolean_t dbuf_gone = B_FALSE;

ASSERT(MUTEX_HELD(&db->db_mtx));
ASSERT(refcount_is_zero(&db->db_holds));
Expand Down Expand Up @@ -1603,7 +1604,7 @@ dbuf_clear(dmu_buf_impl_t *db)
}

if (db->db_buf)
dbuf_gone = arc_buf_evict(db->db_buf);
dbuf_gone = arc_clear_callback(db->db_buf);

if (!dbuf_gone)
mutex_exit(&db->db_mtx);
Expand Down Expand Up @@ -1771,8 +1772,7 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
static int
dbuf_do_evict(void *private)
{
arc_buf_t *buf = private;
dmu_buf_impl_t *db = buf->b_private;
dmu_buf_impl_t *db = private;

if (!MUTEX_HELD(&db->db_mtx))
mutex_enter(&db->db_mtx);
Expand Down Expand Up @@ -2135,11 +2135,23 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
* block on-disk. If so, then we simply evict
* ourselves.
*/
if (!DBUF_IS_CACHEABLE(db) ||
arc_buf_eviction_needed(db->db_buf))
if (!DBUF_IS_CACHEABLE(db)) {
if (db->db_blkptr != NULL &&
!BP_IS_HOLE(db->db_blkptr) &&
!BP_IS_EMBEDDED(db->db_blkptr)) {
spa_t *spa =
dmu_objset_spa(db->db_objset);
blkptr_t bp = *db->db_blkptr;
dbuf_clear(db);
arc_freed(spa, &bp);
} else {
dbuf_clear(db);
}
} else if (arc_buf_eviction_needed(db->db_buf)) {
dbuf_clear(db);
else
} else {
mutex_exit(&db->db_mtx);
}
}
} else {
mutex_exit(&db->db_mtx);
Expand Down
3 changes: 1 addition & 2 deletions usr/src/uts/common/fs/zfs/sys/arc.h
Expand Up @@ -95,7 +95,6 @@ boolean_t arc_buf_remove_ref(arc_buf_t *buf, void *tag);
int arc_buf_size(arc_buf_t *buf);
void arc_release(arc_buf_t *buf, void *tag);
int arc_released(arc_buf_t *buf);
int arc_has_callback(arc_buf_t *buf);
void arc_buf_freeze(arc_buf_t *buf);
void arc_buf_thaw(arc_buf_t *buf);
boolean_t arc_buf_eviction_needed(arc_buf_t *buf);
Expand All @@ -114,7 +113,7 @@ zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
void arc_freed(spa_t *spa, const blkptr_t *bp);

void arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private);
int arc_buf_evict(arc_buf_t *buf);
boolean_t arc_clear_callback(arc_buf_t *buf);

void arc_flush(spa_t *spa);
void arc_tempreserve_clear(uint64_t reserve);
Expand Down

0 comments on commit bbfa8ea

Please sign in to comment.