Skip to content

Commit

Permalink
Reset l2ad_hand and l2ad_first in l2arc_evict
Browse files Browse the repository at this point in the history
Increasing l2arc_write_size or l2arc_write_boost can result in
l2arc_write_buffers() not having enough space to perform its writes and
panic zio_write_phys().

Instead of resetting l2ad_hand to l2ad_start at the end of
l2arc_write_buffers() and not taking into account a possible
user-mediated increase of l2arc_write_max, we do this in l2arc_evict(),
right after l2arc_write_size() has run. If there is not enough space to
evict (ie we will exceed l2ad_end) we evict to the end of the device,
and then reset l2ad_hand to l2ad_start, set l2ad_first to 0 and recurse
l2arc_evict(). We avoid infinite recursion of l2arc_evict() by making
sure in l2arc_write_size() that l2ad_start + size does not exceed
l2ad_end.

Signed-off-by: George Amanakis <gamanakis@gmail.com>
  • Loading branch information
gamanakis committed Mar 26, 2020
1 parent 652bdc9 commit c13f0d1
Show file tree
Hide file tree
Showing 4 changed files with 139 additions and 23 deletions.
70 changes: 48 additions & 22 deletions module/zfs/arc.c
Expand Up @@ -7467,9 +7467,9 @@ l2arc_write_eligible(uint64_t spa_guid, arc_buf_hdr_t *hdr)
}

static uint64_t
l2arc_write_size(void)
l2arc_write_size(l2arc_dev_t *dev)
{
uint64_t size;
uint64_t size, dev_size;

/*
* Make sure our globals have meaningful values in case the user
Expand All @@ -7486,6 +7486,23 @@ l2arc_write_size(void)
if (arc_warm == B_FALSE)
size += l2arc_write_boost;

/*
* Make sure the write size does not exceed the size of the cache
* device. This is important in l2arc_evict(), otherwise infinite
* recursion can occur.
*/
dev_size = dev->l2ad_end - dev->l2ad_start;
if (size >= dev_size) {
cmn_err(CE_NOTE, "l2arc_write_max or l2arc_write_boost "
"exceeds the size of the cache device (guid %llu), "
"resetting them to the default (%d)",
dev->l2ad_vdev->vdev_guid, L2ARC_WRITE_SIZE);
size = l2arc_write_max = l2arc_write_boost = L2ARC_WRITE_SIZE;

if (arc_warm == B_FALSE)
size += l2arc_write_boost;
}

return (size);

}
Expand Down Expand Up @@ -8008,29 +8025,36 @@ l2arc_evict(l2arc_dev_t *dev, uint64_t distance, boolean_t all)
arc_buf_hdr_t *hdr, *hdr_prev;
kmutex_t *hash_lock;
uint64_t taddr;
boolean_t rerun = B_FALSE;

buflist = &dev->l2ad_buflist;

if (!all && dev->l2ad_first) {
/*
* This is the first sweep through the device. There is
* nothing to evict.
*/
return;
}

if (dev->l2ad_hand >= (dev->l2ad_end - (2 * distance))) {
if (dev->l2ad_hand >= (dev->l2ad_end - distance)) {
/*
* When nearing the end of the device, evict to the end
* before the device write hand jumps to the start.
* before the device write hand jumps to the start. Then
* schedule l2arc_evict() to recurse after bumping the write
* hand to the start. This recursion does not happen
* indefinitely as we make sure in l2arc_write_size() that
* when l2ad_hand is reset, the write size does not exceed
* the end of the device.
*/
rerun = B_TRUE;
taddr = dev->l2ad_end;
} else {
taddr = dev->l2ad_hand + distance;
}
DTRACE_PROBE4(l2arc__evict, l2arc_dev_t *, dev, list_t *, buflist,
uint64_t, taddr, boolean_t, all);

if (!all && dev->l2ad_first) {
/*
* This is the first sweep through the device. There is
* nothing to evict.
*/
goto out;
}

top:
mutex_enter(&dev->l2ad_mtx);
for (hdr = list_tail(buflist); hdr; hdr = hdr_prev) {
Expand Down Expand Up @@ -8101,6 +8125,17 @@ l2arc_evict(l2arc_dev_t *dev, uint64_t distance, boolean_t all)
mutex_exit(hash_lock);
}
mutex_exit(&dev->l2ad_mtx);

out:
if (rerun) {
/*
* Bump device hand to the device start if it is approaching the
* end. l2arc_evict() has already evicted ahead for this case.
*/
dev->l2ad_hand = dev->l2ad_start;
dev->l2ad_first = B_FALSE;
l2arc_evict(dev, distance, B_FALSE);
}
}

/*
Expand Down Expand Up @@ -8448,15 +8483,6 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
ARCSTAT_INCR(arcstat_l2_lsize, write_lsize);
ARCSTAT_INCR(arcstat_l2_psize, write_psize);

/*
* Bump device hand to the device start if it is approaching the end.
* l2arc_evict() will already have evicted ahead for this case.
*/
if (dev->l2ad_hand >= (dev->l2ad_end - target_sz)) {
dev->l2ad_hand = dev->l2ad_start;
dev->l2ad_first = B_FALSE;
}

dev->l2ad_writing = B_TRUE;
(void) zio_wait(pio);
dev->l2ad_writing = B_FALSE;
Expand Down Expand Up @@ -8539,7 +8565,7 @@ l2arc_feed_thread(void *unused)

ARCSTAT_BUMP(arcstat_l2_feeds);

size = l2arc_write_size();
size = l2arc_write_size(dev);

/*
* Evict L2ARC buffers that will be overwritten.
Expand Down
2 changes: 2 additions & 0 deletions tests/zfs-tests/include/tunables.cfg
Expand Up @@ -35,6 +35,8 @@ DISABLE_IVSET_GUID_CHECK disable_ivset_guid_check zfs_disable_ivset_guid_check
INITIALIZE_CHUNK_SIZE initialize_chunk_size zfs_initialize_chunk_size
INITIALIZE_VALUE initialize_value zfs_initialize_value
KEEP_LOG_SPACEMAPS_AT_EXPORT keep_log_spacemaps_at_export zfs_keep_log_spacemaps_at_export
L2ARC_NOPREFETCH UNSUPPORTED l2arc_noprefetch
L2ARC_WRITE_MAX UNSUPPORTED l2arc_write_max
LIVELIST_CONDENSE_NEW_ALLOC livelist.condense.new_alloc zfs_livelist_condense_new_alloc
LIVELIST_CONDENSE_SYNC_CANCEL livelist.condense.sync_cancel zfs_livelist_condense_sync_cancel
LIVELIST_CONDENSE_SYNC_PAUSE livelist.condense.sync_pause zfs_livelist_condense_sync_pause
Expand Down
3 changes: 2 additions & 1 deletion tests/zfs-tests/tests/functional/cache/Makefile.am
Expand Up @@ -12,7 +12,8 @@ dist_pkgdata_SCRIPTS = \
cache_008_neg.ksh \
cache_009_pos.ksh \
cache_010_neg.ksh \
cache_011_pos.ksh
cache_011_pos.ksh \
cache_012_pos.ksh

dist_pkgdata_DATA = \
cache.cfg \
Expand Down
87 changes: 87 additions & 0 deletions tests/zfs-tests/tests/functional/cache/cache_012_pos.ksh
@@ -0,0 +1,87 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
# CDDL HEADER END
#

#
# Copyright (c) 2020, George Amanakis. All rights reserved.
#

. $STF_SUITE/tests/functional/cache/cache.cfg
. $STF_SUITE/tests/functional/cache/cache.kshlib

#
# DESCRIPTION:
# Looping around a cache device with l2arc_write_size exceeding
# the device size succeeds.
#
# STRATEGY:
# 1. Create pool with a cache device.
# 2. Create a random file in that pool larger than the cache device
# and random read for 30 sec.
# 3. Read l2_size from arcstats and verify it is at least 70% of the
# size of the cache device.
# 3. Destroy pool.
#

verify_runnable "global"

log_assert "Looping around a cache device succeeds."

function cleanup
{
if poolexists $TESTPOOL ; then
destroy_pool $TESTPOOL
fi

log_must set_tunable32 L2ARC_WRITE_MAX $write_max
log_must set_tunable32 L2ARC_NOPREFETCH $noprefetch
}
log_onexit cleanup

typeset write_max=$(get_tunable L2ARC_WRITE_MAX)
typeset noprefetch=$(get_tunable L2ARC_NOPREFETCH)
log_must set_tunable32 L2ARC_WRITE_MAX $(( $MINVDEVSIZE * 2 ))
log_must set_tunable32 L2ARC_NOPREFETCH 0

typeset fill_mb=$(( floor($MINVDEVSIZE * 3 / 4 ) ))
export DIRECTORY=/$TESTPOOL
export NUMJOBS=4
export RUNTIME=30
export PERF_RANDSEED=1234
export PERF_COMPPERCENT=66
export PERF_COMPCHUNK=0
export BLOCKSIZE=128K
export SYNC_TYPE=0
export DIRECT=1
export FILE_SIZE=$(( floor($fill_mb / $NUMJOBS) ))

typeset VCACHE="$VDIR/vdev.cache"
typeset VCACHE_SZ=$(( $MINVDEVSIZE / 2 ))
log_must truncate -s $VCACHE_SZ $VCACHE

log_must zpool create $TESTPOOL $VDEV \
cache $VCACHE

log_must fio $FIO_SCRIPTS/mkfiles.fio
log_must fio $FIO_SCRIPTS/random_reads.fio

typeset l2_size=$(grep l2_size /proc/spl/kstat/zfs/arcstats \
| awk '{print $3}')

log_must test $l2_size -gt $(( floor($VCACHE_SZ * 7 / 10) ))

log_must zpool destroy $TESTPOOL

log_pass "Looping around a cache device succeeds."

0 comments on commit c13f0d1

Please sign in to comment.