From aeb9ab71c0c45c2784533f5424ce489893260000 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Tue, 18 Jul 2023 12:46:42 -0400 Subject: [PATCH] ZIL: Fix config lock deadlock. When we have some LWBs closed and their ZIOs ready to be issued, we can not afford sleeping on config lock if somebody else try to lock it as writer, or it will cause a deadlock. To solve it, move spa_config_enter() from zil_lwb_write_issue() to zil_lwb_write_close() under zl_issuer_lock to enforce lock ordering with other threads. Now if we can't immediately lock config, issue all previously closed LWBs so that they could drop their config locks after completion, and only then allow sleeping on our lock. Signed-off-by: Alexander Motin Sponsored by: iXsystems, Inc. Issue #15078 --- module/zfs/zil.c | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/module/zfs/zil.c b/module/zfs/zil.c index 00d66a2481d7..c43b55a6e2c8 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -151,6 +151,7 @@ static kmem_cache_t *zil_lwb_cache; static kmem_cache_t *zil_zcw_cache; static void zil_lwb_commit(zilog_t *zilog, lwb_t *lwb, itx_t *itx); +static void zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb); static itx_t *zil_itx_clone(itx_t *oitx); static int @@ -1768,7 +1769,7 @@ static uint_t zil_maxblocksize = SPA_OLD_MAXBLOCKSIZE; * Has to be called under zl_issuer_lock to chain more lwbs. */ static lwb_t * -zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb) +zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb, list_t *ilwbs) { lwb_t *nlwb = NULL; zil_chain_t *zilc; @@ -1870,6 +1871,25 @@ zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb) dmu_tx_commit(tx); + /* + * Acquire config lock for the lwb to issue it later. If we have a + * queue of already closed parent lwbs already holding config lock, + * we can not block here now, or it will be a deadlock. + */ + if (ilwbs && !list_is_empty(ilwbs)) { + if (!spa_config_tryenter(spa, SCL_STATE, lwb, RW_READER)) { + lwb_t *tlwb; + while ((tlwb = list_remove_head(ilwbs)) != NULL) + zil_lwb_write_issue(zilog, tlwb); + spa_config_enter(spa, SCL_STATE, lwb, RW_READER); + } + } else { + spa_config_enter(spa, SCL_STATE, lwb, RW_READER); + } + + if (ilwbs) + list_insert_tail(ilwbs, lwb); + /* * If there was an allocation failure then nlwb will be null which * forces a txg_wait_synced(). @@ -1933,7 +1953,6 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb) ZIL_STAT_INCR(zilog, zil_itx_metaslab_normal_alloc, BP_GET_LSIZE(&lwb->lwb_blk)); } - spa_config_enter(zilog->zl_spa, SCL_STATE, lwb, RW_READER); zil_lwb_add_block(lwb, &lwb->lwb_blk); lwb->lwb_issued_timestamp = gethrtime(); zio_nowait(lwb->lwb_root_zio); @@ -2037,8 +2056,7 @@ zil_lwb_assign(zilog_t *zilog, lwb_t *lwb, itx_t *itx, list_t *ilwbs) lwb_sp < zil_max_waste_space(zilog) && (dlen % max_log_data == 0 || lwb_sp < reclen + dlen % max_log_data))) { - list_insert_tail(ilwbs, lwb); - lwb = zil_lwb_write_close(zilog, lwb); + lwb = zil_lwb_write_close(zilog, lwb, ilwbs); if (lwb == NULL) return (NULL); zil_lwb_write_open(zilog, lwb); @@ -2937,8 +2955,7 @@ zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs) zfs_commit_timeout_pct / 100; if (sleep < zil_min_commit_timeout || lwb->lwb_sz - lwb->lwb_nused < lwb->lwb_sz / 8) { - list_insert_tail(ilwbs, lwb); - lwb = zil_lwb_write_close(zilog, lwb); + lwb = zil_lwb_write_close(zilog, lwb, ilwbs); zilog->zl_cur_used = 0; if (lwb == NULL) { while ((lwb = list_remove_head(ilwbs)) @@ -3096,7 +3113,7 @@ zil_commit_waiter_timeout(zilog_t *zilog, zil_commit_waiter_t *zcw) * since we've reached the commit waiter's timeout and it still * hasn't been issued. */ - lwb_t *nlwb = zil_lwb_write_close(zilog, lwb); + lwb_t *nlwb = zil_lwb_write_close(zilog, lwb, NULL); ASSERT3S(lwb->lwb_state, !=, LWB_STATE_OPENED);