From 6ac34494b6703d60d4d1ba078c26ca20af952451 Mon Sep 17 00:00:00 2001 From: Mark Roper Date: Wed, 12 Feb 2020 12:14:38 +0000 Subject: [PATCH] Prevent deadlock in arc_read in Linux memory reclaim callback Using zfs with Lustre, an arc_read can trigger kernel memory allocation that in turn leads to a memory reclaim callback and a deadlock within a single zfs process. This change uses spl_fstrans_mark and spl_trans_unmark to prevent the reclaim attempt and the deadlock (https://zfsonlinux.topicbox.com/groups/zfs-devel/T4db2c705ec1804ba). The stack trace observed is: #0 [ffffc9002b98adc8] __schedule at ffffffff81610f2e #1 [ffffc9002b98ae68] schedule at ffffffff81611558 #2 [ffffc9002b98ae70] schedule_preempt_disabled at ffffffff8161184a #3 [ffffc9002b98ae78] __mutex_lock at ffffffff816131e8 #4 [ffffc9002b98af18] arc_buf_destroy at ffffffffa0bf37d7 [zfs] #5 [ffffc9002b98af48] dbuf_destroy at ffffffffa0bfa6fe [zfs] #6 [ffffc9002b98af88] dbuf_evict_one at ffffffffa0bfaa96 [zfs] #7 [ffffc9002b98afa0] dbuf_rele_and_unlock at ffffffffa0bfa561 [zfs] #8 [ffffc9002b98b050] dbuf_rele_and_unlock at ffffffffa0bfa32b [zfs] #9 [ffffc9002b98b100] osd_object_delete at ffffffffa0b64ecc [osd_zfs] #10 [ffffc9002b98b118] lu_object_free at ffffffffa06d6a74 [obdclass] #11 [ffffc9002b98b178] lu_site_purge_objects at ffffffffa06d7fc1 [obdclass] #12 [ffffc9002b98b220] lu_cache_shrink_scan at ffffffffa06d81b8 [obdclass] #13 [ffffc9002b98b278] shrink_slab at ffffffff811ca9d8 #14 [ffffc9002b98b338] shrink_node at ffffffff811cfd94 #15 [ffffc9002b98b3b8] do_try_to_free_pages at ffffffff811cfe63 #16 [ffffc9002b98b408] try_to_free_pages at ffffffff811d01c4 #17 [ffffc9002b98b488] __alloc_pages_slowpath at ffffffff811be7f2 #18 [ffffc9002b98b580] __alloc_pages_nodemask at ffffffff811bf3ed #19 [ffffc9002b98b5e0] new_slab at ffffffff81226304 #20 [ffffc9002b98b638] ___slab_alloc at ffffffff812272ab #21 [ffffc9002b98b6f8] __slab_alloc at ffffffff8122740c #22 [ffffc9002b98b708] kmem_cache_alloc at ffffffff81227578 #23 [ffffc9002b98b740] spl_kmem_cache_alloc at ffffffffa048a1fd [spl] #24 [ffffc9002b98b780] arc_buf_alloc_impl at ffffffffa0befba2 [zfs] #25 [ffffc9002b98b7b0] arc_read at ffffffffa0bf0924 [zfs] #26 [ffffc9002b98b858] dbuf_read at ffffffffa0bf9083 [zfs] #27 [ffffc9002b98b900] dmu_buf_hold_by_dnode at ffffffffa0c04869 [zfs] Signed-off-by: Mark Roper --- module/zfs/arc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 8fa8c91ac49c..c1b065c43718 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -5525,6 +5525,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_buf_hdr_t *hdr = NULL; kmutex_t *hash_lock = NULL; zio_t *rzio; + fstrans_cookie_t cookie = spl_fstrans_mark(); uint64_t guid = spa_load_guid(spa); boolean_t compressed_read = (zio_flags & ZIO_FLAG_RAW_COMPRESS) != 0; boolean_t encrypted_read = BP_IS_ENCRYPTED(bp) && @@ -5997,6 +5998,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, /* embedded bps don't actually go to disk */ if (!embedded_bp) spa_read_history_add(spa, zb, *arc_flags); + spl_fstrans_unmark(cookie); return (rc); }