Skip to content

Commit 3e31d2b

Browse files
Eric Schrockbehlendorf
authored andcommitted
Illumos #883: ZIL reuse during remount corruption
Moving the zil_free() cleanup to zil_close() prevents this problem from occurring in the first place. There is a very good description of the issue and fix in Illumus #883. Reviewed by: Matt Ahrens <Matt.Ahrens@delphix.com> Reviewed by: Adam Leventhal <Adam.Leventhal@delphix.com> Reviewed by: Albert Lee <trisk@nexenta.com> Reviewed by: Gordon Ross <gwr@nexenta.com> Reviewed by: Garrett D'Amore <garrett@nexenta.com> Reivewed by: Dan McDonald <danmcd@nexenta.com> Approved by: Gordon Ross <gwr@nexenta.com> References to Illumos issue and patch: - https://www.illumos.org/issues/883 - illumos/illumos-gate@c9ba2a43cb Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Issue #340
1 parent f5fc4ac commit 3e31d2b

File tree

2 files changed

+60
-16
lines changed

2 files changed

+60
-16
lines changed

cmd/ztest/ztest.c

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@ typedef struct ztest_od {
206206
*/
207207
typedef struct ztest_ds {
208208
objset_t *zd_os;
209+
krwlock_t zd_zilog_lock;
209210
zilog_t *zd_zilog;
210211
uint64_t zd_seq;
211212
ztest_od_t *zd_od; /* debugging aid */
@@ -239,6 +240,7 @@ ztest_func_t ztest_dmu_commit_callbacks;
239240
ztest_func_t ztest_zap;
240241
ztest_func_t ztest_zap_parallel;
241242
ztest_func_t ztest_zil_commit;
243+
ztest_func_t ztest_zil_remount;
242244
ztest_func_t ztest_dmu_read_write_zcopy;
243245
ztest_func_t ztest_dmu_objset_create_destroy;
244246
ztest_func_t ztest_dmu_prealloc;
@@ -274,6 +276,7 @@ ztest_info_t ztest_info[] = {
274276
{ ztest_zap_parallel, 100, &zopt_always },
275277
{ ztest_split_pool, 1, &zopt_always },
276278
{ ztest_zil_commit, 1, &zopt_incessant },
279+
{ ztest_zil_remount, 1, &zopt_sometimes },
277280
{ ztest_dmu_read_write_zcopy, 1, &zopt_often },
278281
{ ztest_dmu_objset_create_destroy, 1, &zopt_often },
279282
{ ztest_dsl_prop_get_set, 1, &zopt_often },
@@ -1007,6 +1010,7 @@ ztest_zd_init(ztest_ds_t *zd, objset_t *os)
10071010
dmu_objset_name(os, zd->zd_name);
10081011
int l;
10091012

1013+
rw_init(&zd->zd_zilog_lock, NULL, RW_DEFAULT, NULL);
10101014
mutex_init(&zd->zd_dirobj_lock, NULL, MUTEX_DEFAULT, NULL);
10111015

10121016
for (l = 0; l < ZTEST_OBJECT_LOCKS; l++)
@@ -1022,6 +1026,7 @@ ztest_zd_fini(ztest_ds_t *zd)
10221026
int l;
10231027

10241028
mutex_destroy(&zd->zd_dirobj_lock);
1029+
rw_destroy(&zd->zd_zilog_lock);
10251030

10261031
for (l = 0; l < ZTEST_OBJECT_LOCKS; l++)
10271032
ztest_rll_destroy(&zd->zd_object_lock[l]);
@@ -1993,6 +1998,8 @@ ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset)
19931998
if (ztest_random(2) == 0)
19941999
io_type = ZTEST_IO_WRITE_TAG;
19952000

2001+
(void) rw_enter(&zd->zd_zilog_lock, RW_READER);
2002+
19962003
switch (io_type) {
19972004

19982005
case ZTEST_IO_WRITE_TAG:
@@ -2030,6 +2037,8 @@ ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset)
20302037
break;
20312038
}
20322039

2040+
(void) rw_exit(&zd->zd_zilog_lock);
2041+
20332042
umem_free(data, blocksize);
20342043
}
20352044

@@ -2084,6 +2093,8 @@ ztest_zil_commit(ztest_ds_t *zd, uint64_t id)
20842093
{
20852094
zilog_t *zilog = zd->zd_zilog;
20862095

2096+
(void) rw_enter(&zd->zd_zilog_lock, RW_READER);
2097+
20872098
zil_commit(zilog, ztest_random(ZTEST_OBJECTS));
20882099

20892100
/*
@@ -2095,6 +2106,31 @@ ztest_zil_commit(ztest_ds_t *zd, uint64_t id)
20952106
ASSERT(zd->zd_seq <= zilog->zl_commit_lr_seq);
20962107
zd->zd_seq = zilog->zl_commit_lr_seq;
20972108
mutex_exit(&zilog->zl_lock);
2109+
2110+
(void) rw_exit(&zd->zd_zilog_lock);
2111+
}
2112+
2113+
/*
2114+
* This function is designed to simulate the operations that occur during a
2115+
* mount/unmount operation. We hold the dataset across these operations in an
2116+
* attempt to expose any implicit assumptions about ZIL management.
2117+
*/
2118+
/* ARGSUSED */
2119+
void
2120+
ztest_zil_remount(ztest_ds_t *zd, uint64_t id)
2121+
{
2122+
objset_t *os = zd->zd_os;
2123+
2124+
(void) rw_enter(&zd->zd_zilog_lock, RW_WRITER);
2125+
2126+
/* zfsvfs_teardown() */
2127+
zil_close(zd->zd_zilog);
2128+
2129+
/* zfsvfs_setup() */
2130+
VERIFY(zil_open(os, ztest_get_data) == zd->zd_zilog);
2131+
zil_replay(os, zd, ztest_replay_vector);
2132+
2133+
(void) rw_exit(&zd->zd_zilog_lock);
20982134
}
20992135

21002136
/*

module/zfs/zil.c

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
*/
2121
/*
2222
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23+
* Copyright (c) 2011 by Delphix. All rights reserved.
2324
*/
2425

2526
/* Portions Copyright 2010 Robert Milkowski */
@@ -562,7 +563,7 @@ zil_destroy(zilog_t *zilog, boolean_t keep_first)
562563

563564
if (!list_is_empty(&zilog->zl_lwb_list)) {
564565
ASSERT(zh->zh_claim_txg == 0);
565-
ASSERT(!keep_first);
566+
VERIFY(!keep_first);
566567
while ((lwb = list_head(&zilog->zl_lwb_list)) != NULL) {
567568
list_remove(&zilog->zl_lwb_list, lwb);
568569
if (lwb->lwb_buf != NULL)
@@ -1665,21 +1666,11 @@ zil_alloc(objset_t *os, zil_header_t *zh_phys)
16651666
void
16661667
zil_free(zilog_t *zilog)
16671668
{
1668-
lwb_t *head_lwb;
16691669
int i;
16701670

16711671
zilog->zl_stop_sync = 1;
16721672

1673-
/*
1674-
* After zil_close() there should only be one lwb with a buffer.
1675-
*/
1676-
head_lwb = list_head(&zilog->zl_lwb_list);
1677-
if (head_lwb) {
1678-
ASSERT(head_lwb == list_tail(&zilog->zl_lwb_list));
1679-
list_remove(&zilog->zl_lwb_list, head_lwb);
1680-
zio_buf_free(head_lwb->lwb_buf, head_lwb->lwb_sz);
1681-
kmem_cache_free(zil_lwb_cache, head_lwb);
1682-
}
1673+
ASSERT(list_is_empty(&zilog->zl_lwb_list));
16831674
list_destroy(&zilog->zl_lwb_list);
16841675

16851676
avl_destroy(&zilog->zl_vdev_tree);
@@ -1719,6 +1710,10 @@ zil_open(objset_t *os, zil_get_data_t *get_data)
17191710
{
17201711
zilog_t *zilog = dmu_objset_zil(os);
17211712

1713+
ASSERT(zilog->zl_clean_taskq == NULL);
1714+
ASSERT(zilog->zl_get_data == NULL);
1715+
ASSERT(list_is_empty(&zilog->zl_lwb_list));
1716+
17221717
zilog->zl_get_data = get_data;
17231718
zilog->zl_clean_taskq = taskq_create("zil_clean", 1, minclsyspri,
17241719
2, 2, TASKQ_PREPOPULATE);
@@ -1732,7 +1727,7 @@ zil_open(objset_t *os, zil_get_data_t *get_data)
17321727
void
17331728
zil_close(zilog_t *zilog)
17341729
{
1735-
lwb_t *tail_lwb;
1730+
lwb_t *lwb;
17361731
uint64_t txg = 0;
17371732

17381733
zil_commit(zilog, 0); /* commit all itx */
@@ -1744,16 +1739,29 @@ zil_close(zilog_t *zilog)
17441739
* destroy the zl_clean_taskq.
17451740
*/
17461741
mutex_enter(&zilog->zl_lock);
1747-
tail_lwb = list_tail(&zilog->zl_lwb_list);
1748-
if (tail_lwb != NULL)
1749-
txg = tail_lwb->lwb_max_txg;
1742+
lwb = list_tail(&zilog->zl_lwb_list);
1743+
if (lwb != NULL)
1744+
txg = lwb->lwb_max_txg;
17501745
mutex_exit(&zilog->zl_lock);
17511746
if (txg)
17521747
txg_wait_synced(zilog->zl_dmu_pool, txg);
17531748

17541749
taskq_destroy(zilog->zl_clean_taskq);
17551750
zilog->zl_clean_taskq = NULL;
17561751
zilog->zl_get_data = NULL;
1752+
1753+
/*
1754+
* We should have only one LWB left on the list; remove it now.
1755+
*/
1756+
mutex_enter(&zilog->zl_lock);
1757+
lwb = list_head(&zilog->zl_lwb_list);
1758+
if (lwb != NULL) {
1759+
ASSERT(lwb == list_tail(&zilog->zl_lwb_list));
1760+
list_remove(&zilog->zl_lwb_list, lwb);
1761+
zio_buf_free(lwb->lwb_buf, lwb->lwb_sz);
1762+
kmem_cache_free(zil_lwb_cache, lwb);
1763+
}
1764+
mutex_exit(&zilog->zl_lock);
17571765
}
17581766

17591767
/*

0 commit comments

Comments
 (0)