diff --git a/TEST b/TEST index ebe6ef963fa6..a77d4dbdafe6 100644 --- a/TEST +++ b/TEST @@ -5,21 +5,21 @@ #TEST_PREPARE_SHARES="yes" ### ztest -#TEST_ZTEST_SKIP="yes" +TEST_ZTEST_SKIP="yes" #TEST_ZTEST_TIMEOUT=1800 #TEST_ZTEST_DIR="/var/tmp/" #TEST_ZTEST_OPTIONS="-V" #TEST_ZTEST_CORE_DIR="/mnt/zloop" ### zimport -#TEST_ZIMPORT_SKIP="yes" +TEST_ZIMPORT_SKIP="yes" #TEST_ZIMPORT_DIR="/var/tmp/zimport" #TEST_ZIMPORT_VERSIONS="master installed" #TEST_ZIMPORT_POOLS="zol-0.6.1 zol-0.6.2 master installed" #TEST_ZIMPORT_OPTIONS="-c" ### xfstests -#TEST_XFSTESTS_SKIP="yes" +TEST_XFSTESTS_SKIP="yes" #TEST_XFSTESTS_URL="https://github.com/behlendorf/xfstests/archive/" #TEST_XFSTESTS_VER="zfs.tar.gz" #TEST_XFSTESTS_POOL="tank" @@ -34,11 +34,11 @@ #TEST_ZFSTESTS_DISKSIZE="8G" #TEST_ZFSTESTS_ITERS="1" #TEST_ZFSTESTS_OPTIONS="-vx" -#TEST_ZFSTESTS_RUNFILE="linux.run" +TEST_ZFSTESTS_RUNFILE="pull-7926.run" #TEST_ZFSTESTS_TAGS="functional" ### zfsstress -#TEST_ZFSSTRESS_SKIP="yes" +TEST_ZFSSTRESS_SKIP="yes" #TEST_ZFSSTRESS_URL="https://github.com/nedbass/zfsstress/archive/" #TEST_ZFSSTRESS_VER="master.tar.gz" #TEST_ZFSSTRESS_RUNTIME=300 diff --git a/cmd/zed/agents/zfs_agents.c b/cmd/zed/agents/zfs_agents.c index 47e251a5e760..fcea4af0926b 100644 --- a/cmd/zed/agents/zfs_agents.c +++ b/cmd/zed/agents/zfs_agents.c @@ -57,9 +57,11 @@ typedef struct guid_search { uint64_t gs_pool_guid; uint64_t gs_vdev_guid; char *gs_devid; + boolean_t is_spare; + boolean_t is_l2arc; } guid_search_t; -static void +static boolean_t zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg) { guid_search_t *gsp = arg; @@ -73,8 +75,27 @@ zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg) if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN, &child, &children) == 0) { for (c = 0; c < children; c++) - zfs_agent_iter_vdev(zhp, child[c], gsp); - return; + if (zfs_agent_iter_vdev(zhp, child[c], gsp)) + return (B_TRUE); + } + /* + * Iterate over any spares and cache devices + */ + if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_SPARES, + &child, &children) == 0) { + for (c = 0; c < children; c++) + if (zfs_agent_iter_vdev(zhp, child[c], gsp)) { + gsp->is_spare = B_TRUE; + return (B_TRUE); + } + } + if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_L2CACHE, + &child, &children) == 0) { + for (c = 0; c < children; c++) + if (zfs_agent_iter_vdev(zhp, child[c], gsp)) { + gsp->is_l2arc = B_TRUE; + return (B_TRUE); + } } /* * On a devid match, grab the vdev guid @@ -84,7 +105,10 @@ zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg) (strcmp(gsp->gs_devid, path) == 0)) { (void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID, &gsp->gs_vdev_guid); + return (B_TRUE); } + + return (B_FALSE); } static int @@ -93,13 +117,16 @@ zfs_agent_iter_pool(zpool_handle_t *zhp, void *arg) guid_search_t *gsp = arg; nvlist_t *config, *nvl; + fprintf(stderr, "zfs_agent_iter_pool: search %lu\n", gsp->gs_vdev_guid); /* * For each vdev in this pool, look for a match by devid */ if ((config = zpool_get_config(zhp, NULL)) != NULL) { if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvl) == 0) { - zfs_agent_iter_vdev(zhp, nvl, gsp); + fprintf(stderr, "zfs_agent_iter_pool: vdev_tree\n"); + nvlist_print(stderr, nvl); + (void) zfs_agent_iter_vdev(zhp, nvl, gsp); } } /* @@ -108,6 +135,9 @@ zfs_agent_iter_pool(zpool_handle_t *zhp, void *arg) if (gsp->gs_vdev_guid) { (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &gsp->gs_pool_guid); + fprintf(stderr, "zfs_agent_iter_pool: match found for %lu\n", gsp->gs_vdev_guid); + } else { + fprintf(stderr, "zfs_agent_iter_pool: match not found for %lu\n", gsp->gs_vdev_guid); } zpool_close(zhp); @@ -118,6 +148,7 @@ void zfs_agent_post_event(const char *class, const char *subclass, nvlist_t *nvl) { agent_event_t *event; + boolean_t is_spare = B_FALSE, is_l2arc = B_FALSE; if (subclass == NULL) subclass = ""; @@ -157,32 +188,51 @@ zfs_agent_post_event(const char *class, const char *subclass, nvlist_t *nvl) (void) nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &vdev_guid); /* - * For multipath, ZFS_EV_VDEV_GUID is missing so find it. + * For multipath, spares and L2ARC devices ZFS_EV_VDEV_GUID is + * missing so find it. */ if (vdev_guid == 0) { guid_search_t search = { 0 }; + int ret = 0; (void) nvlist_lookup_string(nvl, DEV_IDENTIFIER, &search.gs_devid); + fprintf(stderr, "zfs_agent_post_event: search by devid=%s\n", search.gs_devid); - (void) zpool_iter(g_zfs_hdl, zfs_agent_iter_pool, + ret = zpool_iter(g_zfs_hdl, zfs_agent_iter_pool, &search); + fprintf(stderr, "zpool_iter: %d\n", ret); pool_guid = search.gs_pool_guid; vdev_guid = search.gs_vdev_guid; + is_spare = search.is_spare; + is_l2arc = search.is_l2arc; } (void) nvlist_add_uint64(payload, FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, pool_guid); (void) nvlist_add_uint64(payload, FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, vdev_guid); + if (is_spare) + (void) nvlist_add_string(payload, + FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, VDEV_TYPE_SPARE); + if (is_l2arc) + (void) nvlist_add_string(payload, + FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, + VDEV_TYPE_L2CACHE); (void) gettimeofday(&tv, NULL); tod[0] = tv.tv_sec; tod[1] = tv.tv_usec; (void) nvlist_add_int64_array(payload, FM_EREPORT_TIME, tod, 2); + fprintf(stderr, "--- being raw udev ---\n"); + nvlist_print(stderr, nvl); + fprintf(stderr, "--- end raw udev ---\n"); zed_log_msg(LOG_INFO, "agent post event: mapping '%s' to '%s'", EC_DEV_REMOVE, class); + fprintf(stderr, "--- being payload ---\n"); + nvlist_print(stderr, payload); + fprintf(stderr, "--- end payload ---\n"); } (void) strlcpy(event->ae_class, class, sizeof (event->ae_class)); diff --git a/cmd/zed/agents/zfs_retire.c b/cmd/zed/agents/zfs_retire.c index 5a090e32f699..a16cc4c297da 100644 --- a/cmd/zed/agents/zfs_retire.c +++ b/cmd/zed/agents/zfs_retire.c @@ -126,6 +126,15 @@ find_vdev(libzfs_handle_t *zhdl, nvlist_t *nv, uint64_t search_guid) return (ret); } + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, + &child, &children) != 0) + return (NULL); + + for (c = 0; c < children; c++) { + if ((ret = find_vdev(zhdl, child[c], search_guid)) != NULL) + return (ret); + } + return (NULL); } @@ -308,6 +317,8 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, * check for an available spare and continue. */ if (strcmp(class, "resource.fs.zfs.removed") == 0) { + char *type; + if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, &pool_guid) != 0 || nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, @@ -318,6 +329,32 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, &vdev)) == NULL) return; + /* Can't replace a spare with another one: offline the device */ + if (nvlist_lookup_string(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, + &type) == 0 && strcmp(type, VDEV_TYPE_SPARE) == 0) { + char *dev_name; + + dev_name = zpool_vdev_name(NULL, zhp, vdev, B_FALSE); + fmd_hdl_debug(hdl, "zpool_vdev_offline '%s'", dev_name); + zpool_vdev_offline(zhp, dev_name, B_TRUE); + free(dev_name); + zpool_close(zhp); + return; + } + + /* Can't replace a l2arc with a spare: offline the device */ + if (nvlist_lookup_string(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, + &type) == 0 && strcmp(type, VDEV_TYPE_L2CACHE) == 0) { + char *dev_name; + + dev_name = zpool_vdev_name(NULL, zhp, vdev, B_FALSE); + fmd_hdl_debug(hdl, "zpool_vdev_offline '%s'", dev_name); + zpool_vdev_offline(zhp, dev_name, B_TRUE); + free(dev_name); + zpool_close(zhp); + return; + } + if (fmd_prop_get_int32(hdl, "spare_on_remove")) replace_with_spare(hdl, zhp, vdev); zpool_close(zhp); diff --git a/lib/libzfs/libzfs_config.c b/lib/libzfs/libzfs_config.c index 67379d07211a..fa8ed8c61331 100644 --- a/lib/libzfs/libzfs_config.c +++ b/lib/libzfs/libzfs_config.c @@ -399,7 +399,7 @@ zpool_iter(libzfs_handle_t *hdl, zpool_iter_f func, void *data) * namespace AVL tree. */ if (!hdl->libzfs_pool_iter && namespace_reload(hdl) != 0) - return (-1); + return (-2); hdl->libzfs_pool_iter++; for (cn = uu_avl_first(hdl->libzfs_ns_avl); cn != NULL; diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c index a4109543dfc3..71d900b9c183 100644 --- a/module/zfs/vdev_disk.c +++ b/module/zfs/vdev_disk.c @@ -803,8 +803,10 @@ vdev_disk_io_done(zio_t *zio) if (zio->io_error == EIO) { vdev_t *v = zio->io_vd; vdev_disk_t *vd = v->vdev_tsd; - - if (check_disk_change(vd->vd_bdev)) { + int ret = check_disk_change(vd->vd_bdev); + zfs_dbgmsg("check_disk_change(%s) = %d", + vd->vd_bdev->bd_disk->disk_name, ret); + if (ret) { vdev_bdev_invalidate(vd->vd_bdev); v->vdev_remove_wanted = B_TRUE; spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE); diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 4b41c3f743ca..19931b2f1e63 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -536,9 +536,10 @@ tests = ['exec_001_pos', 'exec_002_neg'] tags = ['functional', 'exec'] [tests/functional/fault] -tests = ['auto_online_001_pos', 'auto_replace_001_pos', 'auto_spare_001_pos', - 'auto_spare_002_pos', 'auto_spare_ashift', 'auto_spare_multiple', - 'scrub_after_resilver', 'decrypt_fault', 'decompress_fault'] +tests = ['auto_offline_001_pos', 'auto_online_001_pos', 'auto_replace_001_pos', + 'auto_spare_001_pos', 'auto_spare_002_pos', 'auto_spare_ashift', + 'auto_spare_multiple', 'scrub_after_resilver', 'decrypt_fault', + 'decompress_fault'] tags = ['functional', 'fault'] [tests/functional/features/async_destroy] diff --git a/tests/runfiles/pull-7926.run b/tests/runfiles/pull-7926.run new file mode 100644 index 000000000000..9a6e44894fd1 --- /dev/null +++ b/tests/runfiles/pull-7926.run @@ -0,0 +1,16 @@ +[DEFAULT] +pre = setup +quiet = False +pre_user = root +user = root +timeout = 600 +post_user = root +post = cleanup +outputdir = /var/tmp/test_results + +[tests/functional/fault] +tests = ['auto_offline_001_pos', 'auto_offline_001_pos', 'auto_offline_001_pos', + 'auto_offline_001_pos', 'auto_offline_001_pos', 'auto_offline_001_pos', + 'auto_offline_001_pos', 'auto_offline_001_pos', 'auto_offline_001_pos'] +tags = ['functional'] + diff --git a/tests/zfs-tests/tests/functional/fault/Makefile.am b/tests/zfs-tests/tests/functional/fault/Makefile.am index 285e331a1a1d..a4f51181e335 100644 --- a/tests/zfs-tests/tests/functional/fault/Makefile.am +++ b/tests/zfs-tests/tests/functional/fault/Makefile.am @@ -2,6 +2,7 @@ pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/fault dist_pkgdata_SCRIPTS = \ setup.ksh \ cleanup.ksh \ + auto_offline_001_pos.ksh \ auto_online_001_pos.ksh \ auto_replace_001_pos.ksh \ auto_spare_001_pos.ksh \ diff --git a/tests/zfs-tests/tests/functional/fault/auto_offline_001_pos.ksh b/tests/zfs-tests/tests/functional/fault/auto_offline_001_pos.ksh new file mode 100755 index 000000000000..372368b2ac36 --- /dev/null +++ b/tests/zfs-tests/tests/functional/fault/auto_offline_001_pos.ksh @@ -0,0 +1,77 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 by Intel Corporation. All rights reserved. +# Copyright 2018, loli10K . All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/fault/fault.cfg + +# +# DESCRIPTION: +# Testing Fault Management Agent ZED Logic - Physically removed L2ARC device +# is offlined +# +# STRATEGY: +# 1. Create a pool with a L2ARC device +# 2. Simulate physical removal of L2ARC device +# 3. Verify the device is offlined +# +verify_runnable "both" + +if is_linux; then + load_scsi_debug $SDSIZE $SDHOSTS $SDTGTS $SDLUNS '512b' +else + log_unsupported "scsi debug module unsupported" +fi + +function cleanup +{ + destroy_pool $TESTPOOL + rm -f $FILE_DEVICE + unload_scsi_debug +} + +log_assert "ZED detects physically removed L2ARC device" + +log_onexit cleanup + +FILE_DEVICE="$TEST_BASE_DIR/file-vdev" +L2ARC_DEVICE=$(get_debug_device) + +# 1. Create a pool with a L2ARC device +truncate -s $SPA_MINDEVSIZE $FILE_DEVICE +log_must zpool create $TESTPOOL $FILE_DEVICE cache $L2ARC_DEVICE + +# 2. Simulate physical removal of L2ARC device +remove_disk $L2ARC_DEVICE + +# 3. Verify the device is offlined +#log_must wait_vdev_state $TESTPOOL $L2ARC_DEVICE "OFFLINE" +wait_vdev_state $TESTPOOL $L2ARC_DEVICE "OFFLINE" +ret=$? +if [[ $ret != 0 ]]; then + zpool status + zpool status -g + zpool status -LP + cat $ZED_LOG + truncate -s 0 $ZED_LOG + log_fail "" +fi + +log_pass "ZED detects physically removed L2ARC device"