From e6cfd633be909956c8a6998f478ceb32558c210c Mon Sep 17 00:00:00 2001 From: Will Andrews Date: Thu, 31 Dec 2015 17:38:59 +0100 Subject: [PATCH] Illumos 3749 - zfs event processing should work on R/O root filesystems 3749 zfs event processing should work on R/O root filesystems Reviewed by: Matthew Ahrens Reviewed by: Eric Schrock Approved by: Christopher Siden References: https://www.illumos.org/issues/3749 https://github.com/illumos/illumos-gate/commit/3cb69f7 Porting notes: - [include/sys/spa_impl.h] - ffe9d38 Add generic errata infrastructure - 1421c89 Add visibility in to arc_read - [include/sys/fm/fs/zfs.h] - 2668527 Add linux events - 6283f55 Support custom build directories and move includes - [module/zfs/spa_config.c] - Updated spa_config_sync() to match illumos with the exception of a Linux specific block. Ported-by: kernelOfTruth kerneloftruth@gmail.com Signed-off-by: Brian Behlendorf --- include/sys/fm/fs/zfs.h | 1 + include/sys/spa_impl.h | 1 + module/zfs/spa.c | 31 ++++++++++++++++++-- module/zfs/spa_config.c | 64 +++++++++++++++++++++++++++++------------ 4 files changed, 77 insertions(+), 20 deletions(-) diff --git a/include/sys/fm/fs/zfs.h b/include/sys/fm/fs/zfs.h index 0d7eadd4f445..26f400303d80 100644 --- a/include/sys/fm/fs/zfs.h +++ b/include/sys/fm/fs/zfs.h @@ -56,6 +56,7 @@ extern "C" { #define FM_EREPORT_ZFS_IO_FAILURE "io_failure" #define FM_EREPORT_ZFS_PROBE_FAILURE "probe_failure" #define FM_EREPORT_ZFS_LOG_REPLAY "log_replay" +#define FM_EREPORT_ZFS_CONFIG_CACHE_WRITE "config_cache_write" #define FM_EREPORT_ZFS_RESILVER_START "resilver.start" #define FM_EREPORT_ZFS_RESILVER_FINISH "resilver.finish" #define FM_EREPORT_ZFS_SCRUB_START "scrub.start" diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h index 0b49c7147b10..0bb6dccdc2f9 100644 --- a/include/sys/spa_impl.h +++ b/include/sys/spa_impl.h @@ -252,6 +252,7 @@ struct spa { uint64_t spa_deadman_synctime; /* deadman expiration timer */ uint64_t spa_errata; /* errata issues detected */ spa_stats_t spa_stats; /* assorted spa statistics */ + hrtime_t spa_ccw_fail_time; /* Conf cache write fail time */ /* * spa_refcount & spa_config_lock must be the last elements diff --git a/module/zfs/spa.c b/module/zfs/spa.c index d7b800adfa61..ffefbd345810 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -82,6 +82,12 @@ #include "zfs_prop.h" #include "zfs_comutil.h" +/* + * The interval, in seconds, at which failed configuration cache file writes + * should be retried. + */ +static int zfs_ccw_retry_interval = 300; + typedef enum zti_modes { ZTI_MODE_FIXED, /* value is # of threads (min 1) */ ZTI_MODE_BATCH, /* cpu-intensive; value is ignored */ @@ -5912,13 +5918,34 @@ spa_async_resume(spa_t *spa) mutex_exit(&spa->spa_async_lock); } +static boolean_t +spa_async_tasks_pending(spa_t *spa) +{ + uint_t non_config_tasks; + uint_t config_task; + boolean_t config_task_suspended; + + non_config_tasks = spa->spa_async_tasks & ~SPA_ASYNC_CONFIG_UPDATE; + config_task = spa->spa_async_tasks & SPA_ASYNC_CONFIG_UPDATE; + if (spa->spa_ccw_fail_time == 0) { + config_task_suspended = B_FALSE; + } else { + config_task_suspended = + (gethrtime() - spa->spa_ccw_fail_time) < + (zfs_ccw_retry_interval * NANOSEC); + } + + return (non_config_tasks || (config_task && !config_task_suspended)); +} + static void spa_async_dispatch(spa_t *spa) { mutex_enter(&spa->spa_async_lock); - if (spa->spa_async_tasks && !spa->spa_async_suspended && + if (spa_async_tasks_pending(spa) && + !spa->spa_async_suspended && spa->spa_async_thread == NULL && - rootdir != NULL && !vn_is_readonly(rootdir)) + rootdir != NULL) spa->spa_async_thread = thread_create(NULL, 0, spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri); mutex_exit(&spa->spa_async_lock); diff --git a/module/zfs/spa_config.c b/module/zfs/spa_config.c index 19432e0a024e..a62d25bd3aba 100644 --- a/module/zfs/spa_config.c +++ b/module/zfs/spa_config.c @@ -26,6 +26,7 @@ */ #include +#include #include #include #include @@ -145,22 +146,22 @@ spa_config_load(void) kobj_close_file(file); } -static void +static int spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl) { size_t buflen; char *buf; vnode_t *vp; int oflags = FWRITE | FTRUNC | FCREAT | FOFFMAX; - int error; char *temp; + int err; /* * If the nvlist is empty (NULL), then remove the old cachefile. */ if (nvl == NULL) { - (void) vn_remove(dp->scd_path, UIO_SYSSPACE, RMFILE); - return; + err = vn_remove(dp->scd_path, UIO_SYSSPACE, RMFILE); + return (err); } /* @@ -181,16 +182,16 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl) * and overwritten in place. In the event of an error the file is * unlinked to make sure we always have a consistent view of the data. */ - error = vn_open(dp->scd_path, UIO_SYSSPACE, oflags, 0644, &vp, 0, 0); - if (error == 0) { - error = vn_rdwr(UIO_WRITE, vp, buf, buflen, 0, + err = vn_open(dp->scd_path, UIO_SYSSPACE, oflags, 0644, &vp, 0, 0); + if (err == 0) { + err = vn_rdwr(UIO_WRITE, vp, buf, buflen, 0, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, NULL); - if (error == 0) - error = VOP_FSYNC(vp, FSYNC, kcred, NULL); + if (err == 0) + err = VOP_FSYNC(vp, FSYNC, kcred, NULL); (void) VOP_CLOSE(vp, oflags, 1, 0, kcred, NULL); - if (error) + if (err) (void) vn_remove(dp->scd_path, UIO_SYSSPACE, RMFILE); } #else @@ -201,13 +202,14 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl) */ (void) snprintf(temp, MAXPATHLEN, "%s.tmp", dp->scd_path); - error = vn_open(temp, UIO_SYSSPACE, oflags, 0644, &vp, CRCREAT, 0); - if (error == 0) { - if (vn_rdwr(UIO_WRITE, vp, buf, buflen, 0, UIO_SYSSPACE, - 0, RLIM64_INFINITY, kcred, NULL) == 0 && - VOP_FSYNC(vp, FSYNC, kcred, NULL) == 0) { - (void) vn_rename(temp, dp->scd_path, UIO_SYSSPACE); - } + err = vn_open(temp, UIO_SYSSPACE, oflags, 0644, &vp, CRCREAT, 0); + if (err == 0) { + err = vn_rdwr(UIO_WRITE, vp, buf, buflen, 0, UIO_SYSSPACE, + 0, RLIM64_INFINITY, kcred, NULL); + if (err == 0) + err = VOP_FSYNC(vp, FSYNC, kcred, NULL); + if (err == 0) + err = vn_rename(temp, dp->scd_path, UIO_SYSSPACE); (void) VOP_CLOSE(vp, oflags, 1, 0, kcred, NULL); } @@ -216,6 +218,7 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl) vmem_free(buf, buflen); kmem_free(temp, MAXPATHLEN); + return (err); } /* @@ -233,6 +236,8 @@ spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent) spa_config_dirent_t *dp, *tdp; nvlist_t *nvl; char *pool_name; + boolean_t ccw_failure; + int error = 0; ASSERT(MUTEX_HELD(&spa_namespace_lock)); @@ -244,6 +249,7 @@ spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent) * cachefile is changed, the new one is pushed onto this list, allowing * us to update previous cachefiles that no longer contain this pool. */ + ccw_failure = B_FALSE; for (dp = list_head(&target->spa_config_list); dp != NULL; dp = list_next(&target->spa_config_list, dp)) { spa_t *spa = NULL; @@ -290,10 +296,32 @@ spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent) mutex_exit(&spa->spa_props_lock); } - spa_config_write(dp, nvl); + error = spa_config_write(dp, nvl); + if (error != 0) + ccw_failure = B_TRUE; nvlist_free(nvl); } + if (ccw_failure) { + /* + * Keep trying so that configuration data is + * written if/when any temporary filesystem + * resource issues are resolved. + */ + if (target->spa_ccw_fail_time == 0) { + zfs_ereport_post(FM_EREPORT_ZFS_CONFIG_CACHE_WRITE, + target, NULL, NULL, 0, 0); + } + target->spa_ccw_fail_time = gethrtime(); + spa_async_request(target, SPA_ASYNC_CONFIG_UPDATE); + } else { + /* + * Do not rate limit future attempts to update + * the config cache. + */ + target->spa_ccw_fail_time = 0; + } + /* * Remove any config entries older than the current one. */