|
|
@@ -57,6 +57,7 @@ |
|
|
#include <sys/vdev_indirect_mapping.h> |
|
|
#include <sys/vdev_indirect_births.h> |
|
|
#include <sys/vdev_initialize.h> |
|
|
#include <sys/vdev_trim.h> |
|
|
#include <sys/vdev_disk.h> |
|
|
#include <sys/metaslab.h> |
|
|
#include <sys/metaslab_impl.h> |
|
|
@@ -132,7 +133,7 @@ static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = { |
|
|
* number of threads assigned to their taskqs using the ZTI_N(#) or ZTI_ONE |
|
|
* macros. Other operations process a large amount of data; the ZTI_BATCH |
|
|
* macro causes us to create a taskq oriented for throughput. Some operations |
|
|
* are so high frequency and short-lived that the taskq itself can become a a |
|
|
* are so high frequency and short-lived that the taskq itself can become a |
|
|
* point of lock contention. The ZTI_P(#, #) macro indicates that we need an |
|
|
* additional degree of parallelism specified by the number of threads per- |
|
|
* taskq and the number of taskqs; when dispatching an event in this case, the |
|
|
@@ -150,6 +151,7 @@ const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = { |
|
|
{ ZTI_P(12, 8), ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* FREE */ |
|
|
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* CLAIM */ |
|
|
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* IOCTL */ |
|
|
{ ZTI_N(4), ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* TRIM */ |
|
|
}; |
|
|
|
|
|
static void spa_sync_version(void *arg, dmu_tx_t *tx); |
|
|
@@ -554,6 +556,7 @@ spa_prop_validate(spa_t *spa, nvlist_t *props) |
|
|
case ZPOOL_PROP_AUTOREPLACE: |
|
|
case ZPOOL_PROP_LISTSNAPS: |
|
|
case ZPOOL_PROP_AUTOEXPAND: |
|
|
case ZPOOL_PROP_AUTOTRIM: |
|
|
error = nvpair_value_uint64(elem, &intval); |
|
|
if (!error && intval > 1) |
|
|
error = SET_ERROR(EINVAL); |
|
|
@@ -1442,8 +1445,10 @@ spa_unload(spa_t *spa) |
|
|
spa_async_suspend(spa); |
|
|
|
|
|
if (spa->spa_root_vdev) { |
|
|
vdev_initialize_stop_all(spa->spa_root_vdev, |
|
|
VDEV_INITIALIZE_ACTIVE); |
|
|
vdev_t *root_vdev = spa->spa_root_vdev; |
|
|
vdev_initialize_stop_all(root_vdev, VDEV_INITIALIZE_ACTIVE); |
|
|
vdev_trim_stop_all(root_vdev, VDEV_TRIM_ACTIVE); |
|
|
vdev_autotrim_stop_all(spa); |
|
|
} |
|
|
|
|
|
/* |
|
|
@@ -3585,7 +3590,7 @@ spa_ld_get_props(spa_t *spa) |
|
|
spa_prop_find(spa, ZPOOL_PROP_MULTIHOST, &spa->spa_multihost); |
|
|
spa_prop_find(spa, ZPOOL_PROP_DEDUPDITTO, |
|
|
&spa->spa_dedup_ditto); |
|
|
|
|
|
spa_prop_find(spa, ZPOOL_PROP_AUTOTRIM, &spa->spa_autotrim); |
|
|
spa->spa_autoreplace = (autoreplace != 0); |
|
|
} |
|
|
|
|
|
@@ -4336,6 +4341,8 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport) |
|
|
|
|
|
spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); |
|
|
vdev_initialize_restart(spa->spa_root_vdev); |
|
|
vdev_trim_restart(spa->spa_root_vdev); |
|
|
vdev_autotrim_restart(spa); |
|
|
spa_config_exit(spa, SCL_CONFIG, FTAG); |
|
|
} |
|
|
|
|
|
@@ -5338,6 +5345,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, |
|
|
spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE); |
|
|
spa->spa_autoexpand = zpool_prop_default_numeric(ZPOOL_PROP_AUTOEXPAND); |
|
|
spa->spa_multihost = zpool_prop_default_numeric(ZPOOL_PROP_MULTIHOST); |
|
|
spa->spa_autotrim = zpool_prop_default_numeric(ZPOOL_PROP_AUTOTRIM); |
|
|
|
|
|
if (props != NULL) { |
|
|
spa_configfile_set(spa, props, B_FALSE); |
|
|
@@ -5746,14 +5754,16 @@ spa_export_common(char *pool, int new_state, nvlist_t **oldconfig, |
|
|
|
|
|
/* |
|
|
* We're about to export or destroy this pool. Make sure |
|
|
* we stop all initializtion activity here before we |
|
|
* set the spa_final_txg. This will ensure that all |
|
|
* we stop all initialization and trim activity here before |
|
|
* we set the spa_final_txg. This will ensure that all |
|
|
* dirty data resulting from the initialization is |
|
|
* committed to disk before we unload the pool. |
|
|
*/ |
|
|
if (spa->spa_root_vdev != NULL) { |
|
|
vdev_initialize_stop_all(spa->spa_root_vdev, |
|
|
VDEV_INITIALIZE_ACTIVE); |
|
|
vdev_t *rvd = spa->spa_root_vdev; |
|
|
vdev_initialize_stop_all(rvd, VDEV_INITIALIZE_ACTIVE); |
|
|
vdev_trim_stop_all(rvd, VDEV_TRIM_ACTIVE); |
|
|
vdev_autotrim_stop_all(spa); |
|
|
} |
|
|
|
|
|
/* |
|
|
@@ -6376,7 +6386,6 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) |
|
|
vdev_remove_parent(cvd); |
|
|
} |
|
|
|
|
|
|
|
|
/* |
|
|
* We don't set tvd until now because the parent we just removed |
|
|
* may have been the previous top-level vdev. |
|
|
@@ -6490,7 +6499,7 @@ spa_vdev_initialize_impl(spa_t *spa, uint64_t guid, uint64_t cmd_type, |
|
|
* a previous initialization process which has completed but |
|
|
* the thread is not exited. |
|
|
*/ |
|
|
if (cmd_type == POOL_INITIALIZE_DO && |
|
|
if (cmd_type == POOL_INITIALIZE_START && |
|
|
(vd->vdev_initialize_thread != NULL || |
|
|
vd->vdev_top->vdev_removing)) { |
|
|
mutex_exit(&vd->vdev_initialize_lock); |
|
|
@@ -6507,7 +6516,7 @@ spa_vdev_initialize_impl(spa_t *spa, uint64_t guid, uint64_t cmd_type, |
|
|
} |
|
|
|
|
|
switch (cmd_type) { |
|
|
case POOL_INITIALIZE_DO: |
|
|
case POOL_INITIALIZE_START: |
|
|
vdev_initialize(vd); |
|
|
break; |
|
|
case POOL_INITIALIZE_CANCEL: |
|
|
@@ -6571,6 +6580,126 @@ spa_vdev_initialize(spa_t *spa, nvlist_t *nv, uint64_t cmd_type, |
|
|
return (total_errors); |
|
|
} |
|
|
|
|
|
static int |
|
|
spa_vdev_trim_impl(spa_t *spa, uint64_t guid, uint64_t cmd_type, |
|
|
uint64_t rate, boolean_t partial, boolean_t secure, list_t *vd_list) |
|
|
{ |
|
|
ASSERT(MUTEX_HELD(&spa_namespace_lock)); |
|
|
|
|
|
spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER); |
|
|
|
|
|
/* Look up vdev and ensure it's a leaf. */ |
|
|
vdev_t *vd = spa_lookup_by_guid(spa, guid, B_FALSE); |
|
|
if (vd == NULL || vd->vdev_detached) { |
|
|
spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); |
|
|
return (SET_ERROR(ENODEV)); |
|
|
} else if (!vd->vdev_ops->vdev_op_leaf || !vdev_is_concrete(vd)) { |
|
|
spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); |
|
|
return (SET_ERROR(EINVAL)); |
|
|
} else if (!vdev_writeable(vd)) { |
|
|
spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); |
|
|
return (SET_ERROR(EROFS)); |
|
|
} else if (!vd->vdev_has_trim) { |
|
|
spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); |
|
|
return (SET_ERROR(EOPNOTSUPP)); |
|
|
} else if (secure && !vd->vdev_has_securetrim) { |
|
|
spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); |
|
|
return (SET_ERROR(EOPNOTSUPP)); |
|
|
} |
|
|
mutex_enter(&vd->vdev_trim_lock); |
|
|
spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); |
|
|
|
|
|
/* |
|
|
* When we activate a TRIM action we check to see if the |
|
|
* vdev_trim_thread is NULL. We do this instead of using the |
|
|
* vdev_trim_state since there might be a previous TRIM process |
|
|
* which has completed but the thread is not exited. |
|
|
*/ |
|
|
if (cmd_type == POOL_TRIM_START && |
|
|
(vd->vdev_trim_thread != NULL || vd->vdev_top->vdev_removing)) { |
|
|
mutex_exit(&vd->vdev_trim_lock); |
|
|
return (SET_ERROR(EBUSY)); |
|
|
} else if (cmd_type == POOL_TRIM_CANCEL && |
|
|
(vd->vdev_trim_state != VDEV_TRIM_ACTIVE && |
|
|
vd->vdev_trim_state != VDEV_TRIM_SUSPENDED)) { |
|
|
mutex_exit(&vd->vdev_trim_lock); |
|
|
return (SET_ERROR(ESRCH)); |
|
|
} else if (cmd_type == POOL_TRIM_SUSPEND && |
|
|
vd->vdev_trim_state != VDEV_TRIM_ACTIVE) { |
|
|
mutex_exit(&vd->vdev_trim_lock); |
|
|
return (SET_ERROR(ESRCH)); |
|
|
} |
|
|
|
|
|
switch (cmd_type) { |
|
|
case POOL_TRIM_START: |
|
|
vdev_trim(vd, rate, partial, secure); |
|
|
break; |
|
|
case POOL_TRIM_CANCEL: |
|
|
vdev_trim_stop(vd, VDEV_TRIM_CANCELED, vd_list); |
|
|
break; |
|
|
case POOL_TRIM_SUSPEND: |
|
|
vdev_trim_stop(vd, VDEV_TRIM_SUSPENDED, vd_list); |
|
|
break; |
|
|
default: |
|
|
panic("invalid cmd_type %llu", (unsigned long long)cmd_type); |
|
|
} |
|
|
mutex_exit(&vd->vdev_trim_lock); |
|
|
|
|
|
return (0); |
|
|
} |
|
|
|
|
|
/* |
|
|
* Initiates a manual TRIM for the requested vdevs. This kicks off individual |
|
|
* TRIM threads for each child vdev. These threads pass over all of the free |
|
|
* space in the vdev's metaslabs and issues TRIM commands for that space. |
|
|
*/ |
|
|
int |
|
|
spa_vdev_trim(spa_t *spa, nvlist_t *nv, uint64_t cmd_type, uint64_t rate, |
|
|
boolean_t partial, boolean_t secure, nvlist_t *vdev_errlist) |
|
|
{ |
|
|
int total_errors = 0; |
|
|
list_t vd_list; |
|
|
|
|
|
list_create(&vd_list, sizeof (vdev_t), |
|
|
offsetof(vdev_t, vdev_trim_node)); |
|
|
|
|
|
/* |
|
|
* We hold the namespace lock through the whole function |
|
|
* to prevent any changes to the pool while we're starting or |
|
|
* stopping TRIM. The config and state locks are held so that |
|
|
* we can properly assess the vdev state before we commit to |
|
|
* the TRIM operation. |
|
|
*/ |
|
|
mutex_enter(&spa_namespace_lock); |
|
|
|
|
|
for (nvpair_t *pair = nvlist_next_nvpair(nv, NULL); |
|
|
pair != NULL; pair = nvlist_next_nvpair(nv, pair)) { |
|
|
uint64_t vdev_guid = fnvpair_value_uint64(pair); |
|
|
|
|
|
int error = spa_vdev_trim_impl(spa, vdev_guid, cmd_type, |
|
|
rate, partial, secure, &vd_list); |
|
|
if (error != 0) { |
|
|
char guid_as_str[MAXNAMELEN]; |
|
|
|
|
|
(void) snprintf(guid_as_str, sizeof (guid_as_str), |
|
|
"%llu", (unsigned long long)vdev_guid); |
|
|
fnvlist_add_int64(vdev_errlist, guid_as_str, error); |
|
|
total_errors++; |
|
|
} |
|
|
} |
|
|
|
|
|
/* Wait for all TRIM threads to stop. */ |
|
|
vdev_trim_stop_wait(spa, &vd_list); |
|
|
|
|
|
/* Sync out the TRIM state */ |
|
|
txg_wait_synced(spa->spa_dsl_pool, 0); |
|
|
mutex_exit(&spa_namespace_lock); |
|
|
|
|
|
list_destroy(&vd_list); |
|
|
|
|
|
return (total_errors); |
|
|
} |
|
|
|
|
|
/* |
|
|
* Split a set of devices from their mirrors, and create a new pool from them. |
|
|
*/ |
|
|
@@ -6780,24 +6909,36 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config, |
|
|
spa_async_suspend(newspa); |
|
|
|
|
|
/* |
|
|
* Temporarily stop the initializing activity. We set the state to |
|
|
* ACTIVE so that we know to resume the initializing once the split |
|
|
* has completed. |
|
|
* Temporarily stop the initializing and TRIM activity. We set the |
|
|
* state to ACTIVE so that we know to resume initializing or TRIM |
|
|
* once the split has completed. |
|
|
*/ |
|
|
list_t vd_list; |
|
|
list_create(&vd_list, sizeof (vdev_t), |
|
|
list_t vd_initialize_list; |
|
|
list_create(&vd_initialize_list, sizeof (vdev_t), |
|
|
offsetof(vdev_t, vdev_initialize_node)); |
|
|
|
|
|
list_t vd_trim_list; |
|
|
list_create(&vd_trim_list, sizeof (vdev_t), |
|
|
offsetof(vdev_t, vdev_trim_node)); |
|
|
|
|
|
for (c = 0; c < children; c++) { |
|
|
if (vml[c] != NULL) { |
|
|
mutex_enter(&vml[c]->vdev_initialize_lock); |
|
|
vdev_initialize_stop(vml[c], VDEV_INITIALIZE_ACTIVE, |
|
|
&vd_list); |
|
|
vdev_initialize_stop(vml[c], |
|
|
VDEV_INITIALIZE_ACTIVE, &vd_initialize_list); |
|
|
mutex_exit(&vml[c]->vdev_initialize_lock); |
|
|
|
|
|
mutex_enter(&vml[c]->vdev_trim_lock); |
|
|
vdev_trim_stop(vml[c], VDEV_TRIM_ACTIVE, &vd_trim_list); |
|
|
mutex_exit(&vml[c]->vdev_trim_lock); |
|
|
} |
|
|
} |
|
|
vdev_initialize_stop_wait(spa, &vd_list); |
|
|
list_destroy(&vd_list); |
|
|
|
|
|
vdev_initialize_stop_wait(spa, &vd_initialize_list); |
|
|
vdev_trim_stop_wait(spa, &vd_trim_list); |
|
|
|
|
|
list_destroy(&vd_initialize_list); |
|
|
list_destroy(&vd_trim_list); |
|
|
|
|
|
newspa->spa_config_source = SPA_CONFIG_SRC_SPLIT; |
|
|
|
|
|
@@ -6899,8 +7040,10 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config, |
|
|
vml[c]->vdev_offline = B_FALSE; |
|
|
} |
|
|
|
|
|
/* restart initializing disks as necessary */ |
|
|
/* restart initializing or trimming disks as necessary */ |
|
|
spa_async_request(spa, SPA_ASYNC_INITIALIZE_RESTART); |
|
|
spa_async_request(spa, SPA_ASYNC_TRIM_RESTART); |
|
|
spa_async_request(spa, SPA_ASYNC_AUTOTRIM_RESTART); |
|
|
|
|
|
vdev_reopen(spa->spa_root_vdev); |
|
|
|
|
|
@@ -7283,6 +7426,22 @@ spa_async_thread(void *arg) |
|
|
mutex_exit(&spa_namespace_lock); |
|
|
} |
|
|
|
|
|
if (tasks & SPA_ASYNC_TRIM_RESTART) { |
|
|
mutex_enter(&spa_namespace_lock); |
|
|
spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); |
|
|
vdev_trim_restart(spa->spa_root_vdev); |
|
|
spa_config_exit(spa, SCL_CONFIG, FTAG); |
|
|
mutex_exit(&spa_namespace_lock); |
|
|
} |
|
|
|
|
|
if (tasks & SPA_ASYNC_AUTOTRIM_RESTART) { |
|
|
mutex_enter(&spa_namespace_lock); |
|
|
spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); |
|
|
vdev_autotrim_restart(spa); |
|
|
spa_config_exit(spa, SCL_CONFIG, FTAG); |
|
|
mutex_exit(&spa_namespace_lock); |
|
|
} |
|
|
|
|
|
/* |
|
|
* Let the world know that we're done. |
|
|
*/ |
|
|
@@ -7782,6 +7941,11 @@ spa_sync_props(void *arg, dmu_tx_t *tx) |
|
|
case ZPOOL_PROP_FAILUREMODE: |
|
|
spa->spa_failmode = intval; |
|
|
break; |
|
|
case ZPOOL_PROP_AUTOTRIM: |
|
|
spa->spa_autotrim = intval; |
|
|
spa_async_request(spa, |
|
|
SPA_ASYNC_AUTOTRIM_RESTART); |
|
|
break; |
|
|
case ZPOOL_PROP_AUTOEXPAND: |
|
|
spa->spa_autoexpand = intval; |
|
|
if (tx->tx_txg != TXG_INITIAL) |
|
|
|