From 0c2fbe1abd5e799cd42fc7f55ee0a7f05df8ac4a Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Thu, 18 Jul 2019 13:28:09 -0600 Subject: [PATCH] nvme-core: Fix deadlock when deleting the ctrl while scanning With multipath enabled, nvme_scan_work() can read from the device (through nvme_mpath_add_disk()). However, with fabrics, once ctrl->state is set to NVME_CTRL_DELETING, the reads will hang (see nvmf_check_ready()). After setting the state to deleting, nvme_remove_namespaces() will hang waiting for scan_work to flush and these tasks will hang. To fix this, ensure that IOs are failed if there are no available paths after removing a namespace. INFO: task kworker/u4:3:166 blocked for more than 120 seconds. Not tainted 5.2.0-rc6-vmlocalyes-00005-g808c8c2dc0cf #316 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. kworker/u4:3 D 0 166 2 0x80004000 Workqueue: nvme-wq nvme_scan_work Call Trace: __schedule+0x851/0x1400 schedule+0x99/0x210 io_schedule+0x21/0x70 do_read_cache_page+0xa57/0x1330 read_cache_page+0x4a/0x70 read_dev_sector+0xbf/0x380 amiga_partition+0xc4/0x1230 check_partition+0x30f/0x630 rescan_partitions+0x19a/0x980 __blkdev_get+0x85a/0x12f0 blkdev_get+0x2a5/0x790 __device_add_disk+0xe25/0x1250 device_add_disk+0x13/0x20 nvme_mpath_set_live+0x172/0x2b0 nvme_update_ns_ana_state+0x130/0x180 nvme_set_ns_ana_state+0x9a/0xb0 nvme_parse_ana_log+0x1c3/0x4a0 nvme_mpath_add_disk+0x157/0x290 nvme_validate_ns+0x1017/0x1bd0 nvme_scan_work+0x44d/0x6a0 process_one_work+0x7d7/0x1240 worker_thread+0x8e/0xff0 kthread+0x2c3/0x3b0 ret_from_fork+0x35/0x40 INFO: task kworker/u4:1:1034 blocked for more than 120 seconds. Not tainted 5.2.0-rc6-vmlocalyes-00005-g808c8c2dc0cf #316 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. kworker/u4:1 D 0 1034 2 0x80004000 Workqueue: nvme-delete-wq nvme_delete_ctrl_work Call Trace: __schedule+0x851/0x1400 schedule+0x99/0x210 schedule_timeout+0x390/0x830 wait_for_completion+0x1a7/0x310 __flush_work+0x241/0x5d0 flush_work+0x10/0x20 nvme_remove_namespaces+0x85/0x3d0 nvme_do_delete_ctrl+0xb4/0x1e0 nvme_delete_ctrl_work+0x15/0x20 process_one_work+0x7d7/0x1240 worker_thread+0x8e/0xff0 kthread+0x2c3/0x3b0 ret_from_fork+0x35/0x40 Signed-off-by: Logan Gunthorpe --- drivers/nvme/host/core.c | 2 ++ drivers/nvme/host/multipath.c | 47 ++++++++++++++++++++++++++++++----- drivers/nvme/host/nvme.h | 9 +++++-- 3 files changed, 50 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 76b87b8e72c1e..00399c3536fa9 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3571,6 +3571,8 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl) struct nvme_ns *ns, *next; LIST_HEAD(ns_list); + nvme_mpath_clear_ctrl_paths(ctrl); + /* prevent racing with ns scanning */ flush_work(&ctrl->scan_work); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index a9a9276779708..ce72ba6d6ab9b 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -109,18 +109,34 @@ static const char *nvme_ana_state_names[] = { [NVME_ANA_CHANGE] = "change", }; -void nvme_mpath_clear_current_path(struct nvme_ns *ns) +bool nvme_mpath_clear_current_path(struct nvme_ns *ns) { struct nvme_ns_head *head = ns->head; + bool changed = false; int node; if (!head) - return; + return changed; for_each_node(node) { - if (ns == rcu_access_pointer(head->current_path[node])) + if (ns == rcu_access_pointer(head->current_path[node])) { rcu_assign_pointer(head->current_path[node], NULL); + changed = true; + } } + + return changed; +} + +void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl) +{ + struct nvme_ns *ns; + + mutex_lock(&ctrl->scan_lock); + list_for_each_entry(ns, &ctrl->namespaces, list) + if (nvme_mpath_clear_current_path(ns)) + kblockd_schedule_work(&ns->head->requeue_work); + mutex_unlock(&ctrl->scan_lock); } static bool nvme_path_is_disabled(struct nvme_ns *ns) @@ -231,6 +247,25 @@ inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head) return ns; } +static bool nvme_available_path(struct nvme_ns_head *head) +{ + struct nvme_ns *ns; + + list_for_each_entry_rcu(ns, &head->list, siblings) { + switch (ns->ctrl->state) { + case NVME_CTRL_LIVE: + case NVME_CTRL_RESETTING: + case NVME_CTRL_CONNECTING: + /* fallthru */ + return true; + default: + break; + } + } + + return false; +} + static blk_qc_t nvme_ns_head_make_request(struct request_queue *q, struct bio *bio) { @@ -257,14 +292,14 @@ static blk_qc_t nvme_ns_head_make_request(struct request_queue *q, disk_devt(ns->head->disk), bio->bi_iter.bi_sector); ret = direct_make_request(bio); - } else if (!list_empty_careful(&head->list)) { - dev_warn_ratelimited(dev, "no path available - requeuing I/O\n"); + } else if (nvme_available_path(head)) { + dev_warn_ratelimited(dev, "no usable path - requeuing I/O\n"); spin_lock_irq(&head->requeue_lock); bio_list_add(&head->requeue_list, bio); spin_unlock_irq(&head->requeue_lock); } else { - dev_warn_ratelimited(dev, "no path - failing I/O\n"); + dev_warn_ratelimited(dev, "no available path - failing I/O\n"); bio->bi_status = BLK_STS_IOERR; bio_endio(bio); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 716a876119c83..da3d130773c4d 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -496,7 +496,8 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head); int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id); void nvme_mpath_uninit(struct nvme_ctrl *ctrl); void nvme_mpath_stop(struct nvme_ctrl *ctrl); -void nvme_mpath_clear_current_path(struct nvme_ns *ns); +bool nvme_mpath_clear_current_path(struct nvme_ns *ns); +void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl); struct nvme_ns *nvme_find_path(struct nvme_ns_head *head); static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) @@ -544,7 +545,11 @@ static inline void nvme_mpath_add_disk(struct nvme_ns *ns, static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head) { } -static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns) +static inline bool nvme_mpath_clear_current_path(struct nvme_ns *ns) +{ + return false; +} +static inline void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl) { } static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)