From eac7cc52c6b410e542af431b2ee93f3d7dbfb6af Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 6 Nov 2012 11:47:13 +0100 Subject: [PATCH 01/10] floppy: destroy floppy workqueue before cleaning up the queue We need to first destroy the floppy_wq workqueue before cleaning up the queue. Otherwise we might race with still pending work with the workqueue, but all the block queue already gone. This might lead to various oopses, such as CPU 0 Pid: 6, comm: kworker/u:0 Not tainted 3.7.0-rc4 #1 Bochs Bochs RIP: 0010:[] [] blk_peek_request+0xd5/0x1c0 RSP: 0000:ffff88000dc7dd88 EFLAGS: 00010092 RAX: 0000000000000001 RBX: 0000000000000000 RCX: 0000000000000000 RDX: ffff88000f602688 RSI: ffffffff81fd95d8 RDI: 6b6b6b6b6b6b6b6b RBP: ffff88000dc7dd98 R08: ffffffff81fd95c8 R09: 0000000000000000 R10: ffffffff81fd9480 R11: 0000000000000001 R12: 6b6b6b6b6b6b6b6b R13: ffff88000dc7dfd8 R14: ffff88000dc7dfd8 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffffffff81e21000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000000 CR3: 0000000001e11000 CR4: 00000000000006f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process kworker/u:0 (pid: 6, threadinfo ffff88000dc7c000, task ffff88000dc5ecc0) Stack: 0000000000000000 0000000000000000 ffff88000dc7ddb8 ffffffff8134efee ffff88000dc7ddb8 0000000000000000 ffff88000dc7dde8 ffffffff814aef3c ffffffff81e75d80 ffff88000dc0c640 ffff88000fbfb000 ffffffff814aed90 Call Trace: [] blk_fetch_request+0xe/0x30 [] redo_fd_request+0x1ac/0x400 [] ? start_motor+0x130/0x130 [] process_one_work+0x136/0x450 [] ? manage_workers+0x205/0x2e0 [] worker_thread+0x14d/0x420 [] ? rescuer_thread+0x1a0/0x1a0 [] kthread+0xba/0xc0 [] ? __kthread_parkme+0x80/0x80 [] ret_from_fork+0x7a/0xb0 [] ? __kthread_parkme+0x80/0x80 Code: 0f 84 c0 00 00 00 83 f8 01 0f 85 e2 00 00 00 81 4b 40 00 00 80 00 48 89 df e8 58 f8 ff ff be fb ff ff ff fe ff ff <49> 8b 1c 24 49 39 dc 0f 85 2e ff ff ff 41 0f b6 84 24 28 04 00 RIP [] blk_peek_request+0xd5/0x1c0 RSP Reported-by: Fengguang Wu Tested-by: Fengguang Wu Signed-off-by: Jiri Kosina Signed-off-by: Jens Axboe --- drivers/block/floppy.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 1c49d7173966ae..2ddd64a9ffdee4 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4330,6 +4330,7 @@ static int __init do_floppy_init(void) out_unreg_blkdev: unregister_blkdev(FLOPPY_MAJOR, "fd"); out_put_disk: + destroy_workqueue(floppy_wq); for (drive = 0; drive < N_DRIVE; drive++) { if (!disks[drive]) break; @@ -4340,7 +4341,6 @@ static int __init do_floppy_init(void) } put_disk(disks[drive]); } - destroy_workqueue(floppy_wq); return err; } @@ -4555,6 +4555,8 @@ static void __exit floppy_module_exit(void) unregister_blkdev(FLOPPY_MAJOR, "fd"); platform_driver_unregister(&floppy_driver); + destroy_workqueue(floppy_wq); + for (drive = 0; drive < N_DRIVE; drive++) { del_timer_sync(&motor_off_timer[drive]); @@ -4578,7 +4580,6 @@ static void __exit floppy_module_exit(void) cancel_delayed_work_sync(&fd_timeout); cancel_delayed_work_sync(&fd_timer); - destroy_workqueue(floppy_wq); if (atomic_read(&usage_count)) floppy_release_irq_and_dma(); From a8c32a5c98943d370ea606a2e7dc04717eb92206 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 6 Nov 2012 12:24:26 +0100 Subject: [PATCH 02/10] dm: fix deadlock with request based dm and queue request_fn recursion Request based dm attempts to re-run the request queue off the request completion path. If used with a driver that potentially does end_io from its request_fn, we could deadlock trying to recurse back into request dispatch. Fix this by punting the request queue run to kblockd. Tested to fix a quickly reproducible deadlock in such a scenario. Cc: stable@kernel.org Acked-by: Alasdair G Kergon Signed-off-by: Jens Axboe --- drivers/md/dm.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 02db9183ca01e8..77e6eff41cae9b 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -740,8 +740,14 @@ static void rq_completed(struct mapped_device *md, int rw, int run_queue) if (!md_in_flight(md)) wake_up(&md->wait); + /* + * Run this off this callpath, as drivers could invoke end_io while + * inside their request_fn (and holding the queue lock). Calling + * back into ->request_fn() could deadlock attempting to grab the + * queue lock again. + */ if (run_queue) - blk_run_queue(md->queue); + blk_run_queue_async(md->queue); /* * dm_put() must be at the end of this function. See the comment above From 3208795e612406df04a32b46eb5ea5fccfa51d69 Mon Sep 17 00:00:00 2001 From: Selvan Mani Date: Wed, 7 Nov 2012 06:03:37 -0700 Subject: [PATCH 03/10] mtip32xx: fix potential crash on SEC_ERASE_UNIT The mtip driver lifted this code from elsewhere and then added a special handling check for SEC_ERASE_UNIT. If the caller tries to do a security erase but passes no output data for the command then outbuf is not allocated and the driver duly explodes. Reported-by: Dan Carpenter Signed-off-by: Alan Cox Signed-off-by: Selvan Mani Signed-off-by: Asai Thambi S P Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index adc6f36564cf3c..dfb7196bbde955 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -2218,8 +2218,8 @@ static int exec_drive_taskfile(struct driver_data *dd, fis.device); /* check for erase mode support during secure erase.*/ - if ((fis.command == ATA_CMD_SEC_ERASE_UNIT) - && (outbuf[0] & MTIP_SEC_ERASE_MODE)) { + if ((fis.command == ATA_CMD_SEC_ERASE_UNIT) && outbuf && + (outbuf[0] & MTIP_SEC_ERASE_MODE)) { erasemode = 1; } From eda45314922f500f3547d36d317efc05ed823e3e Mon Sep 17 00:00:00 2001 From: Selvan Mani Date: Wed, 7 Nov 2012 06:03:53 -0700 Subject: [PATCH 04/10] mtip32xx: Fix to make lba address correct in big-endian systems Earlier lba address was assigned directly to lba_low and lba_low_ex, which would result in a different number (bytes reversed) in big-endian systems. Now assigning lba address byte-by-byte to fis. Reported-by: Dan Carpenter Signed-off-by: Selvan Mani Signed-off-by: Asai Thambi S P Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index dfb7196bbde955..df46b5a3f56fdc 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -2465,8 +2465,12 @@ static void mtip_hw_submit_io(struct driver_data *dd, sector_t start, fis->opts = 1 << 7; fis->command = (dir == READ ? ATA_CMD_FPDMA_READ : ATA_CMD_FPDMA_WRITE); - *((unsigned int *) &fis->lba_low) = (start & 0xFFFFFF); - *((unsigned int *) &fis->lba_low_ex) = ((start >> 24) & 0xFFFFFF); + fis->lba_low = start & 0xFF; + fis->lba_mid = (start >> 8) & 0xFF; + fis->lba_hi = (start >> 16) & 0xFF; + fis->lba_low_ex = (start >> 24) & 0xFF; + fis->lba_mid_ex = (start >> 32) & 0xFF; + fis->lba_hi_ex = (start >> 40) & 0xFF; fis->device = 1 << 6; fis->features = nsect & 0xFF; fis->features_ex = (nsect >> 8) & 0xFF; From 4b9e884523f63f7bfd6fcbcdfe6f9f9545d98c13 Mon Sep 17 00:00:00 2001 From: Selvan Mani Date: Wed, 7 Nov 2012 06:03:56 -0700 Subject: [PATCH 05/10] mtip32xx: Fix incorrect mask used for erase mode Previous commit use value 3 for erasemode mask. Changing the mask to correct value to 2 Signed-off-by: Selvan Mani Signed-off-by: Asai Thambi S P Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index 5f4a917bd8bbcf..8498e99666b2ed 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h @@ -34,7 +34,7 @@ #define PCIE_CONFIG_EXT_DEVICE_CONTROL_OFFSET 0x48 /* check for erase mode support during secure erase */ -#define MTIP_SEC_ERASE_MODE 0x3 +#define MTIP_SEC_ERASE_MODE 0x2 /* # of times to retry timed out/failed IOs */ #define MTIP_MAX_RETRIES 2 From 7c5d62388e88729775b10a1748f2810e413f1e51 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 8 Nov 2012 07:58:53 +0100 Subject: [PATCH 06/10] mtip32xx: fix shift larger than type warning If we're building a 32-bit kernel and CONFIG_LBADF isn't set, sector_t is 32-bits wide. The shifts by 32 and 40 are thus larger than we support. Cast the sector offset to a u64 to avoid these warnings. Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index df46b5a3f56fdc..faa5591b579f9e 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -2439,7 +2439,7 @@ static int mtip_hw_ioctl(struct driver_data *dd, unsigned int cmd, * return value * None */ -static void mtip_hw_submit_io(struct driver_data *dd, sector_t start, +static void mtip_hw_submit_io(struct driver_data *dd, sector_t sector, int nsect, int nents, int tag, void *callback, void *data, int dir) { @@ -2447,6 +2447,7 @@ static void mtip_hw_submit_io(struct driver_data *dd, sector_t start, struct mtip_port *port = dd->port; struct mtip_cmd *command = &port->commands[tag]; int dma_dir = (dir == READ) ? DMA_FROM_DEVICE : DMA_TO_DEVICE; + u64 start = sector; /* Map the scatter list for DMA access */ nents = dma_map_sg(&dd->pdev->dev, command->sg, nents, dma_dir); From 298d80152c895859bd128552db7a5b228e8a23f7 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 8 Nov 2012 17:35:38 +0800 Subject: [PATCH 07/10] mtip32xx: fix potential NULL pointer dereference in mtip_timeout_function() The dereference to port should be moved below the NULL test. dpatch engine is used to auto generate this patch. (https://github.com/weiyj/dpatch) Signed-off-by: Wei Yongjun Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index faa5591b579f9e..9694dd99bbbc72 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -559,7 +559,7 @@ static void mtip_timeout_function(unsigned long int data) struct mtip_cmd *command; int tag, cmdto_cnt = 0; unsigned int bit, group; - unsigned int num_command_slots = port->dd->slot_groups * 32; + unsigned int num_command_slots; unsigned long to, tagaccum[SLOTBITS_IN_LONGS]; if (unlikely(!port)) @@ -572,6 +572,7 @@ static void mtip_timeout_function(unsigned long int data) } /* clear the tag accumulator */ memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long)); + num_command_slots = port->dd->slot_groups * 32; for (tag = 0; tag < num_command_slots; tag++) { /* From 11cfb6ff736dc89b0447b8902d6912692303f6af Mon Sep 17 00:00:00 2001 From: Ed Cashin Date: Thu, 8 Nov 2012 19:17:15 -0500 Subject: [PATCH 08/10] aoe: avoid running request handler on plugged queue Calling the request handler directly on a plugged queue defeats the performance improvements provided by the plugging mechanism. Use the __blk_run_queue function instead of calling the request handler directly, so that we don't interfere with the block layer's ability to plug the queue. Signed-off-by: Ed Cashin Signed-off-by: Jens Axboe --- drivers/block/aoe/aoecmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 3804a0af3ef192..9fe4f1865558ab 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -935,7 +935,7 @@ aoe_end_request(struct aoedev *d, struct request *rq, int fastfail) /* cf. http://lkml.org/lkml/2006/10/31/28 */ if (!fastfail) - q->request_fn(q); + __blk_run_queue(q); } static void From 836413e8c78ecbc55aa31f3cb600f8ee1aa355a2 Mon Sep 17 00:00:00 2001 From: Selvan Mani Date: Wed, 14 Nov 2012 06:16:35 -0700 Subject: [PATCH 09/10] mtip32xx: Fix padding issue Hi Jens, Another tiny patch. Removed __packed before the struct smart_attr and added __packed at end of the structure to fix padding issue. Signed-off-by: Selvan Mani Signed-off-by: Asai Thambi S P Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index 8498e99666b2ed..b1742640556a78 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h @@ -155,14 +155,14 @@ enum { MTIP_DDF_REBUILD_FAILED_BIT = 8, }; -__packed struct smart_attr{ +struct smart_attr { u8 attr_id; u16 flags; u8 cur; u8 worst; u32 data; u8 res[3]; -}; +} __packed; /* Register Frame Information Structure (FIS), host to device. */ struct host_to_dev_fis { From 893d290f1d7496db97c9471bc352ad4a11dc8a25 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 22 Nov 2012 02:00:11 -0800 Subject: [PATCH 10/10] block: Don't access request after it might be freed After we've done __elv_add_request() and __blk_run_queue() in blk_execute_rq_nowait(), the request might finish and be freed immediately. Therefore checking if the type is REQ_TYPE_PM_RESUME isn't safe afterwards, because if it isn't, rq might be gone. Instead, check beforehand and stash the result in a temporary. This fixes crashes in blk_execute_rq_nowait() I get occasionally when running with lots of memory debugging options enabled -- I think this race is usually harmless because the window for rq to be reallocated is so small. Signed-off-by: Roland Dreier Cc: stable@kernel.org Signed-off-by: Jens Axboe --- block/blk-exec.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/block/blk-exec.c b/block/blk-exec.c index 8b6dc5bd4dd061..f71eac35c1b942 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -52,11 +52,17 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, rq_end_io_fn *done) { int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; + bool is_pm_resume; WARN_ON(irqs_disabled()); rq->rq_disk = bd_disk; rq->end_io = done; + /* + * need to check this before __blk_run_queue(), because rq can + * be freed before that returns. + */ + is_pm_resume = rq->cmd_type == REQ_TYPE_PM_RESUME; spin_lock_irq(q->queue_lock); @@ -71,7 +77,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, __elv_add_request(q, rq, where); __blk_run_queue(q); /* the queue is stopped so it won't be run */ - if (rq->cmd_type == REQ_TYPE_PM_RESUME) + if (is_pm_resume) q->request_fn(q); spin_unlock_irq(q->queue_lock); }