Skip to content

Commit ed5cc70

Browse files
jankarabrauner
authored andcommitted
block: Add config option to not allow writing to mounted devices
Writing to mounted devices is dangerous and can lead to filesystem corruption as well as crashes. Furthermore syzbot comes with more and more involved examples how to corrupt block device under a mounted filesystem leading to kernel crashes and reports we can do nothing about. Add tracking of writers to each block device and a kernel cmdline argument which controls whether other writeable opens to block devices open with BLK_OPEN_RESTRICT_WRITES flag are allowed. We will make filesystems use this flag for used devices. Note that this effectively only prevents modification of the particular block device's page cache by other writers. The actual device content can still be modified by other means - e.g. by issuing direct scsi commands, by doing writes through devices lower in the storage stack (e.g. in case loop devices, DM, or MD are involved) etc. But blocking direct modifications of the block device page cache is enough to give filesystems a chance to perform data validation when loading data from the underlying storage and thus prevent kernel crashes. Syzbot can use this cmdline argument option to avoid uninteresting crashes. Also users whose userspace setup does not need writing to mounted block devices can set this option for hardening. Link: https://lore.kernel.org/all/60788e5d-5c7c-1142-e554-c21d709acfd9@linaro.org Signed-off-by: Jan Kara <jack@suse.cz> Link: https://lore.kernel.org/r/20231101174325.10596-3-jack@suse.cz Reviewed-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: Christian Brauner <brauner@kernel.org>
1 parent cd34758 commit ed5cc70

4 files changed

Lines changed: 97 additions & 1 deletion

File tree

block/Kconfig

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,26 @@ config BLK_DEV_INTEGRITY_T10
7878
select CRC_T10DIF
7979
select CRC64_ROCKSOFT
8080

81+
config BLK_DEV_WRITE_MOUNTED
82+
bool "Allow writing to mounted block devices"
83+
default y
84+
help
85+
When a block device is mounted, writing to its buffer cache is very
86+
likely going to cause filesystem corruption. It is also rather easy to
87+
crash the kernel in this way since the filesystem has no practical way
88+
of detecting these writes to buffer cache and verifying its metadata
89+
integrity. However there are some setups that need this capability
90+
like running fsck on read-only mounted root device, modifying some
91+
features on mounted ext4 filesystem, and similar. If you say N, the
92+
kernel will prevent processes from writing to block devices that are
93+
mounted by filesystems which provides some more protection from runaway
94+
privileged processes and generally makes it much harder to crash
95+
filesystem drivers. Note however that this does not prevent
96+
underlying device(s) from being modified by other means, e.g. by
97+
directly submitting SCSI commands or through access to lower layers of
98+
storage stack. If in doubt, say Y. The configuration can be overridden
99+
with the bdev_allow_write_mounted boot option.
100+
81101
config BLK_DEV_ZONED
82102
bool "Zoned block device support"
83103
select MQ_IOSCHED_DEADLINE

block/bdev.c

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@
3030
#include "../fs/internal.h"
3131
#include "blk.h"
3232

33+
/* Should we allow writing to mounted block devices? */
34+
static bool bdev_allow_write_mounted = IS_ENABLED(CONFIG_BLK_DEV_WRITE_MOUNTED);
35+
3336
struct bdev_inode {
3437
struct block_device bdev;
3538
struct inode vfs_inode;
@@ -730,7 +733,58 @@ void blkdev_put_no_open(struct block_device *bdev)
730733
{
731734
put_device(&bdev->bd_device);
732735
}
733-
736+
737+
static bool bdev_writes_blocked(struct block_device *bdev)
738+
{
739+
return bdev->bd_writers == -1;
740+
}
741+
742+
static void bdev_block_writes(struct block_device *bdev)
743+
{
744+
bdev->bd_writers = -1;
745+
}
746+
747+
static void bdev_unblock_writes(struct block_device *bdev)
748+
{
749+
bdev->bd_writers = 0;
750+
}
751+
752+
static bool bdev_may_open(struct block_device *bdev, blk_mode_t mode)
753+
{
754+
if (bdev_allow_write_mounted)
755+
return true;
756+
/* Writes blocked? */
757+
if (mode & BLK_OPEN_WRITE && bdev_writes_blocked(bdev))
758+
return false;
759+
if (mode & BLK_OPEN_RESTRICT_WRITES && bdev->bd_writers > 0)
760+
return false;
761+
return true;
762+
}
763+
764+
static void bdev_claim_write_access(struct block_device *bdev, blk_mode_t mode)
765+
{
766+
if (bdev_allow_write_mounted)
767+
return;
768+
769+
/* Claim exclusive or shared write access. */
770+
if (mode & BLK_OPEN_RESTRICT_WRITES)
771+
bdev_block_writes(bdev);
772+
else if (mode & BLK_OPEN_WRITE)
773+
bdev->bd_writers++;
774+
}
775+
776+
static void bdev_yield_write_access(struct block_device *bdev, blk_mode_t mode)
777+
{
778+
if (bdev_allow_write_mounted)
779+
return;
780+
781+
/* Yield exclusive or shared write access. */
782+
if (mode & BLK_OPEN_RESTRICT_WRITES)
783+
bdev_unblock_writes(bdev);
784+
else if (mode & BLK_OPEN_WRITE)
785+
bdev->bd_writers--;
786+
}
787+
734788
/**
735789
* bdev_open_by_dev - open a block device by device number
736790
* @dev: device number of block device to open
@@ -773,6 +827,10 @@ struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder,
773827
if (ret)
774828
goto free_handle;
775829

830+
/* Blocking writes requires exclusive opener */
831+
if (mode & BLK_OPEN_RESTRICT_WRITES && !holder)
832+
return ERR_PTR(-EINVAL);
833+
776834
bdev = blkdev_get_no_open(dev);
777835
if (!bdev) {
778836
ret = -ENXIO;
@@ -800,12 +858,16 @@ struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder,
800858
goto abort_claiming;
801859
if (!try_module_get(disk->fops->owner))
802860
goto abort_claiming;
861+
ret = -EBUSY;
862+
if (!bdev_may_open(bdev, mode))
863+
goto abort_claiming;
803864
if (bdev_is_partition(bdev))
804865
ret = blkdev_get_part(bdev, mode);
805866
else
806867
ret = blkdev_get_whole(bdev, mode);
807868
if (ret)
808869
goto put_module;
870+
bdev_claim_write_access(bdev, mode);
809871
if (holder) {
810872
bd_finish_claiming(bdev, holder, hops);
811873

@@ -901,6 +963,8 @@ void bdev_release(struct bdev_handle *handle)
901963
sync_blockdev(bdev);
902964

903965
mutex_lock(&disk->open_mutex);
966+
bdev_yield_write_access(bdev, handle->mode);
967+
904968
if (handle->holder)
905969
bd_end_claim(bdev, handle->holder);
906970

@@ -1069,3 +1133,12 @@ void bdev_statx_dioalign(struct inode *inode, struct kstat *stat)
10691133

10701134
blkdev_put_no_open(bdev);
10711135
}
1136+
1137+
static int __init setup_bdev_allow_write_mounted(char *str)
1138+
{
1139+
if (kstrtobool(str, &bdev_allow_write_mounted))
1140+
pr_warn("Invalid option string for bdev_allow_write_mounted:"
1141+
" '%s'\n", str);
1142+
return 1;
1143+
}
1144+
__setup("bdev_allow_write_mounted=", setup_bdev_allow_write_mounted);

include/linux/blk_types.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ struct block_device {
6666
#ifdef CONFIG_FAIL_MAKE_REQUEST
6767
bool bd_make_it_fail;
6868
#endif
69+
int bd_writers;
6970
/*
7071
* keep this out-of-line as it's both big and not needed in the fast
7172
* path

include/linux/blkdev.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,8 @@ typedef unsigned int __bitwise blk_mode_t;
124124
#define BLK_OPEN_NDELAY ((__force blk_mode_t)(1 << 3))
125125
/* open for "writes" only for ioctls (specialy hack for floppy.c) */
126126
#define BLK_OPEN_WRITE_IOCTL ((__force blk_mode_t)(1 << 4))
127+
/* open is exclusive wrt all other BLK_OPEN_WRITE opens to the device */
128+
#define BLK_OPEN_RESTRICT_WRITES ((__force blk_mode_t)(1 << 5))
127129

128130
struct gendisk {
129131
/*

0 commit comments

Comments
 (0)