Skip to content

Commit

Permalink
bdev: open block device as files
Browse files Browse the repository at this point in the history
Add two new helpers to allow opening block devices as files.
This is not the final infrastructure. This still opens the block device
before opening a struct a file. Until we have removed all references to
struct bdev_handle we can't switch the order:

* Introduce blk_to_file_flags() to translate from block specific to
  flags usable to pen a new file.
* Introduce bdev_file_open_by_{dev,path}().
* Introduce temporary sb_bdev_handle() helper to retrieve a struct
  bdev_handle from a block device file and update places that directly
  reference struct bdev_handle to rely on it.
* Don't count block device openes against the number of open files. A
  bdev_file_open_by_{dev,path}() file is never installed into any
  file descriptor table.

One idea that came to mind was to use kernel_tmpfile_open() which
would require us to pass a path and it would then call do_dentry_open()
going through the regular fops->open::blkdev_open() path. But then we're
back to the problem of routing block specific flags such as
BLK_OPEN_RESTRICT_WRITES through the open path and would have to waste
FMODE_* flags every time we add a new one. With this we can avoid using
a flag bit and we have more leeway in how we open block devices from
bdev_open_by_{dev,path}().

Link: https://lore.kernel.org/r/20240123-vfs-bdev-file-v2-1-adbd023e19cc@kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
  • Loading branch information
brauner committed Feb 25, 2024
1 parent bac0a9e commit f3a6088
Show file tree
Hide file tree
Showing 9 changed files with 126 additions and 20 deletions.
101 changes: 97 additions & 4 deletions block/bdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,13 @@ struct block_device *I_BDEV(struct inode *inode)
}
EXPORT_SYMBOL(I_BDEV);

struct block_device *file_bdev(struct file *bdev_file)
{
struct bdev_handle *handle = bdev_file->private_data;
return handle->bdev;
}
EXPORT_SYMBOL(file_bdev);

static void bdev_write_inode(struct block_device *bdev)
{
struct inode *inode = bdev->bd_inode;
Expand Down Expand Up @@ -368,12 +375,12 @@ static struct file_system_type bd_type = {
};

struct super_block *blockdev_superblock __ro_after_init;
struct vfsmount *blockdev_mnt __ro_after_init;
EXPORT_SYMBOL_GPL(blockdev_superblock);

void __init bdev_cache_init(void)
{
int err;
static struct vfsmount *bd_mnt __ro_after_init;

bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
Expand All @@ -382,10 +389,10 @@ void __init bdev_cache_init(void)
err = register_filesystem(&bd_type);
if (err)
panic("Cannot register bdev pseudo-fs");
bd_mnt = kern_mount(&bd_type);
if (IS_ERR(bd_mnt))
blockdev_mnt = kern_mount(&bd_type);
if (IS_ERR(blockdev_mnt))
panic("Cannot create bdev pseudo-fs");
blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */
blockdev_superblock = blockdev_mnt->mnt_sb; /* For writeback */
}

struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
Expand Down Expand Up @@ -911,6 +918,92 @@ struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder,
}
EXPORT_SYMBOL(bdev_open_by_dev);

/*
* If BLK_OPEN_WRITE_IOCTL is set then this is a historical quirk
* associated with the floppy driver where it has allowed ioctls if the
* file was opened for writing, but does not allow reads or writes.
* Make sure that this quirk is reflected in @f_flags.
*
* It can also happen if a block device is opened as O_RDWR | O_WRONLY.
*/
static unsigned blk_to_file_flags(blk_mode_t mode)
{
unsigned int flags = 0;

if ((mode & (BLK_OPEN_READ | BLK_OPEN_WRITE)) ==
(BLK_OPEN_READ | BLK_OPEN_WRITE))
flags |= O_RDWR;
else if (mode & BLK_OPEN_WRITE_IOCTL)
flags |= O_RDWR | O_WRONLY;
else if (mode & BLK_OPEN_WRITE)
flags |= O_WRONLY;
else if (mode & BLK_OPEN_READ)
flags |= O_RDONLY; /* homeopathic, because O_RDONLY is 0 */
else
WARN_ON_ONCE(true);

if (mode & BLK_OPEN_NDELAY)
flags |= O_NDELAY;

return flags;
}

struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder,
const struct blk_holder_ops *hops)
{
struct file *bdev_file;
struct bdev_handle *handle;
unsigned int flags;

handle = bdev_open_by_dev(dev, mode, holder, hops);
if (IS_ERR(handle))
return ERR_CAST(handle);

flags = blk_to_file_flags(mode);
bdev_file = alloc_file_pseudo_noaccount(handle->bdev->bd_inode,
blockdev_mnt, "", flags | O_LARGEFILE, &def_blk_fops);
if (IS_ERR(bdev_file)) {
bdev_release(handle);
return bdev_file;
}
ihold(handle->bdev->bd_inode);

bdev_file->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT;
if (bdev_nowait(handle->bdev))
bdev_file->f_mode |= FMODE_NOWAIT;

bdev_file->f_mapping = handle->bdev->bd_inode->i_mapping;
bdev_file->f_wb_err = filemap_sample_wb_err(bdev_file->f_mapping);
bdev_file->private_data = handle;
return bdev_file;
}
EXPORT_SYMBOL(bdev_file_open_by_dev);

struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode,
void *holder,
const struct blk_holder_ops *hops)
{
struct file *bdev_file;
dev_t dev;
int error;

error = lookup_bdev(path, &dev);
if (error)
return ERR_PTR(error);

bdev_file = bdev_file_open_by_dev(dev, mode, holder, hops);
if (!IS_ERR(bdev_file) && (mode & BLK_OPEN_WRITE)) {
struct bdev_handle *handle = bdev_file->private_data;
if (bdev_read_only(handle->bdev)) {
fput(bdev_file);
bdev_file = ERR_PTR(-EACCES);
}
}

return bdev_file;
}
EXPORT_SYMBOL(bdev_file_open_by_path);

/**
* bdev_open_by_path - open a block device by name
* @path: path to the block device to open
Expand Down
2 changes: 1 addition & 1 deletion fs/cramfs/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -495,7 +495,7 @@ static void cramfs_kill_sb(struct super_block *sb)
sb->s_mtd = NULL;
} else if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV) && sb->s_bdev) {
sync_blockdev(sb->s_bdev);
bdev_release(sb->s_bdev_handle);
fput(sb->s_bdev_file);
}
kfree(sbi);
}
Expand Down
2 changes: 1 addition & 1 deletion fs/f2fs/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -4247,7 +4247,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)

for (i = 0; i < max_devices; i++) {
if (i == 0)
FDEV(0).bdev_handle = sbi->sb->s_bdev_handle;
FDEV(0).bdev_handle = sb_bdev_handle(sbi->sb);
else if (!RDEV(i).path[0])
break;

Expand Down
2 changes: 1 addition & 1 deletion fs/jfs/jfs_logmgr.c
Original file line number Diff line number Diff line change
Expand Up @@ -1162,7 +1162,7 @@ static int open_inline_log(struct super_block *sb)
init_waitqueue_head(&log->syncwait);

set_bit(log_INLINELOG, &log->flag);
log->bdev_handle = sb->s_bdev_handle;
log->bdev_handle = sb_bdev_handle(sb);
log->base = addressPXD(&JFS_SBI(sb)->logpxd);
log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >>
(L2LOGPSIZE - sb->s_blocksize_bits);
Expand Down
2 changes: 1 addition & 1 deletion fs/romfs/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -594,7 +594,7 @@ static void romfs_kill_sb(struct super_block *sb)
#ifdef CONFIG_ROMFS_ON_BLOCK
if (sb->s_bdev) {
sync_blockdev(sb->s_bdev);
bdev_release(sb->s_bdev_handle);
fput(sb->s_bdev_file);
}
#endif
}
Expand Down
18 changes: 9 additions & 9 deletions fs/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -1532,24 +1532,24 @@ int setup_bdev_super(struct super_block *sb, int sb_flags,
struct fs_context *fc)
{
blk_mode_t mode = sb_open_mode(sb_flags);
struct bdev_handle *bdev_handle;
struct file *bdev_file;
struct block_device *bdev;

bdev_handle = bdev_open_by_dev(sb->s_dev, mode, sb, &fs_holder_ops);
if (IS_ERR(bdev_handle)) {
bdev_file = bdev_file_open_by_dev(sb->s_dev, mode, sb, &fs_holder_ops);
if (IS_ERR(bdev_file)) {
if (fc)
errorf(fc, "%s: Can't open blockdev", fc->source);
return PTR_ERR(bdev_handle);
return PTR_ERR(bdev_file);
}
bdev = bdev_handle->bdev;
bdev = file_bdev(bdev_file);

/*
* This really should be in blkdev_get_by_dev, but right now can't due
* to legacy issues that require us to allow opening a block device node
* writable from userspace even for a read-only block device.
*/
if ((mode & BLK_OPEN_WRITE) && bdev_read_only(bdev)) {
bdev_release(bdev_handle);
fput(bdev_file);
return -EACCES;
}

Expand All @@ -1560,11 +1560,11 @@ int setup_bdev_super(struct super_block *sb, int sb_flags,
if (atomic_read(&bdev->bd_fsfreeze_count) > 0) {
if (fc)
warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
bdev_release(bdev_handle);
fput(bdev_file);
return -EBUSY;
}
spin_lock(&sb_lock);
sb->s_bdev_handle = bdev_handle;
sb->s_bdev_file = bdev_file;
sb->s_bdev = bdev;
sb->s_bdi = bdi_get(bdev->bd_disk->bdi);
if (bdev_stable_writes(bdev))
Expand Down Expand Up @@ -1680,7 +1680,7 @@ void kill_block_super(struct super_block *sb)
generic_shutdown_super(sb);
if (bdev) {
sync_blockdev(bdev);
bdev_release(sb->s_bdev_handle);
fput(sb->s_bdev_file);
}
}

Expand Down
2 changes: 1 addition & 1 deletion fs/xfs/xfs_super.c
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,7 @@ xfs_open_devices(
* Setup xfs_mount buffer target pointers
*/
error = -ENOMEM;
mp->m_ddev_targp = xfs_alloc_buftarg(mp, sb->s_bdev_handle);
mp->m_ddev_targp = xfs_alloc_buftarg(mp, sb_bdev_handle(sb));
if (!mp->m_ddev_targp)
goto out_close_rtdev;

Expand Down
7 changes: 7 additions & 0 deletions include/linux/blkdev.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <linux/sbitmap.h>
#include <linux/uuid.h>
#include <linux/xarray.h>
#include <linux/file.h>

struct module;
struct request_queue;
Expand Down Expand Up @@ -1474,6 +1475,7 @@ extern const struct blk_holder_ops fs_holder_ops;
(BLK_OPEN_READ | BLK_OPEN_RESTRICT_WRITES | \
(((flags) & SB_RDONLY) ? 0 : BLK_OPEN_WRITE))

/* @bdev_handle will be removed soon. */
struct bdev_handle {
struct block_device *bdev;
void *holder;
Expand All @@ -1484,6 +1486,10 @@ struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder,
const struct blk_holder_ops *hops);
struct bdev_handle *bdev_open_by_path(const char *path, blk_mode_t mode,
void *holder, const struct blk_holder_ops *hops);
struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder,
const struct blk_holder_ops *hops);
struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode,
void *holder, const struct blk_holder_ops *hops);
int bd_prepare_to_claim(struct block_device *bdev, void *holder,
const struct blk_holder_ops *hops);
void bd_abort_claiming(struct block_device *bdev, void *holder);
Expand All @@ -1494,6 +1500,7 @@ struct block_device *blkdev_get_no_open(dev_t dev);
void blkdev_put_no_open(struct block_device *bdev);

struct block_device *I_BDEV(struct inode *inode);
struct block_device *file_bdev(struct file *bdev_file);

#ifdef CONFIG_BLOCK
void invalidate_bdev(struct block_device *bdev);
Expand Down
10 changes: 8 additions & 2 deletions include/linux/fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -1228,8 +1228,8 @@ struct super_block {
#endif
struct hlist_bl_head s_roots; /* alternate root dentries for NFS */
struct list_head s_mounts; /* list of mounts; _not_ for fs use */
struct block_device *s_bdev;
struct bdev_handle *s_bdev_handle;
struct block_device *s_bdev; /* can go away once we use an accessor for @s_bdev_file */
struct file *s_bdev_file;
struct backing_dev_info *s_bdi;
struct mtd_info *s_mtd;
struct hlist_node s_instances;
Expand Down Expand Up @@ -1327,6 +1327,12 @@ struct super_block {
struct list_head s_inodes_wb; /* writeback inodes */
} __randomize_layout;

/* Temporary helper that will go away. */
static inline struct bdev_handle *sb_bdev_handle(struct super_block *sb)
{
return sb->s_bdev_file->private_data;
}

static inline struct user_namespace *i_user_ns(const struct inode *inode)
{
return inode->i_sb->s_user_ns;
Expand Down

0 comments on commit f3a6088

Please sign in to comment.