|
| 1 | +// SPDX-License-Identifier: GPL-2.0 |
| 2 | +/* |
| 3 | + * Copyright IBM Corporation, 2021 |
| 4 | + * |
| 5 | + * Author: Mike Rapoport <rppt@linux.ibm.com> |
| 6 | + */ |
| 7 | + |
| 8 | +#include <linux/mm.h> |
| 9 | +#include <linux/fs.h> |
| 10 | +#include <linux/swap.h> |
| 11 | +#include <linux/mount.h> |
| 12 | +#include <linux/memfd.h> |
| 13 | +#include <linux/bitops.h> |
| 14 | +#include <linux/printk.h> |
| 15 | +#include <linux/pagemap.h> |
| 16 | +#include <linux/syscalls.h> |
| 17 | +#include <linux/pseudo_fs.h> |
| 18 | +#include <linux/secretmem.h> |
| 19 | +#include <linux/set_memory.h> |
| 20 | +#include <linux/sched/signal.h> |
| 21 | + |
| 22 | +#include <uapi/linux/magic.h> |
| 23 | + |
| 24 | +#include <asm/tlbflush.h> |
| 25 | + |
| 26 | +#include "internal.h" |
| 27 | + |
| 28 | +#undef pr_fmt |
| 29 | +#define pr_fmt(fmt) "secretmem: " fmt |
| 30 | + |
| 31 | +/* |
| 32 | + * Define mode and flag masks to allow validation of the system call |
| 33 | + * parameters. |
| 34 | + */ |
| 35 | +#define SECRETMEM_MODE_MASK (0x0) |
| 36 | +#define SECRETMEM_FLAGS_MASK SECRETMEM_MODE_MASK |
| 37 | + |
| 38 | +static bool secretmem_enable __ro_after_init; |
| 39 | +module_param_named(enable, secretmem_enable, bool, 0400); |
| 40 | +MODULE_PARM_DESC(secretmem_enable, |
| 41 | + "Enable secretmem and memfd_secret(2) system call"); |
| 42 | + |
| 43 | +static vm_fault_t secretmem_fault(struct vm_fault *vmf) |
| 44 | +{ |
| 45 | + struct address_space *mapping = vmf->vma->vm_file->f_mapping; |
| 46 | + struct inode *inode = file_inode(vmf->vma->vm_file); |
| 47 | + pgoff_t offset = vmf->pgoff; |
| 48 | + gfp_t gfp = vmf->gfp_mask; |
| 49 | + unsigned long addr; |
| 50 | + struct page *page; |
| 51 | + int err; |
| 52 | + |
| 53 | + if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode)) |
| 54 | + return vmf_error(-EINVAL); |
| 55 | + |
| 56 | +retry: |
| 57 | + page = find_lock_page(mapping, offset); |
| 58 | + if (!page) { |
| 59 | + page = alloc_page(gfp | __GFP_ZERO); |
| 60 | + if (!page) |
| 61 | + return VM_FAULT_OOM; |
| 62 | + |
| 63 | + err = set_direct_map_invalid_noflush(page); |
| 64 | + if (err) { |
| 65 | + put_page(page); |
| 66 | + return vmf_error(err); |
| 67 | + } |
| 68 | + |
| 69 | + __SetPageUptodate(page); |
| 70 | + err = add_to_page_cache_lru(page, mapping, offset, gfp); |
| 71 | + if (unlikely(err)) { |
| 72 | + put_page(page); |
| 73 | + /* |
| 74 | + * If a split of large page was required, it |
| 75 | + * already happened when we marked the page invalid |
| 76 | + * which guarantees that this call won't fail |
| 77 | + */ |
| 78 | + set_direct_map_default_noflush(page); |
| 79 | + if (err == -EEXIST) |
| 80 | + goto retry; |
| 81 | + |
| 82 | + return vmf_error(err); |
| 83 | + } |
| 84 | + |
| 85 | + addr = (unsigned long)page_address(page); |
| 86 | + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); |
| 87 | + } |
| 88 | + |
| 89 | + vmf->page = page; |
| 90 | + return VM_FAULT_LOCKED; |
| 91 | +} |
| 92 | + |
| 93 | +static const struct vm_operations_struct secretmem_vm_ops = { |
| 94 | + .fault = secretmem_fault, |
| 95 | +}; |
| 96 | + |
| 97 | +static int secretmem_mmap(struct file *file, struct vm_area_struct *vma) |
| 98 | +{ |
| 99 | + unsigned long len = vma->vm_end - vma->vm_start; |
| 100 | + |
| 101 | + if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0) |
| 102 | + return -EINVAL; |
| 103 | + |
| 104 | + if (mlock_future_check(vma->vm_mm, vma->vm_flags | VM_LOCKED, len)) |
| 105 | + return -EAGAIN; |
| 106 | + |
| 107 | + vma->vm_flags |= VM_LOCKED | VM_DONTDUMP; |
| 108 | + vma->vm_ops = &secretmem_vm_ops; |
| 109 | + |
| 110 | + return 0; |
| 111 | +} |
| 112 | + |
| 113 | +bool vma_is_secretmem(struct vm_area_struct *vma) |
| 114 | +{ |
| 115 | + return vma->vm_ops == &secretmem_vm_ops; |
| 116 | +} |
| 117 | + |
| 118 | +static const struct file_operations secretmem_fops = { |
| 119 | + .mmap = secretmem_mmap, |
| 120 | +}; |
| 121 | + |
| 122 | +static bool secretmem_isolate_page(struct page *page, isolate_mode_t mode) |
| 123 | +{ |
| 124 | + return false; |
| 125 | +} |
| 126 | + |
| 127 | +static int secretmem_migratepage(struct address_space *mapping, |
| 128 | + struct page *newpage, struct page *page, |
| 129 | + enum migrate_mode mode) |
| 130 | +{ |
| 131 | + return -EBUSY; |
| 132 | +} |
| 133 | + |
| 134 | +static void secretmem_freepage(struct page *page) |
| 135 | +{ |
| 136 | + set_direct_map_default_noflush(page); |
| 137 | + clear_highpage(page); |
| 138 | +} |
| 139 | + |
| 140 | +const struct address_space_operations secretmem_aops = { |
| 141 | + .freepage = secretmem_freepage, |
| 142 | + .migratepage = secretmem_migratepage, |
| 143 | + .isolate_page = secretmem_isolate_page, |
| 144 | +}; |
| 145 | + |
| 146 | +static struct vfsmount *secretmem_mnt; |
| 147 | + |
| 148 | +static struct file *secretmem_file_create(unsigned long flags) |
| 149 | +{ |
| 150 | + struct file *file = ERR_PTR(-ENOMEM); |
| 151 | + struct inode *inode; |
| 152 | + |
| 153 | + inode = alloc_anon_inode(secretmem_mnt->mnt_sb); |
| 154 | + if (IS_ERR(inode)) |
| 155 | + return ERR_CAST(inode); |
| 156 | + |
| 157 | + file = alloc_file_pseudo(inode, secretmem_mnt, "secretmem", |
| 158 | + O_RDWR, &secretmem_fops); |
| 159 | + if (IS_ERR(file)) |
| 160 | + goto err_free_inode; |
| 161 | + |
| 162 | + mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); |
| 163 | + mapping_set_unevictable(inode->i_mapping); |
| 164 | + |
| 165 | + inode->i_mapping->a_ops = &secretmem_aops; |
| 166 | + |
| 167 | + /* pretend we are a normal file with zero size */ |
| 168 | + inode->i_mode |= S_IFREG; |
| 169 | + inode->i_size = 0; |
| 170 | + |
| 171 | + return file; |
| 172 | + |
| 173 | +err_free_inode: |
| 174 | + iput(inode); |
| 175 | + return file; |
| 176 | +} |
| 177 | + |
| 178 | +SYSCALL_DEFINE1(memfd_secret, unsigned int, flags) |
| 179 | +{ |
| 180 | + struct file *file; |
| 181 | + int fd, err; |
| 182 | + |
| 183 | + /* make sure local flags do not confict with global fcntl.h */ |
| 184 | + BUILD_BUG_ON(SECRETMEM_FLAGS_MASK & O_CLOEXEC); |
| 185 | + |
| 186 | + if (!secretmem_enable) |
| 187 | + return -ENOSYS; |
| 188 | + |
| 189 | + if (flags & ~(SECRETMEM_FLAGS_MASK | O_CLOEXEC)) |
| 190 | + return -EINVAL; |
| 191 | + |
| 192 | + fd = get_unused_fd_flags(flags & O_CLOEXEC); |
| 193 | + if (fd < 0) |
| 194 | + return fd; |
| 195 | + |
| 196 | + file = secretmem_file_create(flags); |
| 197 | + if (IS_ERR(file)) { |
| 198 | + err = PTR_ERR(file); |
| 199 | + goto err_put_fd; |
| 200 | + } |
| 201 | + |
| 202 | + file->f_flags |= O_LARGEFILE; |
| 203 | + |
| 204 | + fd_install(fd, file); |
| 205 | + return fd; |
| 206 | + |
| 207 | +err_put_fd: |
| 208 | + put_unused_fd(fd); |
| 209 | + return err; |
| 210 | +} |
| 211 | + |
| 212 | +static int secretmem_init_fs_context(struct fs_context *fc) |
| 213 | +{ |
| 214 | + return init_pseudo(fc, SECRETMEM_MAGIC) ? 0 : -ENOMEM; |
| 215 | +} |
| 216 | + |
| 217 | +static struct file_system_type secretmem_fs = { |
| 218 | + .name = "secretmem", |
| 219 | + .init_fs_context = secretmem_init_fs_context, |
| 220 | + .kill_sb = kill_anon_super, |
| 221 | +}; |
| 222 | + |
| 223 | +static int secretmem_init(void) |
| 224 | +{ |
| 225 | + int ret = 0; |
| 226 | + |
| 227 | + if (!secretmem_enable) |
| 228 | + return ret; |
| 229 | + |
| 230 | + secretmem_mnt = kern_mount(&secretmem_fs); |
| 231 | + if (IS_ERR(secretmem_mnt)) |
| 232 | + ret = PTR_ERR(secretmem_mnt); |
| 233 | + |
| 234 | + /* prevent secretmem mappings from ever getting PROT_EXEC */ |
| 235 | + secretmem_mnt->mnt_flags |= MNT_NOEXEC; |
| 236 | + |
| 237 | + return ret; |
| 238 | +} |
| 239 | +fs_initcall(secretmem_init); |
0 commit comments