Skip to content

Commit

Permalink
KVM: x86: Add gmem hook for invalidating private memory
Browse files Browse the repository at this point in the history
TODO: add a CONFIG option that can be to completely skip arch
invalidation loop and avoid __weak references for arch/platforms that
don't need an additional invalidation hook.

In some cases, like with SEV-SNP, guest memory needs to be updated in a
platform-specific manner before it can be safely freed back to the host.
Add hooks to wire up handling of this sort when freeing memory in
response to FALLOC_FL_PUNCH_HOLE operations.

Also issue invalidations of all allocated pages when releasing the gmem
file so that the pages are not left in an unusable state when they get
freed back to the host.

Signed-off-by: Michael Roth <michael.roth@amd.com>
  • Loading branch information
mdroth committed Oct 5, 2023
1 parent 53aafb4 commit 4ebcc04
Show file tree
Hide file tree
Showing 6 changed files with 78 additions and 2 deletions.
1 change: 1 addition & 0 deletions arch/x86/include/asm/kvm-x86-ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ KVM_X86_OP(complete_emulated_msr)
KVM_X86_OP(vcpu_deliver_sipi_vector)
KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons);
KVM_X86_OP_OPTIONAL_RET0(gmem_prepare)
KVM_X86_OP_OPTIONAL(gmem_invalidate)

#undef KVM_X86_OP
#undef KVM_X86_OP_OPTIONAL
Expand Down
1 change: 1 addition & 0 deletions arch/x86/include/asm/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -1754,6 +1754,7 @@ struct kvm_x86_ops {
unsigned long (*vcpu_get_apicv_inhibit_reasons)(struct kvm_vcpu *vcpu);

int (*gmem_prepare)(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order);
void (*gmem_invalidate)(kvm_pfn_t start, kvm_pfn_t end);
};

struct kvm_x86_nested_ops {
Expand Down
7 changes: 7 additions & 0 deletions arch/x86/kvm/x86.c
Original file line number Diff line number Diff line change
Expand Up @@ -13308,6 +13308,13 @@ int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_ord
}
#endif

#ifdef CONFIG_ARCH_HAS_PRIVATE_MEM_INVALIDATE
void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end)
{
static_call_cond(kvm_x86_gmem_invalidate)(start, end);
}
#endif

int kvm_spec_ctrl_test_value(u64 value)
{
/*
Expand Down
4 changes: 4 additions & 0 deletions include/linux/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -2430,4 +2430,8 @@ static inline void kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn,
}
#endif /* CONFIG_KVM_PRIVATE_MEM */

#ifdef CONFIG_ARCH_HAS_PRIVATE_MEM_INVALIDATE
void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end);
#endif /* CONFIG_ARCH_HAS_PRIVATE_MEM_INVALIDATE */

#endif
3 changes: 3 additions & 0 deletions virt/kvm/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -109,3 +109,6 @@ config KVM_GENERIC_PRIVATE_MEM
select KVM_GENERIC_MEMORY_ATTRIBUTES
select KVM_PRIVATE_MEM
bool

config ARCH_HAS_PRIVATE_MEM_INVALIDATE
bool
64 changes: 62 additions & 2 deletions virt/kvm/guest_memfd.c
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,47 @@ static void kvm_gmem_invalidate_end(struct kvm_gmem *gmem, pgoff_t start,
}
}

static long kvm_gmem_punch_hole(struct inode *inode, loff_t offset, loff_t len)
/* Handle arch-specific hooks needed before releasing gmem pages. */
static void kvm_gmem_issue_arch_invalidate(struct inode *inode,
pgoff_t start, pgoff_t end)
{
#ifdef CONFIG_ARCH_HAS_PRIVATE_MEM_INVALIDATE
pgoff_t inode_end = i_size_read(inode) >> PAGE_SHIFT;
pgoff_t index = start;

end = min(end, inode_end);

while (index < end) {
struct folio *folio;
unsigned int order;
struct page *page;
kvm_pfn_t pfn;

folio = __filemap_get_folio(inode->i_mapping, index,
FGP_LOCK, 0);
if (IS_ERR_OR_NULL(folio)) {
index++;
continue;
}

page = folio_file_page(folio, index);
pfn = page_to_pfn(page);
order = folio_order(folio);

kvm_arch_gmem_invalidate(pfn, pfn + min((1ul << order), end - index));

index = folio_next_index(folio);
folio_unlock(folio);
folio_put(folio);

cond_resched();
}
#endif
}

static long kvm_gmem_punch_hole(struct file *file, loff_t offset, loff_t len)
{
struct inode *inode = file_inode(file);
struct list_head *gmem_list = &inode->i_mapping->private_list;
pgoff_t start = offset >> PAGE_SHIFT;
pgoff_t end = (offset + len) >> PAGE_SHIFT;
Expand All @@ -191,6 +230,16 @@ static long kvm_gmem_punch_hole(struct inode *inode, loff_t offset, loff_t len)
list_for_each_entry(gmem, gmem_list, entry)
kvm_gmem_invalidate_begin(gmem, start, end);

/*
* On some platforms the memory allocated through gmem needs to be put
* back into a host-accessible state before the host can access or free
* it for general usage, and the truncation path currently involves
* zero'ing the truncated range fairly early on, before callbacks like
* .invalidate_folio/.free_folio are available to potentially handle
* transitioning the memory back into a host-accessible state. Issue
* arch-specific callbacks in advance of truncation to handle this.
*/
kvm_gmem_issue_arch_invalidate(inode, start, end);
truncate_inode_pages_range(inode->i_mapping, offset, offset + len - 1);

list_for_each_entry(gmem, gmem_list, entry)
Expand Down Expand Up @@ -263,7 +312,7 @@ static long kvm_gmem_fallocate(struct file *file, int mode, loff_t offset,
return -EINVAL;

if (mode & FALLOC_FL_PUNCH_HOLE)
ret = kvm_gmem_punch_hole(file_inode(file), offset, len);
ret = kvm_gmem_punch_hole(file, offset, len);
else
ret = kvm_gmem_allocate(file_inode(file), offset, len);

Expand All @@ -274,6 +323,7 @@ static long kvm_gmem_fallocate(struct file *file, int mode, loff_t offset,

static int kvm_gmem_release(struct inode *inode, struct file *file)
{
struct list_head *gmem_list = &inode->i_mapping->private_list;
struct kvm_gmem *gmem = file->private_data;
struct kvm_memory_slot *slot;
struct kvm *kvm = gmem->kvm;
Expand Down Expand Up @@ -301,6 +351,16 @@ static int kvm_gmem_release(struct inode *inode, struct file *file)
* memory, as its lifetime is associated with the inode, not the file.
*/
kvm_gmem_invalidate_begin(gmem, 0, -1ul);

/*
* Only issue arch-specific invalidations if the gmem inode is actually
* going to be released after this. E.g. in the case of live update the
* memory might still be in use on the destination VM, in which case it
* would have another gmem file instance associated with the same inode.
*/
if (list_is_singular(gmem_list))
kvm_gmem_issue_arch_invalidate(inode, 0, -1ul);

kvm_gmem_invalidate_end(gmem, 0, -1ul);

list_del(&gmem->entry);
Expand Down

0 comments on commit 4ebcc04

Please sign in to comment.