Skip to content

Commit 3ab3283

Browse files
committed
KVM: x86/mmu: Add dedicated API to map guest_memfd pfn into TDP MMU
Add and use a new API for mapping a private pfn from guest_memfd into the TDP MMU from TDX's post-populate hook instead of partially open-coding the functionality into the TDX code. Sharing code with the pre-fault path sounded good on paper, but it's fatally flawed as simulating a fault loses the pfn, and calling back into gmem to re-retrieve the pfn creates locking problems, e.g. kvm_gmem_populate() already holds the gmem invalidation lock. Providing a dedicated API will also removing several MMU exports that ideally would not be exposed outside of the MMU, let alone to vendor code. On that topic, opportunistically drop the kvm_mmu_load() export. Leave kvm_tdp_mmu_gpa_is_mapped() alone for now; the entire commit that added kvm_tdp_mmu_gpa_is_mapped() will be removed in the near future. Gate the API on CONFIG_KVM_GUEST_MEMFD=y as private memory _must_ be backed by guest_memfd. Add a lockdep-only assert to that the incoming pfn is indeed backed by guest_memfd, and that the gmem instance's invalidate lock is held (which, combined with slots_lock being held, obviates the need to check for a stale "fault"). Cc: Michael Roth <michael.roth@amd.com> Cc: Yan Zhao <yan.y.zhao@intel.com> Cc: Ira Weiny <ira.weiny@intel.com> Cc: Vishal Annapurve <vannapurve@google.com> Cc: Rick Edgecombe <rick.p.edgecombe@intel.com> Reviewed-by: Rick Edgecombe <rick.p.edgecombe@intel.com> Reviewed-by: Kai Huang <kai.huang@intel.com> Link: https://lore.kernel.org/all/20250709232103.zwmufocd3l7sqk7y@amd.com Reviewed-by: Binbin Wu <binbin.wu@linux.intel.com> Reviewed-by: Yan Zhao <yan.y.zhao@intel.com> Tested-by: Yan Zhao <yan.y.zhao@intel.com> Tested-by: Kai Huang <kai.huang@intel.com> Link: https://patch.msgid.link/20251030200951.3402865-5-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc@google.com>
1 parent 5294a4b commit 3ab3283

File tree

3 files changed

+84
-8
lines changed

3 files changed

+84
-8
lines changed

arch/x86/kvm/mmu.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,7 @@ extern bool tdp_mmu_enabled;
259259

260260
bool kvm_tdp_mmu_gpa_is_mapped(struct kvm_vcpu *vcpu, u64 gpa);
261261
int kvm_tdp_map_page(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code, u8 *level);
262+
int kvm_tdp_mmu_map_private_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn);
262263

263264
static inline bool kvm_memslots_have_rmaps(struct kvm *kvm)
264265
{

arch/x86/kvm/mmu/mmu.c

Lines changed: 80 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5014,6 +5014,86 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu,
50145014
return min(range->size, end - range->gpa);
50155015
}
50165016

5017+
#ifdef CONFIG_KVM_GUEST_MEMFD
5018+
static void kvm_assert_gmem_invalidate_lock_held(struct kvm_memory_slot *slot)
5019+
{
5020+
#ifdef CONFIG_PROVE_LOCKING
5021+
if (WARN_ON_ONCE(!kvm_slot_has_gmem(slot)) ||
5022+
WARN_ON_ONCE(!slot->gmem.file) ||
5023+
WARN_ON_ONCE(!file_count(slot->gmem.file)))
5024+
return;
5025+
5026+
lockdep_assert_held(&file_inode(slot->gmem.file)->i_mapping->invalidate_lock);
5027+
#endif
5028+
}
5029+
5030+
int kvm_tdp_mmu_map_private_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn)
5031+
{
5032+
struct kvm_page_fault fault = {
5033+
.addr = gfn_to_gpa(gfn),
5034+
.error_code = PFERR_GUEST_FINAL_MASK | PFERR_PRIVATE_ACCESS,
5035+
.prefetch = true,
5036+
.is_tdp = true,
5037+
.nx_huge_page_workaround_enabled = is_nx_huge_page_enabled(vcpu->kvm),
5038+
5039+
.max_level = PG_LEVEL_4K,
5040+
.req_level = PG_LEVEL_4K,
5041+
.goal_level = PG_LEVEL_4K,
5042+
.is_private = true,
5043+
5044+
.gfn = gfn,
5045+
.slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn),
5046+
.pfn = pfn,
5047+
.map_writable = true,
5048+
};
5049+
struct kvm *kvm = vcpu->kvm;
5050+
int r;
5051+
5052+
lockdep_assert_held(&kvm->slots_lock);
5053+
5054+
/*
5055+
* Mapping a pre-determined private pfn is intended only for use when
5056+
* populating a guest_memfd instance. Assert that the slot is backed
5057+
* by guest_memfd and that the gmem instance's invalidate_lock is held.
5058+
*/
5059+
kvm_assert_gmem_invalidate_lock_held(fault.slot);
5060+
5061+
if (KVM_BUG_ON(!tdp_mmu_enabled, kvm))
5062+
return -EIO;
5063+
5064+
if (kvm_gfn_is_write_tracked(kvm, fault.slot, fault.gfn))
5065+
return -EPERM;
5066+
5067+
r = kvm_mmu_reload(vcpu);
5068+
if (r)
5069+
return r;
5070+
5071+
r = mmu_topup_memory_caches(vcpu, false);
5072+
if (r)
5073+
return r;
5074+
5075+
do {
5076+
if (signal_pending(current))
5077+
return -EINTR;
5078+
5079+
if (kvm_test_request(KVM_REQ_VM_DEAD, vcpu))
5080+
return -EIO;
5081+
5082+
cond_resched();
5083+
5084+
guard(read_lock)(&kvm->mmu_lock);
5085+
5086+
r = kvm_tdp_mmu_map(vcpu, &fault);
5087+
} while (r == RET_PF_RETRY);
5088+
5089+
if (r != RET_PF_FIXED)
5090+
return -EIO;
5091+
5092+
return 0;
5093+
}
5094+
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_tdp_mmu_map_private_pfn);
5095+
#endif
5096+
50175097
static void nonpaging_init_context(struct kvm_mmu *context)
50185098
{
50195099
context->page_fault = nonpaging_page_fault;
@@ -5997,7 +6077,6 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
59976077
out:
59986078
return r;
59996079
}
6000-
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_mmu_load);
60016080

60026081
void kvm_mmu_unload(struct kvm_vcpu *vcpu)
60036082
{

arch/x86/kvm/vmx/tdx.c

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3167,15 +3167,12 @@ struct tdx_gmem_post_populate_arg {
31673167
static int tdx_gmem_post_populate(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
31683168
void __user *src, int order, void *_arg)
31693169
{
3170-
u64 error_code = PFERR_GUEST_FINAL_MASK | PFERR_PRIVATE_ACCESS;
3171-
struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
31723170
struct tdx_gmem_post_populate_arg *arg = _arg;
3173-
struct kvm_vcpu *vcpu = arg->vcpu;
3171+
struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
3172+
u64 err, entry, level_state;
31743173
gpa_t gpa = gfn_to_gpa(gfn);
3175-
u8 level = PG_LEVEL_4K;
31763174
struct page *src_page;
31773175
int ret, i;
3178-
u64 err, entry, level_state;
31793176

31803177
/*
31813178
* Get the source page if it has been faulted in. Return failure if the
@@ -3187,7 +3184,7 @@ static int tdx_gmem_post_populate(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
31873184
if (ret != 1)
31883185
return -ENOMEM;
31893186

3190-
ret = kvm_tdp_map_page(vcpu, gpa, error_code, &level);
3187+
ret = kvm_tdp_mmu_map_private_pfn(arg->vcpu, gfn, pfn);
31913188
if (ret < 0)
31923189
goto out;
31933190

@@ -3250,7 +3247,6 @@ static int tdx_vcpu_init_mem_region(struct kvm_vcpu *vcpu, struct kvm_tdx_cmd *c
32503247
!vt_is_tdx_private_gpa(kvm, region.gpa + (region.nr_pages << PAGE_SHIFT) - 1))
32513248
return -EINVAL;
32523249

3253-
kvm_mmu_reload(vcpu);
32543250
ret = 0;
32553251
while (region.nr_pages) {
32563252
if (signal_pending(current)) {

0 commit comments

Comments
 (0)