Skip to content

Commit 59ce4bd

Browse files
committed
KVM: x86/mmu: Defer allocation of shadow MMU's hashed page list
When the TDP MMU is enabled, i.e. when the shadow MMU isn't used until a nested TDP VM is run, defer allocation of the array of hashed lists used to track shadow MMU pages until the first shadow root is allocated. Setting the list outside of mmu_lock is safe, as concurrent readers must hold mmu_lock in some capacity, shadow pages can only be added (or removed) from the list when mmu_lock is held for write, and tasks that are creating a shadow root are serialized by slots_arch_lock. I.e. it's impossible for the list to become non-empty until all readers go away, and so readers are guaranteed to see an empty list even if they make multiple calls to kvm_get_mmu_page_hash() in a single mmu_lock critical section. Use smp_store_release() and smp_load_acquire() to access the hash table pointer to ensure the stores to zero the lists are retired before readers start to walk the list. E.g. if the compiler hoisted the store before the zeroing of memory, for_each_gfn_valid_sp_with_gptes() could consume stale kernel data. Cc: James Houghton <jthoughton@google.com> Link: https://lore.kernel.org/r/20250523001138.3182794-5-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc@google.com>
1 parent 97ad7dd commit 59ce4bd

File tree

1 file changed

+52
-10
lines changed

1 file changed

+52
-10
lines changed

arch/x86/kvm/mmu/mmu.c

Lines changed: 52 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1983,14 +1983,35 @@ static bool sp_has_gptes(struct kvm_mmu_page *sp)
19831983
return true;
19841984
}
19851985

1986+
static __ro_after_init HLIST_HEAD(empty_page_hash);
1987+
1988+
static struct hlist_head *kvm_get_mmu_page_hash(struct kvm *kvm, gfn_t gfn)
1989+
{
1990+
/*
1991+
* Ensure the load of the hash table pointer itself is ordered before
1992+
* loads to walk the table. The pointer is set at runtime outside of
1993+
* mmu_lock when the TDP MMU is enabled, i.e. when the hash table of
1994+
* shadow pages becomes necessary only when KVM needs to shadow L1's
1995+
* TDP for an L2 guest. Pairs with the smp_store_release() in
1996+
* kvm_mmu_alloc_page_hash().
1997+
*/
1998+
struct hlist_head *page_hash = smp_load_acquire(&kvm->arch.mmu_page_hash);
1999+
2000+
lockdep_assert_held(&kvm->mmu_lock);
2001+
2002+
if (!page_hash)
2003+
return &empty_page_hash;
2004+
2005+
return &page_hash[kvm_page_table_hashfn(gfn)];
2006+
}
2007+
19862008
#define for_each_valid_sp(_kvm, _sp, _list) \
19872009
hlist_for_each_entry(_sp, _list, hash_link) \
19882010
if (is_obsolete_sp((_kvm), (_sp))) { \
19892011
} else
19902012

19912013
#define for_each_gfn_valid_sp_with_gptes(_kvm, _sp, _gfn) \
1992-
for_each_valid_sp(_kvm, _sp, \
1993-
&(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)]) \
2014+
for_each_valid_sp(_kvm, _sp, kvm_get_mmu_page_hash(_kvm, _gfn)) \
19942015
if ((_sp)->gfn != (_gfn) || !sp_has_gptes(_sp)) {} else
19952016

19962017
static bool kvm_sync_page_check(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
@@ -2358,6 +2379,12 @@ static struct kvm_mmu_page *__kvm_mmu_get_shadow_page(struct kvm *kvm,
23582379
struct kvm_mmu_page *sp;
23592380
bool created = false;
23602381

2382+
/*
2383+
* No need for memory barriers, unlike in kvm_get_mmu_page_hash(), as
2384+
* mmu_page_hash must be set prior to creating the first shadow root,
2385+
* i.e. reaching this point is fully serialized by slots_arch_lock.
2386+
*/
2387+
BUG_ON(!kvm->arch.mmu_page_hash);
23612388
sp_list = &kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)];
23622389

23632390
sp = kvm_mmu_find_shadow_page(kvm, vcpu, gfn, sp_list, role);
@@ -3886,11 +3913,21 @@ static int kvm_mmu_alloc_page_hash(struct kvm *kvm)
38863913
{
38873914
typeof(kvm->arch.mmu_page_hash) h;
38883915

3916+
if (kvm->arch.mmu_page_hash)
3917+
return 0;
3918+
38893919
h = kvcalloc(KVM_NUM_MMU_PAGES, sizeof(*h), GFP_KERNEL_ACCOUNT);
38903920
if (!h)
38913921
return -ENOMEM;
38923922

3893-
kvm->arch.mmu_page_hash = h;
3923+
/*
3924+
* Ensure the hash table pointer is set only after all stores to zero
3925+
* the memory are retired. Pairs with the smp_load_acquire() in
3926+
* kvm_get_mmu_page_hash(). Note, mmu_lock must be held for write to
3927+
* add (or remove) shadow pages, and so readers are guaranteed to see
3928+
* an empty list for their current mmu_lock critical section.
3929+
*/
3930+
smp_store_release(&kvm->arch.mmu_page_hash, h);
38943931
return 0;
38953932
}
38963933

@@ -3913,9 +3950,13 @@ static int mmu_first_shadow_root_alloc(struct kvm *kvm)
39133950
if (kvm_shadow_root_allocated(kvm))
39143951
goto out_unlock;
39153952

3953+
r = kvm_mmu_alloc_page_hash(kvm);
3954+
if (r)
3955+
goto out_unlock;
3956+
39163957
/*
3917-
* Check if anything actually needs to be allocated, e.g. all metadata
3918-
* will be allocated upfront if TDP is disabled.
3958+
* Check if memslot metadata actually needs to be allocated, e.g. all
3959+
* metadata will be allocated upfront if TDP is disabled.
39193960
*/
39203961
if (kvm_memslots_have_rmaps(kvm) &&
39213962
kvm_page_track_write_tracking_enabled(kvm))
@@ -6703,12 +6744,13 @@ int kvm_mmu_init_vm(struct kvm *kvm)
67036744
INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages);
67046745
spin_lock_init(&kvm->arch.mmu_unsync_pages_lock);
67056746

6706-
r = kvm_mmu_alloc_page_hash(kvm);
6707-
if (r)
6708-
return r;
6709-
6710-
if (tdp_mmu_enabled)
6747+
if (tdp_mmu_enabled) {
67116748
kvm_mmu_init_tdp_mmu(kvm);
6749+
} else {
6750+
r = kvm_mmu_alloc_page_hash(kvm);
6751+
if (r)
6752+
return r;
6753+
}
67126754

67136755
kvm->arch.split_page_header_cache.kmem_cache = mmu_page_header_cache;
67146756
kvm->arch.split_page_header_cache.gfp_zero = __GFP_ZERO;

0 commit comments

Comments
 (0)