Skip to content

Commit 3d626ce

Browse files
committed
KVM: TDX: Add macro to retry SEAMCALLs when forcing vCPUs out of guest
Add a macro to handle kicking vCPUs out of the guest and retrying SEAMCALLs on TDX_OPERAND_BUSY instead of providing small helpers to be used by each SEAMCALL. Wrapping the SEAMCALLs in a macro makes it a little harder to tease out which SEAMCALL is being made, but significantly reduces the amount of copy+paste code, and makes it all but impossible to leave an elevated wait_for_sept_zap. Reviewed-by: Binbin Wu <binbin.wu@linux.intel.com> Reviewed-by: Kai Huang <kai.huang@intel.com> Reviewed-by: Yan Zhao <yan.y.zhao@intel.com> Tested-by: Yan Zhao <yan.y.zhao@intel.com> Tested-by: Kai Huang <kai.huang@intel.com> Link: https://patch.msgid.link/20251030200951.3402865-22-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc@google.com>
1 parent 2ff1411 commit 3d626ce

File tree

1 file changed

+33
-49
lines changed

1 file changed

+33
-49
lines changed

arch/x86/kvm/vmx/tdx.c

Lines changed: 33 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -294,25 +294,34 @@ static inline void tdx_disassociate_vp(struct kvm_vcpu *vcpu)
294294
vcpu->cpu = -1;
295295
}
296296

297-
static void tdx_no_vcpus_enter_start(struct kvm *kvm)
298-
{
299-
struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
300-
301-
lockdep_assert_held_write(&kvm->mmu_lock);
302-
303-
WRITE_ONCE(kvm_tdx->wait_for_sept_zap, true);
304-
305-
kvm_make_all_cpus_request(kvm, KVM_REQ_OUTSIDE_GUEST_MODE);
306-
}
307-
308-
static void tdx_no_vcpus_enter_stop(struct kvm *kvm)
309-
{
310-
struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
311-
312-
lockdep_assert_held_write(&kvm->mmu_lock);
313-
314-
WRITE_ONCE(kvm_tdx->wait_for_sept_zap, false);
315-
}
297+
/*
298+
* Execute a SEAMCALL related to removing/blocking S-EPT entries, with a single
299+
* retry (if necessary) after forcing vCPUs to exit and wait for the operation
300+
* to complete. All flows that remove/block S-EPT entries run with mmu_lock
301+
* held for write, i.e. are mutually exclusive with each other, but they aren't
302+
* mutually exclusive with running vCPUs, and so can fail with "operand busy"
303+
* if a vCPU acquires a relevant lock in the TDX-Module, e.g. when doing TDCALL.
304+
*
305+
* Note, the retry is guaranteed to succeed, absent KVM and/or TDX-Module bugs.
306+
*/
307+
#define tdh_do_no_vcpus(tdh_func, kvm, args...) \
308+
({ \
309+
struct kvm_tdx *__kvm_tdx = to_kvm_tdx(kvm); \
310+
u64 __err; \
311+
\
312+
lockdep_assert_held_write(&kvm->mmu_lock); \
313+
\
314+
__err = tdh_func(args); \
315+
if (unlikely(tdx_operand_busy(__err))) { \
316+
WRITE_ONCE(__kvm_tdx->wait_for_sept_zap, true); \
317+
kvm_make_all_cpus_request(kvm, KVM_REQ_OUTSIDE_GUEST_MODE); \
318+
\
319+
__err = tdh_func(args); \
320+
\
321+
WRITE_ONCE(__kvm_tdx->wait_for_sept_zap, false); \
322+
} \
323+
__err; \
324+
})
316325

317326
/* TDH.PHYMEM.PAGE.RECLAIM is allowed only when destroying the TD. */
318327
static int __tdx_reclaim_page(struct page *page)
@@ -1722,14 +1731,7 @@ static void tdx_track(struct kvm *kvm)
17221731
*/
17231732
lockdep_assert_held_write(&kvm->mmu_lock);
17241733

1725-
err = tdh_mem_track(&kvm_tdx->td);
1726-
if (unlikely(tdx_operand_busy(err))) {
1727-
/* After no vCPUs enter, the second retry is expected to succeed */
1728-
tdx_no_vcpus_enter_start(kvm);
1729-
err = tdh_mem_track(&kvm_tdx->td);
1730-
tdx_no_vcpus_enter_stop(kvm);
1731-
}
1732-
1734+
err = tdh_do_no_vcpus(tdh_mem_track, kvm, &kvm_tdx->td);
17331735
TDX_BUG_ON(err, TDH_MEM_TRACK, kvm);
17341736

17351737
kvm_make_all_cpus_request(kvm, KVM_REQ_OUTSIDE_GUEST_MODE);
@@ -1781,14 +1783,8 @@ static void tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
17811783
if (KVM_BUG_ON(level != PG_LEVEL_4K, kvm))
17821784
return;
17831785

1784-
err = tdh_mem_range_block(&kvm_tdx->td, gpa, tdx_level, &entry, &level_state);
1785-
if (unlikely(tdx_operand_busy(err))) {
1786-
/* After no vCPUs enter, the second retry is expected to succeed */
1787-
tdx_no_vcpus_enter_start(kvm);
1788-
err = tdh_mem_range_block(&kvm_tdx->td, gpa, tdx_level, &entry, &level_state);
1789-
tdx_no_vcpus_enter_stop(kvm);
1790-
}
1791-
1786+
err = tdh_do_no_vcpus(tdh_mem_range_block, kvm, &kvm_tdx->td, gpa,
1787+
tdx_level, &entry, &level_state);
17921788
if (TDX_BUG_ON_2(err, TDH_MEM_RANGE_BLOCK, entry, level_state, kvm))
17931789
return;
17941790

@@ -1803,20 +1799,8 @@ static void tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
18031799
* with other vcpu sept operation.
18041800
* Race with TDH.VP.ENTER due to (0-step mitigation) and Guest TDCALLs.
18051801
*/
1806-
err = tdh_mem_page_remove(&kvm_tdx->td, gpa, tdx_level, &entry,
1807-
&level_state);
1808-
1809-
if (unlikely(tdx_operand_busy(err))) {
1810-
/*
1811-
* The second retry is expected to succeed after kicking off all
1812-
* other vCPUs and prevent them from invoking TDH.VP.ENTER.
1813-
*/
1814-
tdx_no_vcpus_enter_start(kvm);
1815-
err = tdh_mem_page_remove(&kvm_tdx->td, gpa, tdx_level, &entry,
1816-
&level_state);
1817-
tdx_no_vcpus_enter_stop(kvm);
1818-
}
1819-
1802+
err = tdh_do_no_vcpus(tdh_mem_page_remove, kvm, &kvm_tdx->td, gpa,
1803+
tdx_level, &entry, &level_state);
18201804
if (TDX_BUG_ON_2(err, TDH_MEM_PAGE_REMOVE, entry, level_state, kvm))
18211805
return;
18221806

0 commit comments

Comments
 (0)