Skip to content

Commit 15945e9

Browse files
committed
KVM: TDX: Guard VM state transitions with "all" the locks
Acquire kvm->lock, kvm->slots_lock, and all vcpu->mutex locks when servicing ioctls that (a) transition the TD to a new state, i.e. when doing INIT or FINALIZE or (b) are only valid if the TD is in a specific state, i.e. when initializing a vCPU or memory region. Acquiring "all" the locks fixes several KVM_BUG_ON() situations where a SEAMCALL can fail due to racing actions, e.g. if tdh_vp_create() contends with either tdh_mr_extend() or tdh_mr_finalize(). For all intents and purposes, the paths in question are fully serialized, i.e. there's no reason to try and allow anything remotely interesting to happen. Smack 'em with a big hammer instead of trying to be "nice". Acquire kvm->lock to prevent VM-wide things from happening, slots_lock to prevent kvm_mmu_zap_all_fast(), and _all_ vCPU mutexes to prevent vCPUs from interefering. Use the recently-renamed kvm_arch_vcpu_unlocked_ioctl() to service the vCPU-scoped ioctls to avoid a lock inversion problem, e.g. due to taking vcpu->mutex outside kvm->lock. See also commit ecf371f ("KVM: SVM: Reject SEV{-ES} intra host migration if vCPU creation is in-flight"), which fixed a similar bug with SEV intra-host migration where an in-flight vCPU creation could race with a VM-wide state transition. Define a fancy new CLASS to handle the lock+check => unlock logic with guard()-like syntax: CLASS(tdx_vm_state_guard, guard)(kvm); if (IS_ERR(guard)) return PTR_ERR(guard); to simplify juggling the many locks. Note! Take kvm->slots_lock *after* all vcpu->mutex locks, as per KVM's soon-to-be-documented lock ordering rules[1]. Link: https://lore.kernel.org/all/20251016235538.171962-1-seanjc@google.com [1] Reported-by: Yan Zhao <yan.y.zhao@intel.com> Closes: https://lore.kernel.org/all/aLFiPq1smdzN3Ary@yzhao56-desk.sh.intel.com Reviewed-by: Kai Huang <kai.huang@intel.com> Reviewed-by: Yan Zhao <yan.y.zhao@intel.com> Tested-by: Yan Zhao <yan.y.zhao@intel.com> Tested-by: Kai Huang <kai.huang@intel.com> Link: https://patch.msgid.link/20251030200951.3402865-27-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc@google.com>
1 parent f26061f commit 15945e9

File tree

1 file changed

+49
-10
lines changed

1 file changed

+49
-10
lines changed

arch/x86/kvm/vmx/tdx.c

Lines changed: 49 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2653,6 +2653,46 @@ static int tdx_read_cpuid(struct kvm_vcpu *vcpu, u32 leaf, u32 sub_leaf,
26532653
return -EIO;
26542654
}
26552655

2656+
typedef void *tdx_vm_state_guard_t;
2657+
2658+
static tdx_vm_state_guard_t tdx_acquire_vm_state_locks(struct kvm *kvm)
2659+
{
2660+
int r;
2661+
2662+
mutex_lock(&kvm->lock);
2663+
2664+
if (kvm->created_vcpus != atomic_read(&kvm->online_vcpus)) {
2665+
r = -EBUSY;
2666+
goto out_err;
2667+
}
2668+
2669+
r = kvm_lock_all_vcpus(kvm);
2670+
if (r)
2671+
goto out_err;
2672+
2673+
/*
2674+
* Note the unintuitive ordering! vcpu->mutex must be taken outside
2675+
* kvm->slots_lock!
2676+
*/
2677+
mutex_lock(&kvm->slots_lock);
2678+
return kvm;
2679+
2680+
out_err:
2681+
mutex_unlock(&kvm->lock);
2682+
return ERR_PTR(r);
2683+
}
2684+
2685+
static void tdx_release_vm_state_locks(struct kvm *kvm)
2686+
{
2687+
mutex_unlock(&kvm->slots_lock);
2688+
kvm_unlock_all_vcpus(kvm);
2689+
mutex_unlock(&kvm->lock);
2690+
}
2691+
2692+
DEFINE_CLASS(tdx_vm_state_guard, tdx_vm_state_guard_t,
2693+
if (!IS_ERR(_T)) tdx_release_vm_state_locks(_T),
2694+
tdx_acquire_vm_state_locks(kvm), struct kvm *kvm);
2695+
26562696
static int tdx_td_init(struct kvm *kvm, struct kvm_tdx_cmd *cmd)
26572697
{
26582698
struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
@@ -2774,8 +2814,6 @@ static int tdx_td_finalize(struct kvm *kvm, struct kvm_tdx_cmd *cmd)
27742814
{
27752815
struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
27762816

2777-
guard(mutex)(&kvm->slots_lock);
2778-
27792817
if (!is_hkid_assigned(kvm_tdx) || kvm_tdx->state == TD_STATE_RUNNABLE)
27802818
return -EINVAL;
27812819

@@ -2819,7 +2857,9 @@ int tdx_vm_ioctl(struct kvm *kvm, void __user *argp)
28192857
if (tdx_cmd.id == KVM_TDX_CAPABILITIES)
28202858
return tdx_get_capabilities(&tdx_cmd);
28212859

2822-
guard(mutex)(&kvm->lock);
2860+
CLASS(tdx_vm_state_guard, guard)(kvm);
2861+
if (IS_ERR(guard))
2862+
return PTR_ERR(guard);
28232863

28242864
switch (tdx_cmd.id) {
28252865
case KVM_TDX_INIT_VM:
@@ -3123,8 +3163,6 @@ static int tdx_vcpu_init_mem_region(struct kvm_vcpu *vcpu, struct kvm_tdx_cmd *c
31233163
if (tdx->state != VCPU_TD_STATE_INITIALIZED)
31243164
return -EINVAL;
31253165

3126-
guard(mutex)(&kvm->slots_lock);
3127-
31283166
/* Once TD is finalized, the initial guest memory is fixed. */
31293167
if (kvm_tdx->state == TD_STATE_RUNNABLE)
31303168
return -EINVAL;
@@ -3180,20 +3218,22 @@ static int tdx_vcpu_init_mem_region(struct kvm_vcpu *vcpu, struct kvm_tdx_cmd *c
31803218

31813219
int tdx_vcpu_unlocked_ioctl(struct kvm_vcpu *vcpu, void __user *argp)
31823220
{
3183-
struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm);
3221+
struct kvm *kvm = vcpu->kvm;
3222+
struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
31843223
struct kvm_tdx_cmd cmd;
31853224
int r;
31863225

31873226
r = tdx_get_cmd(argp, &cmd);
31883227
if (r)
31893228
return r;
31903229

3230+
CLASS(tdx_vm_state_guard, guard)(kvm);
3231+
if (IS_ERR(guard))
3232+
return PTR_ERR(guard);
3233+
31913234
if (!is_hkid_assigned(kvm_tdx) || kvm_tdx->state == TD_STATE_RUNNABLE)
31923235
return -EINVAL;
31933236

3194-
if (mutex_lock_killable(&vcpu->mutex))
3195-
return -EINTR;
3196-
31973237
vcpu_load(vcpu);
31983238

31993239
switch (cmd.id) {
@@ -3210,7 +3250,6 @@ int tdx_vcpu_unlocked_ioctl(struct kvm_vcpu *vcpu, void __user *argp)
32103250

32113251
vcpu_put(vcpu);
32123252

3213-
mutex_unlock(&vcpu->mutex);
32143253
return r;
32153254
}
32163255

0 commit comments

Comments
 (0)