Skip to content

Commit fa4c027

Browse files
sashukla1sean-jc
authored andcommitted
KVM: x86: Add support for SVM's Virtual NMI
Add support for SVM's Virtual NMIs implementation, which adds proper tracking of virtual NMI blocking, and an intr_ctrl flag that software can set to mark a virtual NMI as pending. Pending virtual NMIs are serviced by hardware if/when virtual NMIs become unblocked, i.e. act more or less like real NMIs. Introduce two new kvm_x86_ops callbacks so to support SVM's vNMI, as KVM needs to treat a pending vNMI as partially injected. Specifically, if two NMIs (for L1) arrive concurrently in KVM's software model, KVM's ABI is to inject one and pend the other. Without vNMI, KVM manually tracks the pending NMI and uses NMI windows to detect when the NMI should be injected. With vNMI, the pending NMI is simply stuffed into the VMCB and handed off to hardware. This means that KVM needs to be able to set a vNMI pending on-demand, and also query if a vNMI is pending, e.g. to honor the "at most one NMI pending" rule and to preserve all NMIs across save and restore. Warn if KVM attempts to open an NMI window when vNMI is fully enabled, as the above logic should prevent KVM from ever getting to kvm_check_and_inject_events() with two NMIs pending _in software_, and the "at most one NMI pending" logic should prevent having an NMI pending in hardware and an NMI pending in software if NMIs are also blocked, i.e. if KVM can't immediately inject the second NMI. Signed-off-by: Santosh Shukla <Santosh.Shukla@amd.com> Co-developed-by: Maxim Levitsky <mlevitsk@redhat.com> Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com> Link: https://lore.kernel.org/r/20230227084016.3368-11-santosh.shukla@amd.com [sean: rewrite shortlog and changelog, massage code comments] Signed-off-by: Sean Christopherson <seanjc@google.com>
1 parent bdedff2 commit fa4c027

File tree

5 files changed

+146
-23
lines changed

5 files changed

+146
-23
lines changed

arch/x86/include/asm/kvm-x86-ops.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@ KVM_X86_OP(get_interrupt_shadow)
6868
KVM_X86_OP(patch_hypercall)
6969
KVM_X86_OP(inject_irq)
7070
KVM_X86_OP(inject_nmi)
71+
KVM_X86_OP_OPTIONAL_RET0(is_vnmi_pending)
72+
KVM_X86_OP_OPTIONAL_RET0(set_vnmi_pending)
7173
KVM_X86_OP(inject_exception)
7274
KVM_X86_OP(cancel_injection)
7375
KVM_X86_OP(interrupt_allowed)

arch/x86/include/asm/kvm_host.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -876,7 +876,8 @@ struct kvm_vcpu_arch {
876876
u64 tsc_scaling_ratio; /* current scaling ratio */
877877

878878
atomic_t nmi_queued; /* unprocessed asynchronous NMIs */
879-
unsigned nmi_pending; /* NMI queued after currently running handler */
879+
/* Number of NMIs pending injection, not including hardware vNMIs. */
880+
unsigned int nmi_pending;
880881
bool nmi_injected; /* Trying to inject an NMI this entry */
881882
bool smi_pending; /* SMI queued after currently running handler */
882883
u8 handling_intr_from_guest;
@@ -1621,6 +1622,13 @@ struct kvm_x86_ops {
16211622
int (*nmi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
16221623
bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
16231624
void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
1625+
/* Whether or not a virtual NMI is pending in hardware. */
1626+
bool (*is_vnmi_pending)(struct kvm_vcpu *vcpu);
1627+
/*
1628+
* Attempt to pend a virtual NMI in harware. Returns %true on success
1629+
* to allow using static_call_ret0 as the fallback.
1630+
*/
1631+
bool (*set_vnmi_pending)(struct kvm_vcpu *vcpu);
16241632
void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
16251633
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
16261634
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
@@ -2005,6 +2013,7 @@ int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level);
20052013
void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id);
20062014

20072015
void kvm_inject_nmi(struct kvm_vcpu *vcpu);
2016+
int kvm_get_nr_pending_nmis(struct kvm_vcpu *vcpu);
20082017

20092018
void kvm_update_dr7(struct kvm_vcpu *vcpu);
20102019

arch/x86/kvm/svm/svm.c

Lines changed: 93 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,8 @@ module_param(dump_invalid_vmcb, bool, 0644);
230230
bool intercept_smi = true;
231231
module_param(intercept_smi, bool, 0444);
232232

233+
bool vnmi = true;
234+
module_param(vnmi, bool, 0444);
233235

234236
static bool svm_gp_erratum_intercept = true;
235237

@@ -1311,6 +1313,9 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
13111313
if (kvm_vcpu_apicv_active(vcpu))
13121314
avic_init_vmcb(svm, vmcb);
13131315

1316+
if (vnmi)
1317+
svm->vmcb->control.int_ctl |= V_NMI_ENABLE_MASK;
1318+
13141319
if (vgif) {
13151320
svm_clr_intercept(svm, INTERCEPT_STGI);
13161321
svm_clr_intercept(svm, INTERCEPT_CLGI);
@@ -3525,6 +3530,39 @@ static void svm_inject_nmi(struct kvm_vcpu *vcpu)
35253530
++vcpu->stat.nmi_injections;
35263531
}
35273532

3533+
static bool svm_is_vnmi_pending(struct kvm_vcpu *vcpu)
3534+
{
3535+
struct vcpu_svm *svm = to_svm(vcpu);
3536+
3537+
if (!is_vnmi_enabled(svm))
3538+
return false;
3539+
3540+
return !!(svm->vmcb->control.int_ctl & V_NMI_BLOCKING_MASK);
3541+
}
3542+
3543+
static bool svm_set_vnmi_pending(struct kvm_vcpu *vcpu)
3544+
{
3545+
struct vcpu_svm *svm = to_svm(vcpu);
3546+
3547+
if (!is_vnmi_enabled(svm))
3548+
return false;
3549+
3550+
if (svm->vmcb->control.int_ctl & V_NMI_PENDING_MASK)
3551+
return false;
3552+
3553+
svm->vmcb->control.int_ctl |= V_NMI_PENDING_MASK;
3554+
vmcb_mark_dirty(svm->vmcb, VMCB_INTR);
3555+
3556+
/*
3557+
* Because the pending NMI is serviced by hardware, KVM can't know when
3558+
* the NMI is "injected", but for all intents and purposes, passing the
3559+
* NMI off to hardware counts as injection.
3560+
*/
3561+
++vcpu->stat.nmi_injections;
3562+
3563+
return true;
3564+
}
3565+
35283566
static void svm_inject_irq(struct kvm_vcpu *vcpu, bool reinjected)
35293567
{
35303568
struct vcpu_svm *svm = to_svm(vcpu);
@@ -3620,6 +3658,35 @@ static void svm_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
36203658
svm_set_intercept(svm, INTERCEPT_CR8_WRITE);
36213659
}
36223660

3661+
static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
3662+
{
3663+
struct vcpu_svm *svm = to_svm(vcpu);
3664+
3665+
if (is_vnmi_enabled(svm))
3666+
return svm->vmcb->control.int_ctl & V_NMI_BLOCKING_MASK;
3667+
else
3668+
return svm->nmi_masked;
3669+
}
3670+
3671+
static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
3672+
{
3673+
struct vcpu_svm *svm = to_svm(vcpu);
3674+
3675+
if (is_vnmi_enabled(svm)) {
3676+
if (masked)
3677+
svm->vmcb->control.int_ctl |= V_NMI_BLOCKING_MASK;
3678+
else
3679+
svm->vmcb->control.int_ctl &= ~V_NMI_BLOCKING_MASK;
3680+
3681+
} else {
3682+
svm->nmi_masked = masked;
3683+
if (masked)
3684+
svm_set_iret_intercept(svm);
3685+
else
3686+
svm_clr_iret_intercept(svm);
3687+
}
3688+
}
3689+
36233690
bool svm_nmi_blocked(struct kvm_vcpu *vcpu)
36243691
{
36253692
struct vcpu_svm *svm = to_svm(vcpu);
@@ -3631,8 +3698,10 @@ bool svm_nmi_blocked(struct kvm_vcpu *vcpu)
36313698
if (is_guest_mode(vcpu) && nested_exit_on_nmi(svm))
36323699
return false;
36333700

3634-
return (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) ||
3635-
svm->nmi_masked;
3701+
if (svm_get_nmi_mask(vcpu))
3702+
return true;
3703+
3704+
return vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK;
36363705
}
36373706

36383707
static int svm_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
@@ -3650,24 +3719,6 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
36503719
return 1;
36513720
}
36523721

3653-
static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
3654-
{
3655-
return to_svm(vcpu)->nmi_masked;
3656-
}
3657-
3658-
static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
3659-
{
3660-
struct vcpu_svm *svm = to_svm(vcpu);
3661-
3662-
if (masked) {
3663-
svm->nmi_masked = true;
3664-
svm_set_iret_intercept(svm);
3665-
} else {
3666-
svm->nmi_masked = false;
3667-
svm_clr_iret_intercept(svm);
3668-
}
3669-
}
3670-
36713722
bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
36723723
{
36733724
struct vcpu_svm *svm = to_svm(vcpu);
@@ -3748,7 +3799,16 @@ static void svm_enable_nmi_window(struct kvm_vcpu *vcpu)
37483799
{
37493800
struct vcpu_svm *svm = to_svm(vcpu);
37503801

3751-
if (svm->nmi_masked && !svm->awaiting_iret_completion)
3802+
/*
3803+
* KVM should never request an NMI window when vNMI is enabled, as KVM
3804+
* allows at most one to-be-injected NMI and one pending NMI, i.e. if
3805+
* two NMIs arrive simultaneously, KVM will inject one and set
3806+
* V_NMI_PENDING for the other. WARN, but continue with the standard
3807+
* single-step approach to try and salvage the pending NMI.
3808+
*/
3809+
WARN_ON_ONCE(is_vnmi_enabled(svm));
3810+
3811+
if (svm_get_nmi_mask(vcpu) && !svm->awaiting_iret_completion)
37523812
return; /* IRET will cause a vm exit */
37533813

37543814
if (!gif_set(svm)) {
@@ -4797,6 +4857,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
47974857
.patch_hypercall = svm_patch_hypercall,
47984858
.inject_irq = svm_inject_irq,
47994859
.inject_nmi = svm_inject_nmi,
4860+
.is_vnmi_pending = svm_is_vnmi_pending,
4861+
.set_vnmi_pending = svm_set_vnmi_pending,
48004862
.inject_exception = svm_inject_exception,
48014863
.cancel_injection = svm_cancel_injection,
48024864
.interrupt_allowed = svm_interrupt_allowed,
@@ -5090,6 +5152,16 @@ static __init int svm_hardware_setup(void)
50905152
pr_info("Virtual GIF supported\n");
50915153
}
50925154

5155+
vnmi = vgif && vnmi && boot_cpu_has(X86_FEATURE_VNMI);
5156+
if (vnmi)
5157+
pr_info("Virtual NMI enabled\n");
5158+
5159+
if (!vnmi) {
5160+
svm_x86_ops.is_vnmi_pending = NULL;
5161+
svm_x86_ops.set_vnmi_pending = NULL;
5162+
}
5163+
5164+
50935165
if (lbrv) {
50945166
if (!boot_cpu_has(X86_FEATURE_LBRV))
50955167
lbrv = false;

arch/x86/kvm/svm/svm.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ extern bool npt_enabled;
3636
extern int vgif;
3737
extern bool intercept_smi;
3838
extern bool x2avic_enabled;
39+
extern bool vnmi;
3940

4041
/*
4142
* Clean bits in VMCB.
@@ -548,6 +549,27 @@ static inline bool is_x2apic_msrpm_offset(u32 offset)
548549
(msr < (APIC_BASE_MSR + 0x100));
549550
}
550551

552+
static inline struct vmcb *get_vnmi_vmcb_l1(struct vcpu_svm *svm)
553+
{
554+
if (!vnmi)
555+
return NULL;
556+
557+
if (is_guest_mode(&svm->vcpu))
558+
return NULL;
559+
else
560+
return svm->vmcb01.ptr;
561+
}
562+
563+
static inline bool is_vnmi_enabled(struct vcpu_svm *svm)
564+
{
565+
struct vmcb *vmcb = get_vnmi_vmcb_l1(svm);
566+
567+
if (vmcb)
568+
return !!(vmcb->control.int_ctl & V_NMI_ENABLE_MASK);
569+
else
570+
return false;
571+
}
572+
551573
/* svm.c */
552574
#define MSR_INVALID 0xffffffffU
553575

arch/x86/kvm/x86.c

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5125,7 +5125,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
51255125
events->interrupt.shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);
51265126

51275127
events->nmi.injected = vcpu->arch.nmi_injected;
5128-
events->nmi.pending = vcpu->arch.nmi_pending;
5128+
events->nmi.pending = kvm_get_nr_pending_nmis(vcpu);
51295129
events->nmi.masked = static_call(kvm_x86_get_nmi_mask)(vcpu);
51305130

51315131
/* events->sipi_vector is never valid when reporting to user space */
@@ -10158,13 +10158,31 @@ static void process_nmi(struct kvm_vcpu *vcpu)
1015810158
else
1015910159
limit = 2;
1016010160

10161+
/*
10162+
* Adjust the limit to account for pending virtual NMIs, which aren't
10163+
* tracked in vcpu->arch.nmi_pending.
10164+
*/
10165+
if (static_call(kvm_x86_is_vnmi_pending)(vcpu))
10166+
limit--;
10167+
1016110168
vcpu->arch.nmi_pending += atomic_xchg(&vcpu->arch.nmi_queued, 0);
1016210169
vcpu->arch.nmi_pending = min(vcpu->arch.nmi_pending, limit);
1016310170

10171+
if (vcpu->arch.nmi_pending &&
10172+
(static_call(kvm_x86_set_vnmi_pending)(vcpu)))
10173+
vcpu->arch.nmi_pending--;
10174+
1016410175
if (vcpu->arch.nmi_pending)
1016510176
kvm_make_request(KVM_REQ_EVENT, vcpu);
1016610177
}
1016710178

10179+
/* Return total number of NMIs pending injection to the VM */
10180+
int kvm_get_nr_pending_nmis(struct kvm_vcpu *vcpu)
10181+
{
10182+
return vcpu->arch.nmi_pending +
10183+
static_call(kvm_x86_is_vnmi_pending)(vcpu);
10184+
}
10185+
1016810186
void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
1016910187
unsigned long *vcpu_bitmap)
1017010188
{

0 commit comments

Comments
 (0)