Skip to content

Commit a7cec20

Browse files
jsmattsonjrsean-jc
authored andcommitted
KVM: x86: Provide a capability to disable APERF/MPERF read intercepts
Allow a guest to read the physical IA32_APERF and IA32_MPERF MSRs without interception. The IA32_APERF and IA32_MPERF MSRs are not virtualized. Writes are not handled at all. The MSR values are not zeroed on vCPU creation, saved on suspend, or restored on resume. No accommodation is made for processor migration or for sharing a logical processor with other tasks. No adjustments are made for non-unit TSC multipliers. The MSRs do not account for time the same way as the comparable PMU events, whether the PMU is virtualized by the traditional emulation method or the new mediated pass-through approach. Nonetheless, in a properly constrained environment, this capability can be combined with a guest CPUID table that advertises support for CPUID.6:ECX.APERFMPERF[bit 0] to induce a Linux guest to report the effective physical CPU frequency in /proc/cpuinfo. Moreover, there is no performance cost for this capability. Signed-off-by: Jim Mattson <jmattson@google.com> Link: https://lore.kernel.org/r/20250530185239.2335185-3-jmattson@google.com Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com> Link: https://lore.kernel.org/r/20250626001225.744268-3-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc@google.com>
1 parent 6fbef86 commit a7cec20

File tree

9 files changed

+53
-2
lines changed

9 files changed

+53
-2
lines changed

Documentation/virt/kvm/api.rst

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7844,6 +7844,7 @@ Valid bits in args[0] are::
78447844
#define KVM_X86_DISABLE_EXITS_HLT (1 << 1)
78457845
#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2)
78467846
#define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3)
7847+
#define KVM_X86_DISABLE_EXITS_APERFMPERF (1 << 4)
78477848

78487849
Enabling this capability on a VM provides userspace with a way to no
78497850
longer intercept some instructions for improved latency in some
@@ -7854,6 +7855,28 @@ all such vmexits.
78547855

78557856
Do not enable KVM_FEATURE_PV_UNHALT if you disable HLT exits.
78567857

7858+
Virtualizing the ``IA32_APERF`` and ``IA32_MPERF`` MSRs requires more
7859+
than just disabling APERF/MPERF exits. While both Intel and AMD
7860+
document strict usage conditions for these MSRs--emphasizing that only
7861+
the ratio of their deltas over a time interval (T0 to T1) is
7862+
architecturally defined--simply passing through the MSRs can still
7863+
produce an incorrect ratio.
7864+
7865+
This erroneous ratio can occur if, between T0 and T1:
7866+
7867+
1. The vCPU thread migrates between logical processors.
7868+
2. Live migration or suspend/resume operations take place.
7869+
3. Another task shares the vCPU's logical processor.
7870+
4. C-states lower than C0 are emulated (e.g., via HLT interception).
7871+
5. The guest TSC frequency doesn't match the host TSC frequency.
7872+
7873+
Due to these complexities, KVM does not automatically associate this
7874+
passthrough capability with the guest CPUID bit,
7875+
``CPUID.6:ECX.APERFMPERF[bit 0]``. Userspace VMMs that deem this
7876+
mechanism adequate for virtualizing the ``IA32_APERF`` and
7877+
``IA32_MPERF`` MSRs must set the guest CPUID bit explicitly.
7878+
7879+
78577880
7.14 KVM_CAP_S390_HPAGE_1M
78587881
--------------------------
78597882

arch/x86/kvm/svm/nested.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ void recalc_intercepts(struct vcpu_svm *svm)
194194
* Hardcode the capacity of the array based on the maximum number of _offsets_.
195195
* MSRs are batched together, so there are fewer offsets than MSRs.
196196
*/
197-
static int nested_svm_msrpm_merge_offsets[6] __ro_after_init;
197+
static int nested_svm_msrpm_merge_offsets[7] __ro_after_init;
198198
static int nested_svm_nr_msrpm_merge_offsets __ro_after_init;
199199
typedef unsigned long nsvm_msrpm_merge_t;
200200

@@ -216,6 +216,8 @@ int __init nested_svm_init_msrpm_merge_offsets(void)
216216
MSR_IA32_SPEC_CTRL,
217217
MSR_IA32_PRED_CMD,
218218
MSR_IA32_FLUSH_CMD,
219+
MSR_IA32_APERF,
220+
MSR_IA32_MPERF,
219221
MSR_IA32_LASTBRANCHFROMIP,
220222
MSR_IA32_LASTBRANCHTOIP,
221223
MSR_IA32_LASTINTFROMIP,

arch/x86/kvm/svm/svm.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -838,6 +838,11 @@ static void svm_recalc_msr_intercepts(struct kvm_vcpu *vcpu)
838838
svm_set_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW,
839839
guest_cpuid_is_intel_compatible(vcpu));
840840

841+
if (kvm_aperfmperf_in_guest(vcpu->kvm)) {
842+
svm_disable_intercept_for_msr(vcpu, MSR_IA32_APERF, MSR_TYPE_R);
843+
svm_disable_intercept_for_msr(vcpu, MSR_IA32_MPERF, MSR_TYPE_R);
844+
}
845+
841846
if (sev_es_guest(vcpu->kvm))
842847
sev_es_recalc_msr_intercepts(vcpu);
843848

arch/x86/kvm/vmx/nested.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -715,6 +715,12 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
715715
nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
716716
MSR_IA32_FLUSH_CMD, MSR_TYPE_W);
717717

718+
nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
719+
MSR_IA32_APERF, MSR_TYPE_R);
720+
721+
nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
722+
MSR_IA32_MPERF, MSR_TYPE_R);
723+
718724
kvm_vcpu_unmap(vcpu, &map);
719725

720726
vmx->nested.force_msr_bitmap_recalc = false;

arch/x86/kvm/vmx/vmx.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4084,6 +4084,10 @@ void vmx_recalc_msr_intercepts(struct kvm_vcpu *vcpu)
40844084
vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R);
40854085
vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R);
40864086
}
4087+
if (kvm_aperfmperf_in_guest(vcpu->kvm)) {
4088+
vmx_disable_intercept_for_msr(vcpu, MSR_IA32_APERF, MSR_TYPE_R);
4089+
vmx_disable_intercept_for_msr(vcpu, MSR_IA32_MPERF, MSR_TYPE_R);
4090+
}
40874091

40884092
/* PT MSRs can be passed through iff PT is exposed to the guest. */
40894093
if (vmx_pt_mode_is_host_guest())

arch/x86/kvm/x86.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4577,6 +4577,9 @@ static u64 kvm_get_allowed_disable_exits(void)
45774577
{
45784578
u64 r = KVM_X86_DISABLE_EXITS_PAUSE;
45794579

4580+
if (boot_cpu_has(X86_FEATURE_APERFMPERF))
4581+
r |= KVM_X86_DISABLE_EXITS_APERFMPERF;
4582+
45804583
if (!mitigate_smt_rsb) {
45814584
r |= KVM_X86_DISABLE_EXITS_HLT |
45824585
KVM_X86_DISABLE_EXITS_CSTATE;
@@ -6613,7 +6616,8 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
66136616

66146617
if (!mitigate_smt_rsb && boot_cpu_has_bug(X86_BUG_SMT_RSB) &&
66156618
cpu_smt_possible() &&
6616-
(cap->args[0] & ~KVM_X86_DISABLE_EXITS_PAUSE))
6619+
(cap->args[0] & ~(KVM_X86_DISABLE_EXITS_PAUSE |
6620+
KVM_X86_DISABLE_EXITS_APERFMPERF)))
66176621
pr_warn_once(SMT_RSB_MSG);
66186622

66196623
kvm_disable_exits(kvm, cap->args[0]);

arch/x86/kvm/x86.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -524,6 +524,11 @@ static inline bool kvm_cstate_in_guest(struct kvm *kvm)
524524
return kvm->arch.disabled_exits & KVM_X86_DISABLE_EXITS_CSTATE;
525525
}
526526

527+
static inline bool kvm_aperfmperf_in_guest(struct kvm *kvm)
528+
{
529+
return kvm->arch.disabled_exits & KVM_X86_DISABLE_EXITS_APERFMPERF;
530+
}
531+
527532
static inline bool kvm_notify_vmexit_enabled(struct kvm *kvm)
528533
{
529534
return kvm->arch.notify_vmexit_flags & KVM_X86_NOTIFY_VMEXIT_ENABLED;

include/uapi/linux/kvm.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -644,6 +644,7 @@ struct kvm_ioeventfd {
644644
#define KVM_X86_DISABLE_EXITS_HLT (1 << 1)
645645
#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2)
646646
#define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3)
647+
#define KVM_X86_DISABLE_EXITS_APERFMPERF (1 << 4)
647648

648649
/* for KVM_ENABLE_CAP */
649650
struct kvm_enable_cap {

tools/include/uapi/linux/kvm.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -617,6 +617,7 @@ struct kvm_ioeventfd {
617617
#define KVM_X86_DISABLE_EXITS_HLT (1 << 1)
618618
#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2)
619619
#define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3)
620+
#define KVM_X86_DISABLE_EXITS_APERFMPERF (1 << 4)
620621

621622
/* for KVM_ENABLE_CAP */
622623
struct kvm_enable_cap {

0 commit comments

Comments
 (0)