Skip to content

Commit 5dcbe0d

Browse files
Dapeng Misean-jc
authored andcommitted
x86: pmu: Optimize emulated instruction validation
For support CPUs supporting PERF_GLOBAL_CTRL MSR, the validation for emulated instruction can be improved to check against precise counts for instructions and branches events instead of a rough range. Move enabling and disabling PERF_GLOBAL_CTRL MSR into kvm_fep_asm blob, thus instructions and branches events can be verified against precise counts. Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com> Link: https://lore.kernel.org/r/20250215013636.1214612-19-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc@google.com>
1 parent 28437cd commit 5dcbe0d

File tree

1 file changed

+65
-43
lines changed

1 file changed

+65
-43
lines changed

x86/pmu.c

Lines changed: 65 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,6 @@
1414

1515
#define N 1000000
1616

17-
// These values match the number of instructions and branches in the
18-
// assembly block in check_emulated_instr().
19-
#define EXPECTED_INSTR 17
20-
#define EXPECTED_BRNCH 5
21-
2217
#define IBPB_JMP_INSNS 9
2318
#define IBPB_JMP_BRANCHES 2
2419

@@ -71,6 +66,40 @@ do { \
7166
: "edi"); \
7267
} while (0)
7368

69+
/* the number of instructions and branches of the kvm_fep_asm() blob */
70+
#define KVM_FEP_INSNS 22
71+
#define KVM_FEP_BRANCHES 5
72+
73+
/*
74+
* KVM_FEP is a magic prefix that forces emulation so
75+
* 'KVM_FEP "jne label\n"' just counts as a single instruction.
76+
*/
77+
#define kvm_fep_asm(_wrmsr) \
78+
do { \
79+
asm volatile( \
80+
_wrmsr "\n\t" \
81+
"mov %%ecx, %%edi;\n\t" \
82+
"mov $0x0, %%eax;\n\t" \
83+
"cmp $0x0, %%eax;\n\t" \
84+
KVM_FEP "jne 1f\n\t" \
85+
KVM_FEP "jne 1f\n\t" \
86+
KVM_FEP "jne 1f\n\t" \
87+
KVM_FEP "jne 1f\n\t" \
88+
KVM_FEP "jne 1f\n\t" \
89+
"mov $0xa, %%eax; cpuid;\n\t" \
90+
"mov $0xa, %%eax; cpuid;\n\t" \
91+
"mov $0xa, %%eax; cpuid;\n\t" \
92+
"mov $0xa, %%eax; cpuid;\n\t" \
93+
"mov $0xa, %%eax; cpuid;\n\t" \
94+
"1: mov %%edi, %%ecx; \n\t" \
95+
"xor %%eax, %%eax; \n\t" \
96+
"xor %%edx, %%edx;\n\t" \
97+
_wrmsr "\n\t" \
98+
: \
99+
: "a"(eax), "d"(edx), "c"(ecx) \
100+
: "ebx", "edi"); \
101+
} while (0)
102+
74103
typedef struct {
75104
uint32_t ctr;
76105
uint32_t idx;
@@ -668,13 +697,15 @@ static void check_running_counter_wrmsr(void)
668697

669698
static void check_emulated_instr(void)
670699
{
700+
u32 eax, edx, ecx;
671701
uint64_t status, instr_start, brnch_start;
672702
uint64_t gp_counter_width = (1ull << pmu.gp_counter_width) - 1;
673703
unsigned int branch_idx = pmu.is_intel ?
674704
INTEL_BRANCHES_IDX : AMD_BRANCHES_IDX;
675705
unsigned int instruction_idx = pmu.is_intel ?
676706
INTEL_INSTRUCTIONS_IDX :
677707
AMD_INSTRUCTIONS_IDX;
708+
678709
pmu_counter_t brnch_cnt = {
679710
.ctr = MSR_GP_COUNTERx(0),
680711
/* branch instructions */
@@ -690,55 +721,46 @@ static void check_emulated_instr(void)
690721
if (this_cpu_has_perf_global_status())
691722
pmu_clear_global_status();
692723

693-
start_event(&brnch_cnt);
694-
start_event(&instr_cnt);
724+
__start_event(&brnch_cnt, 0);
725+
__start_event(&instr_cnt, 0);
695726

696-
brnch_start = -EXPECTED_BRNCH;
697-
instr_start = -EXPECTED_INSTR;
727+
brnch_start = -KVM_FEP_BRANCHES;
728+
instr_start = -KVM_FEP_INSNS;
698729
wrmsr(MSR_GP_COUNTERx(0), brnch_start & gp_counter_width);
699730
wrmsr(MSR_GP_COUNTERx(1), instr_start & gp_counter_width);
700-
// KVM_FEP is a magic prefix that forces emulation so
701-
// 'KVM_FEP "jne label\n"' just counts as a single instruction.
702-
asm volatile(
703-
"mov $0x0, %%eax\n"
704-
"cmp $0x0, %%eax\n"
705-
KVM_FEP "jne label\n"
706-
KVM_FEP "jne label\n"
707-
KVM_FEP "jne label\n"
708-
KVM_FEP "jne label\n"
709-
KVM_FEP "jne label\n"
710-
"mov $0xa, %%eax\n"
711-
"cpuid\n"
712-
"mov $0xa, %%eax\n"
713-
"cpuid\n"
714-
"mov $0xa, %%eax\n"
715-
"cpuid\n"
716-
"mov $0xa, %%eax\n"
717-
"cpuid\n"
718-
"mov $0xa, %%eax\n"
719-
"cpuid\n"
720-
"label:\n"
721-
:
722-
:
723-
: "eax", "ebx", "ecx", "edx");
724731

725-
if (this_cpu_has_perf_global_ctrl())
726-
wrmsr(pmu.msr_global_ctl, 0);
732+
if (this_cpu_has_perf_global_ctrl()) {
733+
eax = BIT(0) | BIT(1);
734+
ecx = pmu.msr_global_ctl;
735+
edx = 0;
736+
kvm_fep_asm("wrmsr");
737+
} else {
738+
eax = ecx = edx = 0;
739+
kvm_fep_asm("nop");
740+
}
727741

728-
stop_event(&brnch_cnt);
729-
stop_event(&instr_cnt);
742+
__stop_event(&brnch_cnt);
743+
__stop_event(&instr_cnt);
730744

731745
// Check that the end count - start count is at least the expected
732746
// number of instructions and branches.
733-
report(instr_cnt.count - instr_start >= EXPECTED_INSTR,
734-
"instruction count");
735-
report(brnch_cnt.count - brnch_start >= EXPECTED_BRNCH,
736-
"branch count");
747+
if (this_cpu_has_perf_global_ctrl()) {
748+
report(instr_cnt.count - instr_start == KVM_FEP_INSNS,
749+
"instruction count");
750+
report(brnch_cnt.count - brnch_start == KVM_FEP_BRANCHES,
751+
"branch count");
752+
} else {
753+
report(instr_cnt.count - instr_start >= KVM_FEP_INSNS,
754+
"instruction count");
755+
report(brnch_cnt.count - brnch_start >= KVM_FEP_BRANCHES,
756+
"branch count");
757+
}
758+
737759
if (this_cpu_has_perf_global_status()) {
738760
// Additionally check that those counters overflowed properly.
739761
status = rdmsr(pmu.msr_global_status);
740-
report(status & 1, "branch counter overflow");
741-
report(status & 2, "instruction counter overflow");
762+
report(status & BIT_ULL(0), "branch counter overflow");
763+
report(status & BIT_ULL(1), "instruction counter overflow");
742764
}
743765

744766
report_prefix_pop();

0 commit comments

Comments
 (0)