Skip to content

Commit 50f8e27

Browse files
Dapeng Misean-jc
authored andcommitted
x86: pmu: Enable and disable PMCs in loop() asm blob
Currently enabling PMCs, executing loop() and disabling PMCs are divided 3 separated functions. So there could be other instructions executed between enabling PMCS and running loop() or running loop() and disabling PMCs, e.g. if there are multiple counters enabled in measure_many() function, the instructions which enabling the 2nd and more counters would be counted in by the 1st counter. So current implementation can only verify the correctness of count by an rough range rather than a precise count even for instructions and branches events. Strictly speaking, this verification is meaningless as the test could still pass even though KVM vPMU has something wrong and reports an incorrect instructions or branches count which is in the rough range. Thus, move the PMCs enabling and disabling into the loop() asm blob and ensure only the loop asm instructions would be counted, then the instructions or branches events can be verified with an precise count instead of an rough range. Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com> Link: https://lore.kernel.org/r/20250215013636.1214612-13-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc@google.com>
1 parent 85c7557 commit 50f8e27

File tree

1 file changed

+65
-15
lines changed

1 file changed

+65
-15
lines changed

x86/pmu.c

Lines changed: 65 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,15 @@
1919
#define EXPECTED_INSTR 17
2020
#define EXPECTED_BRNCH 5
2121

22+
#define LOOP_ASM(_wrmsr) \
23+
_wrmsr "\n\t" \
24+
"mov %%ecx, %%edi; mov %%ebx, %%ecx;\n\t" \
25+
"1: mov (%1), %2; add $64, %1;\n\t" \
26+
"nop; nop; nop; nop; nop; nop; nop;\n\t" \
27+
"loop 1b;\n\t" \
28+
"mov %%edi, %%ecx; xor %%eax, %%eax; xor %%edx, %%edx;\n\t" \
29+
_wrmsr "\n\t"
30+
2231
typedef struct {
2332
uint32_t ctr;
2433
uint32_t idx;
@@ -75,13 +84,43 @@ static struct pmu_event *gp_events;
7584
static unsigned int gp_events_size;
7685
static unsigned int fixed_counters_num;
7786

78-
static inline void loop(void)
87+
88+
static inline void __loop(void)
89+
{
90+
unsigned long tmp, tmp2, tmp3;
91+
92+
asm volatile(LOOP_ASM("nop")
93+
: "=c"(tmp), "=r"(tmp2), "=r"(tmp3)
94+
: "0"(N), "1"(buf));
95+
}
96+
97+
/*
98+
* Enable and disable counters in a whole asm blob to ensure
99+
* no other instructions are counted in the window between
100+
* counters enabling and really LOOP_ASM code executing.
101+
* Thus counters can verify instructions and branches events
102+
* against precise counts instead of a rough valid count range.
103+
*/
104+
static inline void __precise_loop(u64 cntrs)
79105
{
80106
unsigned long tmp, tmp2, tmp3;
107+
unsigned int global_ctl = pmu.msr_global_ctl;
108+
u32 eax = cntrs & (BIT_ULL(32) - 1);
109+
u32 edx = cntrs >> 32;
81110

82-
asm volatile("1: mov (%1), %2; add $64, %1; nop; nop; nop; nop; nop; nop; nop; loop 1b"
83-
: "=c"(tmp), "=r"(tmp2), "=r"(tmp3): "0"(N), "1"(buf));
111+
asm volatile(LOOP_ASM("wrmsr")
112+
: "=b"(tmp), "=r"(tmp2), "=r"(tmp3)
113+
: "a"(eax), "d"(edx), "c"(global_ctl),
114+
"0"(N), "1"(buf)
115+
: "edi");
116+
}
84117

118+
static inline void loop(u64 cntrs)
119+
{
120+
if (!this_cpu_has_perf_global_ctrl())
121+
__loop();
122+
else
123+
__precise_loop(cntrs);
85124
}
86125

87126
volatile uint64_t irq_received;
@@ -181,18 +220,17 @@ static void __start_event(pmu_counter_t *evt, uint64_t count)
181220
ctrl = (ctrl & ~(0xf << shift)) | (usrospmi << shift);
182221
wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl);
183222
}
184-
global_enable(evt);
185223
apic_write(APIC_LVTPC, PMI_VECTOR);
186224
}
187225

188226
static void start_event(pmu_counter_t *evt)
189227
{
190228
__start_event(evt, 0);
229+
global_enable(evt);
191230
}
192231

193-
static void stop_event(pmu_counter_t *evt)
232+
static void __stop_event(pmu_counter_t *evt)
194233
{
195-
global_disable(evt);
196234
if (is_gp(evt)) {
197235
wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)),
198236
evt->config & ~EVNTSEL_EN);
@@ -204,14 +242,24 @@ static void stop_event(pmu_counter_t *evt)
204242
evt->count = rdmsr(evt->ctr);
205243
}
206244

245+
static void stop_event(pmu_counter_t *evt)
246+
{
247+
global_disable(evt);
248+
__stop_event(evt);
249+
}
250+
207251
static noinline void measure_many(pmu_counter_t *evt, int count)
208252
{
209253
int i;
254+
u64 cntrs = 0;
255+
256+
for (i = 0; i < count; i++) {
257+
__start_event(&evt[i], 0);
258+
cntrs |= BIT_ULL(event_to_global_idx(&evt[i]));
259+
}
260+
loop(cntrs);
210261
for (i = 0; i < count; i++)
211-
start_event(&evt[i]);
212-
loop();
213-
for (i = 0; i < count; i++)
214-
stop_event(&evt[i]);
262+
__stop_event(&evt[i]);
215263
}
216264

217265
static void measure_one(pmu_counter_t *evt)
@@ -221,9 +269,11 @@ static void measure_one(pmu_counter_t *evt)
221269

222270
static noinline void __measure(pmu_counter_t *evt, uint64_t count)
223271
{
272+
u64 cntrs = BIT_ULL(event_to_global_idx(evt));
273+
224274
__start_event(evt, count);
225-
loop();
226-
stop_event(evt);
275+
loop(cntrs);
276+
__stop_event(evt);
227277
}
228278

229279
static bool verify_event(uint64_t count, struct pmu_event *e)
@@ -495,7 +545,7 @@ static void check_running_counter_wrmsr(void)
495545
report_prefix_push("running counter wrmsr");
496546

497547
start_event(&evt);
498-
loop();
548+
__loop();
499549
wrmsr(MSR_GP_COUNTERx(0), 0);
500550
stop_event(&evt);
501551
report(evt.count < gp_events[instruction_idx].min, "cntr");
@@ -512,7 +562,7 @@ static void check_running_counter_wrmsr(void)
512562

513563
wrmsr(MSR_GP_COUNTERx(0), count);
514564

515-
loop();
565+
__loop();
516566
stop_event(&evt);
517567

518568
if (this_cpu_has_perf_global_status()) {
@@ -653,7 +703,7 @@ static void warm_up(void)
653703
* the real verification.
654704
*/
655705
for (i = 0; i < 10; i++)
656-
loop();
706+
loop(0);
657707
}
658708

659709
static void check_counters(void)

0 commit comments

Comments
 (0)