Skip to content

Commit 38b5b42

Browse files
Dapeng Misean-jc
authored andcommitted
x86: pmu: Improve LLC misses event verification
When running pmu test on SPR, sometimes the following failure is reported. 1 <= 0 <= 1000000 FAIL: Intel: llc misses-4 Currently The LLC misses occurring only depends on probability. It's possible that there is no LLC misses happened in the whole loop(), especially along with processors have larger and larger cache size just like what we observed on SPR. Thus, add clflush instruction into the loop() asm blob and ensure once LLC miss is triggered at least. Suggested-by: Jim Mattson <jmattson@google.com> Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com> Link: https://lore.kernel.org/r/20250215013636.1214612-15-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc@google.com>
1 parent 89126fa commit 38b5b42

File tree

1 file changed

+26
-13
lines changed

1 file changed

+26
-13
lines changed

x86/pmu.c

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,19 +19,30 @@
1919
#define EXPECTED_INSTR 17
2020
#define EXPECTED_BRNCH 5
2121

22-
/* Enable GLOBAL_CTRL + disable GLOBAL_CTRL instructions */
23-
#define EXTRA_INSNS (3 + 3)
22+
/* Enable GLOBAL_CTRL + disable GLOBAL_CTRL + clflush/mfence instructions */
23+
#define EXTRA_INSNS (3 + 3 +2)
2424
#define LOOP_INSNS (N * 10 + EXTRA_INSNS)
2525
#define LOOP_BRANCHES (N)
26-
#define LOOP_ASM(_wrmsr) \
26+
#define LOOP_ASM(_wrmsr, _clflush) \
2727
_wrmsr "\n\t" \
2828
"mov %%ecx, %%edi; mov %%ebx, %%ecx;\n\t" \
29+
_clflush "\n\t" \
30+
"mfence;\n\t" \
2931
"1: mov (%1), %2; add $64, %1;\n\t" \
3032
"nop; nop; nop; nop; nop; nop; nop;\n\t" \
3133
"loop 1b;\n\t" \
3234
"mov %%edi, %%ecx; xor %%eax, %%eax; xor %%edx, %%edx;\n\t" \
3335
_wrmsr "\n\t"
3436

37+
#define _loop_asm(_wrmsr, _clflush) \
38+
do { \
39+
asm volatile(LOOP_ASM(_wrmsr, _clflush) \
40+
: "=b"(tmp), "=r"(tmp2), "=r"(tmp3) \
41+
: "a"(eax), "d"(edx), "c"(global_ctl), \
42+
"0"(N), "1"(buf) \
43+
: "edi"); \
44+
} while (0)
45+
3546
typedef struct {
3647
uint32_t ctr;
3748
uint32_t idx;
@@ -88,14 +99,17 @@ static struct pmu_event *gp_events;
8899
static unsigned int gp_events_size;
89100
static unsigned int fixed_counters_num;
90101

91-
92102
static inline void __loop(void)
93103
{
94104
unsigned long tmp, tmp2, tmp3;
105+
u32 global_ctl = 0;
106+
u32 eax = 0;
107+
u32 edx = 0;
95108

96-
asm volatile(LOOP_ASM("nop")
97-
: "=c"(tmp), "=r"(tmp2), "=r"(tmp3)
98-
: "0"(N), "1"(buf));
109+
if (this_cpu_has(X86_FEATURE_CLFLUSH))
110+
_loop_asm("nop", "clflush (%1)");
111+
else
112+
_loop_asm("nop", "nop");
99113
}
100114

101115
/*
@@ -108,15 +122,14 @@ static inline void __loop(void)
108122
static inline void __precise_loop(u64 cntrs)
109123
{
110124
unsigned long tmp, tmp2, tmp3;
111-
unsigned int global_ctl = pmu.msr_global_ctl;
125+
u32 global_ctl = pmu.msr_global_ctl;
112126
u32 eax = cntrs & (BIT_ULL(32) - 1);
113127
u32 edx = cntrs >> 32;
114128

115-
asm volatile(LOOP_ASM("wrmsr")
116-
: "=b"(tmp), "=r"(tmp2), "=r"(tmp3)
117-
: "a"(eax), "d"(edx), "c"(global_ctl),
118-
"0"(N), "1"(buf)
119-
: "edi");
129+
if (this_cpu_has(X86_FEATURE_CLFLUSH))
130+
_loop_asm("wrmsr", "clflush (%1)");
131+
else
132+
_loop_asm("wrmsr", "nop");
120133
}
121134

122135
static inline void loop(u64 cntrs)

0 commit comments

Comments
 (0)