Skip to content

Commit 26dcdfa

Browse files
Maxim Levitskysean-jc
authored andcommitted
KVM: selftests: access_tracking_perf_test: Add option to skip the sanity check
Add an option to skip sanity check of number of still idle pages, and set it by default to skip, in case hypervisor or NUMA balancing is detected. Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com> Co-developed-by: James Houghton <jthoughton@google.com> Signed-off-by: James Houghton <jthoughton@google.com> Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com> Link: https://lore.kernel.org/r/20250508184649.2576210-3-jthoughton@google.com Signed-off-by: Sean Christopherson <seanjc@google.com>
1 parent d761c14 commit 26dcdfa

File tree

3 files changed

+61
-9
lines changed

3 files changed

+61
-9
lines changed

tools/testing/selftests/kvm/access_tracking_perf_test.c

Lines changed: 53 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,16 @@ static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
6565
/* Whether to overlap the regions of memory vCPUs access. */
6666
static bool overlap_memory_access;
6767

68+
/*
69+
* If the test should only warn if there are too many idle pages (i.e., it is
70+
* expected).
71+
* -1: Not yet set.
72+
* 0: We do not expect too many idle pages, so FAIL if too many idle pages.
73+
* 1: Having too many idle pages is expected, so merely print a warning if
74+
* too many idle pages are found.
75+
*/
76+
static int idle_pages_warn_only = -1;
77+
6878
struct test_params {
6979
/* The backing source for the region of memory. */
7080
enum vm_mem_backing_src_type backing_src;
@@ -177,18 +187,12 @@ static void mark_vcpu_memory_idle(struct kvm_vm *vm,
177187
* arbitrary; high enough that we ensure most memory access went through
178188
* access tracking but low enough as to not make the test too brittle
179189
* over time and across architectures.
180-
*
181-
* When running the guest as a nested VM, "warn" instead of asserting
182-
* as the TLB size is effectively unlimited and the KVM doesn't
183-
* explicitly flush the TLB when aging SPTEs. As a result, more pages
184-
* are cached and the guest won't see the "idle" bit cleared.
185190
*/
186191
if (still_idle >= pages / 10) {
187-
#ifdef __x86_64__
188-
TEST_ASSERT(this_cpu_has(X86_FEATURE_HYPERVISOR),
192+
TEST_ASSERT(idle_pages_warn_only,
189193
"vCPU%d: Too many pages still idle (%lu out of %lu)",
190194
vcpu_idx, still_idle, pages);
191-
#endif
195+
192196
printf("WARNING: vCPU%d: Too many pages still idle (%lu out of %lu), "
193197
"this will affect performance results.\n",
194198
vcpu_idx, still_idle, pages);
@@ -328,6 +332,32 @@ static void run_test(enum vm_guest_mode mode, void *arg)
328332
memstress_destroy_vm(vm);
329333
}
330334

335+
static int access_tracking_unreliable(void)
336+
{
337+
#ifdef __x86_64__
338+
/*
339+
* When running nested, the TLB size may be effectively unlimited (for
340+
* example, this is the case when running on KVM L0), and KVM doesn't
341+
* explicitly flush the TLB when aging SPTEs. As a result, more pages
342+
* are cached and the guest won't see the "idle" bit cleared.
343+
*/
344+
if (this_cpu_has(X86_FEATURE_HYPERVISOR)) {
345+
puts("Skipping idle page count sanity check, because the test is run nested");
346+
return 1;
347+
}
348+
#endif
349+
/*
350+
* When NUMA balancing is enabled, guest memory will be unmapped to get
351+
* NUMA faults, dropping the Accessed bits.
352+
*/
353+
if (is_numa_balancing_enabled()) {
354+
puts("Skipping idle page count sanity check, because NUMA balancing is enabled");
355+
return 1;
356+
}
357+
358+
return 0;
359+
}
360+
331361
static void help(char *name)
332362
{
333363
puts("");
@@ -342,6 +372,12 @@ static void help(char *name)
342372
printf(" -v: specify the number of vCPUs to run.\n");
343373
printf(" -o: Overlap guest memory accesses instead of partitioning\n"
344374
" them into a separate region of memory for each vCPU.\n");
375+
printf(" -w: Control whether the test warns or fails if more than 10%%\n"
376+
" of pages are still seen as idle/old after accessing guest\n"
377+
" memory. >0 == warn only, 0 == fail, <0 == auto. For auto\n"
378+
" mode, the test fails by default, but switches to warn only\n"
379+
" if NUMA balancing is enabled or the test detects it's running\n"
380+
" in a VM.\n");
345381
backing_src_help("-s");
346382
puts("");
347383
exit(0);
@@ -359,7 +395,7 @@ int main(int argc, char *argv[])
359395

360396
guest_modes_append_default();
361397

362-
while ((opt = getopt(argc, argv, "hm:b:v:os:")) != -1) {
398+
while ((opt = getopt(argc, argv, "hm:b:v:os:w:")) != -1) {
363399
switch (opt) {
364400
case 'm':
365401
guest_modes_cmdline(optarg);
@@ -376,6 +412,11 @@ int main(int argc, char *argv[])
376412
case 's':
377413
params.backing_src = parse_backing_src_type(optarg);
378414
break;
415+
case 'w':
416+
idle_pages_warn_only =
417+
atoi_non_negative("Idle pages warning",
418+
optarg);
419+
break;
379420
case 'h':
380421
default:
381422
help(argv[0]);
@@ -388,6 +429,9 @@ int main(int argc, char *argv[])
388429
"CONFIG_IDLE_PAGE_TRACKING is not enabled");
389430
close(page_idle_fd);
390431

432+
if (idle_pages_warn_only == -1)
433+
idle_pages_warn_only = access_tracking_unreliable();
434+
391435
for_each_guest_mode(run_test, &params);
392436

393437
return 0;

tools/testing/selftests/kvm/include/test_util.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@ bool is_backing_src_hugetlb(uint32_t i);
153153
void backing_src_help(const char *flag);
154154
enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name);
155155
long get_run_delay(void);
156+
bool is_numa_balancing_enabled(void);
156157

157158
/*
158159
* Whether or not the given source type is shared memory (as opposed to

tools/testing/selftests/kvm/lib/test_util.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,13 @@ size_t get_trans_hugepagesz(void)
176176
return get_sysfs_val("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size");
177177
}
178178

179+
bool is_numa_balancing_enabled(void)
180+
{
181+
if (!test_sysfs_path("/proc/sys/kernel/numa_balancing"))
182+
return false;
183+
return get_sysfs_val("/proc/sys/kernel/numa_balancing") == 1;
184+
}
185+
179186
size_t get_def_hugetlb_pagesz(void)
180187
{
181188
char buf[64];

0 commit comments

Comments
 (0)