@@ -65,6 +65,16 @@ static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
65
65
/* Whether to overlap the regions of memory vCPUs access. */
66
66
static bool overlap_memory_access ;
67
67
68
+ /*
69
+ * If the test should only warn if there are too many idle pages (i.e., it is
70
+ * expected).
71
+ * -1: Not yet set.
72
+ * 0: We do not expect too many idle pages, so FAIL if too many idle pages.
73
+ * 1: Having too many idle pages is expected, so merely print a warning if
74
+ * too many idle pages are found.
75
+ */
76
+ static int idle_pages_warn_only = -1 ;
77
+
68
78
struct test_params {
69
79
/* The backing source for the region of memory. */
70
80
enum vm_mem_backing_src_type backing_src ;
@@ -177,18 +187,12 @@ static void mark_vcpu_memory_idle(struct kvm_vm *vm,
177
187
* arbitrary; high enough that we ensure most memory access went through
178
188
* access tracking but low enough as to not make the test too brittle
179
189
* over time and across architectures.
180
- *
181
- * When running the guest as a nested VM, "warn" instead of asserting
182
- * as the TLB size is effectively unlimited and the KVM doesn't
183
- * explicitly flush the TLB when aging SPTEs. As a result, more pages
184
- * are cached and the guest won't see the "idle" bit cleared.
185
190
*/
186
191
if (still_idle >= pages / 10 ) {
187
- #ifdef __x86_64__
188
- TEST_ASSERT (this_cpu_has (X86_FEATURE_HYPERVISOR ),
192
+ TEST_ASSERT (idle_pages_warn_only ,
189
193
"vCPU%d: Too many pages still idle (%lu out of %lu)" ,
190
194
vcpu_idx , still_idle , pages );
191
- #endif
195
+
192
196
printf ("WARNING: vCPU%d: Too many pages still idle (%lu out of %lu), "
193
197
"this will affect performance results.\n" ,
194
198
vcpu_idx , still_idle , pages );
@@ -328,6 +332,32 @@ static void run_test(enum vm_guest_mode mode, void *arg)
328
332
memstress_destroy_vm (vm );
329
333
}
330
334
335
+ static int access_tracking_unreliable (void )
336
+ {
337
+ #ifdef __x86_64__
338
+ /*
339
+ * When running nested, the TLB size may be effectively unlimited (for
340
+ * example, this is the case when running on KVM L0), and KVM doesn't
341
+ * explicitly flush the TLB when aging SPTEs. As a result, more pages
342
+ * are cached and the guest won't see the "idle" bit cleared.
343
+ */
344
+ if (this_cpu_has (X86_FEATURE_HYPERVISOR )) {
345
+ puts ("Skipping idle page count sanity check, because the test is run nested" );
346
+ return 1 ;
347
+ }
348
+ #endif
349
+ /*
350
+ * When NUMA balancing is enabled, guest memory will be unmapped to get
351
+ * NUMA faults, dropping the Accessed bits.
352
+ */
353
+ if (is_numa_balancing_enabled ()) {
354
+ puts ("Skipping idle page count sanity check, because NUMA balancing is enabled" );
355
+ return 1 ;
356
+ }
357
+
358
+ return 0 ;
359
+ }
360
+
331
361
static void help (char * name )
332
362
{
333
363
puts ("" );
@@ -342,6 +372,12 @@ static void help(char *name)
342
372
printf (" -v: specify the number of vCPUs to run.\n" );
343
373
printf (" -o: Overlap guest memory accesses instead of partitioning\n"
344
374
" them into a separate region of memory for each vCPU.\n" );
375
+ printf (" -w: Control whether the test warns or fails if more than 10%%\n"
376
+ " of pages are still seen as idle/old after accessing guest\n"
377
+ " memory. >0 == warn only, 0 == fail, <0 == auto. For auto\n"
378
+ " mode, the test fails by default, but switches to warn only\n"
379
+ " if NUMA balancing is enabled or the test detects it's running\n"
380
+ " in a VM.\n" );
345
381
backing_src_help ("-s" );
346
382
puts ("" );
347
383
exit (0 );
@@ -359,7 +395,7 @@ int main(int argc, char *argv[])
359
395
360
396
guest_modes_append_default ();
361
397
362
- while ((opt = getopt (argc , argv , "hm:b:v:os:" )) != -1 ) {
398
+ while ((opt = getopt (argc , argv , "hm:b:v:os:w: " )) != -1 ) {
363
399
switch (opt ) {
364
400
case 'm' :
365
401
guest_modes_cmdline (optarg );
@@ -376,6 +412,11 @@ int main(int argc, char *argv[])
376
412
case 's' :
377
413
params .backing_src = parse_backing_src_type (optarg );
378
414
break ;
415
+ case 'w' :
416
+ idle_pages_warn_only =
417
+ atoi_non_negative ("Idle pages warning" ,
418
+ optarg );
419
+ break ;
379
420
case 'h' :
380
421
default :
381
422
help (argv [0 ]);
@@ -388,6 +429,9 @@ int main(int argc, char *argv[])
388
429
"CONFIG_IDLE_PAGE_TRACKING is not enabled" );
389
430
close (page_idle_fd );
390
431
432
+ if (idle_pages_warn_only == -1 )
433
+ idle_pages_warn_only = access_tracking_unreliable ();
434
+
391
435
for_each_guest_mode (run_test , & params );
392
436
393
437
return 0 ;
0 commit comments