Permalink
Cannot retrieve contributors at this time
| /* | |
| * Synthetic micro-benchmarking of slab bulk | |
| * | |
| */ | |
| #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
| #include <linux/module.h> | |
| #include <linux/time.h> | |
| #include <linux/time_bench.h> | |
| #include <linux/spinlock.h> | |
| #include <linux/mm.h> | |
| #include <linux/slab.h> | |
| #include <linux/skbuff.h> | |
| static int verbose=1; | |
| /* If SLAB debugging is enabled the per object cost is approx a factor | |
| * between 500 - 1000 times slower. Thus, adjust the default number | |
| * of loops in case CONFIG_SLUB_DEBUG_ON=y | |
| */ | |
| #if defined(CONFIG_SLUB_DEBUG_ON) || defined(CONFIG_DEBUG_SLAB) | |
| # define DEFAULT_LOOPS 10000 | |
| #else | |
| # define DEFAULT_LOOPS 10000000 | |
| #endif | |
| static uint32_t loops = DEFAULT_LOOPS; | |
| module_param(loops, uint, 0); | |
| MODULE_PARM_DESC(loops, "Parameter for loops in bench"); | |
| struct my_elem { | |
| /* element used for benchmark testing */ | |
| struct sk_buff skb; | |
| }; | |
| /* Timing at the nanosec level, we need to know the overhead | |
| * introduced by the for loop itself */ | |
| static int time_bench_for_loop( | |
| struct time_bench_record *rec, void *data) | |
| { | |
| int i; | |
| uint64_t loops_cnt = 0; | |
| time_bench_start(rec); | |
| /** Loop to measure **/ | |
| for (i = 0; i < rec->loops; i++) { | |
| loops_cnt++; | |
| barrier(); /* avoid compiler to optimize this loop */ | |
| } | |
| time_bench_stop(rec, loops_cnt); | |
| return loops_cnt; | |
| } | |
| /* For comparison benchmark against the fastpath of the | |
| * slab/kmem_cache allocator | |
| */ | |
| static int benchmark_kmem_cache_fastpath_reuse( | |
| struct time_bench_record *rec, void *data) | |
| { | |
| uint64_t loops_cnt = 0; | |
| int i; | |
| struct my_elem *elem; | |
| struct kmem_cache *slab; | |
| slab = kmem_cache_create("slab_bench_test1", sizeof(struct my_elem), | |
| 0, SLAB_HWCACHE_ALIGN, NULL); | |
| time_bench_start(rec); | |
| /** Loop to measure **/ | |
| for (i = 0; i < rec->loops; i++) { | |
| /* request new elem */ | |
| elem = kmem_cache_alloc(slab, GFP_ATOMIC); | |
| if (elem == NULL) | |
| goto out; | |
| barrier(); /* compiler barrier */ | |
| /* return elem */ | |
| kmem_cache_free(slab, elem); | |
| /* NOTICE THIS COUNTS alloc+free together*/ | |
| loops_cnt++; | |
| } | |
| out: | |
| time_bench_stop(rec, loops_cnt); | |
| /* cleanup */ | |
| kmem_cache_destroy(slab); | |
| return loops_cnt; | |
| } | |
| /* Fallback versions copy-pasted here, as they are defined in | |
| * slab_common that we cannot link with. | |
| * | |
| * Force them to be "noinlined" as current patch for slab_common cause | |
| * them to be a function call. To keep comparison the same. | |
| */ | |
| noinline | |
| void my__kmem_cache_free_bulk(struct kmem_cache *s, size_t nr, void **p) | |
| { | |
| size_t i; | |
| for (i = 0; i < nr; i++) | |
| kmem_cache_free(s, p[i]); | |
| } | |
| noinline | |
| bool my__kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr, | |
| void **p) | |
| { | |
| size_t i; | |
| for (i = 0; i < nr; i++) { | |
| void *x = p[i] = kmem_cache_alloc(s, flags); | |
| if (!x) { | |
| my__kmem_cache_free_bulk(s, i, p); | |
| return false; | |
| } | |
| } | |
| return true; | |
| } | |
| static int benchmark_slab_fallback_bulk( | |
| struct time_bench_record *rec, void *data) | |
| { | |
| #define MAX_BULK 250 | |
| void *objs[MAX_BULK]; | |
| uint64_t loops_cnt = 0; | |
| int i; | |
| bool success; | |
| struct kmem_cache *slab; | |
| size_t bulk = rec->step; | |
| if (bulk > MAX_BULK) { | |
| pr_warn("%s() bulk(%lu) request too big cap at %d\n", | |
| __func__, bulk, MAX_BULK); | |
| bulk = MAX_BULK; | |
| } | |
| /* loop count is limited to 32-bit due to div_u64_rem() use */ | |
| if (((uint64_t)rec->loops * bulk *2) >= ((1ULL<<32)-1)) { | |
| pr_err("Loop cnt too big will overflow 32-bit\n"); | |
| return 0; | |
| } | |
| slab = kmem_cache_create("slab_bench_test2", sizeof(struct my_elem), | |
| 0, SLAB_HWCACHE_ALIGN, NULL); | |
| time_bench_start(rec); | |
| /** Loop to measure **/ | |
| for (i = 0; i < rec->loops; i++) { | |
| /* request bulk elems */ | |
| success = my__kmem_cache_alloc_bulk(slab, GFP_ATOMIC, bulk, objs); | |
| if (!success) | |
| goto out; | |
| barrier(); /* compiler barrier */ | |
| /* bulk return elems */ | |
| my__kmem_cache_free_bulk(slab, bulk, objs); | |
| /* NOTICE THIS COUNTS (bulk) alloc+free together*/ | |
| loops_cnt+= bulk; | |
| } | |
| out: | |
| time_bench_stop(rec, loops_cnt); | |
| /* cleanup */ | |
| kmem_cache_destroy(slab); | |
| return loops_cnt; | |
| #undef MAX_BULK | |
| } | |
| static int benchmark_slab_bulk01( | |
| struct time_bench_record *rec, void *data) | |
| { | |
| #define MAX_BULK 250 | |
| void *objs[MAX_BULK]; | |
| uint64_t loops_cnt = 0; | |
| int i; | |
| bool success; | |
| struct kmem_cache *slab; | |
| size_t bulk = rec->step; | |
| if (bulk > MAX_BULK) { | |
| pr_warn("%s() bulk(%lu) request too big cap at %d\n", | |
| __func__, bulk, MAX_BULK); | |
| bulk = MAX_BULK; | |
| } | |
| /* loop count is limited to 32-bit due to div_u64_rem() use */ | |
| if (((uint64_t)rec->loops * bulk *2) >= ((1ULL<<32)-1)) { | |
| pr_err("Loop cnt too big will overflow 32-bit\n"); | |
| return 0; | |
| } | |
| slab = kmem_cache_create("slab_bench_test3", sizeof(struct my_elem), | |
| 0, SLAB_HWCACHE_ALIGN, NULL); | |
| time_bench_start(rec); | |
| /** Loop to measure **/ | |
| for (i = 0; i < rec->loops; i++) { | |
| /* request bulk elems */ | |
| success = kmem_cache_alloc_bulk(slab, GFP_ATOMIC, bulk, objs); | |
| if (!success) | |
| goto out; | |
| barrier(); /* compiler barrier */ | |
| /* bulk return elems */ | |
| kmem_cache_free_bulk(slab, bulk, objs); | |
| /* NOTICE THIS COUNTS (bulk) alloc+free together*/ | |
| loops_cnt+= bulk; | |
| } | |
| out: | |
| time_bench_stop(rec, loops_cnt); | |
| /* cleanup */ | |
| kmem_cache_destroy(slab); | |
| return loops_cnt; | |
| #undef MAX_BULK | |
| } | |
| void bulk_test(int bulk) | |
| { | |
| time_bench_loop(loops/bulk, bulk, "kmem bulk_fallback", NULL, | |
| benchmark_slab_fallback_bulk); | |
| cond_resched(); | |
| time_bench_loop(loops/bulk, bulk, "kmem bulk_quick_reuse", NULL, | |
| benchmark_slab_bulk01); | |
| cond_resched(); | |
| } | |
| int run_timing_tests(void) | |
| { | |
| time_bench_loop(loops*10, 0, "for_loop", | |
| NULL, time_bench_for_loop); | |
| time_bench_loop(loops, 0, "kmem fastpath reuse", NULL, | |
| benchmark_kmem_cache_fastpath_reuse); | |
| bulk_test(1); | |
| bulk_test(2); | |
| bulk_test(3); | |
| bulk_test(4); | |
| bulk_test(8); | |
| bulk_test(16); | |
| bulk_test(30); | |
| bulk_test(32); | |
| bulk_test(34); | |
| bulk_test(48); | |
| bulk_test(64); | |
| bulk_test(128); | |
| bulk_test(128+30); | |
| bulk_test(250); | |
| return 0; | |
| } | |
| static int __init slab_bulk_test01_module_init(void) | |
| { | |
| if (verbose) | |
| pr_info("Loaded\n"); | |
| preempt_disable(); | |
| pr_info("DEBUG: cpu:%d\n", smp_processor_id()); | |
| preempt_enable(); | |
| #ifdef CONFIG_DEBUG_PREEMPT | |
| pr_warn("WARN: CONFIG_DEBUG_PREEMPT is enabled: this affect results\n"); | |
| #endif | |
| #ifdef CONFIG_PREEMPT | |
| pr_warn("INFO: CONFIG_PREEMPT is enabled\n"); | |
| #endif | |
| #ifdef CONFIG_PREEMPT_COUNT | |
| pr_warn("INFO: CONFIG_PREEMPT_COUNT is enabled\n"); | |
| #endif | |
| if (run_timing_tests() < 0) { | |
| return -ECANCELED; | |
| } | |
| return 0; | |
| } | |
| module_init(slab_bulk_test01_module_init); | |
| static void __exit slab_bulk_test01_module_exit(void) | |
| { | |
| if (verbose) | |
| pr_info("Unloaded\n"); | |
| } | |
| module_exit(slab_bulk_test01_module_exit); | |
| MODULE_DESCRIPTION("Synthetic micro-benchmarking of slab bulk"); | |
| MODULE_AUTHOR("Jesper Dangaard Brouer <netoptimizer@brouer.com>"); | |
| MODULE_LICENSE("GPL"); |