|
| 1 | +/* |
| 2 | + * Concurrency/parallel benchmark module for linux/skb_array.h |
| 3 | + */ |
| 4 | +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
| 5 | + |
| 6 | +#include <linux/module.h> |
| 7 | +#include <linux/time_bench.h> |
| 8 | +#include <linux/skb_array.h> |
| 9 | + |
| 10 | +static int verbose=1; |
| 11 | + |
| 12 | +//static int parallel_cpus = 0; /* disable many CPUs test default */ |
| 13 | +static int parallel_cpus = 4; |
| 14 | +module_param(parallel_cpus, uint, 0); |
| 15 | +MODULE_PARM_DESC(parallel_cpus, "Number of parallel CPUs (default 4)"); |
| 16 | + |
| 17 | +/* This is the main benchmark function. |
| 18 | + * |
| 19 | + * lib/time_bench.c:time_bench_run_concurrent() sync concurrent execution |
| 20 | + * |
| 21 | + * Notice this function is called by different CPUs, and the enq/deq |
| 22 | + * behavior is dependend on CPU id number. |
| 23 | + */ |
| 24 | +static int time_bench_CPU_enq_or_deq( |
| 25 | + struct time_bench_record *rec, void *data) |
| 26 | +{ |
| 27 | + struct skb_array *queue = (struct skb_array*)data; |
| 28 | + struct sk_buff *skb, *nskb; |
| 29 | + uint64_t loops_cnt = 0; |
| 30 | + int i; |
| 31 | + |
| 32 | + bool enq_CPU = false; |
| 33 | + |
| 34 | + /* Split CPU between enq/deq based on even/odd */ |
| 35 | + if ((smp_processor_id() % 2)== 0) |
| 36 | + enq_CPU = true; |
| 37 | + |
| 38 | + /* Hack: use "step" to mark enq/deq, as "step" gets printed */ |
| 39 | + rec->step = enq_CPU; |
| 40 | + |
| 41 | + /* Fake pointer value to enqueue */ |
| 42 | + skb = (struct sk_buff *)(unsigned long)42; |
| 43 | + |
| 44 | + if (queue == NULL) { |
| 45 | + pr_err("Need queue ptr as input\n"); |
| 46 | + return 0; |
| 47 | + } |
| 48 | + /* loop count is limited to 32-bit due to div_u64_rem() use */ |
| 49 | + if (((uint64_t)rec->loops * 2) >= ((1ULL<<32)-1)) { |
| 50 | + pr_err("Loop cnt too big will overflow 32-bit\n"); |
| 51 | + return 0; |
| 52 | + } |
| 53 | + |
| 54 | + time_bench_start(rec); |
| 55 | + /** Loop to measure **/ |
| 56 | + for (i = 0; i < rec->loops; i++) { |
| 57 | + |
| 58 | + if (enq_CPU) { |
| 59 | + /* enqueue side */ |
| 60 | + if (skb_array_produce_bh(queue, skb) < 0) { |
| 61 | + pr_err("%s() WARN: enq fullq(CPU:%d) i:%d\n", |
| 62 | + __func__, smp_processor_id(), i); |
| 63 | + goto finish_early; |
| 64 | + } |
| 65 | + } else { |
| 66 | + /* dequeue side */ |
| 67 | + nskb = skb_array_consume_bh(queue); |
| 68 | + if (nskb == NULL) { |
| 69 | + pr_err("%s() WARN: deq emptyq (CPU:%d) i:%d\n", |
| 70 | + __func__, smp_processor_id(), i); |
| 71 | + goto finish_early; |
| 72 | + } |
| 73 | + } |
| 74 | + loops_cnt++; |
| 75 | + barrier(); /* compiler barrier */ |
| 76 | + } |
| 77 | +finish_early: |
| 78 | + time_bench_stop(rec, loops_cnt); |
| 79 | + |
| 80 | + return loops_cnt; |
| 81 | +} |
| 82 | + |
| 83 | + |
| 84 | +int run_parallel(const char *desc, uint32_t loops, const cpumask_t *cpumask, |
| 85 | + int step, void *data, |
| 86 | + int (*func)(struct time_bench_record *record, void *data) |
| 87 | + ) |
| 88 | +{ |
| 89 | + struct time_bench_sync sync; |
| 90 | + struct time_bench_cpu *cpu_tasks; |
| 91 | + size_t size; |
| 92 | + |
| 93 | + /* Allocate records for every CPU */ |
| 94 | + size = sizeof(*cpu_tasks) * num_possible_cpus(); |
| 95 | + cpu_tasks = kzalloc(size, GFP_KERNEL); |
| 96 | + |
| 97 | + time_bench_run_concurrent(loops, step, data, |
| 98 | + cpumask, &sync, cpu_tasks, func); |
| 99 | + time_bench_print_stats_cpumask(desc, cpu_tasks, cpumask); |
| 100 | + |
| 101 | + kfree(cpu_tasks); |
| 102 | + return 1; |
| 103 | +} |
| 104 | + |
| 105 | +bool init_queue(struct skb_array *queue, int q_size, int prefill) |
| 106 | +{ |
| 107 | + struct sk_buff *skb; |
| 108 | + int result, i; |
| 109 | + |
| 110 | + /* Allocate and prefill skb_array queue |
| 111 | + */ |
| 112 | + result = skb_array_init(queue, q_size, GFP_KERNEL); |
| 113 | + if (result < 0) { |
| 114 | + pr_err("%s() err creating skb_array queue size:%d\n", |
| 115 | + __func__, q_size); |
| 116 | + return false; |
| 117 | + } |
| 118 | + /* Fake pointer value to enqueue */ |
| 119 | + skb = (struct sk_buff *)(unsigned long)42; |
| 120 | + /* IMPORTANT: |
| 121 | + * Prefill with objects, in-order to keep enough distance |
| 122 | + * between producer and consumer, so the benchmark does not |
| 123 | + * run dry of objects to dequeue. |
| 124 | + */ |
| 125 | + for (i = 0; i < prefill; i++) { |
| 126 | + if (skb_array_produce_bh(queue, skb) < 0) { |
| 127 | + pr_err("%s() err cannot prefill:%d sz:%d\n", |
| 128 | + __func__, prefill, q_size); |
| 129 | + skb_array_cleanup(queue); |
| 130 | + return false; |
| 131 | + } |
| 132 | + } |
| 133 | + |
| 134 | + return true; |
| 135 | +} |
| 136 | + |
| 137 | +void noinline run_parallel_two_CPUs(uint32_t loops, int q_size, int prefill) |
| 138 | +{ |
| 139 | + struct skb_array *queue; |
| 140 | + cpumask_t cpumask; |
| 141 | + |
| 142 | + queue = kzalloc(sizeof(*queue), GFP_KERNEL); |
| 143 | + |
| 144 | + /* Restrict the CPUs to run on |
| 145 | + */ |
| 146 | + cpumask_clear(&cpumask); |
| 147 | + cpumask_set_cpu(0, &cpumask); |
| 148 | + cpumask_set_cpu(1, &cpumask); |
| 149 | + |
| 150 | + if (!init_queue(queue, q_size, prefill)) |
| 151 | + goto fail; |
| 152 | + |
| 153 | + run_parallel("skb_array_parallel_two_CPUs", |
| 154 | + loops, &cpumask, 0, queue, |
| 155 | + time_bench_CPU_enq_or_deq); |
| 156 | + |
| 157 | + skb_array_cleanup(queue); |
| 158 | +fail: |
| 159 | + kfree(queue); |
| 160 | +} |
| 161 | + |
| 162 | +void noinline run_parallel_many_CPUs(uint32_t loops, int q_size, int prefill) |
| 163 | +{ |
| 164 | + struct skb_array *queue; |
| 165 | + cpumask_t cpumask; |
| 166 | + int i; |
| 167 | + |
| 168 | + /* This test is dependend on module parm */ |
| 169 | + if (parallel_cpus == 0) |
| 170 | + return; |
| 171 | + |
| 172 | + queue = kzalloc(sizeof(*queue), GFP_KERNEL); |
| 173 | + |
| 174 | + /* Restrict the CPUs to run on, depending on |
| 175 | + * global module parameter: parallel_cpus |
| 176 | + */ |
| 177 | + if (verbose) |
| 178 | + pr_info("Limit to %d parallel CPUs\n", parallel_cpus); |
| 179 | + cpumask_clear(&cpumask); |
| 180 | + for (i = 0; i < parallel_cpus ; i++) { |
| 181 | + cpumask_set_cpu(i, &cpumask); |
| 182 | + } |
| 183 | + |
| 184 | + if (!init_queue(queue, q_size, prefill)) |
| 185 | + goto fail; |
| 186 | + |
| 187 | + run_parallel("skb_array_parallel_many_CPUs", |
| 188 | + loops, &cpumask, 0, queue, |
| 189 | + time_bench_CPU_enq_or_deq); |
| 190 | + |
| 191 | + skb_array_cleanup(queue); |
| 192 | +fail: |
| 193 | + kfree(queue); |
| 194 | +} |
| 195 | + |
| 196 | + |
| 197 | +int run_benchmark_tests(void) |
| 198 | +{ |
| 199 | + /* ADJUST: These likely need some adjustments on different |
| 200 | + * systems, else the tests likely cannot "complete", because |
| 201 | + * the CPUs catchup to each-other. |
| 202 | + * |
| 203 | + * The benchmark will stop as soon as the CPUs catchup, either |
| 204 | + * when the queue is full, or the queue is empty. |
| 205 | + * |
| 206 | + * If the test does not complete the number of "loops", then |
| 207 | + * the results are still showed, but a WARNing is printed |
| 208 | + * indicating how many interations were completed. Thus, you |
| 209 | + * can judge if the results are valid. |
| 210 | + */ |
| 211 | + uint32_t loops = 1000000; |
| 212 | + int prefill = 32000; |
| 213 | + int q_size = 64000; |
| 214 | + |
| 215 | + if (verbose) |
| 216 | + pr_info("For 'skb_array_parallel_two_CPUs'" |
| 217 | + " step = enq(1)/deq(0)" |
| 218 | + ", cost is either enqueue or dequeue\n"); |
| 219 | + |
| 220 | + run_parallel_two_CPUs(loops, q_size, prefill); |
| 221 | + |
| 222 | + run_parallel_many_CPUs(loops, q_size, prefill); |
| 223 | + |
| 224 | + return 0; |
| 225 | +} |
| 226 | + |
| 227 | +static int __init skb_array_parallel01_module_init(void) |
| 228 | +{ |
| 229 | + if (verbose) |
| 230 | + pr_info("Loaded\n"); |
| 231 | + |
| 232 | + if (run_benchmark_tests() < 0) { |
| 233 | + return -ECANCELED; |
| 234 | + } |
| 235 | + |
| 236 | + return 0; |
| 237 | +} |
| 238 | +module_init(skb_array_parallel01_module_init); |
| 239 | + |
| 240 | +static void __exit skb_array_parallel01_module_exit(void) |
| 241 | +{ |
| 242 | + if (verbose) |
| 243 | + pr_info("Unloaded\n"); |
| 244 | +} |
| 245 | +module_exit(skb_array_parallel01_module_exit); |
| 246 | + |
| 247 | +MODULE_DESCRIPTION("Concurrency/parallel benchmarking of skb_array"); |
| 248 | +MODULE_AUTHOR("Jesper Dangaard Brouer <netoptimizer@brouer.com>"); |
| 249 | +MODULE_LICENSE("GPL"); |
0 commit comments