Skip to content

Commit 75fe31e

Browse files
committed
skb_array_parallel01: parallel benchmark module for linux/skb_array.h
This benchmark try to keep two (or more) CPUs busy enqueuing or dequeuing on the same skb_array queue. It prefills the queue, and stops the test as soon as queue is empty or full, or completes a number of "loops"/cycles. The objects used in the queue are "fake" objects. Thus, we can keep an eternal enq+deq cycles without needing to handle objects. This also keeps the benchmark focus in the queue itself. The skb_array queue should behave well the case of two CPUs single producer(enq) and single consumer(deq), as the producer/consumer part of struct skb_array is kept on seperate cachelines. Plus, as soon as the queue size is above 8 (on 64-bit) the array memory usage (8*8) 64 bytes is above one cache-line, thus the producer/consumer no longer bounce the array cacheline. Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
1 parent b16a333 commit 75fe31e

File tree

2 files changed

+251
-1
lines changed

2 files changed

+251
-1
lines changed

kernel/lib/Kbuild

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,5 @@ obj-$(CONFIG_RING_QUEUE) += ring_queue.o
2525
obj-$(CONFIG_RING_QUEUE_TESTS) += ring_queue_test.o
2626

2727
obj-$(CONFIG_SKB_ARRAY_TESTS) += skb_array_test01.o
28-
obj-$(CONFIG_SKB_ARRAY_TESTS) += skb_array_bench01.o
28+
obj-$(CONFIG_SKB_ARRAY_TESTS) += skb_array_bench01.o
29+
obj-$(CONFIG_SKB_ARRAY_TESTS) += skb_array_parallel01.o

kernel/lib/skb_array_parallel01.c

Lines changed: 249 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,249 @@
1+
/*
2+
* Concurrency/parallel benchmark module for linux/skb_array.h
3+
*/
4+
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
5+
6+
#include <linux/module.h>
7+
#include <linux/time_bench.h>
8+
#include <linux/skb_array.h>
9+
10+
static int verbose=1;
11+
12+
//static int parallel_cpus = 0; /* disable many CPUs test default */
13+
static int parallel_cpus = 4;
14+
module_param(parallel_cpus, uint, 0);
15+
MODULE_PARM_DESC(parallel_cpus, "Number of parallel CPUs (default 4)");
16+
17+
/* This is the main benchmark function.
18+
*
19+
* lib/time_bench.c:time_bench_run_concurrent() sync concurrent execution
20+
*
21+
* Notice this function is called by different CPUs, and the enq/deq
22+
* behavior is dependend on CPU id number.
23+
*/
24+
static int time_bench_CPU_enq_or_deq(
25+
struct time_bench_record *rec, void *data)
26+
{
27+
struct skb_array *queue = (struct skb_array*)data;
28+
struct sk_buff *skb, *nskb;
29+
uint64_t loops_cnt = 0;
30+
int i;
31+
32+
bool enq_CPU = false;
33+
34+
/* Split CPU between enq/deq based on even/odd */
35+
if ((smp_processor_id() % 2)== 0)
36+
enq_CPU = true;
37+
38+
/* Hack: use "step" to mark enq/deq, as "step" gets printed */
39+
rec->step = enq_CPU;
40+
41+
/* Fake pointer value to enqueue */
42+
skb = (struct sk_buff *)(unsigned long)42;
43+
44+
if (queue == NULL) {
45+
pr_err("Need queue ptr as input\n");
46+
return 0;
47+
}
48+
/* loop count is limited to 32-bit due to div_u64_rem() use */
49+
if (((uint64_t)rec->loops * 2) >= ((1ULL<<32)-1)) {
50+
pr_err("Loop cnt too big will overflow 32-bit\n");
51+
return 0;
52+
}
53+
54+
time_bench_start(rec);
55+
/** Loop to measure **/
56+
for (i = 0; i < rec->loops; i++) {
57+
58+
if (enq_CPU) {
59+
/* enqueue side */
60+
if (skb_array_produce_bh(queue, skb) < 0) {
61+
pr_err("%s() WARN: enq fullq(CPU:%d) i:%d\n",
62+
__func__, smp_processor_id(), i);
63+
goto finish_early;
64+
}
65+
} else {
66+
/* dequeue side */
67+
nskb = skb_array_consume_bh(queue);
68+
if (nskb == NULL) {
69+
pr_err("%s() WARN: deq emptyq (CPU:%d) i:%d\n",
70+
__func__, smp_processor_id(), i);
71+
goto finish_early;
72+
}
73+
}
74+
loops_cnt++;
75+
barrier(); /* compiler barrier */
76+
}
77+
finish_early:
78+
time_bench_stop(rec, loops_cnt);
79+
80+
return loops_cnt;
81+
}
82+
83+
84+
int run_parallel(const char *desc, uint32_t loops, const cpumask_t *cpumask,
85+
int step, void *data,
86+
int (*func)(struct time_bench_record *record, void *data)
87+
)
88+
{
89+
struct time_bench_sync sync;
90+
struct time_bench_cpu *cpu_tasks;
91+
size_t size;
92+
93+
/* Allocate records for every CPU */
94+
size = sizeof(*cpu_tasks) * num_possible_cpus();
95+
cpu_tasks = kzalloc(size, GFP_KERNEL);
96+
97+
time_bench_run_concurrent(loops, step, data,
98+
cpumask, &sync, cpu_tasks, func);
99+
time_bench_print_stats_cpumask(desc, cpu_tasks, cpumask);
100+
101+
kfree(cpu_tasks);
102+
return 1;
103+
}
104+
105+
bool init_queue(struct skb_array *queue, int q_size, int prefill)
106+
{
107+
struct sk_buff *skb;
108+
int result, i;
109+
110+
/* Allocate and prefill skb_array queue
111+
*/
112+
result = skb_array_init(queue, q_size, GFP_KERNEL);
113+
if (result < 0) {
114+
pr_err("%s() err creating skb_array queue size:%d\n",
115+
__func__, q_size);
116+
return false;
117+
}
118+
/* Fake pointer value to enqueue */
119+
skb = (struct sk_buff *)(unsigned long)42;
120+
/* IMPORTANT:
121+
* Prefill with objects, in-order to keep enough distance
122+
* between producer and consumer, so the benchmark does not
123+
* run dry of objects to dequeue.
124+
*/
125+
for (i = 0; i < prefill; i++) {
126+
if (skb_array_produce_bh(queue, skb) < 0) {
127+
pr_err("%s() err cannot prefill:%d sz:%d\n",
128+
__func__, prefill, q_size);
129+
skb_array_cleanup(queue);
130+
return false;
131+
}
132+
}
133+
134+
return true;
135+
}
136+
137+
void noinline run_parallel_two_CPUs(uint32_t loops, int q_size, int prefill)
138+
{
139+
struct skb_array *queue;
140+
cpumask_t cpumask;
141+
142+
queue = kzalloc(sizeof(*queue), GFP_KERNEL);
143+
144+
/* Restrict the CPUs to run on
145+
*/
146+
cpumask_clear(&cpumask);
147+
cpumask_set_cpu(0, &cpumask);
148+
cpumask_set_cpu(1, &cpumask);
149+
150+
if (!init_queue(queue, q_size, prefill))
151+
goto fail;
152+
153+
run_parallel("skb_array_parallel_two_CPUs",
154+
loops, &cpumask, 0, queue,
155+
time_bench_CPU_enq_or_deq);
156+
157+
skb_array_cleanup(queue);
158+
fail:
159+
kfree(queue);
160+
}
161+
162+
void noinline run_parallel_many_CPUs(uint32_t loops, int q_size, int prefill)
163+
{
164+
struct skb_array *queue;
165+
cpumask_t cpumask;
166+
int i;
167+
168+
/* This test is dependend on module parm */
169+
if (parallel_cpus == 0)
170+
return;
171+
172+
queue = kzalloc(sizeof(*queue), GFP_KERNEL);
173+
174+
/* Restrict the CPUs to run on, depending on
175+
* global module parameter: parallel_cpus
176+
*/
177+
if (verbose)
178+
pr_info("Limit to %d parallel CPUs\n", parallel_cpus);
179+
cpumask_clear(&cpumask);
180+
for (i = 0; i < parallel_cpus ; i++) {
181+
cpumask_set_cpu(i, &cpumask);
182+
}
183+
184+
if (!init_queue(queue, q_size, prefill))
185+
goto fail;
186+
187+
run_parallel("skb_array_parallel_many_CPUs",
188+
loops, &cpumask, 0, queue,
189+
time_bench_CPU_enq_or_deq);
190+
191+
skb_array_cleanup(queue);
192+
fail:
193+
kfree(queue);
194+
}
195+
196+
197+
int run_benchmark_tests(void)
198+
{
199+
/* ADJUST: These likely need some adjustments on different
200+
* systems, else the tests likely cannot "complete", because
201+
* the CPUs catchup to each-other.
202+
*
203+
* The benchmark will stop as soon as the CPUs catchup, either
204+
* when the queue is full, or the queue is empty.
205+
*
206+
* If the test does not complete the number of "loops", then
207+
* the results are still showed, but a WARNing is printed
208+
* indicating how many interations were completed. Thus, you
209+
* can judge if the results are valid.
210+
*/
211+
uint32_t loops = 1000000;
212+
int prefill = 32000;
213+
int q_size = 64000;
214+
215+
if (verbose)
216+
pr_info("For 'skb_array_parallel_two_CPUs'"
217+
" step = enq(1)/deq(0)"
218+
", cost is either enqueue or dequeue\n");
219+
220+
run_parallel_two_CPUs(loops, q_size, prefill);
221+
222+
run_parallel_many_CPUs(loops, q_size, prefill);
223+
224+
return 0;
225+
}
226+
227+
static int __init skb_array_parallel01_module_init(void)
228+
{
229+
if (verbose)
230+
pr_info("Loaded\n");
231+
232+
if (run_benchmark_tests() < 0) {
233+
return -ECANCELED;
234+
}
235+
236+
return 0;
237+
}
238+
module_init(skb_array_parallel01_module_init);
239+
240+
static void __exit skb_array_parallel01_module_exit(void)
241+
{
242+
if (verbose)
243+
pr_info("Unloaded\n");
244+
}
245+
module_exit(skb_array_parallel01_module_exit);
246+
247+
MODULE_DESCRIPTION("Concurrency/parallel benchmarking of skb_array");
248+
MODULE_AUTHOR("Jesper Dangaard Brouer <netoptimizer@brouer.com>");
249+
MODULE_LICENSE("GPL");

0 commit comments

Comments
 (0)