Skip to content

Conversation

yonghong-song
Copy link
Contributor

The following is an example:

  typedef int (*op_t)(int, int);

  __attribute__((section("_add"))) static int add(int a, int b) { return a + b; }
  __attribute__((section("_mul"))) static int mul(int a, int b) { return a * b; }

  __attribute__((noinline)) static int apply(op_t *ops, int index, int a, int b) {
    // indirect call via function pointer
    return ops[index](a, b);
  }

  #ifdef STATIC
  static op_t ops[] = { add, mul, add, add, mul, mul };
  #endif
  #ifdef GLOBAL
  op_t ops[] = { add, mul, add, add, mul, mul };
  #endif
  int result(int i, int j) {
  #ifdef PRIVATE
    op_t ops[] = { add, mul, add, add, mul, mul };
  #endif
    int x = 2, y = 3;

    int r1 = apply(ops, 0, x, y);
    int r2 = apply(ops, 4, x, y);

    return r1 + r2;
  }

Compilation for three different modes:

  clang --target=bpf -DPRIVATE -O2 -S t.c -o t.s.private
  clang --target=bpf -DSTATIC -O2 -S t.c -o t.s.static
  clang --target=bpf -DGLOBAL -O2 -S t.c -o t.s.global

The assembly for the above three different modes. For example, for PRIVATE mode:

        .text
        .globl  result                          # -- Begin function result
        .p2align        3
        .type   result,@function
    result:                                 # @result
    # %bb.0:
        r1 = BPF.__const.result.ops ll
        w2 = 0
        call apply
        w6 = w0
        r1 = BPF.__const.result.ops ll
        w2 = 4
        call apply
        w0 += w6
        exit
    .Lfunc_end0:
        .size   result, .Lfunc_end0-result
    ...
        .text
        .p2align        3                               # -- Begin function apply
        .type   apply,@function
    apply:                                  # @apply
    # %bb.0:
        r2 = w2
        r2 <<= 3
        r1 += r2
        r3 = *(u64 *)(r1 + 0)
        w1 = 2
        w2 = 3
        callx r3
        exit
    .Lfunc_end3:
        .size   apply, .Lfunc_end3-apply
    ...
        .type   BPF.__const.result.ops,@object  # @BPF.__const.result.ops
        .section        .calltables,"a",@progbits
        .p2align        3, 0x0
    BPF.__const.result.ops:
        .quad   add
        .quad   mul
        .quad   add
        .quad   add
        .quad   mul
        .quad   mul
        .size   BPF.__const.result.ops, 48

STATIC and GLOBAL modes are similar except the callx table. For GLOBAL:

        .type   BPF.ops,@object                 # @BPF.ops
        .section        .calltables,"aw",@progbits
        .globl  BPF.ops
        .p2align        3, 0x0
    BPF.ops:
        .quad   add
        .quad   mul
        .quad   add
        .quad   add
        .quad   mul
        .quad   mul
        .size   BPF.ops, 48

For STATIC:

        .type   BPF.ops,@object                 # @BPF.ops
        .section        .calltables,"a",@progbits
        .p2align        3, 0x0
    BPF.ops:
        .quad   add
        .quad   mul
        .quad   add
        .quad   add
        .quad   mul
        .quad   mul
        .size   BPF.ops, 48

Will add selftests after the implementation is validated in kernel.

@yonghong-song
Copy link
Contributor Author

cc @aspsk
This pull request is for the next callx (jump-table style) support.

The following is an example:

```
  typedef int (*op_t)(int, int);

  __attribute__((section("_add"))) static int add(int a, int b) { return a + b; }
  __attribute__((section("_mul"))) static int mul(int a, int b) { return a * b; }

  __attribute__((noinline)) static int apply(op_t *ops, int index, int a, int b) {
    // indirect call via function pointer
    return ops[index](a, b);
  }

  #ifdef STATIC
  static op_t ops[] = { add, mul, add, add, mul, mul };
  #endif
  #ifdef GLOBAL
  op_t ops[] = { add, mul, add, add, mul, mul };
  #endif
  int result(int i, int j) {
  #ifdef PRIVATE
    op_t ops[] = { add, mul, add, add, mul, mul };
  #endif
    int x = 2, y = 3;

    int r1 = apply(ops, 0, x, y);
    int r2 = apply(ops, 4, x, y);

    return r1 + r2;
  }
```

Compilation for three different modes:
```
  clang --target=bpf -DPRIVATE -O2 -S t.c -o t.s.private
  clang --target=bpf -DSTATIC -O2 -S t.c -o t.s.static
  clang --target=bpf -DGLOBAL -O2 -S t.c -o t.s.global
```

The assembly for the above three different modes. For example,
for PRIVATE mode:
```
        .text
        .globl  result                          # -- Begin function result
        .p2align        3
        .type   result,@function
    result:                                 # @Result
    # %bb.0:
        r1 = BPF.__const.result.ops ll
        w2 = 0
        call apply
        w6 = w0
        r1 = BPF.__const.result.ops ll
        w2 = 4
        call apply
        w0 += w6
        exit
    .Lfunc_end0:
        .size   result, .Lfunc_end0-result
    ...
        .text
        .p2align        3                               # -- Begin function apply
        .type   apply,@function
    apply:                                  # @apply
    # %bb.0:
        r2 = w2
        r2 <<= 3
        r1 += r2
        r3 = *(u64 *)(r1 + 0)
        w1 = 2
        w2 = 3
        callx r3
        exit
    .Lfunc_end3:
        .size   apply, .Lfunc_end3-apply
    ...
        .type   BPF.__const.result.ops,@object  # @BPF.__const.result.ops
        .section        .calltables,"a",@progbits
        .p2align        3, 0x0
    BPF.__const.result.ops:
        .quad   add
        .quad   mul
        .quad   add
        .quad   add
        .quad   mul
        .quad   mul
        .size   BPF.__const.result.ops, 48
```

STATIC and GLOBAL modes are similar except the callx table.
For GLOBAL:
```
        .type   BPF.ops,@object                 # @BPF.ops
        .section        .calltables,"aw",@progbits
        .globl  BPF.ops
        .p2align        3, 0x0
    BPF.ops:
        .quad   add
        .quad   mul
        .quad   add
        .quad   add
        .quad   mul
        .quad   mul
        .size   BPF.ops, 48
```
For STATIC:
```
        .type   BPF.ops,@object                 # @BPF.ops
        .section        .calltables,"a",@progbits
        .p2align        3, 0x0
    BPF.ops:
        .quad   add
        .quad   mul
        .quad   add
        .quad   add
        .quad   mul
        .quad   mul
        .size   BPF.ops, 48
```

Will add selftests after the implementation is validated in kernel.
@aspsk
Copy link

aspsk commented Sep 19, 2025

This pull request is for the next callx (jump-table style) support.

Thanks @yonghong-song! I will try it out the next week.

@aspsk
Copy link

aspsk commented Oct 12, 2025

I will try it out the next week
said I, but not yet. Will try asap

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants