Skip to content

Commit

Permalink
elf_reader,btf: support multiple programs per ELF section
Browse files Browse the repository at this point in the history
This patch adds support for emitting multiple eBPF functions/programs
to the same ELF section.

Significant changes were necessary, both to the BPF linker and the BTF
ext_info parser, as previously most of the code (rightfully) made the
assumption that one ELF section equaled one BPF function. In order to
remove this limitation, all 'offset' logic that used to track positions
within an ELF section had to be modified to track positions within a
function body instead.

Secondly, in order to reassemble instruction streams and BTF func_infos,
line_infos and also CO-RE relocations back into a flat format the kernel
expects, the linking logic had to be broken up into smaller pieces.
This allows the instruction linker and the BTF extinfo linkers to
operate on the same source of truth, so they can generate their outputs
using the same program layout. Additionally, by computing references
(which requires scanning all progs' instruction streams for references
to other programs) only once during ELF loading, any marshaling logic
can simply request the flattened program layout, saving cycles.

To simplify the reference discovery process, the internal distinction
between 'libs' and 'progs' has been eliminated. This allows any function
to be called from anywhere, regardless of section name. For backwards
compatibility reasons, programs of type UnspecifiedProgram are not yet
emitted to the CollectionSpec. How we treat functions from .text and
other unknown sections is still to be debated.

From a high level, the linker is now split into multiple stages:

- Finding references - a program's instruction stream is checked for
  references to any other program in the ELF. If a direct dependency is
  found (a jump to another function), a pointer to that function is
  stored in the program's reference list.
- Flattening - when the program is about to be marshaled for hand-off to
  the kernel, a unique, flat list of dependent programs is generated by
  stepping through each program's reference list in a depth-first
  manner. This list must be used by both the instruction and BTF extinfo
  linkers, so they generate their outputs using the same layout.
- Marshaling - the flat list of programs is simply iterated over and its
  instructions and BTF extinfo's are marshaled to their respective
  output buffers.

Signed-off-by: Timo Beckers <timo@isovalent.com>
  • Loading branch information
ti-mo committed Dec 16, 2021
1 parent 9937f1f commit 9d739bd
Show file tree
Hide file tree
Showing 14 changed files with 786 additions and 393 deletions.
48 changes: 42 additions & 6 deletions asm/instruction.go
Expand Up @@ -26,13 +26,17 @@ func (rio RawInstructionOffset) Bytes() uint64 {

// Instruction is a single eBPF instruction.
type Instruction struct {
OpCode OpCode
Dst Register
Src Register
Offset int16
Constant int64
OpCode OpCode
Dst Register
Src Register
Offset int16
Constant int64

// Reference denotes a reference (e.g. a jump) to another symbol.
Reference string
Symbol string

// Symbol denotes an instruction at the start of a function body.
Symbol string
}

// Sym creates a symbol.
Expand Down Expand Up @@ -186,6 +190,13 @@ func (ins *Instruction) IsLoadOfFunctionPointer() bool {
return ins.OpCode.IsDWordLoad() && ins.Src == PseudoFunc
}

// IsFunctionReference returns true if the instruction references another BPF
// function, either by invoking a Call jump operation or by loading a function
// pointer.
func (ins *Instruction) IsFunctionReference() bool {
return ins.IsFunctionCall() || ins.IsLoadOfFunctionPointer()
}

// IsBuiltinCall returns true if the instruction is a built-in call, i.e. BPF helper call.
func (ins *Instruction) IsBuiltinCall() bool {
return ins.OpCode.JumpOp() == Call && ins.Src == R0 && ins.Dst == R0
Expand Down Expand Up @@ -350,6 +361,31 @@ func (insns Instructions) SymbolOffsets() (map[string]int, error) {
return offsets, nil
}

// FunctionReferences returns a set of symbol names these Instructions make
// bpf-to-bpf calls to.
func (insns Instructions) FunctionReferences() map[string]bool {
calls := make(map[string]bool)

for _, ins := range insns {
if ins.Constant != -1 {
// BPF-to-BPF calls have -1 constants.
continue
}

if ins.Reference == "" {
continue
}

if !ins.IsFunctionReference() {
continue
}

calls[ins.Reference] = true
}

return calls
}

// ReferenceOffsets returns the set of references and their offset in
// the instructions.
func (insns Instructions) ReferenceOffsets() map[string][]int {
Expand Down
3 changes: 2 additions & 1 deletion collection.go
Expand Up @@ -426,7 +426,8 @@ func (cl *collectionLoader) loadProgram(progName string) (*Program, error) {

progSpec = progSpec.Copy()

// Rewrite any reference to a valid map.
// Rewrite any reference to a valid map in the program's instructions,
// which includes all of its dependencies.
for i := range progSpec.Instructions {
ins := &progSpec.Instructions[i]

Expand Down
160 changes: 108 additions & 52 deletions elf_reader.go
Expand Up @@ -159,7 +159,7 @@ func LoadCollectionSpecFromReader(rd io.ReaderAt) (*CollectionSpec, error) {
}

// Finally, collect programs and link them.
progs, err := ec.loadPrograms()
progs, err := ec.loadProgramSections()
if err != nil {
return nil, fmt.Errorf("load programs: %w", err)
}
Expand Down Expand Up @@ -265,12 +265,15 @@ func (ec *elfCode) assignSymbols(symbols []elf.Symbol) {
}
}

func (ec *elfCode) loadPrograms() (map[string]*ProgramSpec, error) {
var (
progs []*ProgramSpec
libs []*ProgramSpec
)
// loadProgramSections iterates ec's sections and emits a ProgramSpec
// for each function it finds.
//
// The resulting map is indexed by function name.
func (ec *elfCode) loadProgramSections() (map[string]*ProgramSpec, error) {

progs := make(map[string]*ProgramSpec)

// Generate a ProgramSpec for each function found in each program section.
for _, sec := range ec.sections {
if sec.kind != programSection {
continue
Expand All @@ -280,86 +283,139 @@ func (ec *elfCode) loadPrograms() (map[string]*ProgramSpec, error) {
return nil, fmt.Errorf("section %v: missing symbols", sec.Name)
}

funcSym, ok := sec.symbols[0]
if !ok {
return nil, fmt.Errorf("section %v: no label at start", sec.Name)
}

insns, length, err := ec.loadInstructions(sec)
funcs, err := ec.loadFunctions(sec)
if err != nil {
return nil, fmt.Errorf("program %s: %w", funcSym.Name, err)
return nil, fmt.Errorf("section %v: %w", sec.Name, err)
}

progType, attachType, progFlags, attachTo := getProgType(sec.Name)

spec := &ProgramSpec{
Name: funcSym.Name,
Type: progType,
Flags: progFlags,
AttachType: attachType,
AttachTo: attachTo,
License: ec.license,
KernelVersion: ec.version,
Instructions: insns,
ByteOrder: ec.ByteOrder,
}
for name, insns := range funcs {
spec := &ProgramSpec{
Name: name,
Type: progType,
Flags: progFlags,
AttachType: attachType,
AttachTo: attachTo,
License: ec.license,
KernelVersion: ec.version,
Instructions: insns,
ByteOrder: ec.ByteOrder,
}

if ec.btf != nil {
spec.BTF, err = ec.btf.Program(sec.Name, length)
if err != nil && !errors.Is(err, btf.ErrNoExtendedInfo) {
return nil, fmt.Errorf("program %s: %w", funcSym.Name, err)
if ec.btf != nil {
spec.BTF, err = ec.btf.Program(name)
if err != nil && !errors.Is(err, btf.ErrNoExtendedInfo) {
return nil, fmt.Errorf("program %s: %w", name, err)
}
}

// Function names must be unique within a single ELF blob.
if progs[name] != nil {
return nil, fmt.Errorf("duplicate program name %s", name)
}
progs[name] = spec
}
}

if spec.Type == UnspecifiedProgram {
// There is no single name we can use for "library" sections,
// since they may contain multiple functions. We'll decode the
// labels they contain later on, and then link sections that way.
libs = append(libs, spec)
} else {
progs = append(progs, spec)
// Populate each prog's references with pointers to all of its callees.
if err := populateReferences(progs); err != nil {
return nil, fmt.Errorf("populating references: %w", err)
}

// Don't emit programs of unknown type to preserve backwards compatibility.
for n, p := range progs {
if p.Type == UnspecifiedProgram {
delete(progs, n)
}
}

res := make(map[string]*ProgramSpec, len(progs))
for _, prog := range progs {
err := link(prog, libs)
return progs, nil
}

// loadFunctions extracts instruction streams from the given program section
// starting at each symbol in the section. The section's symbols must already
// be narrowed down to STT_NOTYPE (emitted by clang <8) or STT_FUNC.
//
// The resulting map is indexed by function name.
func (ec *elfCode) loadFunctions(section *elfSection) (map[string]asm.Instructions, error) {
funcs := make(map[string]asm.Instructions)
for _, sym := range section.symbols {
// Load the function's instructions given the ELF section and the
// function's symbol containing the offset and length of the function.
si, err := ec.loadInstructions(section, sym)
if err != nil {
return nil, fmt.Errorf("program %s: %w", prog.Name, err)
return nil, fmt.Errorf("loading instructions: %w", err)
}
res[prog.Name] = prog

funcs[sym.Name] = si
}

return res, nil
return funcs, nil
}

func (ec *elfCode) loadInstructions(section *elfSection) (asm.Instructions, uint64, error) {
var (
r = bufio.NewReader(section.Open())
insns asm.Instructions
offset uint64
)
// loadInstructions extracts the instruction stream of the given symbol
// from the given ELF section.
func (ec *elfCode) loadInstructions(section *elfSection, sym elf.Symbol) (asm.Instructions, error) {
// clang up until at least version 7 does not set the symbol's size field.
// In this case, buffer the remainder of the section, since we only read up
// to the first Exit instruction.
if sym.Size == 0 {
// Don't use Max(U)Int64, the offset is added to this number
// during SectionReader creation and will overflow.
sym.Size = section.Size
}

r := internal.NewBufferedSectionReader(section, int64(sym.Value), int64(sym.Size))

// Starting point of the function within the section.
offset := sym.Value

var insns asm.Instructions
for {
var ins asm.Instruction
n, err := ins.Unmarshal(r, ec.ByteOrder)
if err == io.EOF {
return insns, offset, nil
}
if err != nil {
return nil, 0, fmt.Errorf("offset %d: %w", offset, err)
// EOF is an error, a valid instruction stream ends with an Exit instruction.
return nil, fmt.Errorf("offset %d: %w", offset, err)
}

ins.Symbol = section.symbols[offset].Name

if rel, ok := section.relocations[offset]; ok {
// A relocation was found for the current offset. Apply it to the insn.
if err = ec.relocateInstruction(&ins, rel); err != nil {
return nil, 0, fmt.Errorf("offset %d: relocate instruction: %w", offset, err)
return nil, fmt.Errorf("offset %d: relocate instruction: %w", offset, err)
}
} else {
// Up to LLVM 9, calls to subprograms within the same ELF section are
// sometimes encoded using relative jumps without relocation entries.
// If, after all relocations entries have been processed, there are
// still relative pseudocalls left, they must point to an existing
// symbol within the section.
// When splitting sections into subprograms, the targets of these calls
// are no longer in scope, so they must be resolved here.
if ins.IsFunctionReference() && ins.Constant != -1 {
jmp := uint64(int64(offset) + (ins.Constant+1)*asm.InstructionSize)
ins.Reference = section.symbols[jmp].Name
ins.Constant = -1
}
}

insns = append(insns, ins)
offset += n

// Read up until the first exit instruction.
if ins.OpCode.JumpOp() == asm.Exit {
break
}
}

if len(insns) == 0 {
return nil, errors.New("no instructions")
}

return insns, nil
}

func (ec *elfCode) relocateInstruction(ins *asm.Instruction, rel elf.Symbol) error {
Expand Down
1 change: 1 addition & 0 deletions elf_reader_test.go
Expand Up @@ -129,6 +129,7 @@ func TestLoadCollectionSpec(t *testing.T) {
cmpopts.IgnoreTypes(new(btf.Map), new(btf.Program)),
cmpopts.IgnoreFields(CollectionSpec{}, "ByteOrder"),
cmpopts.IgnoreFields(ProgramSpec{}, "Instructions", "ByteOrder"),
cmpopts.IgnoreUnexported(ProgramSpec{}),
cmpopts.IgnoreMapEntries(func(key string, _ *MapSpec) bool {
switch key {
case ".bss", ".data", ".rodata":
Expand Down

0 comments on commit 9d739bd

Please sign in to comment.