diff --git a/demo/bcc.py b/demo/bcc.py deleted file mode 100644 index f9f4d28..0000000 --- a/demo/bcc.py +++ /dev/null @@ -1,46 +0,0 @@ -from __future__ import print_function -from bcc import BPF -from bcc.utils import printb - -# load BPF program -b = BPF(text=""" -#include - -BPF_HASH(last); - -int do_trace(struct pt_regs *ctx) { - u64 ts, *tsp, delta, key = 0; - - // attempt to read stored timestamp - tsp = last.lookup(&key); - if (tsp != NULL) { - delta = bpf_ktime_get_ns() - *tsp; - if (delta < 1000000000) { - // output if time is less than 1 second - bpf_trace_printk("%d\\n", delta / 1000000); - } - last.delete(&key); - } - - // update stored timestamp - ts = bpf_ktime_get_ns(); - last.update(&key, &ts); - return 0; -} -""") - -b.attach_kprobe(event=b.get_syscall_fnname("sync"), fn_name="do_trace") -print("Tracing for quick sync's... Ctrl-C to end") - -# TODO -# format output -start = 0 -while 1: - try: - (task, pid, cpu, flags, ts, ms) = b.trace_fields() - if start == 0: - start = ts - ts = ts - start - printb(b"At time %.2f s: multiple syncs detected, last %s ms ago" % (ts, ms)) - except KeyboardInterrupt: - exit() diff --git a/demo/clone-matplotlib.ipynb b/examples/clone-matplotlib.ipynb similarity index 100% rename from demo/clone-matplotlib.ipynb rename to examples/clone-matplotlib.ipynb diff --git a/examples/execve3.py b/examples/execve3.py index f636a34..510d6fa 100644 --- a/examples/execve3.py +++ b/examples/execve3.py @@ -10,18 +10,10 @@ def last() -> HashMap: return HashMap(key=c_uint64, value=c_uint64, max_entries=3) - -@bpf -@section("tracepoint/syscalls/sys_enter_execve") -def hello(ctx: c_void_p) -> c_int32: - print("entered") - print("multi constant support") - return c_int32(0) - - @bpf @section("tracepoint/syscalls/sys_exit_execve") def hello_again(ctx: c_void_p) -> c_int64: + print("multi constant support") print("exited") key = 0 delta = 0 @@ -45,11 +37,9 @@ def hello_again(ctx: c_void_p) -> c_int64: return c_int64(0) - @bpf @bpfglobal def LICENSE() -> str: return "GPL" - compile() diff --git a/examples/execve4.py b/examples/execve4.py index 9c44b1f..be900e4 100644 --- a/examples/execve4.py +++ b/examples/execve4.py @@ -10,7 +10,6 @@ def last() -> HashMap: return HashMap(key=c_uint64, value=c_uint64, max_entries=3) - @bpf @section("blk_start_request") def trace_start(ctx: c_void_p) -> c_int32: diff --git a/examples/hello_world.py b/examples/hello_world.py deleted file mode 100644 index 742da67..0000000 --- a/examples/hello_world.py +++ /dev/null @@ -1,15 +0,0 @@ -# This is what it is going to look like -# pylint: disable-all# type: ignore -from pythonbpf.decorators import tracepoint, syscalls, bpfglobal, bpf -from ctypes import c_void_p, c_int32 - -@bpf -@tracepoint(syscalls.sys_clone) -def trace_clone(ctx: c_void_p) -> c_int32: - print("Hello, World!") - return c_int32(0) - -@bpf -@bpfglobal -def LICENSE() -> str: - return "GPL" diff --git a/demo/pybpf0.py b/examples/pybpf0.py similarity index 100% rename from demo/pybpf0.py rename to examples/pybpf0.py diff --git a/demo/pybpf1.py b/examples/pybpf1.py similarity index 100% rename from demo/pybpf1.py rename to examples/pybpf1.py diff --git a/demo/pybpf2.py b/examples/pybpf2.py similarity index 100% rename from demo/pybpf2.py rename to examples/pybpf2.py diff --git a/demo/pybpf3.py b/examples/pybpf3.py similarity index 100% rename from demo/pybpf3.py rename to examples/pybpf3.py diff --git a/demo/pybpf4.py b/examples/pybpf4.py similarity index 100% rename from demo/pybpf4.py rename to examples/pybpf4.py diff --git a/pythonbpf/codegen.py b/pythonbpf/codegen.py index b749a46..b244130 100644 --- a/pythonbpf/codegen.py +++ b/pythonbpf/codegen.py @@ -5,6 +5,7 @@ from pythonbpf.maps import maps_proc from .structs.structs_pass import structs_proc from .globals_pass import globals_processing +from .debuginfo import DW_LANG_C11, DwarfBehaviorEnum import os import subprocess import inspect @@ -12,6 +13,8 @@ from pylibbpf import BpfProgram import tempfile +VERSION = "v0.1.3" + def find_bpf_chunks(tree): """Find all functions decorated with @bpf in the AST.""" @@ -50,16 +53,18 @@ def compile_to_ir(filename: str, output: str): module.triple = "bpf" if not hasattr(module, '_debug_compile_unit'): - module._file_metadata = module.add_debug_info("DIFile", { # type: ignore + module._file_metadata = module.add_debug_info("DIFile", { # type: ignore "filename": filename, "directory": os.path.dirname(filename) }) - module._debug_compile_unit = module.add_debug_info("DICompileUnit", { # type: ignore - "language": 29, # DW_LANG_C11 - "file": module._file_metadata, # type: ignore - "producer": "PythonBPF DSL Compiler", - "isOptimized": True, + module._debug_compile_unit = module.add_debug_info("DICompileUnit", { # type: ignore + "language": DW_LANG_C11, + "file": module._file_metadata, # type: ignore + "producer": f"PythonBPF {VERSION}", + "isOptimized": True, # TODO: This is probably not true + # TODO: add a global field here that keeps track of all the globals. Works without it, but I think it might + # be required for kprobes. "runtimeVersion": 0, "emissionKind": 1, "splitDebugInlining": False, @@ -67,32 +72,32 @@ def compile_to_ir(filename: str, output: str): }, is_distinct=True) module.add_named_metadata( - "llvm.dbg.cu", module._debug_compile_unit) # type: ignore + "llvm.dbg.cu", module._debug_compile_unit) # type: ignore processor(source, filename, module) - wchar_size = module.add_metadata([ir.Constant(ir.IntType(32), 1), + wchar_size = module.add_metadata([DwarfBehaviorEnum.ERROR_IF_MISMATCH, "wchar_size", ir.Constant(ir.IntType(32), 4)]) - frame_pointer = module.add_metadata([ir.Constant(ir.IntType(32), 7), + frame_pointer = module.add_metadata([DwarfBehaviorEnum.OVERRIDE_USE_LARGEST, "frame-pointer", ir.Constant(ir.IntType(32), 2)]) # Add Debug Info Version (3 = DWARF v3, which LLVM expects) - debug_info_version = module.add_metadata([ir.Constant(ir.IntType(32), 2), + debug_info_version = module.add_metadata([DwarfBehaviorEnum.WARNING_IF_MISMATCH, "Debug Info Version", ir.Constant(ir.IntType(32), 3)]) - # Add explicit DWARF version (4 is common, works with LLVM BPF backend) - dwarf_version = module.add_metadata([ir.Constant(ir.IntType(32), 2), + # Add explicit DWARF version 5 + dwarf_version = module.add_metadata([DwarfBehaviorEnum.OVERRIDE_USE_LARGEST, "Dwarf Version", - ir.Constant(ir.IntType(32), 4)]) + ir.Constant(ir.IntType(32), 5)]) module.add_named_metadata("llvm.module.flags", wchar_size) module.add_named_metadata("llvm.module.flags", frame_pointer) module.add_named_metadata("llvm.module.flags", debug_info_version) module.add_named_metadata("llvm.module.flags", dwarf_version) - module.add_named_metadata("llvm.ident", ["llvmlite PythonBPF v0.0.1"]) + module.add_named_metadata("llvm.ident", [f"PythonBPF {VERSION}"]) print(f"IR written to {output}") with open(output, "w") as f: diff --git a/pythonbpf/debuginfo/__init__.py b/pythonbpf/debuginfo/__init__.py new file mode 100644 index 0000000..faeabe4 --- /dev/null +++ b/pythonbpf/debuginfo/__init__.py @@ -0,0 +1,3 @@ +from .dwarf_constants import * +from .dtypes import * +from .debug_info_generator import DebugInfoGenerator diff --git a/pythonbpf/debuginfo/debug_info_generator.py b/pythonbpf/debuginfo/debug_info_generator.py new file mode 100644 index 0000000..f4972c8 --- /dev/null +++ b/pythonbpf/debuginfo/debug_info_generator.py @@ -0,0 +1,92 @@ +""" +Debug information generation module for Python-BPF +Provides utilities for generating DWARF/BTF debug information +""" + +from . import dwarf_constants as dc +from typing import Dict, Any, List, Optional, Union + + +class DebugInfoGenerator: + def __init__(self, module): + self.module = module + self._type_cache = {} # Cache for common debug types + + def get_basic_type(self, name: str, size: int, encoding: int) -> Any: + """Get or create a basic type with caching""" + key = (name, size, encoding) + if key not in self._type_cache: + self._type_cache[key] = self.module.add_debug_info("DIBasicType", { + "name": name, + "size": size, + "encoding": encoding + }) + return self._type_cache[key] + + def get_uint32_type(self) -> Any: + """Get debug info for unsigned 32-bit integer""" + return self.get_basic_type("unsigned int", 32, dc.DW_ATE_unsigned) + + def get_uint64_type(self) -> Any: + """Get debug info for unsigned 64-bit integer""" + return self.get_basic_type("unsigned long long", 64, dc.DW_ATE_unsigned) + + def create_pointer_type(self, base_type: Any, size: int = 64) -> Any: + """Create a pointer type to the given base type""" + return self.module.add_debug_info("DIDerivedType", { + "tag": dc.DW_TAG_pointer_type, + "baseType": base_type, + "size": size + }) + + def create_array_type(self, base_type: Any, count: int) -> Any: + """Create an array type of the given base type with specified count""" + subrange = self.module.add_debug_info("DISubrange", {"count": count}) + return self.module.add_debug_info("DICompositeType", { + "tag": dc.DW_TAG_array_type, + "baseType": base_type, + "size": self._compute_array_size(base_type, count), + "elements": [subrange] + }) + + @staticmethod + def _compute_array_size(base_type: Any, count: int) -> int: + # Extract size from base_type if possible + # For simplicity, assuming base_type has a size attribute + return getattr(base_type, "size", 32) * count + + def create_struct_member(self, name: str, base_type: Any, offset: int) -> Any: + """Create a struct member with the given name, type, and offset""" + return self.module.add_debug_info("DIDerivedType", { + "tag": dc.DW_TAG_member, + "name": name, + "file": self.module._file_metadata, + "baseType": base_type, + "size": getattr(base_type, "size", 64), + "offset": offset + }) + + def create_struct_type(self, members: List[Any], size: int, is_distinct: bool) -> Any: + """Create a struct type with the given members and size""" + return self.module.add_debug_info("DICompositeType", { + "tag": dc.DW_TAG_structure_type, + "file": self.module._file_metadata, + "size": size, + "elements": members, + }, is_distinct=is_distinct) + + def create_global_var_debug_info(self, name: str, var_type: Any, is_local: bool = False) -> Any: + """Create debug info for a global variable""" + global_var = self.module.add_debug_info("DIGlobalVariable", { + "name": name, + "scope": self.module._debug_compile_unit, + "file": self.module._file_metadata, + "type": var_type, + "isLocal": is_local, + "isDefinition": True + }, is_distinct=True) + + return self.module.add_debug_info("DIGlobalVariableExpression", { + "var": global_var, + "expr": self.module.add_debug_info("DIExpression", {}) + }) diff --git a/pythonbpf/debuginfo/dtypes.py b/pythonbpf/debuginfo/dtypes.py new file mode 100644 index 0000000..640b3ae --- /dev/null +++ b/pythonbpf/debuginfo/dtypes.py @@ -0,0 +1,6 @@ +import llvmlite.ir as ir + +class DwarfBehaviorEnum: + ERROR_IF_MISMATCH = ir.Constant(ir.IntType(32), 1) + WARNING_IF_MISMATCH = ir.Constant(ir.IntType(32), 2) + OVERRIDE_USE_LARGEST = ir.Constant(ir.IntType(32), 7) diff --git a/pythonbpf/dwarf_constants.py b/pythonbpf/debuginfo/dwarf_constants.py similarity index 100% rename from pythonbpf/dwarf_constants.py rename to pythonbpf/debuginfo/dwarf_constants.py diff --git a/pythonbpf/maps/maps_pass.py b/pythonbpf/maps/maps_pass.py index a20a874..1b232e6 100644 --- a/pythonbpf/maps/maps_pass.py +++ b/pythonbpf/maps/maps_pass.py @@ -1,8 +1,8 @@ import ast from llvmlite import ir -from pythonbpf import dwarf_constants as dc from enum import Enum from .maps_utils import MapProcessorRegistry +from ..debuginfo import dwarf_constants as dc, DebugInfoGenerator import logging logger = logging.getLogger(__name__) @@ -55,53 +55,15 @@ def create_bpf_map(module, map_name, map_params): def create_map_debug_info(module, map_global, map_name, map_params): """Generate debug information metadata for BPF map""" - file_metadata = module._file_metadata - compile_unit = module._debug_compile_unit - - # Create basic type for unsigned int (32-bit) - uint_type = module.add_debug_info("DIBasicType", { - "name": "unsigned int", - "size": 32, - "encoding": dc.DW_ATE_unsigned - }) - - # Create basic type for unsigned long long (64-bit) - ulong_type = module.add_debug_info("DIBasicType", { - "name": "unsigned long long", - "size": 64, - "encoding": dc.DW_ATE_unsigned - }) - - # Create array type for map type field (array of 1 unsigned int) - array_subrange = module.add_debug_info( - "DISubrange", {"count": map_params.get("type", BPFMapType.HASH).value}) - array_type = module.add_debug_info("DICompositeType", { - "tag": dc.DW_TAG_array_type, - "baseType": uint_type, - "size": 32, - "elements": [array_subrange] - }) - - # Create pointer types - type_ptr = module.add_debug_info("DIDerivedType", { - "tag": dc.DW_TAG_pointer_type, - "baseType": array_type, - "size": 64 - }) - - key_ptr = module.add_debug_info("DIDerivedType", { - "tag": dc.DW_TAG_pointer_type, - # Adjust based on actual key type - "baseType": array_type if "key_size" in map_params else uint_type, - "size": 64 - }) - - value_ptr = module.add_debug_info("DIDerivedType", { - "tag": dc.DW_TAG_pointer_type, - # Adjust based on actual value type - "baseType": array_type if "value_size" in map_params else ulong_type, - "size": 64 - }) + generator = DebugInfoGenerator(module) + + uint_type = generator.get_uint32_type() + ulong_type = generator.get_uint64_type() + array_type = generator.create_array_type(uint_type, map_params.get("type", BPFMapType.HASH).value) + type_ptr = generator.create_pointer_type(array_type, 64) + key_ptr = generator.create_pointer_type(array_type if "key_size" in map_params else ulong_type, 64) + value_ptr = generator.create_pointer_type(array_type if "value_size" in map_params else ulong_type, 64) + elements_arr = [] @@ -117,69 +79,27 @@ def create_map_debug_info(module, map_global, map_name, map_params): ptr = key_ptr else: ptr = value_ptr - member = module.add_debug_info("DIDerivedType", { - "tag": dc.DW_TAG_member, - "name": elem, - "file": file_metadata, - "baseType": ptr, - "size": 64, - "offset": cnt * 64 - }) + # TODO: the best way to do this is not 64, but get the size each time. this will not work for structs. + member = generator.create_struct_member(elem, ptr, cnt * 64) elements_arr.append(member) cnt += 1 if "max_entries" in map_params: - array_subrange_max_entries = module.add_debug_info( - "DISubrange", {"count": map_params["max_entries"]}) - array_type_max_entries = module.add_debug_info("DICompositeType", { - "tag": dc.DW_TAG_array_type, - "baseType": uint_type, - "size": 32, - "elements": [array_subrange_max_entries] - }) - max_entries_ptr = module.add_debug_info("DIDerivedType", { - "tag": dc.DW_TAG_pointer_type, - "baseType": array_type_max_entries, - "size": 64 - }) - max_entries_member = module.add_debug_info("DIDerivedType", { - "tag": dc.DW_TAG_member, - "name": "max_entries", - "file": file_metadata, - "baseType": max_entries_ptr, - "size": 64, - "offset": cnt * 64 - }) + max_entries_array = generator.create_array_type(uint_type, map_params["max_entries"]) + max_entries_ptr = generator.create_pointer_type(max_entries_array, 64) + max_entries_member = generator.create_struct_member("max_entries", max_entries_ptr, cnt * 64) elements_arr.append(max_entries_member) # Create the struct type - struct_type = module.add_debug_info("DICompositeType", { - "tag": dc.DW_TAG_structure_type, - "file": file_metadata, - "size": 64 * len(elements_arr), # 4 * 64-bit pointers - "elements": elements_arr, - }, is_distinct=True) + struct_type = generator.create_struct_type(elements_arr, 64 * len(elements_arr), is_distinct=True) # Create global variable debug info - global_var = module.add_debug_info("DIGlobalVariable", { - "name": map_name, - "scope": compile_unit, - "file": file_metadata, - "type": struct_type, - "isLocal": False, - "isDefinition": True - }, is_distinct=True) - - # Create global variable expression - global_var_expr = module.add_debug_info("DIGlobalVariableExpression", { - "var": global_var, - "expr": module.add_debug_info("DIExpression", {}) - }) + global_var = generator.create_global_var_debug_info(map_name, struct_type, is_local=False) # Attach debug info to the global variable - map_global.set_metadata("dbg", global_var_expr) + map_global.set_metadata("dbg", global_var) - return global_var_expr + return global_var @MapProcessorRegistry.register("HashMap")