diff --git a/pythonbpf/debuginfo/debug_info_generator.py b/pythonbpf/debuginfo/debug_info_generator.py index ab9fed4e..62f0cc36 100644 --- a/pythonbpf/debuginfo/debug_info_generator.py +++ b/pythonbpf/debuginfo/debug_info_generator.py @@ -81,6 +81,20 @@ def create_array_type(self, base_type: Any, count: int) -> Any: }, ) + def create_array_type_vmlinux(self, type_info: Any, count: int) -> Any: + """Create an array type of the given base type with specified count""" + base_type, type_sizing = type_info + subrange = self.module.add_debug_info("DISubrange", {"count": count}) + return self.module.add_debug_info( + "DICompositeType", + { + "tag": dc.DW_TAG_array_type, + "baseType": base_type, + "size": type_sizing, + "elements": [subrange], + }, + ) + @staticmethod def _compute_array_size(base_type: Any, count: int) -> int: # Extract size from base_type if possible @@ -101,6 +115,23 @@ def create_struct_member(self, name: str, base_type: Any, offset: int) -> Any: }, ) + def create_struct_member_vmlinux( + self, name: str, base_type_with_size: Any, offset: int + ) -> Any: + """Create a struct member with the given name, type, and offset""" + base_type, type_size = base_type_with_size + return self.module.add_debug_info( + "DIDerivedType", + { + "tag": dc.DW_TAG_member, + "name": name, + "file": self.module._file_metadata, + "baseType": base_type, + "size": type_size, + "offset": offset, + }, + ) + def create_struct_type( self, members: List[Any], size: int, is_distinct: bool ) -> Any: @@ -116,6 +147,22 @@ def create_struct_type( is_distinct=is_distinct, ) + def create_struct_type_with_name( + self, name: str, members: List[Any], size: int, is_distinct: bool + ) -> Any: + """Create a struct type with the given members and size""" + return self.module.add_debug_info( + "DICompositeType", + { + "name": name, + "tag": dc.DW_TAG_structure_type, + "file": self.module._file_metadata, + "size": size, + "elements": members, + }, + is_distinct=is_distinct, + ) + def create_global_var_debug_info( self, name: str, var_type: Any, is_local: bool = False ) -> Any: diff --git a/pythonbpf/vmlinux_parser/class_handler.py b/pythonbpf/vmlinux_parser/class_handler.py index c9407116..108fa9fc 100644 --- a/pythonbpf/vmlinux_parser/class_handler.py +++ b/pythonbpf/vmlinux_parser/class_handler.py @@ -99,11 +99,45 @@ def process_vmlinux_post_ast( local_module_name = getattr(elem_type, "__module__", None) new_dep_node.add_field(elem_name, elem_type, ready=False) if local_module_name == ctypes.__name__: + # TODO: need to process pointer to ctype and also CFUNCTYPES here recursively. Current processing is a single dereference new_dep_node.set_field_bitfield_size(elem_name, elem_bitfield_size) - new_dep_node.set_field_ready(elem_name, is_ready=True) - logger.debug( - f"Field {elem_name} is direct ctypes type: {elem_type}" - ) + + # Process pointer to ctype + if isinstance(elem_type, type) and issubclass( + elem_type, ctypes._Pointer + ): + # Get the pointed-to type + pointed_type = elem_type._type_ + logger.debug(f"Found pointer to type: {pointed_type}") + new_dep_node.set_field_containing_type(elem_name, pointed_type) + new_dep_node.set_field_ctype_complex_type( + elem_name, ctypes._Pointer + ) + new_dep_node.set_field_ready(elem_name, is_ready=True) + + # Process function pointers (CFUNCTYPE) + elif hasattr(elem_type, "_restype_") and hasattr( + elem_type, "_argtypes_" + ): + # This is a CFUNCTYPE or similar + logger.info( + f"Function pointer detected for {elem_name} with return type {elem_type._restype_} and arguments {elem_type._argtypes_}" + ) + # Set the field as ready but mark it with special handling + new_dep_node.set_field_ctype_complex_type( + elem_name, ctypes.CFUNCTYPE + ) + new_dep_node.set_field_ready(elem_name, is_ready=True) + logger.warning( + "Blindly processing CFUNCTYPE ctypes to ensure compilation. Unsupported" + ) + + else: + # Regular ctype + new_dep_node.set_field_ready(elem_name, is_ready=True) + logger.debug( + f"Field {elem_name} is direct ctypes type: {elem_type}" + ) elif local_module_name == "vmlinux": new_dep_node.set_field_bitfield_size(elem_name, elem_bitfield_size) logger.debug( @@ -127,6 +161,10 @@ def process_vmlinux_post_ast( ctype_complex_type = ctypes.Array elif issubclass(elem_type, ctypes._Pointer): ctype_complex_type = ctypes._Pointer + else: + raise ImportError( + "Non Array and Pointer type ctype imports not supported in current version" + ) else: raise TypeError("Unsupported ctypes subclass") else: diff --git a/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py b/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py index 0b38cd67..232cf10a 100644 --- a/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py +++ b/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py @@ -1,15 +1,161 @@ -from pythonbpf.debuginfo import DebugInfoGenerator +from pythonbpf.debuginfo import DebugInfoGenerator, dwarf_constants as dc +from ..dependency_node import DependencyNode +import ctypes +import logging +from typing import List, Any, Tuple +logger = logging.getLogger(__name__) -def debug_info_generation(struct, llvm_module): + +def debug_info_generation( + struct: DependencyNode, + llvm_module, + generated_debug_info: List[Tuple[DependencyNode, Any]], +) -> Any: + """ + Generate DWARF debug information for a struct defined in a DependencyNode. + + Args: + struct: The dependency node containing struct information + llvm_module: The LLVM module to add debug info to + generated_debug_info: List of tuples (struct, debug_info) to track generated debug info + + Returns: + The generated global variable debug info + """ + # Set up debug info generator generator = DebugInfoGenerator(llvm_module) - # this is sample debug info generation - # i64type = generator.get_uint64_type() - struct_type = generator.create_struct_type([], 64 * 4, is_distinct=True) + # Check if debug info for this struct has already been generated + for existing_struct, debug_info in generated_debug_info: + if existing_struct.name == struct.name: + return debug_info + + # Process all fields and create members for the struct + members = [] + for field_name, field in struct.fields.items(): + # Get appropriate debug type for this field + field_type = _get_field_debug_type( + field_name, field, generator, struct, generated_debug_info + ) + # Create struct member with proper offset + member = generator.create_struct_member_vmlinux( + field_name, field_type, field.offset * 8 + ) + members.append(member) - global_var = generator.create_global_var_debug_info( - struct.name, struct_type, is_local=False + if struct.name.startswith("struct_"): + struct_name = struct.name.removeprefix("struct_") + else: + raise ValueError("Unions are not supported in the current version") + # Create struct type with all members + struct_type = generator.create_struct_type_with_name( + struct_name, members, struct.__sizeof__() * 8, is_distinct=True ) - return global_var + return struct_type + + +def _get_field_debug_type( + field_name: str, + field, + generator: DebugInfoGenerator, + parent_struct: DependencyNode, + generated_debug_info: List[Tuple[DependencyNode, Any]], +) -> tuple[Any, int]: + """ + Determine the appropriate debug type for a field based on its Python/ctypes type. + + Args: + field_name: Name of the field + field: Field object containing type information + generator: DebugInfoGenerator instance + parent_struct: The parent struct containing this field + generated_debug_info: List of already generated debug info + + Returns: + The debug info type for this field + """ + # Handle complex types (arrays, pointers) + if field.ctype_complex_type is not None: + if issubclass(field.ctype_complex_type, ctypes.Array): + # Handle array types + element_type, base_type_size = _get_basic_debug_type( + field.containing_type, generator + ) + return generator.create_array_type_vmlinux( + (element_type, base_type_size * field.type_size), field.type_size + ), field.type_size * base_type_size + elif issubclass(field.ctype_complex_type, ctypes._Pointer): + # Handle pointer types + pointee_type, _ = _get_basic_debug_type(field.containing_type, generator) + return generator.create_pointer_type(pointee_type), 64 + + # Handle other vmlinux types (nested structs) + if field.type.__module__ == "vmlinux": + # If it's a struct from vmlinux, check if we've already generated debug info for it + struct_name = field.type.__name__ + + # Look for existing debug info in the list + for existing_struct, debug_info in generated_debug_info: + if existing_struct.name == struct_name: + # Use existing debug info + return debug_info, existing_struct.__sizeof__() + + # If not found, create a forward declaration + # This will be completed when the actual struct is processed + logger.warning("Forward declaration in struct created") + forward_type = generator.create_struct_type([], 0, is_distinct=True) + return forward_type, 0 + + # Handle basic C types + return _get_basic_debug_type(field.type, generator) + + +def _get_basic_debug_type(ctype, generator: DebugInfoGenerator) -> Any: + """ + Map a ctypes type to a DWARF debug type. + + Args: + ctype: A ctypes type or Python type + generator: DebugInfoGenerator instance + + Returns: + The corresponding debug type + """ + # Map ctypes to debug info types + if ctype == ctypes.c_char or ctype == ctypes.c_byte: + return generator.get_basic_type("char", 8, dc.DW_ATE_signed_char), 8 + elif ctype == ctypes.c_ubyte or ctype == ctypes.c_uint8: + return generator.get_basic_type("unsigned char", 8, dc.DW_ATE_unsigned_char), 8 + elif ctype == ctypes.c_short or ctype == ctypes.c_int16: + return generator.get_basic_type("short", 16, dc.DW_ATE_signed), 16 + elif ctype == ctypes.c_ushort or ctype == ctypes.c_uint16: + return generator.get_basic_type("unsigned short", 16, dc.DW_ATE_unsigned), 16 + elif ctype == ctypes.c_int or ctype == ctypes.c_int32: + return generator.get_basic_type("int", 32, dc.DW_ATE_signed), 32 + elif ctype == ctypes.c_uint or ctype == ctypes.c_uint32: + return generator.get_basic_type("unsigned int", 32, dc.DW_ATE_unsigned), 32 + elif ctype == ctypes.c_long: + return generator.get_basic_type("long", 64, dc.DW_ATE_signed), 64 + elif ctype == ctypes.c_ulong: + return generator.get_basic_type("unsigned long", 64, dc.DW_ATE_unsigned), 64 + elif ctype == ctypes.c_longlong or ctype == ctypes.c_int64: + return generator.get_basic_type("long long", 64, dc.DW_ATE_signed), 64 + elif ctype == ctypes.c_ulonglong or ctype == ctypes.c_uint64: + return generator.get_basic_type( + "unsigned long long", 64, dc.DW_ATE_unsigned + ), 64 + elif ctype == ctypes.c_float: + return generator.get_basic_type("float", 32, dc.DW_ATE_float), 32 + elif ctype == ctypes.c_double: + return generator.get_basic_type("double", 64, dc.DW_ATE_float), 64 + elif ctype == ctypes.c_bool: + return generator.get_basic_type("bool", 8, dc.DW_ATE_boolean), 8 + elif ctype == ctypes.c_char_p: + char_type = generator.get_basic_type("char", 8, dc.DW_ATE_signed_char), 8 + return generator.create_pointer_type(char_type) + elif ctype == ctypes.c_void_p: + return generator.create_pointer_type(None), 64 + else: + return generator.get_uint64_type(), 64 diff --git a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py index 1cf3794c..5d1df927 100644 --- a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py +++ b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py @@ -14,6 +14,7 @@ def __init__(self, llvm_module, handler: DependencyHandler, assignment=None): self.llvm_module = llvm_module self.handler: DependencyHandler = handler self.generated: list[str] = [] + self.generated_debug_info: list = [] if not handler.is_ready: raise ImportError( "Semantic analysis of vmlinux imports failed. Cannot generate IR" @@ -67,18 +68,22 @@ def struct_processor(self, struct, processing_stack=None): ) # Actual processor logic here after dependencies are resolved - self.gen_ir(struct) + self.generated_debug_info.append( + (struct, self.gen_ir(struct, self.generated_debug_info)) + ) self.generated.append(struct.name) finally: # Remove from processing stack after we're done processing_stack.discard(struct.name) - def gen_ir(self, struct): + def gen_ir(self, struct, generated_debug_info): # TODO: we add the btf_ama attribute by monkey patching in the end of compilation, but once llvmlite # accepts our issue, we will resort to normal accessed attribute based attribute addition # currently we generate all possible field accesses for CO-RE and put into the assignment table - debug_info = debug_info_generation(struct, self.llvm_module) + debug_info = debug_info_generation( + struct, self.llvm_module, generated_debug_info + ) field_index = 0 for field_name, field in struct.fields.items(): # does not take arrays and similar types into consideration yet. @@ -126,6 +131,7 @@ def gen_ir(self, struct): ) globvar.linkage = "external" globvar.set_metadata("llvm.preserve.access.index", debug_info) + return debug_info def _struct_name_generator( self, diff --git a/tests/c-form/ex7.bpf.c b/tests/c-form/ex7.bpf.c index 80a60d1a..33ed6a5e 100644 --- a/tests/c-form/ex7.bpf.c +++ b/tests/c-form/ex7.bpf.c @@ -19,7 +19,7 @@ struct { SEC("tp/syscalls/sys_enter_setuid") int handle_setuid_entry(struct trace_event_raw_sys_enter *ctx) { struct event data = {}; - + struct blk_integrity_iter it = {}; // Extract UID from the syscall arguments data.uid = (unsigned int)ctx->args[0]; data.ts = bpf_ktime_get_ns(); diff --git a/tests/passing_tests/vmlinux/simple_struct_test.py b/tests/passing_tests/vmlinux/simple_struct_test.py new file mode 100644 index 00000000..f47076f2 --- /dev/null +++ b/tests/passing_tests/vmlinux/simple_struct_test.py @@ -0,0 +1,30 @@ +from pythonbpf import bpf, section, bpfglobal, compile_to_ir, compile +from vmlinux import TASK_COMM_LEN # noqa: F401 +from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 + +# from vmlinux import struct_uinput_device +# from vmlinux import struct_blk_integrity_iter +from ctypes import c_int64 + + +# Instructions to how to run this program +# 1. Install PythonBPF: pip install pythonbpf +# 2. Run the program: python examples/simple_struct_test.py +# 3. Run the program with sudo: sudo tools/check.sh run examples/simple_struct_test.o +# 4. Attach object file to any network device with something like ./check.sh run examples/simple_struct_test.o tailscale0 +# 5. send traffic through the device and observe effects +@bpf +@section("tracepoint/syscalls/sys_enter_execve") +def hello_world(ctx: struct_trace_event_raw_sys_enter) -> c_int64: + print("Hello, World!") + return c_int64(0) + + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + + +compile_to_ir("simple_struct_test.py", "simple_struct_test.ll") +compile()