diff --git a/pythonbpf/codegen.py b/pythonbpf/codegen.py index 5db9f880..8d25644b 100644 --- a/pythonbpf/codegen.py +++ b/pythonbpf/codegen.py @@ -19,12 +19,22 @@ import tempfile from logging import Logger import logging +import re logger: Logger = logging.getLogger(__name__) VERSION = "v0.1.4" +def finalize_module(original_str): + """After all IR generation is complete, we monkey patch btf_ama attribute""" + + # Create a string with applied transformation of btf_ama attribute addition to BTF struct field accesses. + pattern = r'(@"llvm\.[^"]+:[^"]*" = external global i64, !llvm\.preserve\.access\.index ![0-9]+)' + replacement = r'\1 "btf_ama"' + return re.sub(pattern, replacement, original_str) + + def find_bpf_chunks(tree): """Find all functions decorated with @bpf in the AST.""" bpf_functions = [] @@ -121,10 +131,12 @@ def compile_to_ir(filename: str, output: str, loglevel=logging.INFO): module.add_named_metadata("llvm.ident", [f"PythonBPF {VERSION}"]) + module_string = finalize_module(str(module)) + logger.info(f"IR written to {output}") with open(output, "w") as f: f.write(f'source_filename = "{filename}"\n') - f.write(str(module)) + f.write(module_string) f.write("\n") return output diff --git a/pythonbpf/vmlinux_parser/class_handler.py b/pythonbpf/vmlinux_parser/class_handler.py index 50f2fd65..c9407116 100644 --- a/pythonbpf/vmlinux_parser/class_handler.py +++ b/pythonbpf/vmlinux_parser/class_handler.py @@ -60,6 +60,10 @@ def process_vmlinux_post_ast( pass else: new_dep_node = DependencyNode(name=current_symbol_name) + + # elem_type_class is the actual vmlinux struct/class + new_dep_node.set_ctype_struct(elem_type_class) + handler.add_node(new_dep_node) class_obj = getattr(imported_module, current_symbol_name) # Inspect the class fields @@ -71,9 +75,6 @@ def process_vmlinux_post_ast( if len(field_elem) == 2: field_name, field_type = field_elem elif len(field_elem) == 3: - raise NotImplementedError( - "Bitfields are not supported in the current version" - ) field_name, field_type, bitfield_size = field_elem field_table[field_name] = [field_type, bitfield_size] elif hasattr(class_obj, "__annotations__"): @@ -144,15 +145,35 @@ def process_vmlinux_post_ast( ) new_dep_node.set_field_type(elem_name, elem_type) if containing_type.__module__ == "vmlinux": - process_vmlinux_post_ast( - containing_type, llvm_handler, handler, processing_stack - ) - size_of_containing_type = ( - handler[containing_type.__name__] - ).__sizeof__() - new_dep_node.set_field_ready( - elem_name, True, size_of_containing_type + containing_type_name = ( + containing_type.__name__ + if hasattr(containing_type, "__name__") + else str(containing_type) ) + + # Check for self-reference or already processed + if containing_type_name == current_symbol_name: + # Self-referential pointer + logger.debug( + f"Self-referential pointer in {current_symbol_name}.{elem_name}" + ) + new_dep_node.set_field_ready(elem_name, True) + elif handler.has_node(containing_type_name): + # Already processed + logger.debug( + f"Reusing already processed {containing_type_name}" + ) + new_dep_node.set_field_ready(elem_name, True) + else: + # Process recursively - THIS WAS MISSING + new_dep_node.add_dependent(containing_type_name) + process_vmlinux_post_ast( + containing_type, + llvm_handler, + handler, + processing_stack, + ) + new_dep_node.set_field_ready(elem_name, True) elif containing_type.__module__ == ctypes.__name__: logger.debug(f"Processing ctype internal{containing_type}") new_dep_node.set_field_ready(elem_name, True) @@ -169,12 +190,7 @@ def process_vmlinux_post_ast( process_vmlinux_post_ast( elem_type, llvm_handler, handler, processing_stack ) - size_of_containing_type = ( - handler[elem_type.__name__] - ).__sizeof__() - new_dep_node.set_field_ready( - elem_name, True, size_of_containing_type - ) + new_dep_node.set_field_ready(elem_name, True) else: raise ValueError( f"{elem_name} with type {elem_type} from module {module_name} not supported in recursive resolver" diff --git a/pythonbpf/vmlinux_parser/dependency_handler.py b/pythonbpf/vmlinux_parser/dependency_handler.py index b960ab3e..b34d27f7 100644 --- a/pythonbpf/vmlinux_parser/dependency_handler.py +++ b/pythonbpf/vmlinux_parser/dependency_handler.py @@ -167,3 +167,7 @@ def __getitem__(self, name: str) -> DependencyNode: if name not in self._nodes: raise KeyError(f"No node with name '{name}' found") return self._nodes[name] + + @property + def nodes(self): + return self._nodes diff --git a/pythonbpf/vmlinux_parser/dependency_node.py b/pythonbpf/vmlinux_parser/dependency_node.py index feebec35..e266761b 100644 --- a/pythonbpf/vmlinux_parser/dependency_node.py +++ b/pythonbpf/vmlinux_parser/dependency_node.py @@ -116,6 +116,7 @@ class DependencyNode: fields: Dict[str, Field] = field(default_factory=dict) _ready_cache: Optional[bool] = field(default=None, repr=False) current_offset: int = 0 + ctype_struct: Optional[Any] = field(default=None, repr=False) def add_field( self, @@ -146,7 +147,14 @@ def add_field( # Invalidate readiness cache self._ready_cache = None + def set_ctype_struct(self, ctype_struct: Any) -> None: + """Set the ctypes structure for automatic offset calculation.""" + self.ctype_struct = ctype_struct + def __sizeof__(self): + # If we have a ctype_struct, use its size + if self.ctype_struct is not None: + return ctypes.sizeof(self.ctype_struct) return self.current_offset def get_field(self, name: str) -> Field: @@ -226,8 +234,22 @@ def set_field_ready( raise KeyError(f"Field '{name}' does not exist in node '{self.name}'") self.fields[name].set_ready(is_ready) - self.fields[name].set_offset(self.current_offset) - self.current_offset += self._calculate_size(name, size_of_containing_type) + + # Use ctypes built-in offset if available + if self.ctype_struct is not None: + try: + self.fields[name].set_offset(getattr(self.ctype_struct, name).offset) + except AttributeError: + # Fallback to manual calculation if field not found in ctype_struct + self.fields[name].set_offset(self.current_offset) + self.current_offset += self._calculate_size( + name, size_of_containing_type + ) + else: + # Manual offset calculation when no ctype_struct is available + self.fields[name].set_offset(self.current_offset) + self.current_offset += self._calculate_size(name, size_of_containing_type) + # Invalidate readiness cache self._ready_cache = None @@ -275,8 +297,28 @@ def _calculate_size( raise NotImplementedError( "This subclass of ctype not supported yet" ) + elif processing_field.type_size is not None: + # Handle vmlinux types with type_size but no ctype_complex_type + # This means it's a direct vmlinux struct field (not array/pointer wrapped) + # The type_size should already contain the full size of the struct + # But if there's a containing_type from vmlinux, we need that size + if processing_field.containing_type is not None: + if processing_field.containing_type.__module__ == "vmlinux": + # For vmlinux containing types, we need the pre-calculated size + if size_of_containing_type is not None: + return size_of_containing_type * processing_field.type_size + else: + raise RuntimeError( + f"Field {name}: vmlinux containing_type requires size_of_containing_type" + ) + else: + raise ModuleNotFoundError( + f"Containing type module {processing_field.containing_type.__module__} not supported" + ) + else: + raise RuntimeError("Wrong type found with no containing type") else: - # search up pre-created stuff and get size + # No ctype_complex_type and no type_size, must rely on size_of_containing_type if size_of_containing_type is None: raise RuntimeError( f"Size of containing type {size_of_containing_type} is None" diff --git a/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py b/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py new file mode 100644 index 00000000..0b38cd67 --- /dev/null +++ b/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py @@ -0,0 +1,15 @@ +from pythonbpf.debuginfo import DebugInfoGenerator + + +def debug_info_generation(struct, llvm_module): + generator = DebugInfoGenerator(llvm_module) + # this is sample debug info generation + # i64type = generator.get_uint64_type() + + struct_type = generator.create_struct_type([], 64 * 4, is_distinct=True) + + global_var = generator.create_global_var_debug_info( + struct.name, struct_type, is_local=False + ) + + return global_var diff --git a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py index d500cf06..1cf3794c 100644 --- a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py +++ b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py @@ -1,12 +1,17 @@ +import ctypes import logging -from pythonbpf.vmlinux_parser.dependency_handler import DependencyHandler +from ..dependency_handler import DependencyHandler +from .debug_info_gen import debug_info_generation +from ..dependency_node import DependencyNode +import llvmlite.ir as ir logger = logging.getLogger(__name__) class IRGenerator: - def __init__(self, module, handler: DependencyHandler): - self.module = module + # get the assignments dict and add this stuff to it. + def __init__(self, llvm_module, handler: DependencyHandler, assignment=None): + self.llvm_module = llvm_module self.handler: DependencyHandler = handler self.generated: list[str] = [] if not handler.is_ready: @@ -15,22 +20,142 @@ def __init__(self, module, handler: DependencyHandler): ) for struct in handler: self.struct_processor(struct) - print() - - def struct_processor(self, struct): - if struct.name not in self.generated: - print(f"IR generating for {struct.name}") - print(f"Struct is {struct}") - for dependency in struct.depends_on: - if dependency not in self.generated: - dep_node_from_dependency = self.handler[dependency] - self.struct_processor(dep_node_from_dependency) - self.generated.append(dependency) - # write actual processor logic here after assuming all dependencies are resolved - # this part cannot yet resolve circular dependencies. Gets stuck on an infinite loop during that. + + def struct_processor(self, struct, processing_stack=None): + # Initialize processing stack on first call + if processing_stack is None: + processing_stack = set() + + # If already generated, skip + if struct.name in self.generated: + return + + # Detect circular dependency + if struct.name in processing_stack: + logger.info( + f"Circular dependency detected for {struct.name}, skipping recursive processing" + ) + # For circular dependencies, we can either: + # 1. Use forward declarations (opaque pointers) + # 2. Mark as incomplete and process later + # 3. Generate a placeholder type + # Here we'll just skip and let it be processed in its own call + return + + logger.info(f"IR generating for {struct.name}") + + # Add to processing stack before processing dependencies + processing_stack.add(struct.name) + + try: + # Process all dependencies first + if struct.depends_on is None: + pass + else: + for dependency in struct.depends_on: + if dependency not in self.generated: + # Check if dependency exists in handler + if dependency in self.handler.nodes: + dep_node_from_dependency = self.handler[dependency] + # Pass the processing_stack down to track circular refs + self.struct_processor( + dep_node_from_dependency, processing_stack + ) + else: + raise RuntimeError( + f"Warning: Dependency {dependency} not found in handler" + ) + + # Actual processor logic here after dependencies are resolved + self.gen_ir(struct) self.generated.append(struct.name) - def struct_name_generator( + finally: + # Remove from processing stack after we're done + processing_stack.discard(struct.name) + + def gen_ir(self, struct): + # TODO: we add the btf_ama attribute by monkey patching in the end of compilation, but once llvmlite + # accepts our issue, we will resort to normal accessed attribute based attribute addition + # currently we generate all possible field accesses for CO-RE and put into the assignment table + debug_info = debug_info_generation(struct, self.llvm_module) + field_index = 0 + for field_name, field in struct.fields.items(): + # does not take arrays and similar types into consideration yet. + if field.ctype_complex_type is not None and issubclass( + field.ctype_complex_type, ctypes.Array + ): + array_size = field.type_size + containing_type = field.containing_type + if containing_type.__module__ == ctypes.__name__: + containing_type_size = ctypes.sizeof(containing_type) + for i in range(0, array_size): + field_co_re_name = self._struct_name_generator( + struct, field, field_index, True, i, containing_type_size + ) + globvar = ir.GlobalVariable( + self.llvm_module, ir.IntType(64), name=field_co_re_name + ) + globvar.linkage = "external" + globvar.set_metadata("llvm.preserve.access.index", debug_info) + field_index += 1 + elif field.type_size is not None: + array_size = field.type_size + containing_type = field.containing_type + if containing_type.__module__ == "vmlinux": + containing_type_size = self.handler[ + containing_type.__name__ + ].current_offset + for i in range(0, array_size): + field_co_re_name = self._struct_name_generator( + struct, field, field_index, True, i, containing_type_size + ) + globvar = ir.GlobalVariable( + self.llvm_module, ir.IntType(64), name=field_co_re_name + ) + globvar.linkage = "external" + globvar.set_metadata("llvm.preserve.access.index", debug_info) + field_index += 1 + else: + field_co_re_name = self._struct_name_generator( + struct, field, field_index + ) + field_index += 1 + globvar = ir.GlobalVariable( + self.llvm_module, ir.IntType(64), name=field_co_re_name + ) + globvar.linkage = "external" + globvar.set_metadata("llvm.preserve.access.index", debug_info) + + def _struct_name_generator( self, - ) -> None: - pass + struct: DependencyNode, + field, + field_index: int, + is_indexed: bool = False, + index: int = 0, + containing_type_size: int = 0, + ) -> str: + if is_indexed: + name = ( + "llvm." + + struct.name.removeprefix("struct_") + + f":0:{field.offset + index * containing_type_size}" + + "$" + + f"0:{field_index}:{index}" + ) + return name + elif struct.name.startswith("struct_"): + name = ( + "llvm." + + struct.name.removeprefix("struct_") + + f":0:{field.offset}" + + "$" + + f"0:{field_index}" + ) + return name + else: + print(self.handler[struct.name]) + raise TypeError( + "Name generation cannot occur due to type name not starting with struct" + ) diff --git a/tests/failing_tests/xdp_pass.py b/tests/failing_tests/xdp_pass.py index 99006955..c8510dcd 100644 --- a/tests/failing_tests/xdp_pass.py +++ b/tests/failing_tests/xdp_pass.py @@ -2,13 +2,16 @@ from pythonbpf.maps import HashMap from pythonbpf.helper import XDP_PASS from vmlinux import TASK_COMM_LEN # noqa: F401 -from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 -# from vmlinux import struct_request +from vmlinux import struct_qspinlock # noqa: F401 + +# from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 +# from vmlinux import struct_posix_cputimers # noqa: F401 from vmlinux import struct_xdp_md + # from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 # from vmlinux import struct_ring_buffer_per_cpu # noqa: F401 - +# from vmlinux import struct_request # noqa: F401 from ctypes import c_int64 # Instructions to how to run this program