From c5bef26b88ecd5559816c37833f80f92b74a2998 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Sat, 8 Nov 2025 18:08:04 +0530 Subject: [PATCH 01/24] add multi imports to single import line. --- pythonbpf/vmlinux_parser/import_detector.py | 70 +++++++++------------ tests/failing_tests/vmlinux/requests.py | 21 +++++++ 2 files changed, 50 insertions(+), 41 deletions(-) create mode 100644 tests/failing_tests/vmlinux/requests.py diff --git a/pythonbpf/vmlinux_parser/import_detector.py b/pythonbpf/vmlinux_parser/import_detector.py index d90c4789..48fe403c 100644 --- a/pythonbpf/vmlinux_parser/import_detector.py +++ b/pythonbpf/vmlinux_parser/import_detector.py @@ -25,7 +25,7 @@ def detect_import_statement(tree: ast.AST) -> list[tuple[str, ast.ImportFrom]]: List of tuples containing (module_name, imported_item) for each vmlinux import Raises: - SyntaxError: If multiple imports from vmlinux are attempted or import * is used + SyntaxError: If import * is used """ vmlinux_imports = [] @@ -40,28 +40,19 @@ def detect_import_statement(tree: ast.AST) -> list[tuple[str, ast.ImportFrom]]: "Please import specific types explicitly." ) - # Check for multiple imports: from vmlinux import A, B, C - if len(node.names) > 1: - imported_names = [alias.name for alias in node.names] - raise SyntaxError( - f"Multiple imports from vmlinux are not supported. " - f"Found: {', '.join(imported_names)}. " - f"Please use separate import statements for each type." - ) - # Check if no specific import is specified (should not happen with valid Python) if len(node.names) == 0: raise SyntaxError( "Import from vmlinux must specify at least one type." ) - # Valid single import + # Support multiple imports: from vmlinux import A, B, C for alias in node.names: import_name = alias.name - # Use alias if provided, otherwise use the original name (commented) - # as_name = alias.asname if alias.asname else alias.name - vmlinux_imports.append(("vmlinux", node)) - logger.info(f"Found vmlinux import: {import_name}") + # Use alias if provided, otherwise use the original name + as_name = alias.asname if alias.asname else alias.name + vmlinux_imports.append(("vmlinux", node, import_name, as_name)) + logger.info(f"Found vmlinux import: {import_name} as {as_name}") # Handle "import vmlinux" statements (not typical but should be rejected) elif isinstance(node, ast.Import): @@ -103,40 +94,37 @@ def vmlinux_proc(tree: ast.AST, module): with open(source_file, "r") as f: mod_ast = ast.parse(f.read(), filename=source_file) - for import_mod, import_node in import_statements: - for alias in import_node.names: - imported_name = alias.name - found = False - for mod_node in mod_ast.body: - if ( - isinstance(mod_node, ast.ClassDef) - and mod_node.name == imported_name - ): - process_vmlinux_class(mod_node, module, handler) - found = True - break - if isinstance(mod_node, ast.Assign): - for target in mod_node.targets: - if isinstance(target, ast.Name) and target.id == imported_name: - process_vmlinux_assign(mod_node, module, assignments) - found = True - break - if found: - break - if not found: - logger.info( - f"{imported_name} not found as ClassDef or Assign in vmlinux" - ) + for import_mod, import_node, imported_name, as_name in import_statements: + found = False + for mod_node in mod_ast.body: + if isinstance(mod_node, ast.ClassDef) and mod_node.name == imported_name: + process_vmlinux_class(mod_node, module, handler) + found = True + break + if isinstance(mod_node, ast.Assign): + for target in mod_node.targets: + if isinstance(target, ast.Name) and target.id == imported_name: + process_vmlinux_assign(mod_node, module, assignments, as_name) + found = True + break + if found: + break + if not found: + logger.info(f"{imported_name} not found as ClassDef or Assign in vmlinux") IRGenerator(module, handler, assignments) return assignments -def process_vmlinux_assign(node, module, assignments: dict[str, AssignmentInfo]): +def process_vmlinux_assign( + node, module, assignments: dict[str, AssignmentInfo], target_name=None +): """Process assignments from vmlinux module.""" # Only handle single-target assignments if len(node.targets) == 1 and isinstance(node.targets[0], ast.Name): - target_name = node.targets[0].id + # Use provided target_name (for aliased imports) or fall back to original name + if target_name is None: + target_name = node.targets[0].id # Handle constant value assignments if isinstance(node.value, ast.Constant): diff --git a/tests/failing_tests/vmlinux/requests.py b/tests/failing_tests/vmlinux/requests.py new file mode 100644 index 00000000..f19256bb --- /dev/null +++ b/tests/failing_tests/vmlinux/requests.py @@ -0,0 +1,21 @@ +from vmlinux import struct_request, struct_pt_regs, XDP_PASS +from pythonbpf import bpf, section, bpfglobal, compile_to_ir +import logging + + +@bpf +@section("kprobe/blk_mq_start_request") +def example(ctx: struct_pt_regs): + req = struct_request(ctx.di) + c = req.__data_len + d = XDP_PASS + print(f"data length {c} and test {d}") + + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + + +compile_to_ir("requests.py", "requests.ll", loglevel=logging.INFO) From 95a624044a191e544e87830887b1d71a7d65b14e Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Sat, 8 Nov 2025 20:28:56 +0530 Subject: [PATCH 02/24] fix type error --- pythonbpf/vmlinux_parser/import_detector.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pythonbpf/vmlinux_parser/import_detector.py b/pythonbpf/vmlinux_parser/import_detector.py index 48fe403c..9c86c8c7 100644 --- a/pythonbpf/vmlinux_parser/import_detector.py +++ b/pythonbpf/vmlinux_parser/import_detector.py @@ -11,7 +11,9 @@ logger = logging.getLogger(__name__) -def detect_import_statement(tree: ast.AST) -> list[tuple[str, ast.ImportFrom]]: +def detect_import_statement( + tree: ast.AST, +) -> list[tuple[str, ast.ImportFrom, str, str]]: """ Parse AST and detect import statements from vmlinux. @@ -64,6 +66,7 @@ def detect_import_statement(tree: ast.AST) -> list[tuple[str, ast.ImportFrom]]: ) logger.info(f"Total vmlinux imports detected: {len(vmlinux_imports)}") + # print(f"\n**************\n{vmlinux_imports}\n**************\n") return vmlinux_imports From 5031f90377bfc3c57359010abf8b229e3168c350 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Mon, 10 Nov 2025 20:06:04 +0530 Subject: [PATCH 03/24] fix stacked vmlinux struct parsing issue --- pythonbpf/vmlinux_parser/class_handler.py | 93 +++++++++++++------ .../vmlinux_parser/ir_gen/debug_info_gen.py | 21 +++-- .../vmlinux_parser/ir_gen/ir_generation.py | 49 +++++++--- tests/failing_tests/vmlinux/requests2.py | 19 ++++ 4 files changed, 137 insertions(+), 45 deletions(-) create mode 100644 tests/failing_tests/vmlinux/requests2.py diff --git a/pythonbpf/vmlinux_parser/class_handler.py b/pythonbpf/vmlinux_parser/class_handler.py index a508ff75..ba51687a 100644 --- a/pythonbpf/vmlinux_parser/class_handler.py +++ b/pythonbpf/vmlinux_parser/class_handler.py @@ -16,10 +16,37 @@ def get_module_symbols(module_name: str): return [name for name in dir(imported_module)], imported_module +def unwrap_pointer_type(type_obj: Any) -> Any: + """ + Recursively unwrap all pointer layers to get the base type. + + This handles multiply nested pointers like LP_LP_struct_attribute_group + and returns the base type (struct_attribute_group). + + Stops unwrapping when reaching a non-pointer type (one without _type_ attribute). + + Args: + type_obj: The type object to unwrap + + Returns: + The base type after unwrapping all pointer layers + """ + current_type = type_obj + # Keep unwrapping while it's a pointer/array type (has _type_) + # But stop if _type_ is just a string or basic type marker + while hasattr(current_type, "_type_"): + next_type = current_type._type_ + # Stop if _type_ is a string (like 'c' for c_char) + if isinstance(next_type, str): + break + current_type = next_type + return current_type + + def process_vmlinux_class( - node, - llvm_module, - handler: DependencyHandler, + node, + llvm_module, + handler: DependencyHandler, ): symbols_in_module, imported_module = get_module_symbols("vmlinux") if node.name in symbols_in_module: @@ -30,10 +57,10 @@ def process_vmlinux_class( def process_vmlinux_post_ast( - elem_type_class, - llvm_handler, - handler: DependencyHandler, - processing_stack=None, + elem_type_class, + llvm_handler, + handler: DependencyHandler, + processing_stack=None, ): # Initialize processing stack on first call if processing_stack is None: @@ -113,7 +140,7 @@ def process_vmlinux_post_ast( # Process pointer to ctype if isinstance(elem_type, type) and issubclass( - elem_type, ctypes._Pointer + elem_type, ctypes._Pointer ): # Get the pointed-to type pointed_type = elem_type._type_ @@ -126,7 +153,7 @@ def process_vmlinux_post_ast( # Process function pointers (CFUNCTYPE) elif hasattr(elem_type, "_restype_") and hasattr( - elem_type, "_argtypes_" + elem_type, "_argtypes_" ): # This is a CFUNCTYPE or similar logger.info( @@ -158,13 +185,19 @@ def process_vmlinux_post_ast( if hasattr(elem_type, "_length_") and is_complex_type: type_length = elem_type._length_ - if containing_type.__module__ == "vmlinux": - new_dep_node.add_dependent( - elem_type._type_.__name__ - if hasattr(elem_type._type_, "__name__") - else str(elem_type._type_) + # Unwrap all pointer layers to get the base type for dependency tracking + base_type = unwrap_pointer_type(elem_type) + base_type_module = getattr(base_type, "__module__", None) + + if base_type_module == "vmlinux": + base_type_name = ( + base_type.__name__ + if hasattr(base_type, "__name__") + else str(base_type) ) - elif containing_type.__module__ == ctypes.__name__: + new_dep_node.add_dependent(base_type_name) + elif base_type_module == ctypes.__name__ or base_type_module is None: + # Handle ctypes or types with no module (like some internal ctypes types) if isinstance(elem_type, type): if issubclass(elem_type, ctypes.Array): ctype_complex_type = ctypes.Array @@ -178,7 +211,7 @@ def process_vmlinux_post_ast( raise TypeError("Unsupported ctypes subclass") else: raise ImportError( - f"Unsupported module of {containing_type}" + f"Unsupported module of {base_type}: {base_type_module}" ) logger.debug( f"{containing_type} containing type of parent {elem_name} with {elem_type} and ctype {ctype_complex_type} and length {type_length}" @@ -191,11 +224,16 @@ def process_vmlinux_post_ast( elem_name, ctype_complex_type ) new_dep_node.set_field_type(elem_name, elem_type) - if containing_type.__module__ == "vmlinux": + + # Check the containing_type module to decide whether to recurse + containing_type_module = getattr(containing_type, "__module__", None) + if containing_type_module == "vmlinux": + # Also unwrap containing_type to get base type name + base_containing_type = unwrap_pointer_type(containing_type) containing_type_name = ( - containing_type.__name__ - if hasattr(containing_type, "__name__") - else str(containing_type) + base_containing_type.__name__ + if hasattr(base_containing_type, "__name__") + else str(base_containing_type) ) # Check for self-reference or already processed @@ -212,21 +250,21 @@ def process_vmlinux_post_ast( ) new_dep_node.set_field_ready(elem_name, True) else: - # Process recursively - THIS WAS MISSING + # Process recursively - use base containing type, not the pointer wrapper new_dep_node.add_dependent(containing_type_name) process_vmlinux_post_ast( - containing_type, + base_containing_type, llvm_handler, handler, processing_stack, ) new_dep_node.set_field_ready(elem_name, True) - elif containing_type.__module__ == ctypes.__name__: + elif containing_type_module == ctypes.__name__ or containing_type_module is None: logger.debug(f"Processing ctype internal{containing_type}") new_dep_node.set_field_ready(elem_name, True) else: raise TypeError( - "Module not supported in recursive resolution" + f"Module not supported in recursive resolution: {containing_type_module}" ) else: new_dep_node.add_dependent( @@ -245,9 +283,12 @@ def process_vmlinux_post_ast( raise ValueError( f"{elem_name} with type {elem_type} from module {module_name} not supported in recursive resolver" ) - + elif module_name == ctypes.__name__ or module_name is None: + # Handle ctypes types - these don't need processing, just return + logger.debug(f"Skipping ctypes type {current_symbol_name}") + return True else: - raise ImportError("UNSUPPORTED Module") + raise ImportError(f"UNSUPPORTED Module {module_name}") logger.info( f"{current_symbol_name} processed and handler readiness {handler.is_ready}" diff --git a/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py b/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py index 232cf10a..d83e3c05 100644 --- a/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py +++ b/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py @@ -46,13 +46,14 @@ def debug_info_generation( if struct.name.startswith("struct_"): struct_name = struct.name.removeprefix("struct_") + # Create struct type with all members + struct_type = generator.create_struct_type_with_name( + struct_name, members, struct.__sizeof__() * 8, is_distinct=True + ) else: - raise ValueError("Unions are not supported in the current version") - # Create struct type with all members - struct_type = generator.create_struct_type_with_name( - struct_name, members, struct.__sizeof__() * 8, is_distinct=True - ) - + logger.warning("Blindly handling Unions present in vmlinux dependencies") + struct_type = None + # raise ValueError("Unions are not supported in the current version") return struct_type @@ -62,7 +63,7 @@ def _get_field_debug_type( generator: DebugInfoGenerator, parent_struct: DependencyNode, generated_debug_info: List[Tuple[DependencyNode, Any]], -) -> tuple[Any, int]: +) -> tuple[Any, int] | None: """ Determine the appropriate debug type for a field based on its Python/ctypes type. @@ -78,7 +79,11 @@ def _get_field_debug_type( """ # Handle complex types (arrays, pointers) if field.ctype_complex_type is not None: - if issubclass(field.ctype_complex_type, ctypes.Array): + #TODO: Check if this is a CFUNCTYPE (function pointer), but sadly it just checks callable for now + if callable(field.ctype_complex_type): + # Handle function pointer types, create a void pointer as a placeholder + return generator.create_pointer_type(None), 64 + elif issubclass(field.ctype_complex_type, ctypes.Array): # Handle array types element_type, base_type_size = _get_basic_debug_type( field.containing_type, generator diff --git a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py index 14a74ad0..e248d4c3 100644 --- a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py +++ b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py @@ -11,6 +11,9 @@ class IRGenerator: + # This field keeps track of the non_struct names to avoid duplicate name errors. + type_number = 0 + unprocessed_store = [] # get the assignments dict and add this stuff to it. def __init__(self, llvm_module, handler: DependencyHandler, assignments): self.llvm_module = llvm_module @@ -68,6 +71,7 @@ def struct_processor(self, struct, processing_stack=None): dep_node_from_dependency, processing_stack ) else: + print(struct) raise RuntimeError( f"Warning: Dependency {dependency} not found in handler" ) @@ -129,7 +133,20 @@ def gen_ir(self, struct, generated_debug_info): for field_name, field in struct.fields.items(): # does not take arrays and similar types into consideration yet. - if field.ctype_complex_type is not None and issubclass( + if callable(field.ctype_complex_type): + # Function pointer case - generate a simple field accessor + field_co_re_name, returned = self._struct_name_generator( + struct, field, field_index + ) + print(field_co_re_name) + field_index += 1 + globvar = ir.GlobalVariable( + self.llvm_module, ir.IntType(64), name=field_co_re_name + ) + globvar.linkage = "external" + globvar.set_metadata("llvm.preserve.access.index", debug_info) + self.generated_field_names[struct.name][field_name] = globvar + elif field.ctype_complex_type is not None and issubclass( field.ctype_complex_type, ctypes.Array ): array_size = field.type_size @@ -137,7 +154,7 @@ def gen_ir(self, struct, generated_debug_info): if containing_type.__module__ == ctypes.__name__: containing_type_size = ctypes.sizeof(containing_type) if array_size == 0: - field_co_re_name = self._struct_name_generator( + field_co_re_name, returned = self._struct_name_generator( struct, field, field_index, True, 0, containing_type_size ) globvar = ir.GlobalVariable( @@ -149,7 +166,7 @@ def gen_ir(self, struct, generated_debug_info): field_index += 1 continue for i in range(0, array_size): - field_co_re_name = self._struct_name_generator( + field_co_re_name, returned = self._struct_name_generator( struct, field, field_index, True, i, containing_type_size ) globvar = ir.GlobalVariable( @@ -163,11 +180,12 @@ def gen_ir(self, struct, generated_debug_info): array_size = field.type_size containing_type = field.containing_type if containing_type.__module__ == "vmlinux": + print(struct) containing_type_size = self.handler[ containing_type.__name__ ].current_offset for i in range(0, array_size): - field_co_re_name = self._struct_name_generator( + field_co_re_name, returned = self._struct_name_generator( struct, field, field_index, True, i, containing_type_size ) globvar = ir.GlobalVariable( @@ -178,7 +196,7 @@ def gen_ir(self, struct, generated_debug_info): self.generated_field_names[struct.name][field_name] = globvar field_index += 1 else: - field_co_re_name = self._struct_name_generator( + field_co_re_name, returned = self._struct_name_generator( struct, field, field_index ) field_index += 1 @@ -198,7 +216,7 @@ def _struct_name_generator( is_indexed: bool = False, index: int = 0, containing_type_size: int = 0, - ) -> str: + ) -> tuple[str, bool]: # TODO: Does not support Unions as well as recursive pointer and array type naming if is_indexed: name = ( @@ -208,7 +226,7 @@ def _struct_name_generator( + "$" + f"0:{field_index}:{index}" ) - return name + return name, True elif struct.name.startswith("struct_"): name = ( "llvm." @@ -217,9 +235,18 @@ def _struct_name_generator( + "$" + f"0:{field_index}" ) - return name + return name, True else: - print(self.handler[struct.name]) - raise TypeError( - "Name generation cannot occur due to type name not starting with struct" + logger.warning( + "Blindly handling non-struct type to avoid type errors in vmlinux IR generation. Possibly a union." ) + self.type_number += 1 + unprocessed_type = "unprocessed_type_" + str(self.handler[struct.name].name) + if self.unprocessed_store.__contains__(unprocessed_type): + return unprocessed_type + "_" + str(self.type_number), False + else: + self.unprocessed_store.append(unprocessed_type) + return unprocessed_type, False + # raise TypeError( + # "Name generation cannot occur due to type name not starting with struct" + # ) diff --git a/tests/failing_tests/vmlinux/requests2.py b/tests/failing_tests/vmlinux/requests2.py new file mode 100644 index 00000000..0f17e304 --- /dev/null +++ b/tests/failing_tests/vmlinux/requests2.py @@ -0,0 +1,19 @@ +from vmlinux import struct_kobj_type +from pythonbpf import bpf, section, bpfglobal, compile_to_ir +import logging +from ctypes import c_void_p + + +@bpf +@section("kprobe/blk_mq_start_request") +def example(ctx: c_void_p): + print(f"data lengt") + + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + + +compile_to_ir("requests.py", "requests.ll", loglevel=logging.INFO) From f7dee329cbc8761146cef719deb07c72d4f4f6c8 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Mon, 10 Nov 2025 20:29:28 +0530 Subject: [PATCH 04/24] fix nested pointers issue in array generation and also fix zero length array IR generation --- .../vmlinux_parser/ir_gen/ir_generation.py | 38 ++++++++++++++++--- 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py index e248d4c3..855548c5 100644 --- a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py +++ b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py @@ -86,6 +86,7 @@ def struct_processor(self, struct, processing_stack=None): members_dict = {} for field_name, field in struct.fields.items(): # Get the generated field name from our dictionary, or use field_name if not found + print(f"DEBUG: {struct.name}, {field_name}") if ( struct.name in self.generated_field_names and field_name in self.generated_field_names[struct.name] @@ -181,12 +182,25 @@ def gen_ir(self, struct, generated_debug_info): containing_type = field.containing_type if containing_type.__module__ == "vmlinux": print(struct) - containing_type_size = self.handler[ - containing_type.__name__ - ].current_offset - for i in range(0, array_size): + # Unwrap all pointer layers to get the base struct type + base_containing_type = containing_type + while hasattr(base_containing_type, "_type_"): + next_type = base_containing_type._type_ + # Stop if _type_ is a string (like 'c' for c_char) + #TODO: stacked pointers not handl;ing ctypes check here as well + if isinstance(next_type, str): + break + base_containing_type = next_type + + # Get the base struct name + base_struct_name = base_containing_type.__name__ if hasattr(base_containing_type, "__name__") else str(base_containing_type) + + # Look up the size using the base struct name + containing_type_size = self.handler[base_struct_name].current_offset + print(f"GAY: {array_size}, {struct.name}, {field_name}") + if array_size == 0: field_co_re_name, returned = self._struct_name_generator( - struct, field, field_index, True, i, containing_type_size + struct, field, field_index, True, 0, containing_type_size ) globvar = ir.GlobalVariable( self.llvm_module, ir.IntType(64), name=field_co_re_name @@ -194,7 +208,19 @@ def gen_ir(self, struct, generated_debug_info): globvar.linkage = "external" globvar.set_metadata("llvm.preserve.access.index", debug_info) self.generated_field_names[struct.name][field_name] = globvar - field_index += 1 + field_index += 1 + else: + for i in range(0, array_size): + field_co_re_name, returned = self._struct_name_generator( + struct, field, field_index, True, i, containing_type_size + ) + globvar = ir.GlobalVariable( + self.llvm_module, ir.IntType(64), name=field_co_re_name + ) + globvar.linkage = "external" + globvar.set_metadata("llvm.preserve.access.index", debug_info) + self.generated_field_names[struct.name][field_name] = globvar + field_index += 1 else: field_co_re_name, returned = self._struct_name_generator( struct, field, field_index From 73bbf00e7cd865826704381d2c6b7865c94b5f1e Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Thu, 13 Nov 2025 09:29:53 +0530 Subject: [PATCH 05/24] add tests --- .../vmlinux/assignment_handling.py | 22 +++++++++++++++++++ tests/failing_tests/vmlinux/requests.py | 9 ++++---- tests/failing_tests/vmlinux/requests2.py | 12 +++++----- 3 files changed, 34 insertions(+), 9 deletions(-) create mode 100644 tests/failing_tests/vmlinux/assignment_handling.py diff --git a/tests/failing_tests/vmlinux/assignment_handling.py b/tests/failing_tests/vmlinux/assignment_handling.py new file mode 100644 index 00000000..5ba1a6ec --- /dev/null +++ b/tests/failing_tests/vmlinux/assignment_handling.py @@ -0,0 +1,22 @@ +from vmlinux import XDP_PASS +from pythonbpf import bpf, section, bpfglobal, compile_to_ir +import logging +from ctypes import c_int64, c_void_p + + +@bpf +@section("kprobe/blk_mq_start_request") +def example(ctx: c_void_p) -> c_int64: + d = XDP_PASS # This gives an error, but + e = XDP_PASS + 0 # this does not + print(f"test1 {e} test2 {d}") + return c_int64(0) + + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + + +compile_to_ir("assignment_handling.py", "assignment_handling.ll", loglevel=logging.INFO) diff --git a/tests/failing_tests/vmlinux/requests.py b/tests/failing_tests/vmlinux/requests.py index f19256bb..3c5907e4 100644 --- a/tests/failing_tests/vmlinux/requests.py +++ b/tests/failing_tests/vmlinux/requests.py @@ -1,15 +1,16 @@ -from vmlinux import struct_request, struct_pt_regs, XDP_PASS +from vmlinux import struct_request, struct_pt_regs from pythonbpf import bpf, section, bpfglobal, compile_to_ir import logging +from ctypes import c_int64, c_void_p @bpf @section("kprobe/blk_mq_start_request") -def example(ctx: struct_pt_regs): +def example(ctx: struct_pt_regs) -> c_int64: req = struct_request(ctx.di) c = req.__data_len - d = XDP_PASS - print(f"data length {c} and test {d}") + print(f"data length {c}") + return c_int64(0) @bpf diff --git a/tests/failing_tests/vmlinux/requests2.py b/tests/failing_tests/vmlinux/requests2.py index 0f17e304..46ccdcbd 100644 --- a/tests/failing_tests/vmlinux/requests2.py +++ b/tests/failing_tests/vmlinux/requests2.py @@ -1,13 +1,15 @@ -from vmlinux import struct_kobj_type +from vmlinux import struct_request, struct_pt_regs, XDP_PASS from pythonbpf import bpf, section, bpfglobal, compile_to_ir import logging -from ctypes import c_void_p +from ctypes import c_int64 @bpf @section("kprobe/blk_mq_start_request") -def example(ctx: c_void_p): - print(f"data lengt") +def example(ctx: struct_pt_regs) -> c_int64: + req = ctx.di + print(f"data length {req}") + return c_int64(0) @bpf @@ -16,4 +18,4 @@ def LICENSE() -> str: return "GPL" -compile_to_ir("requests.py", "requests.ll", loglevel=logging.INFO) +compile_to_ir("requests2.py", "requests2.ll", loglevel=logging.INFO) From 49740598ea900960443e4d9daeb76dcd380aea59 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Thu, 13 Nov 2025 09:31:10 +0530 Subject: [PATCH 06/24] format chore --- .../vmlinux_parser/ir_gen/debug_info_gen.py | 2 +- .../vmlinux_parser/ir_gen/ir_generation.py | 26 ++++++++++++++----- .../vmlinux/assignment_handling.py | 4 +-- tests/failing_tests/vmlinux/requests.py | 2 +- tests/failing_tests/vmlinux/requests2.py | 2 +- 5 files changed, 25 insertions(+), 11 deletions(-) diff --git a/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py b/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py index d83e3c05..57306702 100644 --- a/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py +++ b/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py @@ -79,7 +79,7 @@ def _get_field_debug_type( """ # Handle complex types (arrays, pointers) if field.ctype_complex_type is not None: - #TODO: Check if this is a CFUNCTYPE (function pointer), but sadly it just checks callable for now + # TODO: Check if this is a CFUNCTYPE (function pointer), but sadly it just checks callable for now if callable(field.ctype_complex_type): # Handle function pointer types, create a void pointer as a placeholder return generator.create_pointer_type(None), 64 diff --git a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py index 855548c5..fef51a5f 100644 --- a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py +++ b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py @@ -14,6 +14,7 @@ class IRGenerator: # This field keeps track of the non_struct names to avoid duplicate name errors. type_number = 0 unprocessed_store = [] + # get the assignments dict and add this stuff to it. def __init__(self, llvm_module, handler: DependencyHandler, assignments): self.llvm_module = llvm_module @@ -187,13 +188,17 @@ def gen_ir(self, struct, generated_debug_info): while hasattr(base_containing_type, "_type_"): next_type = base_containing_type._type_ # Stop if _type_ is a string (like 'c' for c_char) - #TODO: stacked pointers not handl;ing ctypes check here as well + # TODO: stacked pointers not handl;ing ctypes check here as well if isinstance(next_type, str): break base_containing_type = next_type # Get the base struct name - base_struct_name = base_containing_type.__name__ if hasattr(base_containing_type, "__name__") else str(base_containing_type) + base_struct_name = ( + base_containing_type.__name__ + if hasattr(base_containing_type, "__name__") + else str(base_containing_type) + ) # Look up the size using the base struct name containing_type_size = self.handler[base_struct_name].current_offset @@ -212,14 +217,23 @@ def gen_ir(self, struct, generated_debug_info): else: for i in range(0, array_size): field_co_re_name, returned = self._struct_name_generator( - struct, field, field_index, True, i, containing_type_size + struct, + field, + field_index, + True, + i, + containing_type_size, ) globvar = ir.GlobalVariable( self.llvm_module, ir.IntType(64), name=field_co_re_name ) globvar.linkage = "external" - globvar.set_metadata("llvm.preserve.access.index", debug_info) - self.generated_field_names[struct.name][field_name] = globvar + globvar.set_metadata( + "llvm.preserve.access.index", debug_info + ) + self.generated_field_names[struct.name][field_name] = ( + globvar + ) field_index += 1 else: field_co_re_name, returned = self._struct_name_generator( @@ -272,7 +286,7 @@ def _struct_name_generator( return unprocessed_type + "_" + str(self.type_number), False else: self.unprocessed_store.append(unprocessed_type) - return unprocessed_type, False + return unprocessed_type, False # raise TypeError( # "Name generation cannot occur due to type name not starting with struct" # ) diff --git a/tests/failing_tests/vmlinux/assignment_handling.py b/tests/failing_tests/vmlinux/assignment_handling.py index 5ba1a6ec..b8fe43ee 100644 --- a/tests/failing_tests/vmlinux/assignment_handling.py +++ b/tests/failing_tests/vmlinux/assignment_handling.py @@ -7,8 +7,8 @@ @bpf @section("kprobe/blk_mq_start_request") def example(ctx: c_void_p) -> c_int64: - d = XDP_PASS # This gives an error, but - e = XDP_PASS + 0 # this does not + d = XDP_PASS # This gives an error, but + e = XDP_PASS + 0 # this does not print(f"test1 {e} test2 {d}") return c_int64(0) diff --git a/tests/failing_tests/vmlinux/requests.py b/tests/failing_tests/vmlinux/requests.py index 3c5907e4..bab809f6 100644 --- a/tests/failing_tests/vmlinux/requests.py +++ b/tests/failing_tests/vmlinux/requests.py @@ -1,7 +1,7 @@ from vmlinux import struct_request, struct_pt_regs from pythonbpf import bpf, section, bpfglobal, compile_to_ir import logging -from ctypes import c_int64, c_void_p +from ctypes import c_int64 @bpf diff --git a/tests/failing_tests/vmlinux/requests2.py b/tests/failing_tests/vmlinux/requests2.py index 46ccdcbd..63e90c71 100644 --- a/tests/failing_tests/vmlinux/requests2.py +++ b/tests/failing_tests/vmlinux/requests2.py @@ -1,4 +1,4 @@ -from vmlinux import struct_request, struct_pt_regs, XDP_PASS +from vmlinux import struct_pt_regs from pythonbpf import bpf, section, bpfglobal, compile_to_ir import logging from ctypes import c_int64 From c8801f4c3e7dc3c6674e8840a9841a07514c56fb Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Wed, 19 Nov 2025 23:35:10 +0530 Subject: [PATCH 07/24] nonetype not parsed --- pythonbpf/vmlinux_parser/class_handler.py | 135 +++++++++++++--------- tests/failing_tests/vmlinux/requests2.py | 2 +- 2 files changed, 83 insertions(+), 54 deletions(-) diff --git a/pythonbpf/vmlinux_parser/class_handler.py b/pythonbpf/vmlinux_parser/class_handler.py index ba51687a..39704590 100644 --- a/pythonbpf/vmlinux_parser/class_handler.py +++ b/pythonbpf/vmlinux_parser/class_handler.py @@ -195,9 +195,80 @@ def process_vmlinux_post_ast( if hasattr(base_type, "__name__") else str(base_type) ) + # ONLY add vmlinux types as dependencies new_dep_node.add_dependent(base_type_name) - elif base_type_module == ctypes.__name__ or base_type_module is None: + + logger.debug( + f"{containing_type} containing type of parent {elem_name} with {elem_type} and ctype {ctype_complex_type} and length {type_length}" + ) + new_dep_node.set_field_containing_type( + elem_name, containing_type + ) + new_dep_node.set_field_type_size(elem_name, type_length) + new_dep_node.set_field_ctype_complex_type( + elem_name, ctype_complex_type + ) + new_dep_node.set_field_type(elem_name, elem_type) + + # Check the containing_type module to decide whether to recurse + containing_type_module = getattr( + containing_type, "__module__", None + ) + if containing_type_module == "vmlinux": + # Also unwrap containing_type to get base type name + base_containing_type = unwrap_pointer_type( + containing_type + ) + containing_type_name = ( + base_containing_type.__name__ + if hasattr(base_containing_type, "__name__") + else str(base_containing_type) + ) + + # Check for self-reference or already processed + if containing_type_name == current_symbol_name: + # Self-referential pointer + logger.debug( + f"Self-referential pointer in {current_symbol_name}.{elem_name}" + ) + new_dep_node.set_field_ready(elem_name, True) + elif handler.has_node(containing_type_name): + # Already processed + logger.debug( + f"Reusing already processed {containing_type_name}" + ) + new_dep_node.set_field_ready(elem_name, True) + else: + # Process recursively - use base containing type, not the pointer wrapper + new_dep_node.add_dependent(containing_type_name) + process_vmlinux_post_ast( + base_containing_type, + llvm_handler, + handler, + processing_stack, + ) + new_dep_node.set_field_ready(elem_name, True) + elif ( + containing_type_module == ctypes.__name__ + or containing_type_module is None + ): + logger.debug( + f"Processing ctype internal{containing_type}" + ) + new_dep_node.set_field_ready(elem_name, True) + else: + raise TypeError( + f"Module not supported in recursive resolution: {containing_type_module}" + ) + elif ( + base_type_module == ctypes.__name__ + or base_type_module is None + ): # Handle ctypes or types with no module (like some internal ctypes types) + # DO NOT add ctypes as dependencies - just set field metadata and mark ready + logger.debug( + f"Base type {base_type} is ctypes - NOT adding as dependency, just processing field" + ) if isinstance(elem_type, type): if issubclass(elem_type, ctypes.Array): ctype_complex_type = ctypes.Array @@ -209,62 +280,20 @@ def process_vmlinux_post_ast( ) else: raise TypeError("Unsupported ctypes subclass") - else: - raise ImportError( - f"Unsupported module of {base_type}: {base_type_module}" + + # Set field metadata but DO NOT add dependency or recurse + new_dep_node.set_field_containing_type( + elem_name, containing_type ) - logger.debug( - f"{containing_type} containing type of parent {elem_name} with {elem_type} and ctype {ctype_complex_type} and length {type_length}" - ) - new_dep_node.set_field_containing_type( - elem_name, containing_type - ) - new_dep_node.set_field_type_size(elem_name, type_length) - new_dep_node.set_field_ctype_complex_type( - elem_name, ctype_complex_type - ) - new_dep_node.set_field_type(elem_name, elem_type) - - # Check the containing_type module to decide whether to recurse - containing_type_module = getattr(containing_type, "__module__", None) - if containing_type_module == "vmlinux": - # Also unwrap containing_type to get base type name - base_containing_type = unwrap_pointer_type(containing_type) - containing_type_name = ( - base_containing_type.__name__ - if hasattr(base_containing_type, "__name__") - else str(base_containing_type) + new_dep_node.set_field_type_size(elem_name, type_length) + new_dep_node.set_field_ctype_complex_type( + elem_name, ctype_complex_type ) - - # Check for self-reference or already processed - if containing_type_name == current_symbol_name: - # Self-referential pointer - logger.debug( - f"Self-referential pointer in {current_symbol_name}.{elem_name}" - ) - new_dep_node.set_field_ready(elem_name, True) - elif handler.has_node(containing_type_name): - # Already processed - logger.debug( - f"Reusing already processed {containing_type_name}" - ) - new_dep_node.set_field_ready(elem_name, True) - else: - # Process recursively - use base containing type, not the pointer wrapper - new_dep_node.add_dependent(containing_type_name) - process_vmlinux_post_ast( - base_containing_type, - llvm_handler, - handler, - processing_stack, - ) - new_dep_node.set_field_ready(elem_name, True) - elif containing_type_module == ctypes.__name__ or containing_type_module is None: - logger.debug(f"Processing ctype internal{containing_type}") + new_dep_node.set_field_type(elem_name, elem_type) new_dep_node.set_field_ready(elem_name, True) else: - raise TypeError( - f"Module not supported in recursive resolution: {containing_type_module}" + raise ImportError( + f"Unsupported module of {base_type}: {base_type_module}" ) else: new_dep_node.add_dependent( diff --git a/tests/failing_tests/vmlinux/requests2.py b/tests/failing_tests/vmlinux/requests2.py index 63e90c71..3ced648b 100644 --- a/tests/failing_tests/vmlinux/requests2.py +++ b/tests/failing_tests/vmlinux/requests2.py @@ -1,4 +1,4 @@ -from vmlinux import struct_pt_regs +from vmlinux import struct_pt_regs, struct_request from pythonbpf import bpf, section, bpfglobal, compile_to_ir import logging from ctypes import c_int64 From 740eed45e1431c8e0cce165daccd42a703a54984 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Thu, 20 Nov 2025 14:17:57 +0530 Subject: [PATCH 08/24] add placeholder debug info to shut llvmlite up about NoneType --- pythonbpf/vmlinux_parser/class_handler.py | 18 ++--- .../vmlinux_parser/ir_gen/debug_info_gen.py | 75 ++++++++++++------- tests/failing_tests/vmlinux/requests2.py | 2 +- 3 files changed, 57 insertions(+), 38 deletions(-) diff --git a/pythonbpf/vmlinux_parser/class_handler.py b/pythonbpf/vmlinux_parser/class_handler.py index 39704590..0c66ba21 100644 --- a/pythonbpf/vmlinux_parser/class_handler.py +++ b/pythonbpf/vmlinux_parser/class_handler.py @@ -44,9 +44,9 @@ def unwrap_pointer_type(type_obj: Any) -> Any: def process_vmlinux_class( - node, - llvm_module, - handler: DependencyHandler, + node, + llvm_module, + handler: DependencyHandler, ): symbols_in_module, imported_module = get_module_symbols("vmlinux") if node.name in symbols_in_module: @@ -57,10 +57,10 @@ def process_vmlinux_class( def process_vmlinux_post_ast( - elem_type_class, - llvm_handler, - handler: DependencyHandler, - processing_stack=None, + elem_type_class, + llvm_handler, + handler: DependencyHandler, + processing_stack=None, ): # Initialize processing stack on first call if processing_stack is None: @@ -140,7 +140,7 @@ def process_vmlinux_post_ast( # Process pointer to ctype if isinstance(elem_type, type) and issubclass( - elem_type, ctypes._Pointer + elem_type, ctypes._Pointer ): # Get the pointed-to type pointed_type = elem_type._type_ @@ -153,7 +153,7 @@ def process_vmlinux_post_ast( # Process function pointers (CFUNCTYPE) elif hasattr(elem_type, "_restype_") and hasattr( - elem_type, "_argtypes_" + elem_type, "_argtypes_" ): # This is a CFUNCTYPE or similar logger.info( diff --git a/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py b/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py index 57306702..eb7636f5 100644 --- a/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py +++ b/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py @@ -21,7 +21,7 @@ def debug_info_generation( generated_debug_info: List of tuples (struct, debug_info) to track generated debug info Returns: - The generated global variable debug info + The generated global variable debug info, or None for unsupported types """ # Set up debug info generator generator = DebugInfoGenerator(llvm_module) @@ -31,29 +31,44 @@ def debug_info_generation( if existing_struct.name == struct.name: return debug_info + # Check if this is a union (not supported yet) + if not struct.name.startswith("struct_"): + logger.warning(f"Skipping debug info generation for union: {struct.name}") + # Create a minimal forward declaration for unions + union_type = generator.create_struct_type( + [], struct.__sizeof__() * 8, is_distinct=True + ) + return union_type + # Process all fields and create members for the struct members = [] for field_name, field in struct.fields.items(): - # Get appropriate debug type for this field - field_type = _get_field_debug_type( - field_name, field, generator, struct, generated_debug_info - ) - # Create struct member with proper offset - member = generator.create_struct_member_vmlinux( - field_name, field_type, field.offset * 8 - ) - members.append(member) + try: + # Get appropriate debug type for this field + field_type = _get_field_debug_type( + field_name, field, generator, struct, generated_debug_info + ) + + # Ensure field_type is a tuple + if not isinstance(field_type, tuple) or len(field_type) != 2: + logger.error(f"Invalid field_type for {field_name}: {field_type}") + continue + + # Create struct member with proper offset + member = generator.create_struct_member_vmlinux( + field_name, field_type, field.offset * 8 + ) + members.append(member) + except Exception as e: + logger.error(f"Failed to process field {field_name} in {struct.name}: {e}") + continue + + struct_name = struct.name.removeprefix("struct_") + # Create struct type with all members + struct_type = generator.create_struct_type_with_name( + struct_name, members, struct.__sizeof__() * 8, is_distinct=True + ) - if struct.name.startswith("struct_"): - struct_name = struct.name.removeprefix("struct_") - # Create struct type with all members - struct_type = generator.create_struct_type_with_name( - struct_name, members, struct.__sizeof__() * 8, is_distinct=True - ) - else: - logger.warning("Blindly handling Unions present in vmlinux dependencies") - struct_type = None - # raise ValueError("Unions are not supported in the current version") return struct_type @@ -63,7 +78,7 @@ def _get_field_debug_type( generator: DebugInfoGenerator, parent_struct: DependencyNode, generated_debug_info: List[Tuple[DependencyNode, Any]], -) -> tuple[Any, int] | None: +) -> tuple[Any, int]: """ Determine the appropriate debug type for a field based on its Python/ctypes type. @@ -75,14 +90,16 @@ def _get_field_debug_type( generated_debug_info: List of already generated debug info Returns: - The debug info type for this field + A tuple of (debug_type, size_in_bits) """ - # Handle complex types (arrays, pointers) + # Handle complex types (arrays, pointers, function pointers) if field.ctype_complex_type is not None: - # TODO: Check if this is a CFUNCTYPE (function pointer), but sadly it just checks callable for now + # Handle function pointer types (CFUNCTYPE) if callable(field.ctype_complex_type): - # Handle function pointer types, create a void pointer as a placeholder - return generator.create_pointer_type(None), 64 + # Function pointers are represented as void pointers + logger.info(f"Field {field_name} is a function pointer, using void pointer") + void_ptr = generator.create_pointer_type(None, 64) + return void_ptr, 64 elif issubclass(field.ctype_complex_type, ctypes.Array): # Handle array types element_type, base_type_size = _get_basic_debug_type( @@ -105,11 +122,13 @@ def _get_field_debug_type( for existing_struct, debug_info in generated_debug_info: if existing_struct.name == struct_name: # Use existing debug info - return debug_info, existing_struct.__sizeof__() + return debug_info, existing_struct.__sizeof__() * 8 # If not found, create a forward declaration # This will be completed when the actual struct is processed - logger.warning("Forward declaration in struct created") + logger.warning( + f"Forward declaration created for {struct_name} in {parent_struct.name}" + ) forward_type = generator.create_struct_type([], 0, is_distinct=True) return forward_type, 0 diff --git a/tests/failing_tests/vmlinux/requests2.py b/tests/failing_tests/vmlinux/requests2.py index 3ced648b..63e90c71 100644 --- a/tests/failing_tests/vmlinux/requests2.py +++ b/tests/failing_tests/vmlinux/requests2.py @@ -1,4 +1,4 @@ -from vmlinux import struct_pt_regs, struct_request +from vmlinux import struct_pt_regs from pythonbpf import bpf, section, bpfglobal, compile_to_ir import logging from ctypes import c_int64 From 306570953b2280b5a79849841dc56d16f5735753 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Thu, 20 Nov 2025 14:18:45 +0530 Subject: [PATCH 09/24] format chore --- pythonbpf/vmlinux_parser/ir_gen/ir_generation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py index fef51a5f..fca17859 100644 --- a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py +++ b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py @@ -13,7 +13,7 @@ class IRGenerator: # This field keeps track of the non_struct names to avoid duplicate name errors. type_number = 0 - unprocessed_store = [] + unprocessed_store: list[str] = [] # get the assignments dict and add this stuff to it. def __init__(self, llvm_module, handler: DependencyHandler, assignments): From 902a52a07db7486f31dbd619873f7ffb2d94bb9b Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Thu, 20 Nov 2025 14:39:13 +0530 Subject: [PATCH 10/24] remove debug print statements --- pythonbpf/vmlinux_parser/import_detector.py | 1 - pythonbpf/vmlinux_parser/ir_gen/ir_generation.py | 5 ----- pythonbpf/vmlinux_parser/vmlinux_exports_handler.py | 2 -- 3 files changed, 8 deletions(-) diff --git a/pythonbpf/vmlinux_parser/import_detector.py b/pythonbpf/vmlinux_parser/import_detector.py index 9c86c8c7..b0da40ae 100644 --- a/pythonbpf/vmlinux_parser/import_detector.py +++ b/pythonbpf/vmlinux_parser/import_detector.py @@ -66,7 +66,6 @@ def detect_import_statement( ) logger.info(f"Total vmlinux imports detected: {len(vmlinux_imports)}") - # print(f"\n**************\n{vmlinux_imports}\n**************\n") return vmlinux_imports diff --git a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py index fca17859..6a7088cd 100644 --- a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py +++ b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py @@ -72,7 +72,6 @@ def struct_processor(self, struct, processing_stack=None): dep_node_from_dependency, processing_stack ) else: - print(struct) raise RuntimeError( f"Warning: Dependency {dependency} not found in handler" ) @@ -87,7 +86,6 @@ def struct_processor(self, struct, processing_stack=None): members_dict = {} for field_name, field in struct.fields.items(): # Get the generated field name from our dictionary, or use field_name if not found - print(f"DEBUG: {struct.name}, {field_name}") if ( struct.name in self.generated_field_names and field_name in self.generated_field_names[struct.name] @@ -140,7 +138,6 @@ def gen_ir(self, struct, generated_debug_info): field_co_re_name, returned = self._struct_name_generator( struct, field, field_index ) - print(field_co_re_name) field_index += 1 globvar = ir.GlobalVariable( self.llvm_module, ir.IntType(64), name=field_co_re_name @@ -182,7 +179,6 @@ def gen_ir(self, struct, generated_debug_info): array_size = field.type_size containing_type = field.containing_type if containing_type.__module__ == "vmlinux": - print(struct) # Unwrap all pointer layers to get the base struct type base_containing_type = containing_type while hasattr(base_containing_type, "_type_"): @@ -202,7 +198,6 @@ def gen_ir(self, struct, generated_debug_info): # Look up the size using the base struct name containing_type_size = self.handler[base_struct_name].current_offset - print(f"GAY: {array_size}, {struct.name}, {field_name}") if array_size == 0: field_co_re_name, returned = self._struct_name_generator( struct, field, field_index, True, 0, containing_type_size diff --git a/pythonbpf/vmlinux_parser/vmlinux_exports_handler.py b/pythonbpf/vmlinux_parser/vmlinux_exports_handler.py index 62c03278..f641e802 100644 --- a/pythonbpf/vmlinux_parser/vmlinux_exports_handler.py +++ b/pythonbpf/vmlinux_parser/vmlinux_exports_handler.py @@ -98,11 +98,9 @@ def handle_vmlinux_struct_field( python_type.__name__, field_name ) builder.function.args[0].type = ir.PointerType(ir.IntType(8)) - print(builder.function.args[0]) field_ptr = self.load_ctx_field( builder, builder.function.args[0], globvar_ir ) - print(field_ptr) # Return pointer to field and field type return field_ptr, field_data else: From 144d9b0ab43279fc6d61f954fb4c38b7f6991fa2 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Thu, 20 Nov 2025 17:24:02 +0530 Subject: [PATCH 11/24] change c-file test structure --- tests/c-form/Makefile | 13 ++++++++----- tests/c-form/requests.bpf.c | 15 +++++++++++++++ .../vmlinux/requests2.py | 0 3 files changed, 23 insertions(+), 5 deletions(-) create mode 100644 tests/c-form/requests.bpf.c rename tests/{failing_tests => passing_tests}/vmlinux/requests2.py (100%) diff --git a/tests/c-form/Makefile b/tests/c-form/Makefile index 64ff9006..a34debac 100644 --- a/tests/c-form/Makefile +++ b/tests/c-form/Makefile @@ -1,19 +1,22 @@ BPF_CLANG := clang -CFLAGS := -O0 -emit-llvm -target bpf -c +CFLAGS := -emit-llvm -target bpf -c SRC := $(wildcard *.bpf.c) LL := $(SRC:.bpf.c=.bpf.ll) OBJ := $(SRC:.bpf.c=.bpf.o) - +LL0 := $(SRC:.bpf.c=.bpf.o0.ll) .PHONY: all clean -all: $(LL) $(OBJ) +all: $(LL) $(OBJ) $(LL0) %.bpf.o: %.bpf.c $(BPF_CLANG) -O2 -g -target bpf -c $< -o $@ %.bpf.ll: %.bpf.c - $(BPF_CLANG) $(CFLAGS) -g -S $< -o $@ + $(BPF_CLANG) $(CFLAGS) -O2 -g -S $< -o $@ + +%.bpf.o0.ll: %.bpf.c + $(BPF_CLANG) $(CFLAGS) -O0 -g -S $< -o $@ clean: - rm -f $(LL) $(OBJ) + rm -f $(LL) $(OBJ) $(LL0) diff --git a/tests/c-form/requests.bpf.c b/tests/c-form/requests.bpf.c new file mode 100644 index 00000000..0e14e98f --- /dev/null +++ b/tests/c-form/requests.bpf.c @@ -0,0 +1,15 @@ +#include "vmlinux.h" +#include +#include + +char LICENSE[] SEC("license") = "GPL"; + +SEC("kprobe/blk_mq_start_request") +int example(struct pt_regs *ctx) +{ + struct request *req = (struct request *)(ctx->di); + u32 data_len = req->__data_len; + bpf_printk("data length %u\n", data_len); + + return 0; +} diff --git a/tests/failing_tests/vmlinux/requests2.py b/tests/passing_tests/vmlinux/requests2.py similarity index 100% rename from tests/failing_tests/vmlinux/requests2.py rename to tests/passing_tests/vmlinux/requests2.py From fde8eab7757ad04489be8213362ea4992b7c70b6 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Fri, 21 Nov 2025 16:02:54 +0530 Subject: [PATCH 12/24] allow allocation pass on vmlinux cast --- pythonbpf/allocation_pass.py | 13 +++++++++++++ pythonbpf/type_deducer.py | 2 -- pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py | 2 +- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/pythonbpf/allocation_pass.py b/pythonbpf/allocation_pass.py index b5fa37c0..a0967394 100644 --- a/pythonbpf/allocation_pass.py +++ b/pythonbpf/allocation_pass.py @@ -118,6 +118,19 @@ def _allocate_for_call( local_sym_tab[var_name] = LocalSymbol(var, struct_info.ir_type, call_type) logger.info(f"Pre-allocated {var_name} for struct {call_type}") + elif VmlinuxHandlerRegistry.is_vmlinux_struct(call_type): + # When calling struct_name(pointer), we're doing a cast, not construction + # So we allocate as a pointer (i64) not as the actual struct + ir_type = ir.IntType(64) # Pointer type + var = builder.alloca(ir_type, name=var_name) + var.align = 8 + local_sym_tab[var_name] = LocalSymbol( + var, ir_type, VmlinuxHandlerRegistry.get_struct_type(call_type) + ) + logger.info( + f"Pre-allocated {var_name} for vmlinux struct pointer cast to {call_type}" + ) + else: logger.warning(f"Unknown call type for allocation: {call_type}") diff --git a/pythonbpf/type_deducer.py b/pythonbpf/type_deducer.py index fd589ae0..a6834a9b 100644 --- a/pythonbpf/type_deducer.py +++ b/pythonbpf/type_deducer.py @@ -16,8 +16,6 @@ "c_long": ir.IntType(64), "c_ulong": ir.IntType(64), "c_longlong": ir.IntType(64), - "c_uint": ir.IntType(32), - "c_int": ir.IntType(32), # Not so sure about this one "str": ir.PointerType(ir.IntType(8)), } diff --git a/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py b/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py index eb7636f5..7ee187be 100644 --- a/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py +++ b/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py @@ -126,7 +126,7 @@ def _get_field_debug_type( # If not found, create a forward declaration # This will be completed when the actual struct is processed - logger.warning( + logger.info( f"Forward declaration created for {struct_name} in {parent_struct.name}" ) forward_type = generator.create_struct_type([], 0, is_distinct=True) From 25394059a6b8be39f03bbccee254f85da91ceba7 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Fri, 21 Nov 2025 20:11:35 +0530 Subject: [PATCH 13/24] allow casting --- pythonbpf/allocation_pass.py | 15 +++--- pythonbpf/assign_pass.py | 5 ++ pythonbpf/expr/expr_pass.py | 68 +++++++++++++++++++++++++ tests/c-form/requests.bpf.c | 7 ++- tests/failing_tests/vmlinux/requests.py | 9 ++-- 5 files changed, 91 insertions(+), 13 deletions(-) diff --git a/pythonbpf/allocation_pass.py b/pythonbpf/allocation_pass.py index a0967394..56c039ff 100644 --- a/pythonbpf/allocation_pass.py +++ b/pythonbpf/allocation_pass.py @@ -121,11 +121,10 @@ def _allocate_for_call( elif VmlinuxHandlerRegistry.is_vmlinux_struct(call_type): # When calling struct_name(pointer), we're doing a cast, not construction # So we allocate as a pointer (i64) not as the actual struct - ir_type = ir.IntType(64) # Pointer type - var = builder.alloca(ir_type, name=var_name) + var = builder.alloca(ir.PointerType(), name=var_name) var.align = 8 local_sym_tab[var_name] = LocalSymbol( - var, ir_type, VmlinuxHandlerRegistry.get_struct_type(call_type) + var, ir.PointerType(), VmlinuxHandlerRegistry.get_struct_type(call_type) ) logger.info( f"Pre-allocated {var_name} for vmlinux struct pointer cast to {call_type}" @@ -340,11 +339,11 @@ def _allocate_for_attribute(builder, var_name, rval, local_sym_tab, structs_sym_ field_ir, field = field_type # TODO: For now, we only support integer type allocations. # This always assumes first argument of function to be the context struct - base_ptr = builder.function.args[0] - local_sym_tab[ - struct_var - ].var = base_ptr # This is repurposing of var to store the pointer of the base type - local_sym_tab[struct_var].ir_type = field_ir + # base_ptr = builder.function.args[0] + # local_sym_tab[ + # struct_var + # ].var = base_ptr # This is repurposing of var to store the pointer of the base type + # local_sym_tab[struct_var].ir_type = field_ir # Determine the actual IR type based on the field's type actual_ir_type = None diff --git a/pythonbpf/assign_pass.py b/pythonbpf/assign_pass.py index 0bd48c61..fc842383 100644 --- a/pythonbpf/assign_pass.py +++ b/pythonbpf/assign_pass.py @@ -1,5 +1,7 @@ import ast import logging +from inspect import isclass + from llvmlite import ir from pythonbpf.expr import eval_expr from pythonbpf.helper import emit_probe_read_kernel_str_call @@ -150,6 +152,9 @@ def handle_variable_assignment( val, val_type = val_result logger.info(f"Evaluated value for {var_name}: {val} of type {val_type}, {var_type}") if val_type != var_type: + # if isclass(val_type) and (val_type.__module__ == "vmlinux"): + # logger.info("Handling typecast to vmlinux struct") + # print(val_type, var_type) if isinstance(val_type, Field): logger.info("Handling assignment to struct field") # Special handling for struct_xdp_md i32 fields that are zero-extended to i64 diff --git a/pythonbpf/expr/expr_pass.py b/pythonbpf/expr/expr_pass.py index a9eab987..335a7649 100644 --- a/pythonbpf/expr/expr_pass.py +++ b/pythonbpf/expr/expr_pass.py @@ -524,6 +524,64 @@ def _handle_boolean_op( logger.error(f"Unsupported boolean operator: {type(expr.op).__name__}") return None +# ============================================================================ +# VMLinux casting +# ============================================================================ + +def _handle_vmlinux_cast( + func, + module, + builder, + expr, + local_sym_tab, + map_sym_tab, + structs_sym_tab=None, +): + # handle expressions such as struct_request(ctx.di) where struct_request is a vmlinux + # struct and ctx.di is a pointer to a struct but is actually represented as a c_uint64 + # which needs to be cast to a pointer. This is also a field of another vmlinux struct + """Handle vmlinux struct cast expressions like struct_request(ctx.di).""" + if len(expr.args) != 1: + logger.info("vmlinux struct cast takes exactly one argument") + return None + + # Get the struct name + struct_name = expr.func.id + + # Evaluate the argument (e.g., ctx.di which is a c_uint64) + arg_result = eval_expr( + func, + module, + builder, + expr.args[0], + local_sym_tab, + map_sym_tab, + structs_sym_tab, + ) + + if arg_result is None: + logger.info("Failed to evaluate argument to vmlinux struct cast") + return None + + arg_val, arg_type = arg_result + # Get the vmlinux struct type + vmlinux_struct_type = VmlinuxHandlerRegistry.get_struct_type(struct_name) + if vmlinux_struct_type is None: + logger.error(f"Failed to get vmlinux struct type for {struct_name}") + return None + # Cast the integer/value to a pointer to the struct + # If arg_val is an integer type, we need to inttoptr it + ptr_type = ir.PointerType() + #TODO: add a integer check here later + if ctypes_to_ir(arg_type.type.__name__): + # Cast integer to pointer + casted_ptr = builder.inttoptr(arg_val, ptr_type) + else: + logger.error(f"Unsupported type for vmlinux cast: {arg_type}") + return None + + return casted_ptr, vmlinux_struct_type + # ============================================================================ # Expression Dispatcher @@ -545,6 +603,16 @@ def eval_expr( elif isinstance(expr, ast.Constant): return _handle_constant_expr(module, builder, expr) elif isinstance(expr, ast.Call): + if isinstance(expr.func, ast.Name) and VmlinuxHandlerRegistry.is_vmlinux_struct(expr.func.id): + return _handle_vmlinux_cast( + func, + module, + builder, + expr, + local_sym_tab, + map_sym_tab, + structs_sym_tab, + ) if isinstance(expr.func, ast.Name) and expr.func.id == "deref": return _handle_deref_call(expr, local_sym_tab, builder) diff --git a/tests/c-form/requests.bpf.c b/tests/c-form/requests.bpf.c index 0e14e98f..55b12397 100644 --- a/tests/c-form/requests.bpf.c +++ b/tests/c-form/requests.bpf.c @@ -1,15 +1,18 @@ #include "vmlinux.h" #include #include +#include char LICENSE[] SEC("license") = "GPL"; SEC("kprobe/blk_mq_start_request") int example(struct pt_regs *ctx) { + u64 a = ctx->r15; struct request *req = (struct request *)(ctx->di); - u32 data_len = req->__data_len; - bpf_printk("data length %u\n", data_len); + unsigned int something_ns = BPF_CORE_READ(req, timeout); + unsigned int data_len = BPF_CORE_READ(req, __data_len); + bpf_printk("data length %lld %ld %ld\n", data_len, something_ns, a); return 0; } diff --git a/tests/failing_tests/vmlinux/requests.py b/tests/failing_tests/vmlinux/requests.py index bab809f6..a32636ed 100644 --- a/tests/failing_tests/vmlinux/requests.py +++ b/tests/failing_tests/vmlinux/requests.py @@ -1,5 +1,5 @@ from vmlinux import struct_request, struct_pt_regs -from pythonbpf import bpf, section, bpfglobal, compile_to_ir +from pythonbpf import bpf, section, bpfglobal, compile_to_ir, compile import logging from ctypes import c_int64 @@ -7,9 +7,11 @@ @bpf @section("kprobe/blk_mq_start_request") def example(ctx: struct_pt_regs) -> c_int64: + a = ctx.r15 req = struct_request(ctx.di) - c = req.__data_len - print(f"data length {c}") + d = req.__data_len + c = req.timeout + print(f"data length {d} and {c} and {a}") return c_int64(0) @@ -20,3 +22,4 @@ def LICENSE() -> str: compile_to_ir("requests.py", "requests.ll", loglevel=logging.INFO) +compile() From 9ee821c7f611ef95c2891ab6fa78525e5abd42b7 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Fri, 21 Nov 2025 20:21:24 +0530 Subject: [PATCH 14/24] make pointer allocation feasible but subverting LLC --- pythonbpf/allocation_pass.py | 4 ++-- pythonbpf/expr/expr_pass.py | 26 +++++++++++++++----------- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/pythonbpf/allocation_pass.py b/pythonbpf/allocation_pass.py index 56c039ff..31ebf6de 100644 --- a/pythonbpf/allocation_pass.py +++ b/pythonbpf/allocation_pass.py @@ -121,10 +121,10 @@ def _allocate_for_call( elif VmlinuxHandlerRegistry.is_vmlinux_struct(call_type): # When calling struct_name(pointer), we're doing a cast, not construction # So we allocate as a pointer (i64) not as the actual struct - var = builder.alloca(ir.PointerType(), name=var_name) + var = builder.alloca(ir.IntType(64), name=var_name) var.align = 8 local_sym_tab[var_name] = LocalSymbol( - var, ir.PointerType(), VmlinuxHandlerRegistry.get_struct_type(call_type) + var, ir.IntType(64), VmlinuxHandlerRegistry.get_struct_type(call_type) ) logger.info( f"Pre-allocated {var_name} for vmlinux struct pointer cast to {call_type}" diff --git a/pythonbpf/expr/expr_pass.py b/pythonbpf/expr/expr_pass.py index 335a7649..3bfb0a70 100644 --- a/pythonbpf/expr/expr_pass.py +++ b/pythonbpf/expr/expr_pass.py @@ -524,20 +524,22 @@ def _handle_boolean_op( logger.error(f"Unsupported boolean operator: {type(expr.op).__name__}") return None + # ============================================================================ # VMLinux casting # ============================================================================ + def _handle_vmlinux_cast( - func, - module, - builder, - expr, - local_sym_tab, - map_sym_tab, - structs_sym_tab=None, + func, + module, + builder, + expr, + local_sym_tab, + map_sym_tab, + structs_sym_tab=None, ): - # handle expressions such as struct_request(ctx.di) where struct_request is a vmlinux + # handle expressions such as struct_request(ctx.di) where struct_request is a vmlinux # struct and ctx.di is a pointer to a struct but is actually represented as a c_uint64 # which needs to be cast to a pointer. This is also a field of another vmlinux struct """Handle vmlinux struct cast expressions like struct_request(ctx.di).""" @@ -572,7 +574,7 @@ def _handle_vmlinux_cast( # Cast the integer/value to a pointer to the struct # If arg_val is an integer type, we need to inttoptr it ptr_type = ir.PointerType() - #TODO: add a integer check here later + # TODO: add a integer check here later if ctypes_to_ir(arg_type.type.__name__): # Cast integer to pointer casted_ptr = builder.inttoptr(arg_val, ptr_type) @@ -603,8 +605,10 @@ def eval_expr( elif isinstance(expr, ast.Constant): return _handle_constant_expr(module, builder, expr) elif isinstance(expr, ast.Call): - if isinstance(expr.func, ast.Name) and VmlinuxHandlerRegistry.is_vmlinux_struct(expr.func.id): - return _handle_vmlinux_cast( + if isinstance(expr.func, ast.Name) and VmlinuxHandlerRegistry.is_vmlinux_struct( + expr.func.id + ): + return _handle_vmlinux_cast( func, module, builder, From 11850d16d30111914898abcd984ff5ec4f1b8380 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Fri, 21 Nov 2025 20:50:40 +0530 Subject: [PATCH 15/24] field check in allocation pass --- pythonbpf/allocation_pass.py | 13 +++---------- pythonbpf/expr/expr_pass.py | 13 +++++++++++-- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/pythonbpf/allocation_pass.py b/pythonbpf/allocation_pass.py index 31ebf6de..9b453119 100644 --- a/pythonbpf/allocation_pass.py +++ b/pythonbpf/allocation_pass.py @@ -337,13 +337,6 @@ def _allocate_for_attribute(builder, var_name, rval, local_sym_tab, structs_sym_ VmlinuxHandlerRegistry.get_field_type(vmlinux_struct_name, field_name) ) field_ir, field = field_type - # TODO: For now, we only support integer type allocations. - # This always assumes first argument of function to be the context struct - # base_ptr = builder.function.args[0] - # local_sym_tab[ - # struct_var - # ].var = base_ptr # This is repurposing of var to store the pointer of the base type - # local_sym_tab[struct_var].ir_type = field_ir # Determine the actual IR type based on the field's type actual_ir_type = None @@ -398,12 +391,12 @@ def _allocate_for_attribute(builder, var_name, rval, local_sym_tab, structs_sym_ ) actual_ir_type = ir.IntType(64) - # Allocate with the actual IR type, not the GlobalVariable + # Allocate with the actual IR type var = _allocate_with_type(builder, var_name, actual_ir_type) - local_sym_tab[var_name] = LocalSymbol(var, actual_ir_type, field) + local_sym_tab[var_name] = LocalSymbol(var, actual_ir_type, field) # <-- Store Field metadata logger.info( - f"Pre-allocated {var_name} from vmlinux struct {vmlinux_struct_name}.{field_name}" + f"Pre-allocated {var_name} as {actual_ir_type} from vmlinux struct {vmlinux_struct_name}.{field_name}" ) return else: diff --git a/pythonbpf/expr/expr_pass.py b/pythonbpf/expr/expr_pass.py index 3bfb0a70..ee2b9f49 100644 --- a/pythonbpf/expr/expr_pass.py +++ b/pythonbpf/expr/expr_pass.py @@ -14,6 +14,7 @@ ) from pythonbpf.vmlinux_parser.assignment_info import Field from .vmlinux_registry import VmlinuxHandlerRegistry +from ..vmlinux_parser.dependency_node import Field logger: Logger = logging.getLogger(__name__) @@ -89,8 +90,16 @@ def _handle_attribute_expr( return vmlinux_result else: raise RuntimeError("Vmlinux struct did not process successfully") - metadata = structs_sym_tab[var_metadata] - if attr_name in metadata.fields: + + elif isinstance(var_metadata, Field): + logger.error( + f"Cannot access field '{attr_name}' on already-loaded field value '{var_name}'" + ) + return None + + # Regular user-defined struct + metadata = structs_sym_tab.get(var_metadata) + if metadata and attr_name in metadata.fields: gep = metadata.gep(builder, var_ptr, attr_name) val = builder.load(gep) field_type = metadata.field_type(attr_name) From 99321c7669a120dd614bce4003cdc5c5c25c168a Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Fri, 21 Nov 2025 23:01:08 +0530 Subject: [PATCH 16/24] add a failing C test --- pythonbpf/type_deducer.py | 1 + tests/c-form/requests2.bpf.c | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) create mode 100644 tests/c-form/requests2.bpf.c diff --git a/pythonbpf/type_deducer.py b/pythonbpf/type_deducer.py index a6834a9b..74abc0d2 100644 --- a/pythonbpf/type_deducer.py +++ b/pythonbpf/type_deducer.py @@ -16,6 +16,7 @@ "c_long": ir.IntType(64), "c_ulong": ir.IntType(64), "c_longlong": ir.IntType(64), + "c_uint": ir.IntType(32), # Not so sure about this one "str": ir.PointerType(ir.IntType(8)), } diff --git a/tests/c-form/requests2.bpf.c b/tests/c-form/requests2.bpf.c new file mode 100644 index 00000000..c0cbf9f2 --- /dev/null +++ b/tests/c-form/requests2.bpf.c @@ -0,0 +1,18 @@ +#include "vmlinux.h" +#include +#include +#include + +char LICENSE[] SEC("license") = "GPL"; + +SEC("kprobe/blk_mq_start_request") +int example(struct pt_regs *ctx) +{ + u64 a = ctx->r15; + struct request *req = (struct request *)(ctx->di); + unsigned int something_ns = req->timeout; + unsigned int data_len = req->__data_len; + bpf_printk("data length %lld %ld %ld\n", data_len, something_ns, a); + + return 0; +} From 377fa4041deeab4284662dd26a8cac00fa9e0326 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Sat, 22 Nov 2025 00:36:59 +0530 Subject: [PATCH 17/24] add regular struct field access handling in vmlinux_registry.py --- .../vmlinux_parser/vmlinux_exports_handler.py | 118 ++++++++++++++++-- tests/failing_tests/vmlinux/requests.py | 3 +- 2 files changed, 112 insertions(+), 9 deletions(-) diff --git a/pythonbpf/vmlinux_parser/vmlinux_exports_handler.py b/pythonbpf/vmlinux_parser/vmlinux_exports_handler.py index 30f30589..085610ad 100644 --- a/pythonbpf/vmlinux_parser/vmlinux_exports_handler.py +++ b/pythonbpf/vmlinux_parser/vmlinux_exports_handler.py @@ -94,17 +94,119 @@ def handle_vmlinux_struct_field( f"Attempting to access field {field_name} of possible vmlinux struct {struct_var_name}" ) python_type: type = var_info.metadata - struct_name = python_type.__name__ - globvar_ir, field_data = self.get_field_type(struct_name, field_name) - builder.function.args[0].type = ir.PointerType(ir.IntType(8)) - field_ptr = self.load_ctx_field( - builder, builder.function.args[0], globvar_ir, field_data, struct_name - ) - # Return pointer to field and field type - return field_ptr, field_data + # Check if this is a context field (ctx) or a cast struct + is_context_field = var_info.var is None + + if is_context_field: + # Handle context field access (original behavior) + struct_name = python_type.__name__ + globvar_ir, field_data = self.get_field_type(struct_name, field_name) + builder.function.args[0].type = ir.PointerType(ir.IntType(8)) + field_ptr = self.load_ctx_field( + builder, builder.function.args[0], globvar_ir, field_data, struct_name + ) + return field_ptr, field_data + else: + # Handle cast struct field access + struct_name = python_type.__name__ + globvar_ir, field_data = self.get_field_type(struct_name, field_name) + + # Handle cast struct field access (use standard GEP) + # Load the struct pointer from the local variable + struct_ptr = builder.load(var_info.var) + + # Use standard GEP for non-context struct field access + field_value = self.load_struct_field( + builder, struct_ptr, globvar_ir, field_data, struct_name + ) + # Return field value and field type + return field_value, field_data else: raise RuntimeError("Variable accessed not found in symbol table") + @staticmethod + def load_struct_field(builder, struct_ptr_int, offset_global, field_data, struct_name=None): + """ + Generate LLVM IR to load a field from a regular (non-context) struct using standard GEP. + + Args: + builder: llvmlite IRBuilder instance + struct_ptr_int: The struct pointer as an i64 value (already loaded from alloca) + offset_global: Global variable containing the field offset (i64) + field_data: contains data about the field + struct_name: Name of the struct being accessed (optional) + Returns: + The loaded value + """ + + # Load the offset value + offset = builder.load(offset_global) + + # Convert i64 to pointer type (BPF stores pointers as i64) + i8_ptr_type = ir.PointerType(ir.IntType(8)) + struct_ptr = builder.inttoptr(struct_ptr_int, i8_ptr_type) + + # GEP with offset to get field pointer + field_ptr = builder.gep( + struct_ptr, + [offset], + inbounds=False, + ) + + # Determine the appropriate IR type based on field information + int_width = 64 # Default to 64-bit + needs_zext = False + + if field_data is not None: + # Try to determine the size from field metadata + if field_data.type.__module__ == ctypes.__name__: + try: + field_size_bytes = ctypes.sizeof(field_data.type) + field_size_bits = field_size_bytes * 8 + + if field_size_bits in [8, 16, 32, 64]: + int_width = field_size_bits + logger.info(f"Determined field size: {int_width} bits") + + # Special handling for struct_xdp_md i32 fields + if struct_name == "struct_xdp_md" and int_width == 32: + needs_zext = True + logger.info( + "struct_xdp_md i32 field detected, will zero-extend to i64" + ) + else: + logger.warning( + f"Unusual field size {field_size_bits} bits, using default 64" + ) + except Exception as e: + logger.warning( + f"Could not determine field size: {e}, using default 64" + ) + + elif field_data.type.__module__ == "vmlinux": + # For pointers to structs or complex vmlinux types + if field_data.ctype_complex_type is not None and issubclass( + field_data.ctype_complex_type, ctypes._Pointer + ): + int_width = 64 # Pointers are always 64-bit + logger.info("Field is a pointer type, using 64 bits") + else: + logger.warning("Complex vmlinux field type, using default 64 bits") + + # Bitcast to appropriate pointer type based on determined width + ptr_type = ir.PointerType(ir.IntType(int_width)) + typed_ptr = builder.bitcast(field_ptr, ptr_type) + + # Load the value + value = builder.load(typed_ptr) + + # Zero-extend i32 to i64 if needed + if needs_zext: + value = builder.zext(value, ir.IntType(64)) + logger.info("Zero-extended i32 value to i64") + + return value + @staticmethod def load_ctx_field(builder, ctx_arg, offset_global, field_data, struct_name=None): """ diff --git a/tests/failing_tests/vmlinux/requests.py b/tests/failing_tests/vmlinux/requests.py index a32636ed..3e4c242f 100644 --- a/tests/failing_tests/vmlinux/requests.py +++ b/tests/failing_tests/vmlinux/requests.py @@ -10,8 +10,9 @@ def example(ctx: struct_pt_regs) -> c_int64: a = ctx.r15 req = struct_request(ctx.di) d = req.__data_len + b = ctx.r12 c = req.timeout - print(f"data length {d} and {c} and {a}") + print(f"data length {d} and {c} and {a} and {b}") return c_int64(0) From a42a75179d9108554c9466b3e8001ff05768be31 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Sat, 22 Nov 2025 00:37:39 +0530 Subject: [PATCH 18/24] format chore --- pythonbpf/allocation_pass.py | 4 +++- pythonbpf/assign_pass.py | 1 - pythonbpf/expr/expr_pass.py | 1 - pythonbpf/vmlinux_parser/vmlinux_exports_handler.py | 12 +++++++++--- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/pythonbpf/allocation_pass.py b/pythonbpf/allocation_pass.py index 9b453119..4fc6f5e4 100644 --- a/pythonbpf/allocation_pass.py +++ b/pythonbpf/allocation_pass.py @@ -393,7 +393,9 @@ def _allocate_for_attribute(builder, var_name, rval, local_sym_tab, structs_sym_ # Allocate with the actual IR type var = _allocate_with_type(builder, var_name, actual_ir_type) - local_sym_tab[var_name] = LocalSymbol(var, actual_ir_type, field) # <-- Store Field metadata + local_sym_tab[var_name] = LocalSymbol( + var, actual_ir_type, field + ) # <-- Store Field metadata logger.info( f"Pre-allocated {var_name} as {actual_ir_type} from vmlinux struct {vmlinux_struct_name}.{field_name}" diff --git a/pythonbpf/assign_pass.py b/pythonbpf/assign_pass.py index fc842383..dff89a9e 100644 --- a/pythonbpf/assign_pass.py +++ b/pythonbpf/assign_pass.py @@ -1,6 +1,5 @@ import ast import logging -from inspect import isclass from llvmlite import ir from pythonbpf.expr import eval_expr diff --git a/pythonbpf/expr/expr_pass.py b/pythonbpf/expr/expr_pass.py index ee2b9f49..c510c969 100644 --- a/pythonbpf/expr/expr_pass.py +++ b/pythonbpf/expr/expr_pass.py @@ -12,7 +12,6 @@ get_base_type_and_depth, deref_to_depth, ) -from pythonbpf.vmlinux_parser.assignment_info import Field from .vmlinux_registry import VmlinuxHandlerRegistry from ..vmlinux_parser.dependency_node import Field diff --git a/pythonbpf/vmlinux_parser/vmlinux_exports_handler.py b/pythonbpf/vmlinux_parser/vmlinux_exports_handler.py index 085610ad..cda02bf5 100644 --- a/pythonbpf/vmlinux_parser/vmlinux_exports_handler.py +++ b/pythonbpf/vmlinux_parser/vmlinux_exports_handler.py @@ -103,7 +103,11 @@ def handle_vmlinux_struct_field( globvar_ir, field_data = self.get_field_type(struct_name, field_name) builder.function.args[0].type = ir.PointerType(ir.IntType(8)) field_ptr = self.load_ctx_field( - builder, builder.function.args[0], globvar_ir, field_data, struct_name + builder, + builder.function.args[0], + globvar_ir, + field_data, + struct_name, ) return field_ptr, field_data else: @@ -125,7 +129,9 @@ def handle_vmlinux_struct_field( raise RuntimeError("Variable accessed not found in symbol table") @staticmethod - def load_struct_field(builder, struct_ptr_int, offset_global, field_data, struct_name=None): + def load_struct_field( + builder, struct_ptr_int, offset_global, field_data, struct_name=None + ): """ Generate LLVM IR to load a field from a regular (non-context) struct using standard GEP. @@ -186,7 +192,7 @@ def load_struct_field(builder, struct_ptr_int, offset_global, field_data, struct elif field_data.type.__module__ == "vmlinux": # For pointers to structs or complex vmlinux types if field_data.ctype_complex_type is not None and issubclass( - field_data.ctype_complex_type, ctypes._Pointer + field_data.ctype_complex_type, ctypes._Pointer ): int_width = 64 # Pointers are always 64-bit logger.info("Field is a pointer type, using 64 bits") From 84507b8b9803e567d30214a67e6ba0b4cf4554f0 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Sat, 22 Nov 2025 00:57:12 +0530 Subject: [PATCH 19/24] add btf probe read kernel helper --- pythonbpf/helper/bpf_helper_handler.py | 70 ++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/pythonbpf/helper/bpf_helper_handler.py b/pythonbpf/helper/bpf_helper_handler.py index ba35cc45..f52e87a9 100644 --- a/pythonbpf/helper/bpf_helper_handler.py +++ b/pythonbpf/helper/bpf_helper_handler.py @@ -34,6 +34,7 @@ class BPFHelperID(Enum): BPF_PERF_EVENT_OUTPUT = 25 BPF_GET_STACK = 67 BPF_PROBE_READ_KERNEL_STR = 115 + BPF_PROBE_READ_KERNEL = 113 BPF_RINGBUF_OUTPUT = 130 BPF_RINGBUF_RESERVE = 131 BPF_RINGBUF_SUBMIT = 132 @@ -574,6 +575,75 @@ def bpf_probe_read_kernel_str_emitter( return result, ir.IntType(64) +def emit_probe_read_kernel_call(builder, dst_ptr, dst_size, src_ptr): + """Emit LLVM IR call to bpf_probe_read_kernel""" + + fn_type = ir.FunctionType( + ir.IntType(64), + [ir.PointerType(), ir.IntType(32), ir.PointerType()], + var_arg=False, + ) + fn_ptr = builder.inttoptr( + ir.Constant(ir.IntType(64), BPFHelperID.BPF_PROBE_READ_KERNEL.value), + ir.PointerType(fn_type), + ) + + result = builder.call( + fn_ptr, + [ + builder.bitcast(dst_ptr, ir.PointerType()), + ir.Constant(ir.IntType(32), dst_size), + builder.bitcast(src_ptr, ir.PointerType()), + ], + tail=False, + ) + + logger.info(f"Emitted bpf_probe_read_kernel (size={dst_size})") + return result + + +@HelperHandlerRegistry.register( + "probe_read_kernel", + param_types=[ + ir.PointerType(ir.IntType(8)), + ir.PointerType(ir.IntType(8)), + ], + return_type=ir.IntType(64), +) +def bpf_probe_read_kernel_emitter( + call, + map_ptr, + module, + builder, + func, + local_sym_tab=None, + struct_sym_tab=None, + map_sym_tab=None, +): + """Emit LLVM IR for bpf_probe_read_kernel helper.""" + + if len(call.args) != 2: + raise ValueError( + f"probe_read_kernel expects 2 args (dst, src), got {len(call.args)}" + ) + + # Get destination buffer (char array -> i8*) + dst_ptr, dst_size = get_or_create_ptr_from_arg( + func, module, call.args[0], builder, local_sym_tab, map_sym_tab, struct_sym_tab + ) + + # Get source pointer (evaluate expression) + src_ptr, src_type = get_ptr_from_arg( + call.args[1], func, module, builder, local_sym_tab, map_sym_tab, struct_sym_tab + ) + + # Emit the helper call + result = emit_probe_read_kernel_call(builder, dst_ptr, dst_size, src_ptr) + + logger.info(f"Emitted bpf_probe_read_kernel (size={dst_size})") + return result, ir.IntType(64) + + @HelperHandlerRegistry.register( "random", param_types=[], From 6f25c554a97cc84d37b59a7d3b4a5ac92878ee6f Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Sat, 22 Nov 2025 01:47:25 +0530 Subject: [PATCH 20/24] fix CO-RE read for cast structs --- pythonbpf/assign_pass.py | 24 ++++++++++--- pythonbpf/helper/__init__.py | 3 +- .../vmlinux_parser/vmlinux_exports_handler.py | 36 +++++++++++++------ tests/failing_tests/vmlinux/requests.py | 3 +- 4 files changed, 50 insertions(+), 16 deletions(-) diff --git a/pythonbpf/assign_pass.py b/pythonbpf/assign_pass.py index dff89a9e..0d69a043 100644 --- a/pythonbpf/assign_pass.py +++ b/pythonbpf/assign_pass.py @@ -1,5 +1,6 @@ import ast import logging +from inspect import isclass from llvmlite import ir from pythonbpf.expr import eval_expr @@ -149,11 +150,26 @@ def handle_variable_assignment( return False val, val_type = val_result - logger.info(f"Evaluated value for {var_name}: {val} of type {val_type}, {var_type}") + logger.info(f"Evaluated value for {var_name}: {val} of type {val_type}, expected {var_type}") + if val_type != var_type: - # if isclass(val_type) and (val_type.__module__ == "vmlinux"): - # logger.info("Handling typecast to vmlinux struct") - # print(val_type, var_type) + # Handle vmlinux struct pointers - they're represented as Python classes but are i64 pointers + if isclass(val_type) and (val_type.__module__ == "vmlinux"): + logger.info("Handling vmlinux struct pointer assignment") + # vmlinux struct pointers: val is a pointer, need to convert to i64 + if isinstance(var_type, ir.IntType) and var_type.width == 64: + # Convert pointer to i64 using ptrtoint + if isinstance(val.type, ir.PointerType): + val = builder.ptrtoint(val, ir.IntType(64)) + logger.info(f"Converted vmlinux struct pointer to i64 using ptrtoint") + builder.store(val, var_ptr) + logger.info(f"Assigned vmlinux struct pointer to {var_name} (i64)") + return True + else: + logger.error( + f"Type mismatch: vmlinux struct pointer requires i64, got {var_type}" + ) + return False if isinstance(val_type, Field): logger.info("Handling assignment to struct field") # Special handling for struct_xdp_md i32 fields that are zero-extended to i64 diff --git a/pythonbpf/helper/__init__.py b/pythonbpf/helper/__init__.py index 6d38e791..e92828ac 100644 --- a/pythonbpf/helper/__init__.py +++ b/pythonbpf/helper/__init__.py @@ -1,6 +1,6 @@ from .helper_registry import HelperHandlerRegistry from .helper_utils import reset_scratch_pool -from .bpf_helper_handler import handle_helper_call, emit_probe_read_kernel_str_call +from .bpf_helper_handler import handle_helper_call, emit_probe_read_kernel_str_call, emit_probe_read_kernel_call from .helpers import ( ktime, pid, @@ -74,6 +74,7 @@ def helper_call_handler( "reset_scratch_pool", "handle_helper_call", "emit_probe_read_kernel_str_call", + "emit_probe_read_kernel_call", "ktime", "pid", "deref", diff --git a/pythonbpf/vmlinux_parser/vmlinux_exports_handler.py b/pythonbpf/vmlinux_parser/vmlinux_exports_handler.py index cda02bf5..aa6f52de 100644 --- a/pythonbpf/vmlinux_parser/vmlinux_exports_handler.py +++ b/pythonbpf/vmlinux_parser/vmlinux_exports_handler.py @@ -115,11 +115,11 @@ def handle_vmlinux_struct_field( struct_name = python_type.__name__ globvar_ir, field_data = self.get_field_type(struct_name, field_name) - # Handle cast struct field access (use standard GEP) + # Handle cast struct field access (use bpf_probe_read_kernel) # Load the struct pointer from the local variable struct_ptr = builder.load(var_info.var) - # Use standard GEP for non-context struct field access + # Use bpf_probe_read_kernel for non-context struct field access field_value = self.load_struct_field( builder, struct_ptr, globvar_ir, field_data, struct_name ) @@ -133,7 +133,7 @@ def load_struct_field( builder, struct_ptr_int, offset_global, field_data, struct_name=None ): """ - Generate LLVM IR to load a field from a regular (non-context) struct using standard GEP. + Generate LLVM IR to load a field from a regular (non-context) struct using bpf_probe_read_kernel. Args: builder: llvmlite IRBuilder instance @@ -159,7 +159,8 @@ def load_struct_field( inbounds=False, ) - # Determine the appropriate IR type based on field information + # Determine the appropriate field size based on field information + field_size_bytes = 8 # Default to 8 bytes (64-bit) int_width = 64 # Default to 64-bit needs_zext = False @@ -172,7 +173,7 @@ def load_struct_field( if field_size_bits in [8, 16, 32, 64]: int_width = field_size_bits - logger.info(f"Determined field size: {int_width} bits") + logger.info(f"Determined field size: {int_width} bits ({field_size_bytes} bytes)") # Special handling for struct_xdp_md i32 fields if struct_name == "struct_xdp_md" and int_width == 32: @@ -195,16 +196,31 @@ def load_struct_field( field_data.ctype_complex_type, ctypes._Pointer ): int_width = 64 # Pointers are always 64-bit + field_size_bytes = 8 logger.info("Field is a pointer type, using 64 bits") else: logger.warning("Complex vmlinux field type, using default 64 bits") - # Bitcast to appropriate pointer type based on determined width - ptr_type = ir.PointerType(ir.IntType(int_width)) - typed_ptr = builder.bitcast(field_ptr, ptr_type) + # Allocate local storage for the field value + local_storage = builder.alloca(ir.IntType(int_width)) + local_storage_i8_ptr = builder.bitcast(local_storage, i8_ptr_type) + + # Use bpf_probe_read_kernel to safely read the field + # This generates: + # %gep = getelementptr i8, ptr %struct_ptr, i64 %offset (already done above as field_ptr) + # %passed = tail call ptr @llvm.bpf.passthrough.p0.p0(i32 2, ptr %gep) + # %result = call i64 inttoptr (i64 113 to ptr)(ptr %local_storage, i32 %size, ptr %passed) + from pythonbpf.helper import emit_probe_read_kernel_call + + result = emit_probe_read_kernel_call( + builder, + local_storage_i8_ptr, + field_size_bytes, + field_ptr + ) - # Load the value - value = builder.load(typed_ptr) + # Load the value from local storage + value = builder.load(local_storage) # Zero-extend i32 to i64 if needed if needs_zext: diff --git a/tests/failing_tests/vmlinux/requests.py b/tests/failing_tests/vmlinux/requests.py index 3e4c242f..bb7fb9d9 100644 --- a/tests/failing_tests/vmlinux/requests.py +++ b/tests/failing_tests/vmlinux/requests.py @@ -12,7 +12,8 @@ def example(ctx: struct_pt_regs) -> c_int64: d = req.__data_len b = ctx.r12 c = req.timeout - print(f"data length {d} and {c} and {a} and {b}") + print(f"data length {d} and {c} and {a}") + print(f"ctx arg {b}") return c_int64(0) From 2b3635fe20d748b1188d4cc006336e8551b3a498 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Sat, 22 Nov 2025 01:48:44 +0530 Subject: [PATCH 21/24] format chore --- pythonbpf/assign_pass.py | 8 ++++++-- pythonbpf/helper/__init__.py | 6 +++++- pythonbpf/vmlinux_parser/vmlinux_exports_handler.py | 11 +++++------ 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/pythonbpf/assign_pass.py b/pythonbpf/assign_pass.py index 0d69a043..5d73cf3e 100644 --- a/pythonbpf/assign_pass.py +++ b/pythonbpf/assign_pass.py @@ -150,7 +150,9 @@ def handle_variable_assignment( return False val, val_type = val_result - logger.info(f"Evaluated value for {var_name}: {val} of type {val_type}, expected {var_type}") + logger.info( + f"Evaluated value for {var_name}: {val} of type {val_type}, expected {var_type}" + ) if val_type != var_type: # Handle vmlinux struct pointers - they're represented as Python classes but are i64 pointers @@ -161,7 +163,9 @@ def handle_variable_assignment( # Convert pointer to i64 using ptrtoint if isinstance(val.type, ir.PointerType): val = builder.ptrtoint(val, ir.IntType(64)) - logger.info(f"Converted vmlinux struct pointer to i64 using ptrtoint") + logger.info( + "Converted vmlinux struct pointer to i64 using ptrtoint" + ) builder.store(val, var_ptr) logger.info(f"Assigned vmlinux struct pointer to {var_name} (i64)") return True diff --git a/pythonbpf/helper/__init__.py b/pythonbpf/helper/__init__.py index e92828ac..17306355 100644 --- a/pythonbpf/helper/__init__.py +++ b/pythonbpf/helper/__init__.py @@ -1,6 +1,10 @@ from .helper_registry import HelperHandlerRegistry from .helper_utils import reset_scratch_pool -from .bpf_helper_handler import handle_helper_call, emit_probe_read_kernel_str_call, emit_probe_read_kernel_call +from .bpf_helper_handler import ( + handle_helper_call, + emit_probe_read_kernel_str_call, + emit_probe_read_kernel_call, +) from .helpers import ( ktime, pid, diff --git a/pythonbpf/vmlinux_parser/vmlinux_exports_handler.py b/pythonbpf/vmlinux_parser/vmlinux_exports_handler.py index aa6f52de..c26cac9e 100644 --- a/pythonbpf/vmlinux_parser/vmlinux_exports_handler.py +++ b/pythonbpf/vmlinux_parser/vmlinux_exports_handler.py @@ -173,7 +173,9 @@ def load_struct_field( if field_size_bits in [8, 16, 32, 64]: int_width = field_size_bits - logger.info(f"Determined field size: {int_width} bits ({field_size_bytes} bytes)") + logger.info( + f"Determined field size: {int_width} bits ({field_size_bytes} bytes)" + ) # Special handling for struct_xdp_md i32 fields if struct_name == "struct_xdp_md" and int_width == 32: @@ -212,11 +214,8 @@ def load_struct_field( # %result = call i64 inttoptr (i64 113 to ptr)(ptr %local_storage, i32 %size, ptr %passed) from pythonbpf.helper import emit_probe_read_kernel_call - result = emit_probe_read_kernel_call( - builder, - local_storage_i8_ptr, - field_size_bytes, - field_ptr + emit_probe_read_kernel_call( + builder, local_storage_i8_ptr, field_size_bytes, field_ptr ) # Load the value from local storage From a91c3158ad58f1f975e8c83c1c455b8f9e48db7a Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Sat, 22 Nov 2025 12:34:55 +0530 Subject: [PATCH 22/24] sort fields in debug info by offset order --- pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py b/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py index 7ee187be..c4f5642c 100644 --- a/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py +++ b/pythonbpf/vmlinux_parser/ir_gen/debug_info_gen.py @@ -42,7 +42,10 @@ def debug_info_generation( # Process all fields and create members for the struct members = [] - for field_name, field in struct.fields.items(): + + sorted_fields = sorted(struct.fields.items(), key=lambda item: item[1].offset) + + for field_name, field in sorted_fields: try: # Get appropriate debug type for this field field_type = _get_field_debug_type( @@ -97,7 +100,9 @@ def _get_field_debug_type( # Handle function pointer types (CFUNCTYPE) if callable(field.ctype_complex_type): # Function pointers are represented as void pointers - logger.info(f"Field {field_name} is a function pointer, using void pointer") + logger.warning( + f"Field {field_name} is a function pointer, using void pointer" + ) void_ptr = generator.create_pointer_type(None, 64) return void_ptr, 64 elif issubclass(field.ctype_complex_type, ctypes.Array): From 081ee5cb4c9e79851fcb7f8a3d9dd3bfa259e570 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Sat, 22 Nov 2025 13:19:55 +0530 Subject: [PATCH 23/24] move requests.py to passing tests --- tests/{failing_tests => passing_tests}/vmlinux/requests.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{failing_tests => passing_tests}/vmlinux/requests.py (100%) diff --git a/tests/failing_tests/vmlinux/requests.py b/tests/passing_tests/vmlinux/requests.py similarity index 100% rename from tests/failing_tests/vmlinux/requests.py rename to tests/passing_tests/vmlinux/requests.py From a2de15fb1eb0b37227502253fab1881d7d267928 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Sat, 22 Nov 2025 13:36:21 +0530 Subject: [PATCH 24/24] add c_int to type_deducer.py --- pythonbpf/type_deducer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pythonbpf/type_deducer.py b/pythonbpf/type_deducer.py index 74abc0d2..fd589ae0 100644 --- a/pythonbpf/type_deducer.py +++ b/pythonbpf/type_deducer.py @@ -17,6 +17,7 @@ "c_ulong": ir.IntType(64), "c_longlong": ir.IntType(64), "c_uint": ir.IntType(32), + "c_int": ir.IntType(32), # Not so sure about this one "str": ir.PointerType(ir.IntType(8)), }