From e636fcaea788ecec6b1e69f54ee2c45eb48584d8 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Mon, 20 Oct 2025 04:21:42 +0530 Subject: [PATCH 1/8] add assignment info class family and change how assignments are handled --- pythonbpf/vmlinux_parser/assignment_info.py | 34 +++++++++++++++++++ pythonbpf/vmlinux_parser/import_detector.py | 26 +++++++++++--- .../vmlinux/simple_struct_test.py | 1 - 3 files changed, 55 insertions(+), 6 deletions(-) create mode 100644 pythonbpf/vmlinux_parser/assignment_info.py diff --git a/pythonbpf/vmlinux_parser/assignment_info.py b/pythonbpf/vmlinux_parser/assignment_info.py new file mode 100644 index 00000000..d3665c03 --- /dev/null +++ b/pythonbpf/vmlinux_parser/assignment_info.py @@ -0,0 +1,34 @@ +from enum import Enum, auto +from typing import Any, Callable, Dict, List, Optional, TypedDict +from dataclasses import dataclass + +from pythonbpf.vmlinux_parser.dependency_node import Field + + +@dataclass +class AssignmentType(Enum): + CONSTANT = auto() + STRUCT = auto() + ARRAY = auto() # probably won't be used + FUNCTION_POINTER = auto() + POINTER = auto() # again, probably won't be used + +@dataclass +class FunctionSignature(TypedDict): + return_type: str + param_types: List[str] + varargs: bool + + +# Thew name of the assignment will be in the dict that uses this class +@dataclass +class AssignmentInfo(TypedDict): + value_type: AssignmentType + python_type: type + value: Optional[Any] + pointer_level: Optional[int] + signature: Optional[FunctionSignature] # For function pointers + # The key of the dict is the name of the field. + # Value is a tuple that contains the global variable representing that field + # along with all the information about that field as a Field type. + members: Optional[Dict[str, tuple[str, Field]]] # For structs. diff --git a/pythonbpf/vmlinux_parser/import_detector.py b/pythonbpf/vmlinux_parser/import_detector.py index 972b1ff2..965bca86 100644 --- a/pythonbpf/vmlinux_parser/import_detector.py +++ b/pythonbpf/vmlinux_parser/import_detector.py @@ -4,6 +4,7 @@ import importlib import inspect +from .assignment_info import AssignmentInfo, AssignmentType from .dependency_handler import DependencyHandler from .ir_gen import IRGenerator from .class_handler import process_vmlinux_class @@ -82,7 +83,7 @@ def vmlinux_proc(tree: ast.AST, module): # initialise dependency handler handler = DependencyHandler() # initialise assignment dictionary of name to type - assignments: dict[str, tuple[type, Any]] = {} + assignments: dict[str, AssignmentInfo] = {} if not import_statements: logger.info("No vmlinux imports found") @@ -132,16 +133,31 @@ def vmlinux_proc(tree: ast.AST, module): return assignments -def process_vmlinux_assign(node, module, assignments: dict[str, tuple[type, Any]]): - # Check if this is a simple assignment with a constant value +def process_vmlinux_assign(node, module, assignments: dict[str, AssignmentInfo]): + """Process assignments from vmlinux module.""" + # Only handle single-target assignments if len(node.targets) == 1 and isinstance(node.targets[0], ast.Name): target_name = node.targets[0].id + + # Handle constant value assignments if isinstance(node.value, ast.Constant): - assignments[target_name] = (type(node.value.value), node.value.value) + # Fixed: using proper TypedDict creation syntax with named arguments + assignments[target_name] = AssignmentInfo( + value_type=AssignmentType.CONSTANT, + python_type=type(node.value.value), + value=node.value.value, + pointer_level=None, + signature=None, + members=None + ) logger.info( f"Added assignment: {target_name} = {node.value.value!r} of type {type(node.value.value)}" ) + + # Handle other assignment types that we may need to support else: - raise ValueError(f"Unsupported assignment type for {target_name}") + logger.warning( + f"Unsupported assignment type for {target_name}: {ast.dump(node.value)}" + ) else: raise ValueError("Not a simple assignment") diff --git a/tests/passing_tests/vmlinux/simple_struct_test.py b/tests/passing_tests/vmlinux/simple_struct_test.py index f47076f2..43e5c9e5 100644 --- a/tests/passing_tests/vmlinux/simple_struct_test.py +++ b/tests/passing_tests/vmlinux/simple_struct_test.py @@ -27,4 +27,3 @@ def LICENSE() -> str: compile_to_ir("simple_struct_test.py", "simple_struct_test.ll") -compile() From 8da50b7068f3979e7b8fc3c593f9f6c80866e83e Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Mon, 20 Oct 2025 04:31:23 +0530 Subject: [PATCH 2/8] float assignments to class_handler.py --- pythonbpf/vmlinux_parser/assignment_info.py | 3 ++- pythonbpf/vmlinux_parser/class_handler.py | 19 +++++++++++++++---- pythonbpf/vmlinux_parser/import_detector.py | 7 +++---- .../vmlinux/simple_struct_test.py | 2 +- 4 files changed, 21 insertions(+), 10 deletions(-) diff --git a/pythonbpf/vmlinux_parser/assignment_info.py b/pythonbpf/vmlinux_parser/assignment_info.py index d3665c03..435a7ad1 100644 --- a/pythonbpf/vmlinux_parser/assignment_info.py +++ b/pythonbpf/vmlinux_parser/assignment_info.py @@ -1,5 +1,5 @@ from enum import Enum, auto -from typing import Any, Callable, Dict, List, Optional, TypedDict +from typing import Any, Dict, List, Optional, TypedDict from dataclasses import dataclass from pythonbpf.vmlinux_parser.dependency_node import Field @@ -13,6 +13,7 @@ class AssignmentType(Enum): FUNCTION_POINTER = auto() POINTER = auto() # again, probably won't be used + @dataclass class FunctionSignature(TypedDict): return_type: str diff --git a/pythonbpf/vmlinux_parser/class_handler.py b/pythonbpf/vmlinux_parser/class_handler.py index 108fa9fc..2adf76da 100644 --- a/pythonbpf/vmlinux_parser/class_handler.py +++ b/pythonbpf/vmlinux_parser/class_handler.py @@ -1,6 +1,8 @@ import logging from functools import lru_cache import importlib + +from .assignment_info import AssignmentInfo from .dependency_handler import DependencyHandler from .dependency_node import DependencyNode import ctypes @@ -15,17 +17,26 @@ def get_module_symbols(module_name: str): return [name for name in dir(imported_module)], imported_module -def process_vmlinux_class(node, llvm_module, handler: DependencyHandler): +def process_vmlinux_class( + node, + llvm_module, + handler: DependencyHandler, + assignments: dict[str, AssignmentInfo], +): symbols_in_module, imported_module = get_module_symbols("vmlinux") if node.name in symbols_in_module: vmlinux_type = getattr(imported_module, node.name) - process_vmlinux_post_ast(vmlinux_type, llvm_module, handler) + process_vmlinux_post_ast(vmlinux_type, llvm_module, handler, assignments) else: raise ImportError(f"{node.name} not in vmlinux") def process_vmlinux_post_ast( - elem_type_class, llvm_handler, handler: DependencyHandler, processing_stack=None + elem_type_class, + llvm_handler, + handler: DependencyHandler, + assignments: dict[str, AssignmentInfo], + processing_stack=None, ): # Initialize processing stack on first call if processing_stack is None: @@ -46,7 +57,7 @@ def process_vmlinux_post_ast( logger.debug(f"Node {current_symbol_name} already processed and ready") return True - # XXX:Check it's use. It's probably not being used. + # XXX:Check its use. It's probably not being used. if current_symbol_name in processing_stack: logger.debug( f"Dependency already in processing stack for {current_symbol_name}, skipping" diff --git a/pythonbpf/vmlinux_parser/import_detector.py b/pythonbpf/vmlinux_parser/import_detector.py index 965bca86..d8bd78f9 100644 --- a/pythonbpf/vmlinux_parser/import_detector.py +++ b/pythonbpf/vmlinux_parser/import_detector.py @@ -1,6 +1,5 @@ import ast import logging -from typing import List, Tuple, Any import importlib import inspect @@ -12,7 +11,7 @@ logger = logging.getLogger(__name__) -def detect_import_statement(tree: ast.AST) -> List[Tuple[str, ast.ImportFrom]]: +def detect_import_statement(tree: ast.AST) -> list[tuple[str, ast.ImportFrom]]: """ Parse AST and detect import statements from vmlinux. @@ -113,7 +112,7 @@ def vmlinux_proc(tree: ast.AST, module): isinstance(mod_node, ast.ClassDef) and mod_node.name == imported_name ): - process_vmlinux_class(mod_node, module, handler) + process_vmlinux_class(mod_node, module, handler, assignments) found = True break if isinstance(mod_node, ast.Assign): @@ -148,7 +147,7 @@ def process_vmlinux_assign(node, module, assignments: dict[str, AssignmentInfo]) value=node.value.value, pointer_level=None, signature=None, - members=None + members=None, ) logger.info( f"Added assignment: {target_name} = {node.value.value!r} of type {type(node.value.value)}" diff --git a/tests/passing_tests/vmlinux/simple_struct_test.py b/tests/passing_tests/vmlinux/simple_struct_test.py index 43e5c9e5..c9390c84 100644 --- a/tests/passing_tests/vmlinux/simple_struct_test.py +++ b/tests/passing_tests/vmlinux/simple_struct_test.py @@ -1,4 +1,4 @@ -from pythonbpf import bpf, section, bpfglobal, compile_to_ir, compile +from pythonbpf import bpf, section, bpfglobal, compile_to_ir from vmlinux import TASK_COMM_LEN # noqa: F401 from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401 From eee212795f4f5c0a70624c41dae786840afcf780 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Mon, 20 Oct 2025 04:41:00 +0530 Subject: [PATCH 3/8] add assignment dict handling to class_handler.py --- pythonbpf/codegen.py | 4 +-- pythonbpf/vmlinux_parser/class_handler.py | 33 ++++++++++++++++++++--- 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/pythonbpf/codegen.py b/pythonbpf/codegen.py index 078adf72..beac470c 100644 --- a/pythonbpf/codegen.py +++ b/pythonbpf/codegen.py @@ -55,11 +55,11 @@ def processor(source_code, filename, module): for func_node in bpf_chunks: logger.info(f"Found BPF function/struct: {func_node.name}") - vmlinux_proc(tree, module) + vmlinux_symtab = vmlinux_proc(tree, module) populate_global_symbol_table(tree, module) license_processing(tree, module) globals_processing(tree, module) - + print("DEBUG:", vmlinux_symtab) structs_sym_tab = structs_proc(tree, module, bpf_chunks) map_sym_tab = maps_proc(tree, module, bpf_chunks) func_proc(tree, module, bpf_chunks, map_sym_tab, structs_sym_tab) diff --git a/pythonbpf/vmlinux_parser/class_handler.py b/pythonbpf/vmlinux_parser/class_handler.py index 2adf76da..58168b37 100644 --- a/pythonbpf/vmlinux_parser/class_handler.py +++ b/pythonbpf/vmlinux_parser/class_handler.py @@ -2,9 +2,9 @@ from functools import lru_cache import importlib -from .assignment_info import AssignmentInfo +from .assignment_info import AssignmentInfo, AssignmentType from .dependency_handler import DependencyHandler -from .dependency_node import DependencyNode +from .dependency_node import DependencyNode, Field import ctypes from typing import Optional, Any, Dict @@ -103,12 +103,21 @@ def process_vmlinux_post_ast( else: raise TypeError("Could not get required class and definition") + # Create a members dictionary for AssignmentInfo + members_dict: Dict[str, tuple[str, Field]] = {} + logger.debug(f"Extracted fields for {current_symbol_name}: {field_table}") for elem in field_table.items(): elem_name, elem_temp_list = elem [elem_type, elem_bitfield_size] = elem_temp_list local_module_name = getattr(elem_type, "__module__", None) new_dep_node.add_field(elem_name, elem_type, ready=False) + + # Store field reference for struct assignment info + field_ref = new_dep_node.get_field(elem_name) + if field_ref: + members_dict[elem_name] = (elem_name, field_ref) + if local_module_name == ctypes.__name__: # TODO: need to process pointer to ctype and also CFUNCTYPES here recursively. Current processing is a single dereference new_dep_node.set_field_bitfield_size(elem_name, elem_bitfield_size) @@ -220,6 +229,7 @@ def process_vmlinux_post_ast( containing_type, llvm_handler, handler, + assignments, # Pass assignments to recursive call processing_stack, ) new_dep_node.set_field_ready(elem_name, True) @@ -237,7 +247,11 @@ def process_vmlinux_post_ast( else str(elem_type) ) process_vmlinux_post_ast( - elem_type, llvm_handler, handler, processing_stack + elem_type, + llvm_handler, + handler, + assignments, + processing_stack, ) new_dep_node.set_field_ready(elem_name, True) else: @@ -245,10 +259,21 @@ def process_vmlinux_post_ast( f"{elem_name} with type {elem_type} from module {module_name} not supported in recursive resolver" ) + # Add struct to assignments dictionary + assignments[current_symbol_name] = AssignmentInfo( + value_type=AssignmentType.STRUCT, + python_type=elem_type_class, + value=None, + pointer_level=None, + signature=None, + members=members_dict, + ) + logger.info(f"Added struct assignment info for {current_symbol_name}") + else: raise ImportError("UNSUPPORTED Module") - logging.info( + logger.info( f"{current_symbol_name} processed and handler readiness {handler.is_ready}" ) return True From a20643f3a72ab9dbca10c2d9858994ee303d89ba Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Mon, 20 Oct 2025 18:41:59 +0530 Subject: [PATCH 4/8] move assignemnt tablegen to ir_generation.py --- pythonbpf/vmlinux_parser/class_handler.py | 28 ++----------------- pythonbpf/vmlinux_parser/import_detector.py | 4 +-- .../vmlinux_parser/ir_gen/ir_generation.py | 28 ++++++++++++++++++- 3 files changed, 31 insertions(+), 29 deletions(-) diff --git a/pythonbpf/vmlinux_parser/class_handler.py b/pythonbpf/vmlinux_parser/class_handler.py index 58168b37..a508ff75 100644 --- a/pythonbpf/vmlinux_parser/class_handler.py +++ b/pythonbpf/vmlinux_parser/class_handler.py @@ -2,9 +2,8 @@ from functools import lru_cache import importlib -from .assignment_info import AssignmentInfo, AssignmentType from .dependency_handler import DependencyHandler -from .dependency_node import DependencyNode, Field +from .dependency_node import DependencyNode import ctypes from typing import Optional, Any, Dict @@ -21,12 +20,11 @@ def process_vmlinux_class( node, llvm_module, handler: DependencyHandler, - assignments: dict[str, AssignmentInfo], ): symbols_in_module, imported_module = get_module_symbols("vmlinux") if node.name in symbols_in_module: vmlinux_type = getattr(imported_module, node.name) - process_vmlinux_post_ast(vmlinux_type, llvm_module, handler, assignments) + process_vmlinux_post_ast(vmlinux_type, llvm_module, handler) else: raise ImportError(f"{node.name} not in vmlinux") @@ -35,7 +33,6 @@ def process_vmlinux_post_ast( elem_type_class, llvm_handler, handler: DependencyHandler, - assignments: dict[str, AssignmentInfo], processing_stack=None, ): # Initialize processing stack on first call @@ -103,9 +100,6 @@ def process_vmlinux_post_ast( else: raise TypeError("Could not get required class and definition") - # Create a members dictionary for AssignmentInfo - members_dict: Dict[str, tuple[str, Field]] = {} - logger.debug(f"Extracted fields for {current_symbol_name}: {field_table}") for elem in field_table.items(): elem_name, elem_temp_list = elem @@ -113,11 +107,6 @@ def process_vmlinux_post_ast( local_module_name = getattr(elem_type, "__module__", None) new_dep_node.add_field(elem_name, elem_type, ready=False) - # Store field reference for struct assignment info - field_ref = new_dep_node.get_field(elem_name) - if field_ref: - members_dict[elem_name] = (elem_name, field_ref) - if local_module_name == ctypes.__name__: # TODO: need to process pointer to ctype and also CFUNCTYPES here recursively. Current processing is a single dereference new_dep_node.set_field_bitfield_size(elem_name, elem_bitfield_size) @@ -229,7 +218,6 @@ def process_vmlinux_post_ast( containing_type, llvm_handler, handler, - assignments, # Pass assignments to recursive call processing_stack, ) new_dep_node.set_field_ready(elem_name, True) @@ -250,7 +238,6 @@ def process_vmlinux_post_ast( elem_type, llvm_handler, handler, - assignments, processing_stack, ) new_dep_node.set_field_ready(elem_name, True) @@ -259,17 +246,6 @@ def process_vmlinux_post_ast( f"{elem_name} with type {elem_type} from module {module_name} not supported in recursive resolver" ) - # Add struct to assignments dictionary - assignments[current_symbol_name] = AssignmentInfo( - value_type=AssignmentType.STRUCT, - python_type=elem_type_class, - value=None, - pointer_level=None, - signature=None, - members=members_dict, - ) - logger.info(f"Added struct assignment info for {current_symbol_name}") - else: raise ImportError("UNSUPPORTED Module") diff --git a/pythonbpf/vmlinux_parser/import_detector.py b/pythonbpf/vmlinux_parser/import_detector.py index d8bd78f9..6df7a980 100644 --- a/pythonbpf/vmlinux_parser/import_detector.py +++ b/pythonbpf/vmlinux_parser/import_detector.py @@ -112,7 +112,7 @@ def vmlinux_proc(tree: ast.AST, module): isinstance(mod_node, ast.ClassDef) and mod_node.name == imported_name ): - process_vmlinux_class(mod_node, module, handler, assignments) + process_vmlinux_class(mod_node, module, handler) found = True break if isinstance(mod_node, ast.Assign): @@ -128,7 +128,7 @@ def vmlinux_proc(tree: ast.AST, module): f"{imported_name} not found as ClassDef or Assign in vmlinux" ) - IRGenerator(module, handler) + IRGenerator(module, handler, assignments) return assignments diff --git a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py index cacd2e71..bd0adfaa 100644 --- a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py +++ b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py @@ -1,5 +1,8 @@ import ctypes import logging + +from ..dependency_node import Field +from ..assignment_info import AssignmentInfo, AssignmentType from ..dependency_handler import DependencyHandler from .debug_info_gen import debug_info_generation from ..dependency_node import DependencyNode @@ -10,11 +13,13 @@ class IRGenerator: # get the assignments dict and add this stuff to it. - def __init__(self, llvm_module, handler: DependencyHandler, assignment=None): + def __init__(self, llvm_module, handler: DependencyHandler, assignments): self.llvm_module = llvm_module self.handler: DependencyHandler = handler self.generated: list[str] = [] self.generated_debug_info: list = [] + self.generated_field_names: dict[Field, str] = {} + self.assignments: dict[str, AssignmentInfo] = assignments if not handler.is_ready: raise ImportError( "Semantic analysis of vmlinux imports failed. Cannot generate IR" @@ -67,6 +72,24 @@ def struct_processor(self, struct, processing_stack=None): f"Warning: Dependency {dependency} not found in handler" ) + # Fill the assignments dictionary with struct information + if struct.name not in self.assignments: + # Create a members dictionary for AssignmentInfo + members_dict = {} + for field_name, field in struct.fields.items(): + members_dict[field_name] = (self.generated_field_names[field], field) + + # Add struct to assignments dictionary + self.assignments[struct.name] = AssignmentInfo( + value_type=AssignmentType.STRUCT, + python_type=struct.ctype_struct, + value=None, + pointer_level=None, + signature=None, + members=members_dict, + ) + logger.info(f"Added struct assignment info for {struct.name}") + # Actual processor logic here after dependencies are resolved self.generated_debug_info.append( (struct, self.gen_ir(struct, self.generated_debug_info)) @@ -98,6 +121,7 @@ def gen_ir(self, struct, generated_debug_info): field_co_re_name = self._struct_name_generator( struct, field, field_index, True, i, containing_type_size ) + self.generated_field_names[field] = field_co_re_name globvar = ir.GlobalVariable( self.llvm_module, ir.IntType(64), name=field_co_re_name ) @@ -115,6 +139,7 @@ def gen_ir(self, struct, generated_debug_info): field_co_re_name = self._struct_name_generator( struct, field, field_index, True, i, containing_type_size ) + self.generated_field_names[field] = field_co_re_name globvar = ir.GlobalVariable( self.llvm_module, ir.IntType(64), name=field_co_re_name ) @@ -125,6 +150,7 @@ def gen_ir(self, struct, generated_debug_info): field_co_re_name = self._struct_name_generator( struct, field, field_index ) + self.generated_field_names[field] = field_co_re_name field_index += 1 globvar = ir.GlobalVariable( self.llvm_module, ir.IntType(64), name=field_co_re_name From c9363e62a9a0d017a2972323d068bdce33e150de Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Mon, 20 Oct 2025 21:27:18 +0530 Subject: [PATCH 5/8] move field name generation to assignments dict --- pythonbpf/vmlinux_parser/dependency_node.py | 38 +++++++++++++ .../vmlinux_parser/ir_gen/ir_generation.py | 57 +++++++++++++++---- 2 files changed, 85 insertions(+), 10 deletions(-) diff --git a/pythonbpf/vmlinux_parser/dependency_node.py b/pythonbpf/vmlinux_parser/dependency_node.py index e266761b..5055b325 100644 --- a/pythonbpf/vmlinux_parser/dependency_node.py +++ b/pythonbpf/vmlinux_parser/dependency_node.py @@ -18,6 +18,44 @@ class Field: value: Any = None ready: bool = False + def __hash__(self): + """ + Create a hash based on the immutable attributes that define this field's identity. + This allows Field objects to be used as dictionary keys. + """ + # Use a tuple of the fields that uniquely identify this field + identity = ( + self.name, + id(self.type), # Use id for non-hashable types + id(self.ctype_complex_type) if self.ctype_complex_type else None, + id(self.containing_type) if self.containing_type else None, + self.type_size, + self.bitfield_size, + self.offset, + self.value if self.value else None, + ) + return hash(identity) + + def __eq__(self, other): + """ + Define equality consistent with the hash function. + Two fields are equal if they have the same name, type, and offset. + """ + # DO ther change here + if not isinstance(other, Field): + return False + + return ( + self.name == other.name + and self.type is other.type + and self.ctype_complex_type is other.ctype_complex_type + and self.containing_type is other.containing_type + and self.type_size == other.type_size + and self.bitfield_size == other.bitfield_size + and self.offset == other.offset + and self.value == other.value + ) + def set_ready(self, is_ready: bool = True) -> None: """Set the readiness state of this field.""" self.ready = is_ready diff --git a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py index bd0adfaa..52fd4bbc 100644 --- a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py +++ b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py @@ -1,7 +1,6 @@ import ctypes import logging -from ..dependency_node import Field from ..assignment_info import AssignmentInfo, AssignmentType from ..dependency_handler import DependencyHandler from .debug_info_gen import debug_info_generation @@ -18,7 +17,8 @@ def __init__(self, llvm_module, handler: DependencyHandler, assignments): self.handler: DependencyHandler = handler self.generated: list[str] = [] self.generated_debug_info: list = [] - self.generated_field_names: dict[Field, str] = {} + # Use struct_name and field_name as key instead of Field object + self.generated_field_names: dict[str, dict[str, str]] = {} self.assignments: dict[str, AssignmentInfo] = assignments if not handler.is_ready: raise ImportError( @@ -72,12 +72,30 @@ def struct_processor(self, struct, processing_stack=None): f"Warning: Dependency {dependency} not found in handler" ) + # Generate IR first to populate field names + self.generated_debug_info.append( + (struct, self.gen_ir(struct, self.generated_debug_info)) + ) + # Fill the assignments dictionary with struct information if struct.name not in self.assignments: # Create a members dictionary for AssignmentInfo members_dict = {} for field_name, field in struct.fields.items(): - members_dict[field_name] = (self.generated_field_names[field], field) + # Get the generated field name from our dictionary, or use field_name if not found + if ( + struct.name in self.generated_field_names + and field_name in self.generated_field_names[struct.name] + ): + field_co_re_name = self.generated_field_names[struct.name][ + field_name + ] + members_dict[field_name] = (field_co_re_name, field) + else: + raise ValueError( + f"llvm global name not found for struct field {field_name}" + ) + # members_dict[field_name] = (field_name, field) # Add struct to assignments dictionary self.assignments[struct.name] = AssignmentInfo( @@ -90,10 +108,6 @@ def struct_processor(self, struct, processing_stack=None): ) logger.info(f"Added struct assignment info for {struct.name}") - # Actual processor logic here after dependencies are resolved - self.generated_debug_info.append( - (struct, self.gen_ir(struct, self.generated_debug_info)) - ) self.generated.append(struct.name) finally: @@ -108,6 +122,11 @@ def gen_ir(self, struct, generated_debug_info): struct, self.llvm_module, generated_debug_info ) field_index = 0 + + # Make sure the struct has an entry in our field names dictionary + if struct.name not in self.generated_field_names: + self.generated_field_names[struct.name] = {} + for field_name, field in struct.fields.items(): # does not take arrays and similar types into consideration yet. if field.ctype_complex_type is not None and issubclass( @@ -117,11 +136,27 @@ def gen_ir(self, struct, generated_debug_info): containing_type = field.containing_type if containing_type.__module__ == ctypes.__name__: containing_type_size = ctypes.sizeof(containing_type) + if array_size == 0: + field_co_re_name = self._struct_name_generator( + struct, field, field_index, True, 0, containing_type_size + ) + self.generated_field_names[struct.name][field_name] = ( + field_co_re_name + ) + globvar = ir.GlobalVariable( + self.llvm_module, ir.IntType(64), name=field_co_re_name + ) + globvar.linkage = "external" + globvar.set_metadata("llvm.preserve.access.index", debug_info) + field_index += 1 + continue for i in range(0, array_size): field_co_re_name = self._struct_name_generator( struct, field, field_index, True, i, containing_type_size ) - self.generated_field_names[field] = field_co_re_name + self.generated_field_names[struct.name][field_name] = ( + field_co_re_name + ) globvar = ir.GlobalVariable( self.llvm_module, ir.IntType(64), name=field_co_re_name ) @@ -139,7 +174,9 @@ def gen_ir(self, struct, generated_debug_info): field_co_re_name = self._struct_name_generator( struct, field, field_index, True, i, containing_type_size ) - self.generated_field_names[field] = field_co_re_name + self.generated_field_names[struct.name][field_name] = ( + field_co_re_name + ) globvar = ir.GlobalVariable( self.llvm_module, ir.IntType(64), name=field_co_re_name ) @@ -150,7 +187,7 @@ def gen_ir(self, struct, generated_debug_info): field_co_re_name = self._struct_name_generator( struct, field, field_index ) - self.generated_field_names[field] = field_co_re_name + self.generated_field_names[struct.name][field_name] = field_co_re_name field_index += 1 globvar = ir.GlobalVariable( self.llvm_module, ir.IntType(64), name=field_co_re_name From 3b323132f04d409e3889ced6c862533ba4e92a87 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Mon, 20 Oct 2025 21:29:04 +0530 Subject: [PATCH 6/8] change equality condition --- pythonbpf/vmlinux_parser/dependency_node.py | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/pythonbpf/vmlinux_parser/dependency_node.py b/pythonbpf/vmlinux_parser/dependency_node.py index 5055b325..dd413ad4 100644 --- a/pythonbpf/vmlinux_parser/dependency_node.py +++ b/pythonbpf/vmlinux_parser/dependency_node.py @@ -39,22 +39,9 @@ def __hash__(self): def __eq__(self, other): """ Define equality consistent with the hash function. - Two fields are equal if they have the same name, type, and offset. + Two fields are equal if they have they are the same """ - # DO ther change here - if not isinstance(other, Field): - return False - - return ( - self.name == other.name - and self.type is other.type - and self.ctype_complex_type is other.ctype_complex_type - and self.containing_type is other.containing_type - and self.type_size == other.type_size - and self.bitfield_size == other.bitfield_size - and self.offset == other.offset - and self.value == other.value - ) + return self is other def set_ready(self, is_ready: bool = True) -> None: """Set the readiness state of this field.""" From 56a2fbaf5b4c0a39cdcb144e3472ea13d368c8f6 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Mon, 20 Oct 2025 21:36:46 +0530 Subject: [PATCH 7/8] change globvar string to real global variable --- pythonbpf/vmlinux_parser/assignment_info.py | 3 ++- .../vmlinux_parser/ir_gen/ir_generation.py | 26 +++++++++---------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/pythonbpf/vmlinux_parser/assignment_info.py b/pythonbpf/vmlinux_parser/assignment_info.py index 435a7ad1..465432d5 100644 --- a/pythonbpf/vmlinux_parser/assignment_info.py +++ b/pythonbpf/vmlinux_parser/assignment_info.py @@ -1,6 +1,7 @@ from enum import Enum, auto from typing import Any, Dict, List, Optional, TypedDict from dataclasses import dataclass +import llvmlite.ir as ir from pythonbpf.vmlinux_parser.dependency_node import Field @@ -32,4 +33,4 @@ class AssignmentInfo(TypedDict): # The key of the dict is the name of the field. # Value is a tuple that contains the global variable representing that field # along with all the information about that field as a Field type. - members: Optional[Dict[str, tuple[str, Field]]] # For structs. + members: Optional[Dict[str, tuple[ir.GlobalVariable, Field]]] # For structs. diff --git a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py index 52fd4bbc..949b4b69 100644 --- a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py +++ b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py @@ -18,7 +18,7 @@ def __init__(self, llvm_module, handler: DependencyHandler, assignments): self.generated: list[str] = [] self.generated_debug_info: list = [] # Use struct_name and field_name as key instead of Field object - self.generated_field_names: dict[str, dict[str, str]] = {} + self.generated_field_names: dict[str, dict[str, ir.GlobalVariable]] = {} self.assignments: dict[str, AssignmentInfo] = assignments if not handler.is_ready: raise ImportError( @@ -87,10 +87,10 @@ def struct_processor(self, struct, processing_stack=None): struct.name in self.generated_field_names and field_name in self.generated_field_names[struct.name] ): - field_co_re_name = self.generated_field_names[struct.name][ + field_global_variable = self.generated_field_names[struct.name][ field_name ] - members_dict[field_name] = (field_co_re_name, field) + members_dict[field_name] = (field_global_variable, field) else: raise ValueError( f"llvm global name not found for struct field {field_name}" @@ -140,28 +140,28 @@ def gen_ir(self, struct, generated_debug_info): field_co_re_name = self._struct_name_generator( struct, field, field_index, True, 0, containing_type_size ) - self.generated_field_names[struct.name][field_name] = ( - field_co_re_name - ) globvar = ir.GlobalVariable( self.llvm_module, ir.IntType(64), name=field_co_re_name ) globvar.linkage = "external" globvar.set_metadata("llvm.preserve.access.index", debug_info) + self.generated_field_names[struct.name][field_name] = ( + globvar + ) field_index += 1 continue for i in range(0, array_size): field_co_re_name = self._struct_name_generator( struct, field, field_index, True, i, containing_type_size ) - self.generated_field_names[struct.name][field_name] = ( - field_co_re_name - ) globvar = ir.GlobalVariable( self.llvm_module, ir.IntType(64), name=field_co_re_name ) globvar.linkage = "external" globvar.set_metadata("llvm.preserve.access.index", debug_info) + self.generated_field_names[struct.name][field_name] = ( + globvar + ) field_index += 1 elif field.type_size is not None: array_size = field.type_size @@ -174,26 +174,26 @@ def gen_ir(self, struct, generated_debug_info): field_co_re_name = self._struct_name_generator( struct, field, field_index, True, i, containing_type_size ) - self.generated_field_names[struct.name][field_name] = ( - field_co_re_name - ) globvar = ir.GlobalVariable( self.llvm_module, ir.IntType(64), name=field_co_re_name ) globvar.linkage = "external" globvar.set_metadata("llvm.preserve.access.index", debug_info) + self.generated_field_names[struct.name][field_name] = ( + globvar + ) field_index += 1 else: field_co_re_name = self._struct_name_generator( struct, field, field_index ) - self.generated_field_names[struct.name][field_name] = field_co_re_name field_index += 1 globvar = ir.GlobalVariable( self.llvm_module, ir.IntType(64), name=field_co_re_name ) globvar.linkage = "external" globvar.set_metadata("llvm.preserve.access.index", debug_info) + self.generated_field_names[struct.name][field_name] = globvar return debug_info def _struct_name_generator( From 76d0dbfbf4711729b54ee2f76794d39c6b0de894 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Mon, 20 Oct 2025 21:36:54 +0530 Subject: [PATCH 8/8] change globvar string to real global variable --- pythonbpf/vmlinux_parser/ir_gen/ir_generation.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py index 949b4b69..960671e1 100644 --- a/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py +++ b/pythonbpf/vmlinux_parser/ir_gen/ir_generation.py @@ -145,9 +145,7 @@ def gen_ir(self, struct, generated_debug_info): ) globvar.linkage = "external" globvar.set_metadata("llvm.preserve.access.index", debug_info) - self.generated_field_names[struct.name][field_name] = ( - globvar - ) + self.generated_field_names[struct.name][field_name] = globvar field_index += 1 continue for i in range(0, array_size): @@ -159,9 +157,7 @@ def gen_ir(self, struct, generated_debug_info): ) globvar.linkage = "external" globvar.set_metadata("llvm.preserve.access.index", debug_info) - self.generated_field_names[struct.name][field_name] = ( - globvar - ) + self.generated_field_names[struct.name][field_name] = globvar field_index += 1 elif field.type_size is not None: array_size = field.type_size @@ -179,9 +175,7 @@ def gen_ir(self, struct, generated_debug_info): ) globvar.linkage = "external" globvar.set_metadata("llvm.preserve.access.index", debug_info) - self.generated_field_names[struct.name][field_name] = ( - globvar - ) + self.generated_field_names[struct.name][field_name] = globvar field_index += 1 else: field_co_re_name = self._struct_name_generator(