Skip to content

Commit dd734ea

Browse files
Merge pull request #56 from pythonbpf/vmlinux-ir-gen
Adds IR and debug info generation capabilities for vmlinux imported structs
2 parents 9a60dd8 + 71d005b commit dd734ea

File tree

7 files changed

+260
-43
lines changed

7 files changed

+260
-43
lines changed

pythonbpf/codegen.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,22 @@
1919
import tempfile
2020
from logging import Logger
2121
import logging
22+
import re
2223

2324
logger: Logger = logging.getLogger(__name__)
2425

2526
VERSION = "v0.1.4"
2627

2728

29+
def finalize_module(original_str):
30+
"""After all IR generation is complete, we monkey patch btf_ama attribute"""
31+
32+
# Create a string with applied transformation of btf_ama attribute addition to BTF struct field accesses.
33+
pattern = r'(@"llvm\.[^"]+:[^"]*" = external global i64, !llvm\.preserve\.access\.index ![0-9]+)'
34+
replacement = r'\1 "btf_ama"'
35+
return re.sub(pattern, replacement, original_str)
36+
37+
2838
def find_bpf_chunks(tree):
2939
"""Find all functions decorated with @bpf in the AST."""
3040
bpf_functions = []
@@ -121,10 +131,12 @@ def compile_to_ir(filename: str, output: str, loglevel=logging.INFO):
121131

122132
module.add_named_metadata("llvm.ident", [f"PythonBPF {VERSION}"])
123133

134+
module_string = finalize_module(str(module))
135+
124136
logger.info(f"IR written to {output}")
125137
with open(output, "w") as f:
126138
f.write(f'source_filename = "{filename}"\n')
127-
f.write(str(module))
139+
f.write(module_string)
128140
f.write("\n")
129141

130142
return output

pythonbpf/vmlinux_parser/class_handler.py

Lines changed: 33 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,10 @@ def process_vmlinux_post_ast(
6060
pass
6161
else:
6262
new_dep_node = DependencyNode(name=current_symbol_name)
63+
64+
# elem_type_class is the actual vmlinux struct/class
65+
new_dep_node.set_ctype_struct(elem_type_class)
66+
6367
handler.add_node(new_dep_node)
6468
class_obj = getattr(imported_module, current_symbol_name)
6569
# Inspect the class fields
@@ -71,9 +75,6 @@ def process_vmlinux_post_ast(
7175
if len(field_elem) == 2:
7276
field_name, field_type = field_elem
7377
elif len(field_elem) == 3:
74-
raise NotImplementedError(
75-
"Bitfields are not supported in the current version"
76-
)
7778
field_name, field_type, bitfield_size = field_elem
7879
field_table[field_name] = [field_type, bitfield_size]
7980
elif hasattr(class_obj, "__annotations__"):
@@ -144,15 +145,35 @@ def process_vmlinux_post_ast(
144145
)
145146
new_dep_node.set_field_type(elem_name, elem_type)
146147
if containing_type.__module__ == "vmlinux":
147-
process_vmlinux_post_ast(
148-
containing_type, llvm_handler, handler, processing_stack
149-
)
150-
size_of_containing_type = (
151-
handler[containing_type.__name__]
152-
).__sizeof__()
153-
new_dep_node.set_field_ready(
154-
elem_name, True, size_of_containing_type
148+
containing_type_name = (
149+
containing_type.__name__
150+
if hasattr(containing_type, "__name__")
151+
else str(containing_type)
155152
)
153+
154+
# Check for self-reference or already processed
155+
if containing_type_name == current_symbol_name:
156+
# Self-referential pointer
157+
logger.debug(
158+
f"Self-referential pointer in {current_symbol_name}.{elem_name}"
159+
)
160+
new_dep_node.set_field_ready(elem_name, True)
161+
elif handler.has_node(containing_type_name):
162+
# Already processed
163+
logger.debug(
164+
f"Reusing already processed {containing_type_name}"
165+
)
166+
new_dep_node.set_field_ready(elem_name, True)
167+
else:
168+
# Process recursively - THIS WAS MISSING
169+
new_dep_node.add_dependent(containing_type_name)
170+
process_vmlinux_post_ast(
171+
containing_type,
172+
llvm_handler,
173+
handler,
174+
processing_stack,
175+
)
176+
new_dep_node.set_field_ready(elem_name, True)
156177
elif containing_type.__module__ == ctypes.__name__:
157178
logger.debug(f"Processing ctype internal{containing_type}")
158179
new_dep_node.set_field_ready(elem_name, True)
@@ -169,12 +190,7 @@ def process_vmlinux_post_ast(
169190
process_vmlinux_post_ast(
170191
elem_type, llvm_handler, handler, processing_stack
171192
)
172-
size_of_containing_type = (
173-
handler[elem_type.__name__]
174-
).__sizeof__()
175-
new_dep_node.set_field_ready(
176-
elem_name, True, size_of_containing_type
177-
)
193+
new_dep_node.set_field_ready(elem_name, True)
178194
else:
179195
raise ValueError(
180196
f"{elem_name} with type {elem_type} from module {module_name} not supported in recursive resolver"

pythonbpf/vmlinux_parser/dependency_handler.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,3 +167,7 @@ def __getitem__(self, name: str) -> DependencyNode:
167167
if name not in self._nodes:
168168
raise KeyError(f"No node with name '{name}' found")
169169
return self._nodes[name]
170+
171+
@property
172+
def nodes(self):
173+
return self._nodes

pythonbpf/vmlinux_parser/dependency_node.py

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ class DependencyNode:
116116
fields: Dict[str, Field] = field(default_factory=dict)
117117
_ready_cache: Optional[bool] = field(default=None, repr=False)
118118
current_offset: int = 0
119+
ctype_struct: Optional[Any] = field(default=None, repr=False)
119120

120121
def add_field(
121122
self,
@@ -146,7 +147,14 @@ def add_field(
146147
# Invalidate readiness cache
147148
self._ready_cache = None
148149

150+
def set_ctype_struct(self, ctype_struct: Any) -> None:
151+
"""Set the ctypes structure for automatic offset calculation."""
152+
self.ctype_struct = ctype_struct
153+
149154
def __sizeof__(self):
155+
# If we have a ctype_struct, use its size
156+
if self.ctype_struct is not None:
157+
return ctypes.sizeof(self.ctype_struct)
150158
return self.current_offset
151159

152160
def get_field(self, name: str) -> Field:
@@ -226,8 +234,22 @@ def set_field_ready(
226234
raise KeyError(f"Field '{name}' does not exist in node '{self.name}'")
227235

228236
self.fields[name].set_ready(is_ready)
229-
self.fields[name].set_offset(self.current_offset)
230-
self.current_offset += self._calculate_size(name, size_of_containing_type)
237+
238+
# Use ctypes built-in offset if available
239+
if self.ctype_struct is not None:
240+
try:
241+
self.fields[name].set_offset(getattr(self.ctype_struct, name).offset)
242+
except AttributeError:
243+
# Fallback to manual calculation if field not found in ctype_struct
244+
self.fields[name].set_offset(self.current_offset)
245+
self.current_offset += self._calculate_size(
246+
name, size_of_containing_type
247+
)
248+
else:
249+
# Manual offset calculation when no ctype_struct is available
250+
self.fields[name].set_offset(self.current_offset)
251+
self.current_offset += self._calculate_size(name, size_of_containing_type)
252+
231253
# Invalidate readiness cache
232254
self._ready_cache = None
233255

@@ -275,8 +297,28 @@ def _calculate_size(
275297
raise NotImplementedError(
276298
"This subclass of ctype not supported yet"
277299
)
300+
elif processing_field.type_size is not None:
301+
# Handle vmlinux types with type_size but no ctype_complex_type
302+
# This means it's a direct vmlinux struct field (not array/pointer wrapped)
303+
# The type_size should already contain the full size of the struct
304+
# But if there's a containing_type from vmlinux, we need that size
305+
if processing_field.containing_type is not None:
306+
if processing_field.containing_type.__module__ == "vmlinux":
307+
# For vmlinux containing types, we need the pre-calculated size
308+
if size_of_containing_type is not None:
309+
return size_of_containing_type * processing_field.type_size
310+
else:
311+
raise RuntimeError(
312+
f"Field {name}: vmlinux containing_type requires size_of_containing_type"
313+
)
314+
else:
315+
raise ModuleNotFoundError(
316+
f"Containing type module {processing_field.containing_type.__module__} not supported"
317+
)
318+
else:
319+
raise RuntimeError("Wrong type found with no containing type")
278320
else:
279-
# search up pre-created stuff and get size
321+
# No ctype_complex_type and no type_size, must rely on size_of_containing_type
280322
if size_of_containing_type is None:
281323
raise RuntimeError(
282324
f"Size of containing type {size_of_containing_type} is None"
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
from pythonbpf.debuginfo import DebugInfoGenerator
2+
3+
4+
def debug_info_generation(struct, llvm_module):
5+
generator = DebugInfoGenerator(llvm_module)
6+
# this is sample debug info generation
7+
# i64type = generator.get_uint64_type()
8+
9+
struct_type = generator.create_struct_type([], 64 * 4, is_distinct=True)
10+
11+
global_var = generator.create_global_var_debug_info(
12+
struct.name, struct_type, is_local=False
13+
)
14+
15+
return global_var
Lines changed: 144 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,17 @@
1+
import ctypes
12
import logging
2-
from pythonbpf.vmlinux_parser.dependency_handler import DependencyHandler
3+
from ..dependency_handler import DependencyHandler
4+
from .debug_info_gen import debug_info_generation
5+
from ..dependency_node import DependencyNode
6+
import llvmlite.ir as ir
37

48
logger = logging.getLogger(__name__)
59

610

711
class IRGenerator:
8-
def __init__(self, module, handler: DependencyHandler):
9-
self.module = module
12+
# get the assignments dict and add this stuff to it.
13+
def __init__(self, llvm_module, handler: DependencyHandler, assignment=None):
14+
self.llvm_module = llvm_module
1015
self.handler: DependencyHandler = handler
1116
self.generated: list[str] = []
1217
if not handler.is_ready:
@@ -15,22 +20,142 @@ def __init__(self, module, handler: DependencyHandler):
1520
)
1621
for struct in handler:
1722
self.struct_processor(struct)
18-
print()
19-
20-
def struct_processor(self, struct):
21-
if struct.name not in self.generated:
22-
print(f"IR generating for {struct.name}")
23-
print(f"Struct is {struct}")
24-
for dependency in struct.depends_on:
25-
if dependency not in self.generated:
26-
dep_node_from_dependency = self.handler[dependency]
27-
self.struct_processor(dep_node_from_dependency)
28-
self.generated.append(dependency)
29-
# write actual processor logic here after assuming all dependencies are resolved
30-
# this part cannot yet resolve circular dependencies. Gets stuck on an infinite loop during that.
23+
24+
def struct_processor(self, struct, processing_stack=None):
25+
# Initialize processing stack on first call
26+
if processing_stack is None:
27+
processing_stack = set()
28+
29+
# If already generated, skip
30+
if struct.name in self.generated:
31+
return
32+
33+
# Detect circular dependency
34+
if struct.name in processing_stack:
35+
logger.info(
36+
f"Circular dependency detected for {struct.name}, skipping recursive processing"
37+
)
38+
# For circular dependencies, we can either:
39+
# 1. Use forward declarations (opaque pointers)
40+
# 2. Mark as incomplete and process later
41+
# 3. Generate a placeholder type
42+
# Here we'll just skip and let it be processed in its own call
43+
return
44+
45+
logger.info(f"IR generating for {struct.name}")
46+
47+
# Add to processing stack before processing dependencies
48+
processing_stack.add(struct.name)
49+
50+
try:
51+
# Process all dependencies first
52+
if struct.depends_on is None:
53+
pass
54+
else:
55+
for dependency in struct.depends_on:
56+
if dependency not in self.generated:
57+
# Check if dependency exists in handler
58+
if dependency in self.handler.nodes:
59+
dep_node_from_dependency = self.handler[dependency]
60+
# Pass the processing_stack down to track circular refs
61+
self.struct_processor(
62+
dep_node_from_dependency, processing_stack
63+
)
64+
else:
65+
raise RuntimeError(
66+
f"Warning: Dependency {dependency} not found in handler"
67+
)
68+
69+
# Actual processor logic here after dependencies are resolved
70+
self.gen_ir(struct)
3171
self.generated.append(struct.name)
3272

33-
def struct_name_generator(
73+
finally:
74+
# Remove from processing stack after we're done
75+
processing_stack.discard(struct.name)
76+
77+
def gen_ir(self, struct):
78+
# TODO: we add the btf_ama attribute by monkey patching in the end of compilation, but once llvmlite
79+
# accepts our issue, we will resort to normal accessed attribute based attribute addition
80+
# currently we generate all possible field accesses for CO-RE and put into the assignment table
81+
debug_info = debug_info_generation(struct, self.llvm_module)
82+
field_index = 0
83+
for field_name, field in struct.fields.items():
84+
# does not take arrays and similar types into consideration yet.
85+
if field.ctype_complex_type is not None and issubclass(
86+
field.ctype_complex_type, ctypes.Array
87+
):
88+
array_size = field.type_size
89+
containing_type = field.containing_type
90+
if containing_type.__module__ == ctypes.__name__:
91+
containing_type_size = ctypes.sizeof(containing_type)
92+
for i in range(0, array_size):
93+
field_co_re_name = self._struct_name_generator(
94+
struct, field, field_index, True, i, containing_type_size
95+
)
96+
globvar = ir.GlobalVariable(
97+
self.llvm_module, ir.IntType(64), name=field_co_re_name
98+
)
99+
globvar.linkage = "external"
100+
globvar.set_metadata("llvm.preserve.access.index", debug_info)
101+
field_index += 1
102+
elif field.type_size is not None:
103+
array_size = field.type_size
104+
containing_type = field.containing_type
105+
if containing_type.__module__ == "vmlinux":
106+
containing_type_size = self.handler[
107+
containing_type.__name__
108+
].current_offset
109+
for i in range(0, array_size):
110+
field_co_re_name = self._struct_name_generator(
111+
struct, field, field_index, True, i, containing_type_size
112+
)
113+
globvar = ir.GlobalVariable(
114+
self.llvm_module, ir.IntType(64), name=field_co_re_name
115+
)
116+
globvar.linkage = "external"
117+
globvar.set_metadata("llvm.preserve.access.index", debug_info)
118+
field_index += 1
119+
else:
120+
field_co_re_name = self._struct_name_generator(
121+
struct, field, field_index
122+
)
123+
field_index += 1
124+
globvar = ir.GlobalVariable(
125+
self.llvm_module, ir.IntType(64), name=field_co_re_name
126+
)
127+
globvar.linkage = "external"
128+
globvar.set_metadata("llvm.preserve.access.index", debug_info)
129+
130+
def _struct_name_generator(
34131
self,
35-
) -> None:
36-
pass
132+
struct: DependencyNode,
133+
field,
134+
field_index: int,
135+
is_indexed: bool = False,
136+
index: int = 0,
137+
containing_type_size: int = 0,
138+
) -> str:
139+
if is_indexed:
140+
name = (
141+
"llvm."
142+
+ struct.name.removeprefix("struct_")
143+
+ f":0:{field.offset + index * containing_type_size}"
144+
+ "$"
145+
+ f"0:{field_index}:{index}"
146+
)
147+
return name
148+
elif struct.name.startswith("struct_"):
149+
name = (
150+
"llvm."
151+
+ struct.name.removeprefix("struct_")
152+
+ f":0:{field.offset}"
153+
+ "$"
154+
+ f"0:{field_index}"
155+
)
156+
return name
157+
else:
158+
print(self.handler[struct.name])
159+
raise TypeError(
160+
"Name generation cannot occur due to type name not starting with struct"
161+
)

0 commit comments

Comments
 (0)