Skip to content

Commit 079ceaa

Browse files
Merge pull request #57 from pythonbpf/vmlinux-debug-info
Add debug info handling to vmlinux * Does not add support for recursive ctypes pointer based resolution * Still does not support unions and function pointers. * Has the mechanism to build for function pointers added.
2 parents 51a1be0 + 328b792 commit 079ceaa

File tree

6 files changed

+283
-16
lines changed

6 files changed

+283
-16
lines changed

pythonbpf/debuginfo/debug_info_generator.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,20 @@ def create_array_type(self, base_type: Any, count: int) -> Any:
8181
},
8282
)
8383

84+
def create_array_type_vmlinux(self, type_info: Any, count: int) -> Any:
85+
"""Create an array type of the given base type with specified count"""
86+
base_type, type_sizing = type_info
87+
subrange = self.module.add_debug_info("DISubrange", {"count": count})
88+
return self.module.add_debug_info(
89+
"DICompositeType",
90+
{
91+
"tag": dc.DW_TAG_array_type,
92+
"baseType": base_type,
93+
"size": type_sizing,
94+
"elements": [subrange],
95+
},
96+
)
97+
8498
@staticmethod
8599
def _compute_array_size(base_type: Any, count: int) -> int:
86100
# Extract size from base_type if possible
@@ -101,6 +115,23 @@ def create_struct_member(self, name: str, base_type: Any, offset: int) -> Any:
101115
},
102116
)
103117

118+
def create_struct_member_vmlinux(
119+
self, name: str, base_type_with_size: Any, offset: int
120+
) -> Any:
121+
"""Create a struct member with the given name, type, and offset"""
122+
base_type, type_size = base_type_with_size
123+
return self.module.add_debug_info(
124+
"DIDerivedType",
125+
{
126+
"tag": dc.DW_TAG_member,
127+
"name": name,
128+
"file": self.module._file_metadata,
129+
"baseType": base_type,
130+
"size": type_size,
131+
"offset": offset,
132+
},
133+
)
134+
104135
def create_struct_type(
105136
self, members: List[Any], size: int, is_distinct: bool
106137
) -> Any:
@@ -116,6 +147,22 @@ def create_struct_type(
116147
is_distinct=is_distinct,
117148
)
118149

150+
def create_struct_type_with_name(
151+
self, name: str, members: List[Any], size: int, is_distinct: bool
152+
) -> Any:
153+
"""Create a struct type with the given members and size"""
154+
return self.module.add_debug_info(
155+
"DICompositeType",
156+
{
157+
"name": name,
158+
"tag": dc.DW_TAG_structure_type,
159+
"file": self.module._file_metadata,
160+
"size": size,
161+
"elements": members,
162+
},
163+
is_distinct=is_distinct,
164+
)
165+
119166
def create_global_var_debug_info(
120167
self, name: str, var_type: Any, is_local: bool = False
121168
) -> Any:

pythonbpf/vmlinux_parser/class_handler.py

Lines changed: 42 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -99,11 +99,45 @@ def process_vmlinux_post_ast(
9999
local_module_name = getattr(elem_type, "__module__", None)
100100
new_dep_node.add_field(elem_name, elem_type, ready=False)
101101
if local_module_name == ctypes.__name__:
102+
# TODO: need to process pointer to ctype and also CFUNCTYPES here recursively. Current processing is a single dereference
102103
new_dep_node.set_field_bitfield_size(elem_name, elem_bitfield_size)
103-
new_dep_node.set_field_ready(elem_name, is_ready=True)
104-
logger.debug(
105-
f"Field {elem_name} is direct ctypes type: {elem_type}"
106-
)
104+
105+
# Process pointer to ctype
106+
if isinstance(elem_type, type) and issubclass(
107+
elem_type, ctypes._Pointer
108+
):
109+
# Get the pointed-to type
110+
pointed_type = elem_type._type_
111+
logger.debug(f"Found pointer to type: {pointed_type}")
112+
new_dep_node.set_field_containing_type(elem_name, pointed_type)
113+
new_dep_node.set_field_ctype_complex_type(
114+
elem_name, ctypes._Pointer
115+
)
116+
new_dep_node.set_field_ready(elem_name, is_ready=True)
117+
118+
# Process function pointers (CFUNCTYPE)
119+
elif hasattr(elem_type, "_restype_") and hasattr(
120+
elem_type, "_argtypes_"
121+
):
122+
# This is a CFUNCTYPE or similar
123+
logger.info(
124+
f"Function pointer detected for {elem_name} with return type {elem_type._restype_} and arguments {elem_type._argtypes_}"
125+
)
126+
# Set the field as ready but mark it with special handling
127+
new_dep_node.set_field_ctype_complex_type(
128+
elem_name, ctypes.CFUNCTYPE
129+
)
130+
new_dep_node.set_field_ready(elem_name, is_ready=True)
131+
logger.warning(
132+
"Blindly processing CFUNCTYPE ctypes to ensure compilation. Unsupported"
133+
)
134+
135+
else:
136+
# Regular ctype
137+
new_dep_node.set_field_ready(elem_name, is_ready=True)
138+
logger.debug(
139+
f"Field {elem_name} is direct ctypes type: {elem_type}"
140+
)
107141
elif local_module_name == "vmlinux":
108142
new_dep_node.set_field_bitfield_size(elem_name, elem_bitfield_size)
109143
logger.debug(
@@ -127,6 +161,10 @@ def process_vmlinux_post_ast(
127161
ctype_complex_type = ctypes.Array
128162
elif issubclass(elem_type, ctypes._Pointer):
129163
ctype_complex_type = ctypes._Pointer
164+
else:
165+
raise ImportError(
166+
"Non Array and Pointer type ctype imports not supported in current version"
167+
)
130168
else:
131169
raise TypeError("Unsupported ctypes subclass")
132170
else:
Lines changed: 154 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,161 @@
1-
from pythonbpf.debuginfo import DebugInfoGenerator
1+
from pythonbpf.debuginfo import DebugInfoGenerator, dwarf_constants as dc
2+
from ..dependency_node import DependencyNode
3+
import ctypes
4+
import logging
5+
from typing import List, Any, Tuple
26

7+
logger = logging.getLogger(__name__)
38

4-
def debug_info_generation(struct, llvm_module):
9+
10+
def debug_info_generation(
11+
struct: DependencyNode,
12+
llvm_module,
13+
generated_debug_info: List[Tuple[DependencyNode, Any]],
14+
) -> Any:
15+
"""
16+
Generate DWARF debug information for a struct defined in a DependencyNode.
17+
18+
Args:
19+
struct: The dependency node containing struct information
20+
llvm_module: The LLVM module to add debug info to
21+
generated_debug_info: List of tuples (struct, debug_info) to track generated debug info
22+
23+
Returns:
24+
The generated global variable debug info
25+
"""
26+
# Set up debug info generator
527
generator = DebugInfoGenerator(llvm_module)
6-
# this is sample debug info generation
7-
# i64type = generator.get_uint64_type()
828

9-
struct_type = generator.create_struct_type([], 64 * 4, is_distinct=True)
29+
# Check if debug info for this struct has already been generated
30+
for existing_struct, debug_info in generated_debug_info:
31+
if existing_struct.name == struct.name:
32+
return debug_info
33+
34+
# Process all fields and create members for the struct
35+
members = []
36+
for field_name, field in struct.fields.items():
37+
# Get appropriate debug type for this field
38+
field_type = _get_field_debug_type(
39+
field_name, field, generator, struct, generated_debug_info
40+
)
41+
# Create struct member with proper offset
42+
member = generator.create_struct_member_vmlinux(
43+
field_name, field_type, field.offset * 8
44+
)
45+
members.append(member)
1046

11-
global_var = generator.create_global_var_debug_info(
12-
struct.name, struct_type, is_local=False
47+
if struct.name.startswith("struct_"):
48+
struct_name = struct.name.removeprefix("struct_")
49+
else:
50+
raise ValueError("Unions are not supported in the current version")
51+
# Create struct type with all members
52+
struct_type = generator.create_struct_type_with_name(
53+
struct_name, members, struct.__sizeof__() * 8, is_distinct=True
1354
)
1455

15-
return global_var
56+
return struct_type
57+
58+
59+
def _get_field_debug_type(
60+
field_name: str,
61+
field,
62+
generator: DebugInfoGenerator,
63+
parent_struct: DependencyNode,
64+
generated_debug_info: List[Tuple[DependencyNode, Any]],
65+
) -> tuple[Any, int]:
66+
"""
67+
Determine the appropriate debug type for a field based on its Python/ctypes type.
68+
69+
Args:
70+
field_name: Name of the field
71+
field: Field object containing type information
72+
generator: DebugInfoGenerator instance
73+
parent_struct: The parent struct containing this field
74+
generated_debug_info: List of already generated debug info
75+
76+
Returns:
77+
The debug info type for this field
78+
"""
79+
# Handle complex types (arrays, pointers)
80+
if field.ctype_complex_type is not None:
81+
if issubclass(field.ctype_complex_type, ctypes.Array):
82+
# Handle array types
83+
element_type, base_type_size = _get_basic_debug_type(
84+
field.containing_type, generator
85+
)
86+
return generator.create_array_type_vmlinux(
87+
(element_type, base_type_size * field.type_size), field.type_size
88+
), field.type_size * base_type_size
89+
elif issubclass(field.ctype_complex_type, ctypes._Pointer):
90+
# Handle pointer types
91+
pointee_type, _ = _get_basic_debug_type(field.containing_type, generator)
92+
return generator.create_pointer_type(pointee_type), 64
93+
94+
# Handle other vmlinux types (nested structs)
95+
if field.type.__module__ == "vmlinux":
96+
# If it's a struct from vmlinux, check if we've already generated debug info for it
97+
struct_name = field.type.__name__
98+
99+
# Look for existing debug info in the list
100+
for existing_struct, debug_info in generated_debug_info:
101+
if existing_struct.name == struct_name:
102+
# Use existing debug info
103+
return debug_info, existing_struct.__sizeof__()
104+
105+
# If not found, create a forward declaration
106+
# This will be completed when the actual struct is processed
107+
logger.warning("Forward declaration in struct created")
108+
forward_type = generator.create_struct_type([], 0, is_distinct=True)
109+
return forward_type, 0
110+
111+
# Handle basic C types
112+
return _get_basic_debug_type(field.type, generator)
113+
114+
115+
def _get_basic_debug_type(ctype, generator: DebugInfoGenerator) -> Any:
116+
"""
117+
Map a ctypes type to a DWARF debug type.
118+
119+
Args:
120+
ctype: A ctypes type or Python type
121+
generator: DebugInfoGenerator instance
122+
123+
Returns:
124+
The corresponding debug type
125+
"""
126+
# Map ctypes to debug info types
127+
if ctype == ctypes.c_char or ctype == ctypes.c_byte:
128+
return generator.get_basic_type("char", 8, dc.DW_ATE_signed_char), 8
129+
elif ctype == ctypes.c_ubyte or ctype == ctypes.c_uint8:
130+
return generator.get_basic_type("unsigned char", 8, dc.DW_ATE_unsigned_char), 8
131+
elif ctype == ctypes.c_short or ctype == ctypes.c_int16:
132+
return generator.get_basic_type("short", 16, dc.DW_ATE_signed), 16
133+
elif ctype == ctypes.c_ushort or ctype == ctypes.c_uint16:
134+
return generator.get_basic_type("unsigned short", 16, dc.DW_ATE_unsigned), 16
135+
elif ctype == ctypes.c_int or ctype == ctypes.c_int32:
136+
return generator.get_basic_type("int", 32, dc.DW_ATE_signed), 32
137+
elif ctype == ctypes.c_uint or ctype == ctypes.c_uint32:
138+
return generator.get_basic_type("unsigned int", 32, dc.DW_ATE_unsigned), 32
139+
elif ctype == ctypes.c_long:
140+
return generator.get_basic_type("long", 64, dc.DW_ATE_signed), 64
141+
elif ctype == ctypes.c_ulong:
142+
return generator.get_basic_type("unsigned long", 64, dc.DW_ATE_unsigned), 64
143+
elif ctype == ctypes.c_longlong or ctype == ctypes.c_int64:
144+
return generator.get_basic_type("long long", 64, dc.DW_ATE_signed), 64
145+
elif ctype == ctypes.c_ulonglong or ctype == ctypes.c_uint64:
146+
return generator.get_basic_type(
147+
"unsigned long long", 64, dc.DW_ATE_unsigned
148+
), 64
149+
elif ctype == ctypes.c_float:
150+
return generator.get_basic_type("float", 32, dc.DW_ATE_float), 32
151+
elif ctype == ctypes.c_double:
152+
return generator.get_basic_type("double", 64, dc.DW_ATE_float), 64
153+
elif ctype == ctypes.c_bool:
154+
return generator.get_basic_type("bool", 8, dc.DW_ATE_boolean), 8
155+
elif ctype == ctypes.c_char_p:
156+
char_type = generator.get_basic_type("char", 8, dc.DW_ATE_signed_char), 8
157+
return generator.create_pointer_type(char_type)
158+
elif ctype == ctypes.c_void_p:
159+
return generator.create_pointer_type(None), 64
160+
else:
161+
return generator.get_uint64_type(), 64

pythonbpf/vmlinux_parser/ir_gen/ir_generation.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ def __init__(self, llvm_module, handler: DependencyHandler, assignment=None):
1414
self.llvm_module = llvm_module
1515
self.handler: DependencyHandler = handler
1616
self.generated: list[str] = []
17+
self.generated_debug_info: list = []
1718
if not handler.is_ready:
1819
raise ImportError(
1920
"Semantic analysis of vmlinux imports failed. Cannot generate IR"
@@ -67,18 +68,22 @@ def struct_processor(self, struct, processing_stack=None):
6768
)
6869

6970
# Actual processor logic here after dependencies are resolved
70-
self.gen_ir(struct)
71+
self.generated_debug_info.append(
72+
(struct, self.gen_ir(struct, self.generated_debug_info))
73+
)
7174
self.generated.append(struct.name)
7275

7376
finally:
7477
# Remove from processing stack after we're done
7578
processing_stack.discard(struct.name)
7679

77-
def gen_ir(self, struct):
80+
def gen_ir(self, struct, generated_debug_info):
7881
# TODO: we add the btf_ama attribute by monkey patching in the end of compilation, but once llvmlite
7982
# accepts our issue, we will resort to normal accessed attribute based attribute addition
8083
# currently we generate all possible field accesses for CO-RE and put into the assignment table
81-
debug_info = debug_info_generation(struct, self.llvm_module)
84+
debug_info = debug_info_generation(
85+
struct, self.llvm_module, generated_debug_info
86+
)
8287
field_index = 0
8388
for field_name, field in struct.fields.items():
8489
# does not take arrays and similar types into consideration yet.
@@ -126,6 +131,7 @@ def gen_ir(self, struct):
126131
)
127132
globvar.linkage = "external"
128133
globvar.set_metadata("llvm.preserve.access.index", debug_info)
134+
return debug_info
129135

130136
def _struct_name_generator(
131137
self,

tests/c-form/ex7.bpf.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ struct {
1919
SEC("tp/syscalls/sys_enter_setuid")
2020
int handle_setuid_entry(struct trace_event_raw_sys_enter *ctx) {
2121
struct event data = {};
22-
22+
struct blk_integrity_iter it = {};
2323
// Extract UID from the syscall arguments
2424
data.uid = (unsigned int)ctx->args[0];
2525
data.ts = bpf_ktime_get_ns();
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
from pythonbpf import bpf, section, bpfglobal, compile_to_ir, compile
2+
from vmlinux import TASK_COMM_LEN # noqa: F401
3+
from vmlinux import struct_trace_event_raw_sys_enter # noqa: F401
4+
5+
# from vmlinux import struct_uinput_device
6+
# from vmlinux import struct_blk_integrity_iter
7+
from ctypes import c_int64
8+
9+
10+
# Instructions to how to run this program
11+
# 1. Install PythonBPF: pip install pythonbpf
12+
# 2. Run the program: python examples/simple_struct_test.py
13+
# 3. Run the program with sudo: sudo tools/check.sh run examples/simple_struct_test.o
14+
# 4. Attach object file to any network device with something like ./check.sh run examples/simple_struct_test.o tailscale0
15+
# 5. send traffic through the device and observe effects
16+
@bpf
17+
@section("tracepoint/syscalls/sys_enter_execve")
18+
def hello_world(ctx: struct_trace_event_raw_sys_enter) -> c_int64:
19+
print("Hello, World!")
20+
return c_int64(0)
21+
22+
23+
@bpf
24+
@bpfglobal
25+
def LICENSE() -> str:
26+
return "GPL"
27+
28+
29+
compile_to_ir("simple_struct_test.py", "simple_struct_test.ll")
30+
compile()

0 commit comments

Comments
 (0)