From c423cc647da374e40955659d050626e847a3a964 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Wed, 8 Oct 2025 00:45:30 +0530 Subject: [PATCH 1/5] add vmlinux.py transpiler from experiment repository Signed-off-by: varun-r-mallya --- tools/vmlinux-gen.py | 256 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 256 insertions(+) create mode 100755 tools/vmlinux-gen.py diff --git a/tools/vmlinux-gen.py b/tools/vmlinux-gen.py new file mode 100755 index 00000000..6fbc975e --- /dev/null +++ b/tools/vmlinux-gen.py @@ -0,0 +1,256 @@ +#!/usr/bin/env python3 +""" +BTF to Python ctypes Converter +Converts Linux kernel BTF (BPF Type Format) to Python ctypes definitions. + +This tool automates the process of: +1. Dumping BTF from vmlinux +2. Preprocessing enum definitions +3. Running C preprocessor +4. Converting to Python ctypes using clang2py +5. Post-processing the output + +Requirements: +- bpftool +- clang +- ctypeslib2 (pip install ctypeslib2) +""" + +import argparse +import os +import re +import subprocess +import sys +import tempfile + + +class BTFConverter: + def __init__(self, btf_source="/sys/kernel/btf/vmlinux", output_file="vmlinux.py", + keep_intermediate=False, verbose=False): + self.btf_source = btf_source + self.output_file = output_file + self.keep_intermediate = keep_intermediate + self.verbose = verbose + self.temp_dir = tempfile.mkdtemp() if not keep_intermediate else "." + + def log(self, message): + """Print message if verbose mode is enabled.""" + if self.verbose: + print(f"[*] {message}") + + def run_command(self, cmd, description): + """Run a shell command and handle errors.""" + self.log(f"{description}...") + try: + result = subprocess.run( + cmd, + shell=True, + check=True, + capture_output=True, + text=True + ) + if self.verbose and result.stdout: + print(result.stdout) + return result + except subprocess.CalledProcessError as e: + print(f"Error during {description}:", file=sys.stderr) + print(e.stderr, file=sys.stderr) + sys.exit(1) + + def step1_dump_btf(self): + """Step 1: Dump BTF from vmlinux.""" + vmlinux_h = os.path.join(self.temp_dir, "vmlinux.h") + cmd = f"bpftool btf dump file {self.btf_source} format c > {vmlinux_h}" + self.run_command(cmd, "Dumping BTF from vmlinux") + return vmlinux_h + + def step2_preprocess_enums(self, input_file): + """Step 1.5: Preprocess enum definitions.""" + self.log("Preprocessing enum definitions...") + + with open(input_file, 'r') as f: + original_code = f.read() + + # Extract anonymous enums + enums = re.findall( + r'(? {output_file}" + self.run_command(cmd, "Running C preprocessor") + return output_file + + def step4_convert_to_ctypes(self, input_file): + """Step 3: Convert to Python ctypes using clang2py.""" + output_file = os.path.join(self.temp_dir, "vmlinux_raw.py") + cmd = ( + f"clang2py {input_file} -o {output_file} " + f"--clang-args=\"-fno-ms-extensions -I/usr/include -I/usr/include/linux\"" + ) + self.run_command(cmd, "Converting to Python ctypes") + return output_file + + def step5_postprocess(self, input_file): + """Step 4: Post-process the generated Python file.""" + self.log("Post-processing Python ctypes definitions...") + + with open(input_file, "r") as f: + data = f.read() + + # Remove lines like ('_45', ctypes.c_int64, 0) + data = re.sub(r"\('_[0-9]+',\s*ctypes\.[a-zA-Z0-9_]+,\s*0\),?\s*\n?", "", data) + + # Replace ('_20', ctypes.c_uint64, 64) → ('_20', ctypes.c_uint64) + data = re.sub(r"\('(_[0-9]+)',\s*(ctypes\.[a-zA-Z0-9_]+),\s*[0-9]+\)", r"('\1', \2)", data) + + # Replace ('_20', ctypes.c_char, 8) with ('_20', ctypes.c_uint8, 8) + data = re.sub( + r"(ctypes\.c_char)(\s*,\s*\d+\))", + r"ctypes.c_uint8\2", + data + ) + + # Remove ctypes. prefix from invalid entries + invalid_ctypes = ["bpf_iter_state", "_cache_type", "fs_context_purpose"] + for name in invalid_ctypes: + data = re.sub(rf"\bctypes\.{name}\b", name, data) + + with open(self.output_file, "w") as f: + f.write(data) + + self.log(f"Saved final output to {self.output_file}") + + def cleanup(self): + """Remove temporary files if not keeping them.""" + if not self.keep_intermediate and self.temp_dir != ".": + self.log(f"Cleaning up temporary directory: {self.temp_dir}") + import shutil + shutil.rmtree(self.temp_dir, ignore_errors=True) + + def convert(self): + """Run the complete conversion pipeline.""" + try: + self.log("Starting BTF to Python ctypes conversion...") + + # Check dependencies + self.check_dependencies() + + # Run conversion pipeline + vmlinux_h = self.step1_dump_btf() + vmlinux_processed_h = self.step2_preprocess_enums(vmlinux_h) + vmlinux_i = self.step3_run_preprocessor(vmlinux_processed_h) + vmlinux_raw_py = self.step4_convert_to_ctypes(vmlinux_i) + self.step5_postprocess(vmlinux_raw_py) + + print(f"\nāœ“ Conversion complete! Output saved to: {self.output_file}") + + except Exception as e: + print(f"\nāœ— Error during conversion: {e}", file=sys.stderr) + sys.exit(1) + finally: + self.cleanup() + + def check_dependencies(self): + """Check if required tools are available.""" + self.log("Checking dependencies...") + + dependencies = { + "bpftool": "bpftool --version", + "clang": "clang --version", + "clang2py": "clang2py --version" + } + + missing = [] + for tool, cmd in dependencies.items(): + try: + subprocess.run( + cmd, + shell=True, + check=True, + capture_output=True + ) + except subprocess.CalledProcessError: + missing.append(tool) + + if missing: + print("Error: Missing required dependencies:", file=sys.stderr) + for tool in missing: + print(f" - {tool}", file=sys.stderr) + if "clang2py" in missing: + print("\nInstall ctypeslib2: pip install ctypeslib2", file=sys.stderr) + sys.exit(1) + + +def main(): + parser = argparse.ArgumentParser( + description="Convert Linux kernel BTF to Python ctypes definitions", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + %(prog)s + %(prog)s -o kernel_types.py + %(prog)s --btf-source /sys/kernel/btf/custom_module -k -v + """ + ) + + parser.add_argument( + "--btf-source", + default="/sys/kernel/btf/vmlinux", + help="Path to BTF source (default: /sys/kernel/btf/vmlinux)" + ) + + parser.add_argument( + "-o", "--output", + default="vmlinux.py", + help="Output Python file (default: vmlinux.py)" + ) + + parser.add_argument( + "-k", "--keep-intermediate", + action="store_true", + help="Keep intermediate files (vmlinux.h, vmlinux_processed.h, etc.)" + ) + + parser.add_argument( + "-v", "--verbose", + action="store_true", + help="Enable verbose output" + ) + + args = parser.parse_args() + + converter = BTFConverter( + btf_source=args.btf_source, + output_file=args.output, + keep_intermediate=args.keep_intermediate, + verbose=args.verbose + ) + + converter.convert() + + +if __name__ == "__main__": + main() From a27360482be0386ee1a44dcae85f8063aa68c918 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Wed, 8 Oct 2025 05:15:29 +0530 Subject: [PATCH 2/5] complete vmlinux transpiler. TODO: struct_kioctx for x86_64 vmlinux.h has anonymous structs that refused to transpile well, so an extra rule has been written to make only the structs of that external. Fix this in the future. --- examples/kprobes.py | 27 +++++++++ tests/c-form/kprobe.bpf.c | 19 ++++++ tools/vmlinux-gen.py | 121 ++++++++++++++++++++++++++++++++++++-- 3 files changed, 163 insertions(+), 4 deletions(-) create mode 100644 examples/kprobes.py create mode 100644 tests/c-form/kprobe.bpf.c diff --git a/examples/kprobes.py b/examples/kprobes.py new file mode 100644 index 00000000..3796d03f --- /dev/null +++ b/examples/kprobes.py @@ -0,0 +1,27 @@ +from pythonbpf import bpf, section, bpfglobal, BPF +from ctypes import c_void_p, c_int64 + + +@bpf +@section("kretprobe/do_unlinkat") +def hello_world(ctx: c_void_p) -> c_int64: + print("Hello, World!") + return c_int64(0) + +@bpf +@section("kprobe/do_unlinkat") +def hello_world(ctx: c_void_p) -> c_int64: + print("Hello, World!") + return c_int64(0) + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + + +b = BPF() +b.load_and_attach() +while True: + print("running") +# Now cat /sys/kernel/debug/tracing/trace_pipe to see results of unlink kprobe. diff --git a/tests/c-form/kprobe.bpf.c b/tests/c-form/kprobe.bpf.c new file mode 100644 index 00000000..d2d588d1 --- /dev/null +++ b/tests/c-form/kprobe.bpf.c @@ -0,0 +1,19 @@ +#include "vmlinux.h" +#include +#include + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; + +SEC("kprobe/do_unlinkat") +int kprobe_execve(struct pt_regs *ctx) +{ + bpf_printk("unlinkat created"); + return 0; +} + +SEC("kretprobe/do_unlinkat") +int kretprobe_execve(struct pt_regs *ctx) +{ + bpf_printk("unlinkat returned\n"); + return 0; +} diff --git a/tools/vmlinux-gen.py b/tools/vmlinux-gen.py index 6fbc975e..5bd913ac 100755 --- a/tools/vmlinux-gen.py +++ b/tools/vmlinux-gen.py @@ -6,9 +6,10 @@ This tool automates the process of: 1. Dumping BTF from vmlinux 2. Preprocessing enum definitions -3. Running C preprocessor -4. Converting to Python ctypes using clang2py -5. Post-processing the output +3. Processing struct kioctx to extract anonymous nested structs +4. Running C preprocessor +5. Converting to Python ctypes using clang2py +6. Post-processing the output Requirements: - bpftool @@ -96,6 +97,115 @@ def step2_preprocess_enums(self, input_file): return output_file + def step2_5_process_kioctx(self, input_file): + #TODO: this is a very bad bug and design decision. A single struct has an issue mostly. + """Step 2.5: Process struct kioctx to extract nested anonymous structs.""" + self.log("Processing struct kioctx nested structs...") + + with open(input_file, 'r') as f: + content = f.read() + + # Pattern to match struct kioctx with its full body (handles multiple nesting levels) + kioctx_pattern = r'struct\s+kioctx\s*\{(?:[^{}]|\{(?:[^{}]|\{[^{}]*\})*\})*\}\s*;' + + def process_kioctx_replacement(match): + full_struct = match.group(0) + self.log(f"Found struct kioctx, length: {len(full_struct)} chars") + + # Extract the struct body (everything between outermost { and }) + body_match = re.search(r'struct\s+kioctx\s*\{(.*)\}\s*;', full_struct, re.DOTALL) + if not body_match: + return full_struct + + body = body_match.group(1) + + # Find all anonymous structs within the body + # Pattern: struct { ... } followed by ; (not a member name) + anon_struct_pattern = r'struct\s*\{[^}]*\}' + + anon_structs = [] + anon_counter = 4 # Start from 4, counting down to 1 + + def replace_anonymous_struct(m): + nonlocal anon_counter + anon_struct_content = m.group(0) + + # Extract the body of the anonymous struct + anon_body_match = re.search(r'struct\s*\{(.*)\}', anon_struct_content, re.DOTALL) + if not anon_body_match: + return anon_struct_content + + anon_body = anon_body_match.group(1) + + # Create the named struct definition + anon_name = f"__anon{anon_counter}" + member_name = f"a{anon_counter}" + + # Store the struct definition + anon_structs.append(f"struct {anon_name} {{{anon_body}}};") + + anon_counter -= 1 + + # Return the member declaration + return f"struct {anon_name} {member_name}" + + # Process the body, finding and replacing anonymous structs + # We need to be careful to only match anonymous structs followed by ; + processed_body = body + + # Find all occurrences and process them + pattern_with_semicolon = r'struct\s*\{([^}]*)\}\s*;' + matches = list(re.finditer(pattern_with_semicolon, body, re.DOTALL)) + + if not matches: + self.log("No anonymous structs found in kioctx") + return full_struct + + self.log(f"Found {len(matches)} anonymous struct(s)") + + # Process in reverse order to maintain string positions + for match in reversed(matches): + anon_struct_content = match.group(1) + start_pos = match.start() + end_pos = match.end() + + # Create the named struct definition + anon_name = f"__anon{anon_counter}" + member_name = f"a{anon_counter}" + + # Store the struct definition + anon_structs.insert(0, f"struct {anon_name} {{{anon_struct_content}}};") + + # Replace in the body + replacement = f"struct {anon_name} {member_name};" + processed_body = processed_body[:start_pos] + replacement + processed_body[end_pos:] + + anon_counter -= 1 + + # Rebuild the complete definition + if anon_structs: + # Prepend the anonymous struct definitions + anon_definitions = '\n'.join(anon_structs) + '\n\n' + new_struct = f"struct kioctx {{{processed_body}}};" + return anon_definitions + new_struct + else: + return full_struct + + # Apply the transformation + processed_content = re.sub( + kioctx_pattern, + process_kioctx_replacement, + content, + flags=re.DOTALL + ) + + output_file = os.path.join(self.temp_dir, "vmlinux_kioctx_processed.h") + with open(output_file, 'w') as f: + f.write(processed_content) + + self.log(f"Saved kioctx-processed output to {output_file}") + return output_file + def step3_run_preprocessor(self, input_file): """Step 2: Run C preprocessor.""" output_file = os.path.join(self.temp_dir, "vmlinux.i") @@ -161,7 +271,8 @@ def convert(self): # Run conversion pipeline vmlinux_h = self.step1_dump_btf() vmlinux_processed_h = self.step2_preprocess_enums(vmlinux_h) - vmlinux_i = self.step3_run_preprocessor(vmlinux_processed_h) + vmlinux_kioctx_h = self.step2_5_process_kioctx(vmlinux_processed_h) + vmlinux_i = self.step3_run_preprocessor(vmlinux_kioctx_h) vmlinux_raw_py = self.step4_convert_to_ctypes(vmlinux_i) self.step5_postprocess(vmlinux_raw_py) @@ -169,6 +280,8 @@ def convert(self): except Exception as e: print(f"\nāœ— Error during conversion: {e}", file=sys.stderr) + import traceback + traceback.print_exc() sys.exit(1) finally: self.cleanup() From a3443ab1d5f169d9cdef3e156095c1550e883e59 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Wed, 8 Oct 2025 05:16:36 +0530 Subject: [PATCH 3/5] format chore Signed-off-by: varun-r-mallya --- examples/kprobes.py | 2 +- pythonbpf/functions_pass.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/examples/kprobes.py b/examples/kprobes.py index 3796d03f..bb0a6784 100644 --- a/examples/kprobes.py +++ b/examples/kprobes.py @@ -10,7 +10,7 @@ def hello_world(ctx: c_void_p) -> c_int64: @bpf @section("kprobe/do_unlinkat") -def hello_world(ctx: c_void_p) -> c_int64: +def hello_world2(ctx: c_void_p) -> c_int64: print("Hello, World!") return c_int64(0) diff --git a/pythonbpf/functions_pass.py b/pythonbpf/functions_pass.py index d1ea5265..c4b0fb6f 100644 --- a/pythonbpf/functions_pass.py +++ b/pythonbpf/functions_pass.py @@ -410,7 +410,7 @@ def process_stmt( raise ValueError("Failed to evaluate return expression") if val[1] != ret_type: raise ValueError( - "Return type mismatch: expected " f"{ret_type}, got {val[1]}" + f"Return type mismatch: expected {ret_type}, got {val[1]}" ) builder.ret(val[0]) did_return = True @@ -420,8 +420,7 @@ def process_stmt( val = builder.load(var) if val.type != ret_type: raise ValueError( - "Return type mismatch: expected" - f"{ret_type}, got {val.type}" + f"Return type mismatch: expected{ret_type}, got {val.type}" ) builder.ret(val) did_return = True From 976af290af2c416e42b9215082d045ee8bba92ba Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Wed, 8 Oct 2025 05:17:59 +0530 Subject: [PATCH 4/5] Revert "format chore" This reverts commit a3443ab1d5f169d9cdef3e156095c1550e883e59. --- examples/kprobes.py | 2 +- pythonbpf/functions_pass.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/kprobes.py b/examples/kprobes.py index bb0a6784..3796d03f 100644 --- a/examples/kprobes.py +++ b/examples/kprobes.py @@ -10,7 +10,7 @@ def hello_world(ctx: c_void_p) -> c_int64: @bpf @section("kprobe/do_unlinkat") -def hello_world2(ctx: c_void_p) -> c_int64: +def hello_world(ctx: c_void_p) -> c_int64: print("Hello, World!") return c_int64(0) diff --git a/pythonbpf/functions_pass.py b/pythonbpf/functions_pass.py index c4b0fb6f..d1ea5265 100644 --- a/pythonbpf/functions_pass.py +++ b/pythonbpf/functions_pass.py @@ -410,7 +410,7 @@ def process_stmt( raise ValueError("Failed to evaluate return expression") if val[1] != ret_type: raise ValueError( - f"Return type mismatch: expected {ret_type}, got {val[1]}" + "Return type mismatch: expected " f"{ret_type}, got {val[1]}" ) builder.ret(val[0]) did_return = True @@ -420,7 +420,8 @@ def process_stmt( val = builder.load(var) if val.type != ret_type: raise ValueError( - f"Return type mismatch: expected{ret_type}, got {val.type}" + "Return type mismatch: expected" + f"{ret_type}, got {val.type}" ) builder.ret(val) did_return = True From 8a69e05ee294b4304b96257752df07a0a3e24594 Mon Sep 17 00:00:00 2001 From: varun-r-mallya Date: Wed, 8 Oct 2025 05:18:49 +0530 Subject: [PATCH 5/5] fix duplicate variable in example Signed-off-by: varun-r-mallya --- examples/kprobes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/kprobes.py b/examples/kprobes.py index 3796d03f..bb0a6784 100644 --- a/examples/kprobes.py +++ b/examples/kprobes.py @@ -10,7 +10,7 @@ def hello_world(ctx: c_void_p) -> c_int64: @bpf @section("kprobe/do_unlinkat") -def hello_world(ctx: c_void_p) -> c_int64: +def hello_world2(ctx: c_void_p) -> c_int64: print("Hello, World!") return c_int64(0)