diff --git a/BCC-Examples/disksnoop.ipynb b/BCC-Examples/disksnoop.ipynb new file mode 100644 index 00000000..3e993d37 --- /dev/null +++ b/BCC-Examples/disksnoop.ipynb @@ -0,0 +1,122 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "c3520e58-e50f-4bc1-8f9d-a6fecbf6e9f0", + "metadata": {}, + "outputs": [], + "source": [ + "from vmlinux import struct_request, struct_pt_regs\n", + "from pythonbpf import bpf, section, bpfglobal, map, BPF\n", + "from pythonbpf.helper import ktime\n", + "from pythonbpf.maps import HashMap\n", + "from ctypes import c_int64, c_uint64, c_int32\n", + "\n", + "REQ_WRITE = 1\n", + "\n", + "\n", + "@bpf\n", + "@map\n", + "def start() -> HashMap:\n", + " return HashMap(key=c_uint64, value=c_uint64, max_entries=10240)\n", + "\n", + "\n", + "@bpf\n", + "@section(\"kprobe/blk_mq_end_request\")\n", + "def trace_completion(ctx: struct_pt_regs) -> c_int64:\n", + " # Get request pointer from first argument\n", + " req_ptr = ctx.di\n", + " req = struct_request(ctx.di)\n", + " # Print: data_len, cmd_flags, latency_us\n", + " data_len = req.__data_len\n", + " cmd_flags = req.cmd_flags\n", + " # Lookup start timestamp\n", + " req_tsp = start.lookup(req_ptr)\n", + " if req_tsp:\n", + " # Calculate delta in nanoseconds\n", + " delta = ktime() - req_tsp\n", + "\n", + " # Convert to microseconds for printing\n", + " delta_us = delta // 1000\n", + "\n", + " print(f\"{data_len} {cmd_flags:x} {delta_us}\\n\")\n", + "\n", + " # Delete the entry\n", + " start.delete(req_ptr)\n", + "\n", + " return c_int64(0)\n", + "\n", + "\n", + "@bpf\n", + "@section(\"kprobe/blk_mq_start_request\")\n", + "def trace_start(ctx1: struct_pt_regs) -> c_int32:\n", + " req = ctx1.di\n", + " ts = ktime()\n", + " start.update(req, ts)\n", + " return c_int32(0)\n", + "\n", + "\n", + "@bpf\n", + "@bpfglobal\n", + "def LICENSE() -> str:\n", + " return \"GPL\"\n", + "\n", + "\n", + "b = BPF()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "97040f73-98e0-4993-94c6-125d1b42d931", + "metadata": {}, + "outputs": [], + "source": [ + "b.load()\n", + "b.attach_all()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b1bd4f51-fa25-42e1-877c-e48a2605189f", + "metadata": {}, + "outputs": [], + "source": [ + "from pythonbpf import trace_pipe" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "96b4b59b-b0db-4952-9534-7a714f685089", + "metadata": {}, + "outputs": [], + "source": [ + "trace_pipe()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/BCC-Examples/disksnoop.py b/BCC-Examples/disksnoop.py new file mode 100644 index 00000000..4ef4d247 --- /dev/null +++ b/BCC-Examples/disksnoop.py @@ -0,0 +1,61 @@ +from vmlinux import struct_request, struct_pt_regs +from pythonbpf import bpf, section, bpfglobal, compile_to_ir, compile, map +from pythonbpf.helper import ktime +from pythonbpf.maps import HashMap +import logging +from ctypes import c_int64, c_uint64, c_int32 + +# Constants +REQ_WRITE = 1 # from include/linux/blk_types.h + + +@bpf +@map +def start() -> HashMap: + return HashMap(key=c_uint64, value=c_uint64, max_entries=10240) + + +@bpf +@section("kprobe/blk_mq_end_request") +def trace_completion(ctx: struct_pt_regs) -> c_int64: + # Get request pointer from first argument + req_ptr = ctx.di + req = struct_request(ctx.di) + # Print: data_len, cmd_flags, latency_us + data_len = req.__data_len + cmd_flags = req.cmd_flags + # Lookup start timestamp + req_tsp = start.lookup(req_ptr) + if req_tsp: + # Calculate delta in nanoseconds + delta = ktime() - req_tsp + + # Convert to microseconds for printing + delta_us = delta // 1000 + + print(f"{data_len} {cmd_flags:x} {delta_us}\n") + + # Delete the entry + start.delete(req_ptr) + + return c_int64(0) + + +@bpf +@section("kprobe/blk_mq_start_request") +def trace_start(ctx1: struct_pt_regs) -> c_int32: + req = ctx1.di + ts = ktime() + start.update(req, ts) + return c_int32(0) + + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + + +if __name__ == "__main__": + compile_to_ir("disksnoop.py", "disksnoop.ll", loglevel=logging.INFO) + compile() diff --git a/pythonbpf/allocation_pass.py b/pythonbpf/allocation_pass.py index fc6f21eb..db6f0bac 100644 --- a/pythonbpf/allocation_pass.py +++ b/pythonbpf/allocation_pass.py @@ -114,9 +114,22 @@ def _allocate_for_call( # Struct constructors elif call_type in structs_sym_tab: struct_info = structs_sym_tab[call_type] - var = builder.alloca(struct_info.ir_type, name=var_name) - local_sym_tab[var_name] = LocalSymbol(var, struct_info.ir_type, call_type) - logger.info(f"Pre-allocated {var_name} for struct {call_type}") + if len(rval.args) == 0: + # Zero-arg constructor: allocate the struct itself + var = builder.alloca(struct_info.ir_type, name=var_name) + local_sym_tab[var_name] = LocalSymbol( + var, struct_info.ir_type, call_type + ) + logger.info(f"Pre-allocated {var_name} for struct {call_type}") + else: + # Pointer cast: allocate as pointer to struct + ptr_type = ir.PointerType(struct_info.ir_type) + var = builder.alloca(ptr_type, name=var_name) + var.align = 8 + local_sym_tab[var_name] = LocalSymbol(var, ptr_type, call_type) + logger.info( + f"Pre-allocated {var_name} for struct pointer cast to {call_type}" + ) elif VmlinuxHandlerRegistry.is_vmlinux_struct(call_type): # When calling struct_name(pointer), we're doing a cast, not construction @@ -371,6 +384,7 @@ def _allocate_for_attribute(builder, var_name, rval, local_sym_tab, structs_sym_ f"Could not determine size for ctypes field {field_name}: {e}" ) actual_ir_type = ir.IntType(64) + field_size_bits = 64 # Check if it's a nested vmlinux struct or complex type elif field.type.__module__ == "vmlinux": @@ -379,23 +393,34 @@ def _allocate_for_attribute(builder, var_name, rval, local_sym_tab, structs_sym_ field.ctype_complex_type, ctypes._Pointer ): actual_ir_type = ir.IntType(64) # Pointer is always 64-bit + field_size_bits = 64 # For embedded structs, this is more complex - might need different handling else: logger.warning( f"Field {field_name} is a nested vmlinux struct, using i64 for now" ) actual_ir_type = ir.IntType(64) + field_size_bits = 64 else: logger.warning( f"Unknown field type module {field.type.__module__} for {field_name}" ) actual_ir_type = ir.IntType(64) + field_size_bits = 64 + + # Pre-allocate the tmp storage used by load_struct_field (so we don't alloca inside handler) + tmp_name = f"{struct_var}_{field_name}_tmp" + tmp_ir_type = ir.IntType(field_size_bits) + tmp_var = builder.alloca(tmp_ir_type, name=tmp_name) + tmp_var.align = tmp_ir_type.width // 8 + local_sym_tab[tmp_name] = LocalSymbol(tmp_var, tmp_ir_type) + logger.info( + f"Pre-allocated temp {tmp_name} (i{field_size_bits}) for vmlinux field read {vmlinux_struct_name}.{field_name}" + ) - # Allocate with the actual IR type + # Allocate with the actual IR type for the destination var var = _allocate_with_type(builder, var_name, actual_ir_type) - local_sym_tab[var_name] = LocalSymbol( - var, actual_ir_type, field - ) # <-- Store Field metadata + local_sym_tab[var_name] = LocalSymbol(var, actual_ir_type, field) logger.info( f"Pre-allocated {var_name} as {actual_ir_type} from vmlinux struct {vmlinux_struct_name}.{field_name}" diff --git a/pythonbpf/assign_pass.py b/pythonbpf/assign_pass.py index 5d73cf3e..412af932 100644 --- a/pythonbpf/assign_pass.py +++ b/pythonbpf/assign_pass.py @@ -174,6 +174,23 @@ def handle_variable_assignment( f"Type mismatch: vmlinux struct pointer requires i64, got {var_type}" ) return False + # Handle user-defined struct pointer casts + # val_type is a string (struct name), var_type is a pointer to the struct + if isinstance(val_type, str) and val_type in structs_sym_tab: + struct_info = structs_sym_tab[val_type] + expected_ptr_type = ir.PointerType(struct_info.ir_type) + + # Check if var_type matches the expected pointer type + if isinstance(var_type, ir.PointerType) and var_type == expected_ptr_type: + # val is already the correct pointer type from inttoptr/bitcast + builder.store(val, var_ptr) + logger.info(f"Assigned user-defined struct pointer cast to {var_name}") + return True + else: + logger.error( + f"Type mismatch: user-defined struct pointer cast requires pointer type, got {var_type}" + ) + return False if isinstance(val_type, Field): logger.info("Handling assignment to struct field") # Special handling for struct_xdp_md i32 fields that are zero-extended to i64 diff --git a/pythonbpf/expr/expr_pass.py b/pythonbpf/expr/expr_pass.py index 489de9ff..9f3bfa43 100644 --- a/pythonbpf/expr/expr_pass.py +++ b/pythonbpf/expr/expr_pass.py @@ -241,7 +241,11 @@ def get_operand_value( var_type = var.type base_type, depth = get_base_type_and_depth(var_type) logger.info(f"var is {var}, base_type is {base_type}, depth is {depth}") - val = deref_to_depth(func, builder, var, depth) + if depth == 1: + val = builder.load(var) + return val + else: + val = deref_to_depth(func, builder, var, depth) return val else: # Check if it's a vmlinux enum/constant @@ -618,7 +622,7 @@ def _handle_boolean_op( # ============================================================================ -# VMLinux casting +# Struct casting (including vmlinux struct casting) # ============================================================================ @@ -666,15 +670,83 @@ def _handle_vmlinux_cast( # Cast the integer/value to a pointer to the struct # If arg_val is an integer type, we need to inttoptr it ptr_type = ir.PointerType() - # TODO: add a integer check here later - if ctypes_to_ir(arg_type.type.__name__): - # Cast integer to pointer + # TODO: add a field value type check here + # print(arg_type) + if isinstance(arg_type, Field): + if ctypes_to_ir(arg_type.type.__name__): + # Cast integer to pointer + casted_ptr = builder.inttoptr(arg_val, ptr_type) + else: + logger.error(f"Unsupported type for vmlinux cast: {arg_type}") + return None + else: casted_ptr = builder.inttoptr(arg_val, ptr_type) + + return casted_ptr, vmlinux_struct_type + + +def _handle_user_defined_struct_cast( + func, + module, + builder, + expr, + local_sym_tab, + map_sym_tab, + structs_sym_tab, +): + """Handle user-defined struct cast expressions like iphdr(nh). + + This casts a pointer/integer value to a pointer to the user-defined struct, + similar to how vmlinux struct casts work but for user-defined @struct types. + """ + if len(expr.args) != 1: + logger.info("User-defined struct cast takes exactly one argument") + return None + + # Get the struct name + struct_name = expr.func.id + + if struct_name not in structs_sym_tab: + logger.error(f"Struct {struct_name} not found in structs_sym_tab") + return None + + struct_info = structs_sym_tab[struct_name] + + # Evaluate the argument (e.g., + # an address/pointer value) + arg_result = eval_expr( + func, + module, + builder, + expr.args[0], + local_sym_tab, + map_sym_tab, + structs_sym_tab, + ) + + if arg_result is None: + logger.info("Failed to evaluate argument to user-defined struct cast") + return None + + arg_val, arg_type = arg_result + + # Cast the integer/pointer value to a pointer to the struct type + # The struct pointer type is a pointer to the struct's IR type + struct_ptr_type = ir.PointerType(struct_info.ir_type) + + # If arg_val is an integer type (like i64), convert to pointer using inttoptr + if isinstance(arg_val.type, ir.IntType): + casted_ptr = builder.inttoptr(arg_val, struct_ptr_type) + logger.info(f"Cast integer to pointer for struct {struct_name}") + elif isinstance(arg_val.type, ir.PointerType): + # If already a pointer, bitcast to the struct pointer type + casted_ptr = builder.bitcast(arg_val, struct_ptr_type) + logger.info(f"Bitcast pointer to struct pointer for {struct_name}") else: - logger.error(f"Unsupported type for vmlinux cast: {arg_type}") + logger.error(f"Unsupported type for user-defined struct cast: {arg_val.type}") return None - return casted_ptr, vmlinux_struct_type + return casted_ptr, struct_name # ============================================================================ @@ -722,6 +794,16 @@ def eval_expr( map_sym_tab, structs_sym_tab, ) + if isinstance(expr.func, ast.Name) and (expr.func.id in structs_sym_tab): + return _handle_user_defined_struct_cast( + func, + module, + builder, + expr, + local_sym_tab, + map_sym_tab, + structs_sym_tab, + ) result = CallHandlerRegistry.handle_call( expr, module, builder, func, local_sym_tab, map_sym_tab, structs_sym_tab diff --git a/pythonbpf/type_deducer.py b/pythonbpf/type_deducer.py index fd589ae0..2e4c77f4 100644 --- a/pythonbpf/type_deducer.py +++ b/pythonbpf/type_deducer.py @@ -18,6 +18,10 @@ "c_longlong": ir.IntType(64), "c_uint": ir.IntType(32), "c_int": ir.IntType(32), + "c_ushort": ir.IntType(16), + "c_short": ir.IntType(16), + "c_ubyte": ir.IntType(8), + "c_byte": ir.IntType(8), # Not so sure about this one "str": ir.PointerType(ir.IntType(8)), } diff --git a/pythonbpf/vmlinux_parser/vmlinux_exports_handler.py b/pythonbpf/vmlinux_parser/vmlinux_exports_handler.py index c26cac9e..3ab07cbd 100644 --- a/pythonbpf/vmlinux_parser/vmlinux_exports_handler.py +++ b/pythonbpf/vmlinux_parser/vmlinux_exports_handler.py @@ -77,7 +77,7 @@ def handle_vmlinux_enum(self, name): return None def get_vmlinux_enum_value(self, name): - """Handle vmlinux enum constants by returning LLVM IR constants""" + """Handle vmlinux.enum constants by returning LLVM IR constants""" if self.is_vmlinux_enum(name): value = self.vmlinux_symtab[name].value logger.info(f"The value of vmlinux enum {name} = {value}") @@ -119,9 +119,18 @@ def handle_vmlinux_struct_field( # Load the struct pointer from the local variable struct_ptr = builder.load(var_info.var) + # Determine the preallocated tmp name that assignment pass should have created + tmp_name = f"{struct_var_name}_{field_name}_tmp" + # Use bpf_probe_read_kernel for non-context struct field access field_value = self.load_struct_field( - builder, struct_ptr, globvar_ir, field_data, struct_name + builder, + struct_ptr, + globvar_ir, + field_data, + struct_name, + local_sym_tab, + tmp_name, ) # Return field value and field type return field_value, field_data @@ -130,7 +139,13 @@ def handle_vmlinux_struct_field( @staticmethod def load_struct_field( - builder, struct_ptr_int, offset_global, field_data, struct_name=None + builder, + struct_ptr_int, + offset_global, + field_data, + struct_name=None, + local_sym_tab=None, + tmp_name: str | None = None, ): """ Generate LLVM IR to load a field from a regular (non-context) struct using bpf_probe_read_kernel. @@ -141,6 +156,8 @@ def load_struct_field( offset_global: Global variable containing the field offset (i64) field_data: contains data about the field struct_name: Name of the struct being accessed (optional) + local_sym_tab: symbol table (optional) - used to locate preallocated tmp storage + tmp_name: name of the preallocated temporary storage to use (preferred) Returns: The loaded value """ @@ -203,9 +220,18 @@ def load_struct_field( else: logger.warning("Complex vmlinux field type, using default 64 bits") - # Allocate local storage for the field value - local_storage = builder.alloca(ir.IntType(int_width)) - local_storage_i8_ptr = builder.bitcast(local_storage, i8_ptr_type) + # Use preallocated temporary storage if provided by allocation pass + + local_storage_i8_ptr = None + if tmp_name and local_sym_tab and tmp_name in local_sym_tab: + # Expect the tmp to be an alloca created during allocation pass + tmp_alloca = local_sym_tab[tmp_name].var + local_storage_i8_ptr = builder.bitcast(tmp_alloca, i8_ptr_type) + else: + # Fallback: allocate inline (not ideal, but preserves behavior) + local_storage = builder.alloca(ir.IntType(int_width)) + local_storage_i8_ptr = builder.bitcast(local_storage, i8_ptr_type) + logger.warning(f"Temp storage '{tmp_name}' not found. Allocating inline") # Use bpf_probe_read_kernel to safely read the field # This generates: @@ -219,7 +245,9 @@ def load_struct_field( ) # Load the value from local storage - value = builder.load(local_storage) + value = builder.load( + builder.bitcast(local_storage_i8_ptr, ir.PointerType(ir.IntType(int_width))) + ) # Zero-extend i32 to i64 if needed if needs_zext: diff --git a/tests/c-form/disksnoop.bpf.c b/tests/c-form/disksnoop.bpf.c new file mode 100644 index 00000000..ca6b3df8 --- /dev/null +++ b/tests/c-form/disksnoop.bpf.c @@ -0,0 +1,66 @@ +// disksnoop.bpf.c +// eBPF program (compile with: clang -O2 -g -target bpf -c disksnoop.bpf.c -o disksnoop.bpf.o) + +#include "vmlinux.h" +#include +#include + +char LICENSE[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, __u64); + __type(value, __u64); + __uint(max_entries, 10240); +} start_map SEC(".maps"); + +/* kprobe: record start timestamp keyed by request pointer */ +SEC("kprobe/blk_mq_start_request") +int trace_start(struct pt_regs *ctx) +{ + /* request * is first arg */ + __u64 reqp = (__u64)(ctx->di); + __u64 ts = bpf_ktime_get_ns(); + + bpf_map_update_elem(&start_map, &reqp, &ts, BPF_ANY); + +// /* optional debug: + bpf_printk("start: req=%llu ts=%llu\n", reqp, ts); +// */ + return 0; +} + +/* completion: compute latency and print data_len, cmd_flags, latency_us */ +SEC("kprobe/blk_mq_end_request") +int trace_completion(struct pt_regs *ctx) +{ + __u64 reqp = (__u64)(ctx->di); + __u64 *tsp; + __u64 now_ns; + __u64 delta_ns; + __u64 delta_us = 0; + bpf_printk("%lld", reqp); + tsp = bpf_map_lookup_elem(&start_map, &reqp); + if (!tsp) + return 0; + + now_ns = bpf_ktime_get_ns(); + delta_ns = now_ns - *tsp; + delta_us = delta_ns / 1000; + + /* read request fields using CO-RE; needs vmlinux.h/BTF */ + __u32 data_len = 0; + __u32 cmd_flags = 0; + + /* __data_len is usually a 32/64-bit; use CORE read to be safe */ + data_len = ( __u32 ) BPF_CORE_READ((struct request *)reqp, __data_len); + cmd_flags = ( __u32 ) BPF_CORE_READ((struct request *)reqp, cmd_flags); + + /* print: " " */ + bpf_printk("%u %x %llu\n", data_len, cmd_flags, delta_us); + + /* remove from map */ + bpf_map_delete_elem(&start_map, &reqp); + + return 0; +} diff --git a/tests/c-form/xdp_test.bpf.c b/tests/c-form/xdp_test.bpf.c new file mode 100644 index 00000000..e553c37b --- /dev/null +++ b/tests/c-form/xdp_test.bpf.c @@ -0,0 +1,36 @@ +#include "vmlinux.h" +#include +#include +#include +#include + +struct fake_iphdr { + unsigned short useless; + unsigned short tot_len; + unsigned short id; + unsigned short frag_off; + unsigned char ttl; + unsigned char protocol; + unsigned short check; + unsigned int saddr; + unsigned int daddr; +}; + +SEC("xdp") +int xdp_prog(struct xdp_md *ctx) { + unsigned long data = ctx->data; + unsigned long data_end = ctx->data_end; + + if (data + sizeof(struct ethhdr) + sizeof(struct fake_iphdr) <= data_end) { + struct fake_iphdr *iph = (void *)data + sizeof(struct ethhdr); + + bpf_printk("%d", iph->saddr); + + return XDP_PASS; + } else { + return XDP_ABORTED; + } + struct task_struct * a = btf_bpf_get_current_task_btf(); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tests/failing_tests/xdp/xdp_test_1.py b/tests/failing_tests/xdp/xdp_test_1.py new file mode 100644 index 00000000..302a6174 --- /dev/null +++ b/tests/failing_tests/xdp/xdp_test_1.py @@ -0,0 +1,46 @@ +from vmlinux import XDP_PASS, XDP_ABORTED +from vmlinux import ( + struct_xdp_md, +) +from pythonbpf import bpf, section, bpfglobal, compile, compile_to_ir, struct +from ctypes import c_int64, c_ubyte, c_ushort, c_uint32, c_void_p + + +@bpf +@struct +class iphdr: + useless: c_ushort + tot_len: c_ushort + id: c_ushort + frag_off: c_ushort + ttl: c_ubyte + protocol: c_ubyte + check: c_ushort + saddr: c_uint32 + daddr: c_uint32 + + +@bpf +@section("xdp") +def ip_detector(ctx: struct_xdp_md) -> c_int64: + data = c_void_p(ctx.data) + data_end = c_void_p(ctx.data_end) + if data + 34 < data_end: + hdr = data + 14 + iph = iphdr(hdr) + addr = iph.saddr + print(f"ipaddress: {addr}") + else: + return c_int64(XDP_ABORTED) + + return c_int64(XDP_PASS) + + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + + +compile_to_ir("xdp_test_1.py", "xdp_test_1.ll") +compile() diff --git a/tests/passing_tests/assign/ptr_to_char_array.py b/tests/passing_tests/assign/ptr_to_char_array.py index 6a090be2..90daae75 100644 --- a/tests/passing_tests/assign/ptr_to_char_array.py +++ b/tests/passing_tests/assign/ptr_to_char_array.py @@ -1,4 +1,4 @@ -from pythonbpf import bpf, struct, section, bpfglobal +from pythonbpf import bpf, struct, section, bpfglobal, compile from pythonbpf.helper import comm from ctypes import c_void_p, c_int64 @@ -26,3 +26,6 @@ def hello(ctx: c_void_p) -> c_int64: @bpfglobal def LICENSE() -> str: return "GPL" + + +compile()