From 18d62d605a62eb5768b3b7f4115065cfeccd4716 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Mon, 13 Oct 2025 20:11:27 +0530 Subject: [PATCH 01/38] Add hello_world BCC example --- BCC-Examples/hello_world.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 BCC-Examples/hello_world.py diff --git a/BCC-Examples/hello_world.py b/BCC-Examples/hello_world.py new file mode 100644 index 00000000..447cc1b6 --- /dev/null +++ b/BCC-Examples/hello_world.py @@ -0,0 +1,18 @@ +from pythonbpf import bpf, section, bpfglobal, compile +from ctypes import c_void_p, c_int64 + + +@bpf +@section("tracepoint/syscalls/sys_enter_clone") +def hello_world(ctx: c_void_p) -> c_int64: + print("Hello, World!") + return c_int64(0) + + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + + +compile() From edc33733d9bbf00e85fb81db96961b3392903338 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Tue, 14 Oct 2025 03:51:43 +0530 Subject: [PATCH 02/38] Add trace_pipe utility --- BCC-Examples/hello_world.py | 8 ++++++-- BCC-Examples/sys_sync.py | 19 +++++++++++++++++++ pythonbpf/__init__.py | 2 ++ pythonbpf/utils.py | 9 +++++++++ 4 files changed, 36 insertions(+), 2 deletions(-) create mode 100644 BCC-Examples/sys_sync.py create mode 100644 pythonbpf/utils.py diff --git a/BCC-Examples/hello_world.py b/BCC-Examples/hello_world.py index 447cc1b6..a44753c7 100644 --- a/BCC-Examples/hello_world.py +++ b/BCC-Examples/hello_world.py @@ -1,4 +1,4 @@ -from pythonbpf import bpf, section, bpfglobal, compile +from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe from ctypes import c_void_p, c_int64 @@ -15,4 +15,8 @@ def LICENSE() -> str: return "GPL" -compile() +# compile() +b = BPF() +b.load_and_attach() + +trace_pipe() diff --git a/BCC-Examples/sys_sync.py b/BCC-Examples/sys_sync.py new file mode 100644 index 00000000..1d872eb7 --- /dev/null +++ b/BCC-Examples/sys_sync.py @@ -0,0 +1,19 @@ +from pythonbpf import bpf, section, bpfglobal, compile +from ctypes import c_void_p, c_int64 + + +@bpf +@section("tracepoint/syscalls/sys_enter_sync") +def hello_world(ctx: c_void_p) -> c_int64: + print("sys_sync() called") + return c_int64(0) + + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + + +compile() +print("Tracing sys_sync()... Ctrl-C to end.") diff --git a/pythonbpf/__init__.py b/pythonbpf/__init__.py index 022af1ba..3eda1262 100644 --- a/pythonbpf/__init__.py +++ b/pythonbpf/__init__.py @@ -1,5 +1,6 @@ from .decorators import bpf, map, section, bpfglobal, struct from .codegen import compile_to_ir, compile, BPF +from .utils import trace_pipe __all__ = [ "bpf", @@ -10,4 +11,5 @@ "compile_to_ir", "compile", "BPF", + "trace_pipe", ] diff --git a/pythonbpf/utils.py b/pythonbpf/utils.py new file mode 100644 index 00000000..8664d806 --- /dev/null +++ b/pythonbpf/utils.py @@ -0,0 +1,9 @@ +import subprocess + + +def trace_pipe(): + """Util to read from the trace pipe.""" + try: + subprocess.run(["cat", "/sys/kernel/tracing/trace_pipe"]) + except KeyboardInterrupt: + print("Tracing stopped.") From 37d1e1b1433e10bf38d00101d774e20c313a0b6b Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Tue, 14 Oct 2025 03:54:02 +0530 Subject: [PATCH 03/38] Add sys_sync BCC example --- BCC-Examples/sys_sync.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/BCC-Examples/sys_sync.py b/BCC-Examples/sys_sync.py index 1d872eb7..7749caec 100644 --- a/BCC-Examples/sys_sync.py +++ b/BCC-Examples/sys_sync.py @@ -1,4 +1,4 @@ -from pythonbpf import bpf, section, bpfglobal, compile +from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe from ctypes import c_void_p, c_int64 @@ -15,5 +15,6 @@ def LICENSE() -> str: return "GPL" -compile() +BPF().load_and_attach() print("Tracing sys_sync()... Ctrl-C to end.") +trace_pipe() From 263402d137ee061e9583b0569eb7f6c46bc92abc Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Tue, 14 Oct 2025 04:22:17 +0530 Subject: [PATCH 04/38] Add trace_fields --- pythonbpf/__init__.py | 3 ++- pythonbpf/utils.py | 22 ++++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/pythonbpf/__init__.py b/pythonbpf/__init__.py index 3eda1262..5f963394 100644 --- a/pythonbpf/__init__.py +++ b/pythonbpf/__init__.py @@ -1,6 +1,6 @@ from .decorators import bpf, map, section, bpfglobal, struct from .codegen import compile_to_ir, compile, BPF -from .utils import trace_pipe +from .utils import trace_pipe, trace_fields __all__ = [ "bpf", @@ -12,4 +12,5 @@ "compile", "BPF", "trace_pipe", + "trace_fields", ] diff --git a/pythonbpf/utils.py b/pythonbpf/utils.py index 8664d806..8dbc5e56 100644 --- a/pythonbpf/utils.py +++ b/pythonbpf/utils.py @@ -1,4 +1,9 @@ import subprocess +import re + +TRACE_PATTERN = re.compile( + rb"^(.{1,16}?)-(\d+)\s+\[(\d+)\]\s+([a-zA-Z.]+)\s+([0-9.]+):\s+.*?:\s+(.*)$" +) def trace_pipe(): @@ -7,3 +12,20 @@ def trace_pipe(): subprocess.run(["cat", "/sys/kernel/tracing/trace_pipe"]) except KeyboardInterrupt: print("Tracing stopped.") + + +def trace_fields(): + """Parse one line from trace_pipe into fields.""" + with open("/sys/kernel/tracing/trace_pipe", "rb", buffering=0) as f: + while True: + line = f.readline().rstrip() + + if not line or line.startswith(b"CPU:"): + continue + + match = TRACE_PATTERN.match(line) + if not match: + raise ValueError("Cannot parse trace line") + + task, pid, cpu, flags, ts, msg = match.groups() + return (task.strip(), int(pid), int(cpu), flags, float(ts), msg) From 903654daffbb0fc80f54aa9e1452ae6cde0d5737 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Tue, 14 Oct 2025 04:42:12 +0530 Subject: [PATCH 05/38] Add hello_fields BCC Example --- BCC-Examples/hello_fields.py | 33 ++++++++++++++++++++++++++ BCC-Examples/hello_world.py | 1 - pythonbpf/utils.py | 45 ++++++++++++++++++++++++++++-------- 3 files changed, 68 insertions(+), 11 deletions(-) create mode 100644 BCC-Examples/hello_fields.py diff --git a/BCC-Examples/hello_fields.py b/BCC-Examples/hello_fields.py new file mode 100644 index 00000000..3a52fc11 --- /dev/null +++ b/BCC-Examples/hello_fields.py @@ -0,0 +1,33 @@ +from pythonbpf import bpf, section, bpfglobal, BPF, trace_fields +from ctypes import c_void_p, c_int64 + + +@bpf +@section("tracepoint/syscalls/sys_enter_clone") +def hello_world(ctx: c_void_p) -> c_int64: + print("Hello, World!") + return c_int64(0) + + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + + +# compile +b = BPF() +b.load_and_attach() + +# header +print(f"{'TIME(s)':<18} {'COMM':<16} {'PID':<6} {'MESSAGE'}") + +# format output +while True: + try: + (task, pid, cpu, flags, ts, msg) = trace_fields() + except ValueError: + continue + except KeyboardInterrupt: + exit() + print(f"{ts:<18} {task:<16} {pid:<6} {msg}") diff --git a/BCC-Examples/hello_world.py b/BCC-Examples/hello_world.py index a44753c7..ff51731e 100644 --- a/BCC-Examples/hello_world.py +++ b/BCC-Examples/hello_world.py @@ -15,7 +15,6 @@ def LICENSE() -> str: return "GPL" -# compile() b = BPF() b.load_and_attach() diff --git a/pythonbpf/utils.py b/pythonbpf/utils.py index 8dbc5e56..47e99132 100644 --- a/pythonbpf/utils.py +++ b/pythonbpf/utils.py @@ -1,9 +1,4 @@ import subprocess -import re - -TRACE_PATTERN = re.compile( - rb"^(.{1,16}?)-(\d+)\s+\[(\d+)\]\s+([a-zA-Z.]+)\s+([0-9.]+):\s+.*?:\s+(.*)$" -) def trace_pipe(): @@ -20,12 +15,42 @@ def trace_fields(): while True: line = f.readline().rstrip() - if not line or line.startswith(b"CPU:"): + if not line: + continue + + # Skip lost event lines + if line.startswith(b"CPU:"): continue - match = TRACE_PATTERN.match(line) - if not match: + # Parse BCC-style: first 16 bytes = task + task = line[:16].lstrip().decode("utf-8") + line = line[17:] # Skip past task field and space + + # Find the colon that ends "pid cpu flags timestamp" + ts_end = line.find(b":") + if ts_end == -1: raise ValueError("Cannot parse trace line") - task, pid, cpu, flags, ts, msg = match.groups() - return (task.strip(), int(pid), int(cpu), flags, float(ts), msg) + # Split "pid [cpu] flags timestamp" + try: + parts = line[:ts_end].split() + if len(parts) < 4: + raise ValueError("Not enough fields") + + pid = int(parts[0]) + cpu = parts[1][1:-1] # Remove brackets from [cpu] + cpu = int(cpu) + flags = parts[2] + ts = float(parts[3]) + except (ValueError, IndexError): + raise ValueError("Cannot parse trace line") + + # Get message: skip ": symbol:" part + line = line[ts_end + 1 :] # Skip first ":" + sym_end = line.find(b":") + if sym_end != -1: + msg = line[sym_end + 2 :].decode("utf-8") # Skip ": " after symbol + else: + msg = line.lstrip().decode("utf-8") + + return (task, pid, cpu, flags, ts, msg) From d7329ad3d75169171a14a842bab84cd97f41e623 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Tue, 14 Oct 2025 16:07:55 +0530 Subject: [PATCH 06/38] Add BCC sync_timing example --- BCC-Examples/sync_timing.py | 52 +++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 BCC-Examples/sync_timing.py diff --git a/BCC-Examples/sync_timing.py b/BCC-Examples/sync_timing.py new file mode 100644 index 00000000..37d6a036 --- /dev/null +++ b/BCC-Examples/sync_timing.py @@ -0,0 +1,52 @@ +from pythonbpf import bpf, map, section, bpfglobal, BPF, trace_fields +from pythonbpf.helper import ktime +from pythonbpf.maps import HashMap + +from ctypes import c_void_p, c_int64 + + +@bpf +@map +def last() -> HashMap: + return HashMap(key=c_int64, value=c_int64, max_entries=1) + + +@bpf +@section("tracepoint/syscalls/sys_enter_sync") +def do_trace(ctx: c_void_p) -> c_int64: + key = 0 + tsp = last.lookup(key) + if tsp: + delta = ktime() - tsp + if delta < 1000000000: + time_ms = delta // 1000000 + print(f"{time_ms}") + last.delete(key) + else: + last.update(key, ktime()) + return c_int64(0) + + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + + +# compile +b = BPF() +b.load_and_attach() + +print("Tracing for quick sync's... Ctrl-C to end") + +# format output +start = 0 +while True: + try: + task, pid, cpu, flags, ts, ms = trace_fields() + if start == 0: + start = ts + ts -= start + print(f"At time {ts} s: Multiple syncs detected, last {ms} ms ago") + except KeyboardInterrupt: + exit() From b676a5ebb480130a92974f5d6cbf2759d04f0453 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Tue, 14 Oct 2025 17:05:05 +0530 Subject: [PATCH 07/38] Fix return in BCC-Examples --- BCC-Examples/hello_fields.py | 2 +- BCC-Examples/hello_world.py | 2 +- BCC-Examples/sync_timing.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/BCC-Examples/hello_fields.py b/BCC-Examples/hello_fields.py index 3a52fc11..8a2464a7 100644 --- a/BCC-Examples/hello_fields.py +++ b/BCC-Examples/hello_fields.py @@ -6,7 +6,7 @@ @section("tracepoint/syscalls/sys_enter_clone") def hello_world(ctx: c_void_p) -> c_int64: print("Hello, World!") - return c_int64(0) + return 0 # type: ignore [return-value] @bpf diff --git a/BCC-Examples/hello_world.py b/BCC-Examples/hello_world.py index ff51731e..4b5647b8 100644 --- a/BCC-Examples/hello_world.py +++ b/BCC-Examples/hello_world.py @@ -6,7 +6,7 @@ @section("tracepoint/syscalls/sys_enter_clone") def hello_world(ctx: c_void_p) -> c_int64: print("Hello, World!") - return c_int64(0) + return 0 # type: ignore [return-value] @bpf diff --git a/BCC-Examples/sync_timing.py b/BCC-Examples/sync_timing.py index 37d6a036..448caba4 100644 --- a/BCC-Examples/sync_timing.py +++ b/BCC-Examples/sync_timing.py @@ -24,7 +24,7 @@ def do_trace(ctx: c_void_p) -> c_int64: last.delete(key) else: last.update(key, ktime()) - return c_int64(0) + return 0 # type: ignore [return-value] @bpf From 4a79f9b9b2649f49c6c725c1eab24d38cd105f36 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Tue, 14 Oct 2025 18:50:46 +0530 Subject: [PATCH 08/38] Add sync_count BCC-Example --- BCC-Examples/sync_count.py | 59 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 BCC-Examples/sync_count.py diff --git a/BCC-Examples/sync_count.py b/BCC-Examples/sync_count.py new file mode 100644 index 00000000..bbea0226 --- /dev/null +++ b/BCC-Examples/sync_count.py @@ -0,0 +1,59 @@ +from pythonbpf import bpf, map, section, bpfglobal, BPF, trace_fields +from pythonbpf.helper import ktime +from pythonbpf.maps import HashMap + +from ctypes import c_void_p, c_int64 + + +@bpf +@map +def last() -> HashMap: + return HashMap(key=c_int64, value=c_int64, max_entries=2) + + +@bpf +@section("tracepoint/syscalls/sys_enter_sync") +def do_trace(ctx: c_void_p) -> c_int64: + ts_key = 0 + cnt_key = 1 + tsp = last.lookup(ts_key) + cntp = last.lookup(cnt_key) + if not cntp: + last.update(cnt_key, 0) + cntp = last.lookup(cnt_key) + if tsp: + delta = ktime() - tsp + if delta < 1000000000: + time_ms = delta // 1000000 + print(f"{time_ms} {cntp}") + last.delete(ts_key) + else: + last.update(ts_key, ktime()) + last.update(cnt_key, cntp + 1) + return 0 # type: ignore [return-value] + + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + + +# compile +b = BPF() +b.load_and_attach() + +print("Tracing for quick sync's... Ctrl-C to end") + +# format output +start = 0 +while True: + try: + task, pid, cpu, flags, ts, msg = trace_fields() + if start == 0: + start = ts + ts -= start + ms, cnt = msg.split() + print(f"At time {ts} s: Multiple syncs detected, last {ms} ms ago. Count {cnt}") + except KeyboardInterrupt: + exit() From dd3fc74d0965d7d849e6a3f1876e4d4c5750450b Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Tue, 14 Oct 2025 23:06:43 +0530 Subject: [PATCH 09/38] Add support for tuple style multiiassignment in allocation_pass --- pythonbpf/allocation_pass.py | 81 +++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 34 deletions(-) diff --git a/pythonbpf/allocation_pass.py b/pythonbpf/allocation_pass.py index 9d824841..1bcf6e4a 100644 --- a/pythonbpf/allocation_pass.py +++ b/pythonbpf/allocation_pass.py @@ -25,41 +25,54 @@ def __iter__(self): def handle_assign_allocation(builder, stmt, local_sym_tab, structs_sym_tab): """Handle memory allocation for assignment statements.""" - # Validate assignment - if len(stmt.targets) != 1: - logger.warning("Multi-target assignment not supported, skipping allocation") - return - - target = stmt.targets[0] - - # Skip non-name targets (e.g., struct field assignments) - if isinstance(target, ast.Attribute): - logger.debug(f"Struct field assignment to {target.attr}, no allocation needed") - return - - if not isinstance(target, ast.Name): - logger.warning(f"Unsupported assignment target type: {type(target).__name__}") - return - - var_name = target.id - rval = stmt.value - - # Skip if already allocated - if var_name in local_sym_tab: - logger.debug(f"Variable {var_name} already allocated, skipping") - return - - # Determine type and allocate based on rval - if isinstance(rval, ast.Call): - _allocate_for_call(builder, var_name, rval, local_sym_tab, structs_sym_tab) - elif isinstance(rval, ast.Constant): - _allocate_for_constant(builder, var_name, rval, local_sym_tab) - elif isinstance(rval, ast.BinOp): - _allocate_for_binop(builder, var_name, local_sym_tab) + logger.info(f"Handling assignment for allocation: {ast.dump(stmt)}") + + # NOTE: Support multi-target assignments (e.g.: a, b = 1, 2) + if isinstance(stmt.targets[0], ast.Tuple): + if not isinstance(stmt.value, ast.Tuple): + logger.warning("Mismatched multi-target assignment, skipping allocation") + return + targets = stmt.targets[0].elts + rvals = stmt.value.elts + if len(targets) != len(rvals): + logger.warning("Mismatched multi-target assignment, skipping allocation") + return else: - logger.warning( - f"Unsupported assignment value type for {var_name}: {type(rval).__name__}" - ) + targets = stmt.targets + rvals = [stmt.value] + + for target, rval in zip(targets, rvals): + # Skip non-name targets (e.g., struct field assignments) + if isinstance(target, ast.Attribute): + logger.debug( + f"Struct field assignment to {target.attr}, no allocation needed" + ) + continue + + if not isinstance(target, ast.Name): + logger.warning( + f"Unsupported assignment target type: {type(target).__name__}" + ) + continue + + var_name = target.id + + # Skip if already allocated + if var_name in local_sym_tab: + logger.debug(f"Variable {var_name} already allocated, skipping") + continue + + # Determine type and allocate based on rval + if isinstance(rval, ast.Call): + _allocate_for_call(builder, var_name, rval, local_sym_tab, structs_sym_tab) + elif isinstance(rval, ast.Constant): + _allocate_for_constant(builder, var_name, rval, local_sym_tab) + elif isinstance(rval, ast.BinOp): + _allocate_for_binop(builder, var_name, local_sym_tab) + else: + logger.warning( + f"Unsupported assignment value type for {var_name}: {type(rval).__name__}" + ) def _allocate_for_call(builder, var_name, rval, local_sym_tab, structs_sym_tab): From c56928bc8a7c2524dc2464383ae39d7e9b860282 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Tue, 14 Oct 2025 23:30:59 +0530 Subject: [PATCH 10/38] Add create_targets_and_rvals, use it in handle_assign to enable tuple assignment --- pythonbpf/allocation_pass.py | 27 +++++---- pythonbpf/functions/functions_pass.py | 79 +++++++++++++-------------- 2 files changed, 54 insertions(+), 52 deletions(-) diff --git a/pythonbpf/allocation_pass.py b/pythonbpf/allocation_pass.py index 1bcf6e4a..5bc2f3a7 100644 --- a/pythonbpf/allocation_pass.py +++ b/pythonbpf/allocation_pass.py @@ -22,24 +22,27 @@ def __iter__(self): yield self.metadata -def handle_assign_allocation(builder, stmt, local_sym_tab, structs_sym_tab): - """Handle memory allocation for assignment statements.""" - - logger.info(f"Handling assignment for allocation: {ast.dump(stmt)}") - - # NOTE: Support multi-target assignments (e.g.: a, b = 1, 2) +def create_targets_and_rvals(stmt): + """Create lists of targets and right-hand values from an assignment statement.""" if isinstance(stmt.targets[0], ast.Tuple): if not isinstance(stmt.value, ast.Tuple): logger.warning("Mismatched multi-target assignment, skipping allocation") return - targets = stmt.targets[0].elts - rvals = stmt.value.elts + targets, rvals = stmt.targets[0].elts, stmt.value.elts if len(targets) != len(rvals): - logger.warning("Mismatched multi-target assignment, skipping allocation") + logger.warning("length of LHS != length of RHS, skipping allocation") return - else: - targets = stmt.targets - rvals = [stmt.value] + return targets, rvals + return stmt.targets, [stmt.value] + + +def handle_assign_allocation(builder, stmt, local_sym_tab, structs_sym_tab): + """Handle memory allocation for assignment statements.""" + + logger.info(f"Handling assignment for allocation: {ast.dump(stmt)}") + + # NOTE: Support multi-target assignments (e.g.: a, b = 1, 2) + targets, rvals = create_targets_and_rvals(stmt) for target, rval in zip(targets, rvals): # Skip non-name targets (e.g., struct field assignments) diff --git a/pythonbpf/functions/functions_pass.py b/pythonbpf/functions/functions_pass.py index 8d0bce1e..5836ce0d 100644 --- a/pythonbpf/functions/functions_pass.py +++ b/pythonbpf/functions/functions_pass.py @@ -12,7 +12,11 @@ handle_variable_assignment, handle_struct_field_assignment, ) -from pythonbpf.allocation_pass import handle_assign_allocation, allocate_temp_pool +from pythonbpf.allocation_pass import ( + handle_assign_allocation, + allocate_temp_pool, + create_targets_and_rvals, +) from .return_utils import handle_none_return, handle_xdp_return, is_xdp_name from .function_metadata import get_probe_string, is_global_function, infer_return_type @@ -140,48 +144,43 @@ def handle_assign( ): """Handle assignment statements in the function body.""" - # TODO: Support this later - # GH #37 - if len(stmt.targets) != 1: - logger.error("Multi-target assignment is not supported for now") - return - - target = stmt.targets[0] - rval = stmt.value + # NOTE: Support multi-target assignments (e.g.: a, b = 1, 2) + targets, rvals = create_targets_and_rvals(stmt) - if isinstance(target, ast.Name): - # NOTE: Simple variable assignment case: x = 5 - var_name = target.id - result = handle_variable_assignment( - func, - module, - builder, - var_name, - rval, - local_sym_tab, - map_sym_tab, - structs_sym_tab, - ) - if not result: - logger.error(f"Failed to handle assignment to {var_name}") - return + for target, rval in zip(targets, rvals): + if isinstance(target, ast.Name): + # NOTE: Simple variable assignment case: x = 5 + var_name = target.id + result = handle_variable_assignment( + func, + module, + builder, + var_name, + rval, + local_sym_tab, + map_sym_tab, + structs_sym_tab, + ) + if not result: + logger.error(f"Failed to handle assignment to {var_name}") + continue - if isinstance(target, ast.Attribute): - # NOTE: Struct field assignment case: pkt.field = value - handle_struct_field_assignment( - func, - module, - builder, - target, - rval, - local_sym_tab, - map_sym_tab, - structs_sym_tab, - ) - return + if isinstance(target, ast.Attribute): + # NOTE: Struct field assignment case: pkt.field = value + handle_struct_field_assignment( + func, + module, + builder, + target, + rval, + local_sym_tab, + map_sym_tab, + structs_sym_tab, + ) + continue - # Unsupported target type - logger.error(f"Unsupported assignment target: {ast.dump(target)}") + # Unsupported target type + logger.error(f"Unsupported assignment target: {ast.dump(target)}") def handle_cond( From 0a1557e318c34b3ae287c0ea5b32e526c9c6a191 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Tue, 14 Oct 2025 23:33:54 +0530 Subject: [PATCH 11/38] Add sync_count BCC example to use tuple-like assignment --- BCC-Examples/sync_count.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/BCC-Examples/sync_count.py b/BCC-Examples/sync_count.py index bbea0226..51790b94 100644 --- a/BCC-Examples/sync_count.py +++ b/BCC-Examples/sync_count.py @@ -14,10 +14,8 @@ def last() -> HashMap: @bpf @section("tracepoint/syscalls/sys_enter_sync") def do_trace(ctx: c_void_p) -> c_int64: - ts_key = 0 - cnt_key = 1 - tsp = last.lookup(ts_key) - cntp = last.lookup(cnt_key) + ts_key, cnt_key = 0, 1 + tsp, cntp = last.lookup(ts_key), last.lookup(cnt_key) if not cntp: last.update(cnt_key, 0) cntp = last.lookup(cnt_key) From b105c70b383267b3471ac748a25a59dc2f2b9482 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Wed, 15 Oct 2025 12:35:59 +0530 Subject: [PATCH 12/38] Add hello_perf_output BCC-Example skeleton --- BCC-Examples/hello_perf_output.py | 40 +++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 BCC-Examples/hello_perf_output.py diff --git a/BCC-Examples/hello_perf_output.py b/BCC-Examples/hello_perf_output.py new file mode 100644 index 00000000..3b41d793 --- /dev/null +++ b/BCC-Examples/hello_perf_output.py @@ -0,0 +1,40 @@ +from pythonbpf import bpf, map, struct, section, bpfglobal, compile +from pythonbpf.helper import ktime, pid +from pythonbpf.maps import PerfEventArray + +from ctypes import c_void_p, c_int64, c_uint64 + + +@bpf +@struct +class data_t: + pid: c_uint64 + ts: c_uint64 + comm: str(16) # type: ignore [valid-type] + + +@bpf +@map +def events() -> PerfEventArray: + return PerfEventArray(key_size=c_int64, value_size=c_int64) + + +@bpf +@section("tracepoint/syscalls/sys_enter_clone") +def hello(ctx: c_void_p) -> c_int64: + dataobj = data_t() + strobj = "hellohellohello" + dataobj.pid, dataobj.ts = pid(), ktime() + # get_curr_comm(dataobj.comm) + print(f"clone called at {dataobj.ts} by pid {dataobj.pid}, comm {strobj}") + events.output(dataobj) + return 0 # type: ignore [return-value] + + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + + +compile() From a0d954b20bc12dce823c7a1349f6670cbcbb7dc4 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Wed, 15 Oct 2025 12:47:30 +0530 Subject: [PATCH 13/38] Register bpf_get_current_comm_emitter for comm --- pythonbpf/helper/bpf_helper_handler.py | 18 ++++++++++++++++++ pythonbpf/helper/helpers.py | 9 ++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/pythonbpf/helper/bpf_helper_handler.py b/pythonbpf/helper/bpf_helper_handler.py index 6e296148..e82dfc6c 100644 --- a/pythonbpf/helper/bpf_helper_handler.py +++ b/pythonbpf/helper/bpf_helper_handler.py @@ -23,6 +23,7 @@ class BPFHelperID(Enum): BPF_KTIME_GET_NS = 5 BPF_PRINTK = 6 BPF_GET_CURRENT_PID_TGID = 14 + BPF_GET_CURRENT_COMM = 16 BPF_PERF_EVENT_OUTPUT = 25 @@ -234,6 +235,23 @@ def bpf_map_delete_elem_emitter( return result, None +@HelperHandlerRegistry.register("comm") +def bpf_get_current_comm_emitter( + call, + map_ptr, + module, + builder, + func, + local_sym_tab=None, + struct_sym_tab=None, + map_sym_tab=None, +): + """ + Emit LLVM IR for bpf_get_current_comm helper function call. + """ + pass # Not implemented yet + + @HelperHandlerRegistry.register("pid") def bpf_get_current_pid_tgid_emitter( call, diff --git a/pythonbpf/helper/helpers.py b/pythonbpf/helper/helpers.py index 1a84599b..1861e679 100644 --- a/pythonbpf/helper/helpers.py +++ b/pythonbpf/helper/helpers.py @@ -2,19 +2,26 @@ def ktime(): + """get current ktime""" return ctypes.c_int64(0) def pid(): + """get current process id""" return ctypes.c_int32(0) def deref(ptr): - "dereference a pointer" + """dereference a pointer""" result = ctypes.cast(ptr, ctypes.POINTER(ctypes.c_void_p)).contents.value return result if result is not None else 0 +def comm(buf): + """get current process command name""" + return ctypes.c_int64(0) + + XDP_ABORTED = ctypes.c_int64(0) XDP_DROP = ctypes.c_int64(1) XDP_PASS = ctypes.c_int64(2) From 13a804f7ac99fabc4770540842917b790652ba6f Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Wed, 15 Oct 2025 14:03:09 +0530 Subject: [PATCH 14/38] Implement bpf_get_current_comm_emitter --- BCC-Examples/hello_perf_output.py | 4 +- pythonbpf/helper/__init__.py | 3 +- pythonbpf/helper/bpf_helper_handler.py | 43 +++++++++++++++++++- pythonbpf/helper/helper_utils.py | 54 ++++++++++++++++++++++++++ 4 files changed, 100 insertions(+), 4 deletions(-) diff --git a/BCC-Examples/hello_perf_output.py b/BCC-Examples/hello_perf_output.py index 3b41d793..03da1edd 100644 --- a/BCC-Examples/hello_perf_output.py +++ b/BCC-Examples/hello_perf_output.py @@ -1,5 +1,5 @@ from pythonbpf import bpf, map, struct, section, bpfglobal, compile -from pythonbpf.helper import ktime, pid +from pythonbpf.helper import ktime, pid, comm from pythonbpf.maps import PerfEventArray from ctypes import c_void_p, c_int64, c_uint64 @@ -25,7 +25,7 @@ def hello(ctx: c_void_p) -> c_int64: dataobj = data_t() strobj = "hellohellohello" dataobj.pid, dataobj.ts = pid(), ktime() - # get_curr_comm(dataobj.comm) + comm(dataobj.comm) print(f"clone called at {dataobj.ts} by pid {dataobj.pid}, comm {strobj}") events.output(dataobj) return 0 # type: ignore [return-value] diff --git a/pythonbpf/helper/__init__.py b/pythonbpf/helper/__init__.py index 9f301b2a..26f792b5 100644 --- a/pythonbpf/helper/__init__.py +++ b/pythonbpf/helper/__init__.py @@ -1,7 +1,7 @@ from .helper_registry import HelperHandlerRegistry from .helper_utils import reset_scratch_pool from .bpf_helper_handler import handle_helper_call -from .helpers import ktime, pid, deref, XDP_DROP, XDP_PASS +from .helpers import ktime, pid, deref, comm, XDP_DROP, XDP_PASS # Register the helper handler with expr module @@ -62,6 +62,7 @@ def helper_call_handler( "ktime", "pid", "deref", + "comm", "XDP_DROP", "XDP_PASS", ] diff --git a/pythonbpf/helper/bpf_helper_handler.py b/pythonbpf/helper/bpf_helper_handler.py index e82dfc6c..1c96cb4b 100644 --- a/pythonbpf/helper/bpf_helper_handler.py +++ b/pythonbpf/helper/bpf_helper_handler.py @@ -7,6 +7,7 @@ get_or_create_ptr_from_arg, get_flags_val, get_data_ptr_and_size, + get_buffer_ptr_and_size, ) from .printk_formatter import simple_string_print, handle_fstring_print @@ -248,8 +249,48 @@ def bpf_get_current_comm_emitter( ): """ Emit LLVM IR for bpf_get_current_comm helper function call. + + Accepts: comm(dataobj.field) or comm(my_buffer) """ - pass # Not implemented yet + if not call.args or len(call.args) != 1: + raise ValueError( + f"comm expects exactly one argument (buffer), got {len(call.args)}" + ) + + buf_arg = call.args[0] + + # Extract buffer pointer and size + buf_ptr, buf_size = get_buffer_ptr_and_size( + buf_arg, builder, local_sym_tab, struct_sym_tab + ) + + # Validate it's a char array + if not isinstance( + buf_ptr.type.pointee, ir.ArrayType + ) or buf_ptr.type.pointee.element != ir.IntType(8): + raise ValueError( + f"comm expects a char array buffer, got {buf_ptr.type.pointee}" + ) + + # Cast to void* and call helper + buf_void_ptr = builder.bitcast(buf_ptr, ir.PointerType()) + + fn_type = ir.FunctionType( + ir.IntType(64), + [ir.PointerType(), ir.IntType(32)], + var_arg=False, + ) + fn_ptr = builder.inttoptr( + ir.Constant(ir.IntType(64), BPFHelperID.BPF_GET_CURRENT_COMM.value), + ir.PointerType(fn_type), + ) + + result = builder.call( + fn_ptr, [buf_void_ptr, ir.Constant(ir.IntType(32), buf_size)], tail=False + ) + + logger.info(f"Emitted bpf_get_current_comm with {buf_size} byte buffer") + return result, None @HelperHandlerRegistry.register("pid") diff --git a/pythonbpf/helper/helper_utils.py b/pythonbpf/helper/helper_utils.py index 4b04464c..cf89c302 100644 --- a/pythonbpf/helper/helper_utils.py +++ b/pythonbpf/helper/helper_utils.py @@ -136,3 +136,57 @@ def get_data_ptr_and_size(data_arg, local_sym_tab, struct_sym_tab): raise NotImplementedError( "Only simple object names are supported as data in perf event output." ) + + +def get_buffer_ptr_and_size(buf_arg, builder, local_sym_tab, struct_sym_tab): + """Extract buffer pointer and size from either a struct field or variable.""" + + # Case 1: Struct field (obj.field) + if isinstance(buf_arg, ast.Attribute): + if not isinstance(buf_arg.value, ast.Name): + raise ValueError( + "Only simple struct field access supported (e.g., obj.field)" + ) + + struct_name = buf_arg.value.id + field_name = buf_arg.attr + + # Lookup struct + if not local_sym_tab or struct_name not in local_sym_tab: + raise ValueError(f"Struct '{struct_name}' not found") + + struct_type = local_sym_tab[struct_name].metadata + if not struct_sym_tab or struct_type not in struct_sym_tab: + raise ValueError(f"Struct type '{struct_type}' not found") + + struct_info = struct_sym_tab[struct_type] + + # Get field pointer and type + struct_ptr = local_sym_tab[struct_name].var + field_ptr = struct_info.gep(builder, struct_ptr, field_name) + field_type = struct_info.field_type(field_name) + + if not isinstance(field_type, ir.ArrayType): + raise ValueError(f"Field '{field_name}' must be an array type") + + return field_ptr, field_type.count + + # Case 2: Variable name + elif isinstance(buf_arg, ast.Name): + var_name = buf_arg.id + + if not local_sym_tab or var_name not in local_sym_tab: + raise ValueError(f"Variable '{var_name}' not found") + + var_ptr = local_sym_tab[var_name].var + var_type = local_sym_tab[var_name].ir_type + + if not isinstance(var_type, ir.ArrayType): + raise ValueError(f"Variable '{var_name}' must be an array type") + + return var_ptr, var_type.count + + else: + raise ValueError( + "comm expects either a struct field (obj.field) or variable name" + ) From fb480639a51a18f0d943fa740a2cfe699d4fbfc9 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Wed, 15 Oct 2025 17:27:43 +0530 Subject: [PATCH 15/38] Make char array struct fields work --- BCC-Examples/hello_perf_output.py | 11 ++- pythonbpf/helper/printk_formatter.py | 137 ++++++++++++++++++++------- 2 files changed, 108 insertions(+), 40 deletions(-) diff --git a/BCC-Examples/hello_perf_output.py b/BCC-Examples/hello_perf_output.py index 03da1edd..57ec0987 100644 --- a/BCC-Examples/hello_perf_output.py +++ b/BCC-Examples/hello_perf_output.py @@ -1,4 +1,4 @@ -from pythonbpf import bpf, map, struct, section, bpfglobal, compile +from pythonbpf import bpf, map, struct, section, bpfglobal, BPF, trace_pipe from pythonbpf.helper import ktime, pid, comm from pythonbpf.maps import PerfEventArray @@ -23,10 +23,9 @@ def events() -> PerfEventArray: @section("tracepoint/syscalls/sys_enter_clone") def hello(ctx: c_void_p) -> c_int64: dataobj = data_t() - strobj = "hellohellohello" dataobj.pid, dataobj.ts = pid(), ktime() comm(dataobj.comm) - print(f"clone called at {dataobj.ts} by pid {dataobj.pid}, comm {strobj}") + print(f"clone called at {dataobj.ts} by pid {dataobj.pid}, comm {dataobj.comm}") events.output(dataobj) return 0 # type: ignore [return-value] @@ -37,4 +36,8 @@ def LICENSE() -> str: return "GPL" -compile() +# compile +BPF().load_and_attach() + +print("Tracing clone()... Ctrl-C to end") +trace_pipe() diff --git a/pythonbpf/helper/printk_formatter.py b/pythonbpf/helper/printk_formatter.py index e0cd669f..cee069f6 100644 --- a/pythonbpf/helper/printk_formatter.py +++ b/pythonbpf/helper/printk_formatter.py @@ -173,6 +173,15 @@ def _populate_fval(ftype, node, fmt_parts, exprs): raise NotImplementedError( f"Unsupported pointer target type in f-string: {target}" ) + elif isinstance(ftype, ir.ArrayType): + if isinstance(ftype.element, ir.IntType) and ftype.element.width == 8: + # Char array + fmt_parts.append("%s") + exprs.append(node) + else: + raise NotImplementedError( + f"Unsupported array element type in f-string: {ftype.element}" + ) else: raise NotImplementedError(f"Unsupported field type in f-string: {ftype}") @@ -197,44 +206,100 @@ def _create_format_string_global(fmt_str, func, module, builder): def _prepare_expr_args(expr, func, module, builder, local_sym_tab, struct_sym_tab): """Evaluate and prepare an expression to use as an arg for bpf_printk.""" - val, _ = eval_expr( - func, - module, - builder, - expr, - local_sym_tab, - None, - struct_sym_tab, + + # Special case: struct field char array needs pointer to first element + char_array_ptr = _get_struct_char_array_ptr( + expr, builder, local_sym_tab, struct_sym_tab ) + if char_array_ptr: + return char_array_ptr - if val: - if isinstance(val.type, ir.PointerType): - target, depth = get_base_type_and_depth(val.type) - if isinstance(target, ir.IntType): - if target.width >= 32: - val = deref_to_depth(func, builder, val, depth) - val = builder.sext(val, ir.IntType(64)) - elif target.width == 8 and depth == 1: - # NOTE: i8* is string, no need to deref - pass + # Regular expression evaluation + val, _ = eval_expr(func, module, builder, expr, local_sym_tab, None, struct_sym_tab) - else: - logger.warning( - "Only int and ptr supported in bpf_printk args. Others default to 0." - ) - val = ir.Constant(ir.IntType(64), 0) - elif isinstance(val.type, ir.IntType): - if val.type.width < 64: - val = builder.sext(val, ir.IntType(64)) - else: - logger.warning( - "Only int and ptr supported in bpf_printk args. Others default to 0." - ) - val = ir.Constant(ir.IntType(64), 0) - return val + if not val: + logger.warning("Failed to evaluate expression for bpf_printk, defaulting to 0") + return ir.Constant(ir.IntType(64), 0) + + # Convert value to bpf_printk compatible type + if isinstance(val.type, ir.PointerType): + return _handle_pointer_arg(val, func, builder) + elif isinstance(val.type, ir.IntType): + return _handle_int_arg(val, builder) else: - logger.warning( - "Failed to evaluate expression for bpf_printk argument. " - "It will be converted to 0." - ) + logger.warning(f"Unsupported type {val.type} in bpf_printk, defaulting to 0") return ir.Constant(ir.IntType(64), 0) + + +def _get_struct_char_array_ptr(expr, builder, local_sym_tab, struct_sym_tab): + """Get pointer to first element of char array in struct field, or None.""" + if not (isinstance(expr, ast.Attribute) and isinstance(expr.value, ast.Name)): + return None + + var_name = expr.value.id + field_name = expr.attr + + # Check if it's a valid struct field + if not ( + local_sym_tab + and var_name in local_sym_tab + and struct_sym_tab + and local_sym_tab[var_name].metadata in struct_sym_tab + ): + return None + + struct_type = local_sym_tab[var_name].metadata + struct_info = struct_sym_tab[struct_type] + + if field_name not in struct_info.fields: + return None + + field_type = struct_info.field_type(field_name) + + # Check if it's a char array + is_char_array = ( + isinstance(field_type, ir.ArrayType) + and isinstance(field_type.element, ir.IntType) + and field_type.element.width == 8 + ) + + if not is_char_array: + return None + + # Get field pointer and GEP to first element: [N x i8]* -> i8* + struct_ptr = local_sym_tab[var_name].var + field_ptr = struct_info.gep(builder, struct_ptr, field_name) + + return builder.gep( + field_ptr, + [ir.Constant(ir.IntType(32), 0), ir.Constant(ir.IntType(32), 0)], + inbounds=True, + ) + + +def _handle_pointer_arg(val, func, builder): + """Convert pointer type for bpf_printk.""" + target, depth = get_base_type_and_depth(val.type) + + if not isinstance(target, ir.IntType): + logger.warning("Only int pointers supported in bpf_printk, defaulting to 0") + return ir.Constant(ir.IntType(64), 0) + + # i8* is string - use as-is + if target.width == 8 and depth == 1: + return val + + # Integer pointers: dereference and sign-extend to i64 + if target.width >= 32: + val = deref_to_depth(func, builder, val, depth) + return builder.sext(val, ir.IntType(64)) + + logger.warning("Unsupported pointer width in bpf_printk, defaulting to 0") + return ir.Constant(ir.IntType(64), 0) + + +def _handle_int_arg(val, builder): + """Convert integer type for bpf_printk (sign-extend to i64).""" + if val.type.width < 64: + return builder.sext(val, ir.IntType(64)) + return val From 81f72a76984c30a5ebe20548ac7c2bb264549bed Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Wed, 15 Oct 2025 18:10:04 +0530 Subject: [PATCH 16/38] Support var-to-var and var-to-struct-fld allocations --- pythonbpf/allocation_pass.py | 97 ++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) diff --git a/pythonbpf/allocation_pass.py b/pythonbpf/allocation_pass.py index 5bc2f3a7..17f53953 100644 --- a/pythonbpf/allocation_pass.py +++ b/pythonbpf/allocation_pass.py @@ -72,6 +72,14 @@ def handle_assign_allocation(builder, stmt, local_sym_tab, structs_sym_tab): _allocate_for_constant(builder, var_name, rval, local_sym_tab) elif isinstance(rval, ast.BinOp): _allocate_for_binop(builder, var_name, local_sym_tab) + elif isinstance(rval, ast.Name): + # Variable-to-variable assignment (b = a) + _allocate_for_name(builder, var_name, rval, local_sym_tab) + elif isinstance(rval, ast.Attribute): + # Struct field-to-variable assignment (a = dat.fld) + _allocate_for_attribute( + builder, var_name, rval, local_sym_tab, structs_sym_tab + ) else: logger.warning( f"Unsupported assignment value type for {var_name}: {type(rval).__name__}" @@ -192,3 +200,92 @@ def allocate_temp_pool(builder, max_temps, local_sym_tab): temp_var = builder.alloca(ir.IntType(64), name=temp_name) temp_var.align = 8 local_sym_tab[temp_name] = LocalSymbol(temp_var, ir.IntType(64)) + + +def _allocate_for_name(builder, var_name, rval, local_sym_tab): + """Allocate memory for variable-to-variable assignment (b = a).""" + source_var = rval.id + + if source_var not in local_sym_tab: + logger.error(f"Source variable '{source_var}' not found in symbol table") + return + + # Get type from source variable + source_type = local_sym_tab[source_var].ir_type + source_metadata = local_sym_tab[source_var].metadata + + # Allocate with same type + var = builder.alloca(source_type, name=var_name) + + # Set alignment based on type + if isinstance(source_type, ir.IntType): + var.align = source_type.width // 8 + elif isinstance(source_type, ir.PointerType): + var.align = 8 + elif isinstance(source_type, ir.ArrayType): + var.align = ( + source_type.element.width // 8 + if isinstance(source_type.element, ir.IntType) + else 1 + ) + else: + var.align = 8 # Default alignment + + local_sym_tab[var_name] = LocalSymbol(var, source_type, source_metadata) + logger.info( + f"Pre-allocated {var_name} from variable {source_var} with type {source_type}" + ) + + +def _allocate_for_attribute(builder, var_name, rval, local_sym_tab, structs_sym_tab): + """Allocate memory for struct field-to-variable assignment (a = dat.fld).""" + if not isinstance(rval.value, ast.Name): + logger.warning( + f"Complex attribute access not supported for allocation of {var_name}" + ) + return + + struct_var = rval.value.id + field_name = rval.attr + + # Validate struct exists + if struct_var not in local_sym_tab: + logger.error(f"Struct variable '{struct_var}' not found in symbol table") + return + + struct_type = local_sym_tab[struct_var].metadata + if not struct_type or struct_type not in structs_sym_tab: + logger.error(f"Struct type '{struct_type}' not found in struct symbol table") + return + + struct_info = structs_sym_tab[struct_type] + + # Validate field exists + if field_name not in struct_info.fields: + logger.error(f"Field '{field_name}' not found in struct '{struct_type}'") + return + + # Get field type + field_type = struct_info.field_type(field_name) + + # Allocate with field's type + var = builder.alloca(field_type, name=var_name) + + # Set alignment based on type + if isinstance(field_type, ir.IntType): + var.align = field_type.width // 8 + elif isinstance(field_type, ir.PointerType): + var.align = 8 + elif isinstance(field_type, ir.ArrayType): + var.align = ( + field_type.element.width // 8 + if isinstance(field_type.element, ir.IntType) + else 1 + ) + else: + var.align = 8 # Default alignment + + local_sym_tab[var_name] = LocalSymbol(var, field_type) + logger.info( + f"Pre-allocated {var_name} from {struct_var}.{field_name} with type {field_type}" + ) From fd630293f729e310d7bb679042ad4226cd197816 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Wed, 15 Oct 2025 18:14:13 +0530 Subject: [PATCH 17/38] Remove duplicate alignment logic from allocation_pass --- pythonbpf/allocation_pass.py | 80 +++++++++++++++--------------------- 1 file changed, 32 insertions(+), 48 deletions(-) diff --git a/pythonbpf/allocation_pass.py b/pythonbpf/allocation_pass.py index 17f53953..289d30f2 100644 --- a/pythonbpf/allocation_pass.py +++ b/pythonbpf/allocation_pass.py @@ -210,82 +210,66 @@ def _allocate_for_name(builder, var_name, rval, local_sym_tab): logger.error(f"Source variable '{source_var}' not found in symbol table") return - # Get type from source variable - source_type = local_sym_tab[source_var].ir_type - source_metadata = local_sym_tab[source_var].metadata + # Get type and metadata from source variable + source_symbol = local_sym_tab[source_var] - # Allocate with same type - var = builder.alloca(source_type, name=var_name) - - # Set alignment based on type - if isinstance(source_type, ir.IntType): - var.align = source_type.width // 8 - elif isinstance(source_type, ir.PointerType): - var.align = 8 - elif isinstance(source_type, ir.ArrayType): - var.align = ( - source_type.element.width // 8 - if isinstance(source_type.element, ir.IntType) - else 1 - ) - else: - var.align = 8 # Default alignment + # Allocate with same type and alignment + var = _allocate_with_type(builder, var_name, source_symbol.ir_type) + local_sym_tab[var_name] = LocalSymbol( + var, source_symbol.ir_type, source_symbol.metadata + ) - local_sym_tab[var_name] = LocalSymbol(var, source_type, source_metadata) logger.info( - f"Pre-allocated {var_name} from variable {source_var} with type {source_type}" + f"Pre-allocated {var_name} from {source_var} with type {source_symbol.ir_type}" ) def _allocate_for_attribute(builder, var_name, rval, local_sym_tab, structs_sym_tab): """Allocate memory for struct field-to-variable assignment (a = dat.fld).""" if not isinstance(rval.value, ast.Name): - logger.warning( - f"Complex attribute access not supported for allocation of {var_name}" - ) + logger.warning(f"Complex attribute access not supported for {var_name}") return struct_var = rval.value.id field_name = rval.attr - # Validate struct exists + # Validate struct and field if struct_var not in local_sym_tab: - logger.error(f"Struct variable '{struct_var}' not found in symbol table") + logger.error(f"Struct variable '{struct_var}' not found") return struct_type = local_sym_tab[struct_var].metadata if not struct_type or struct_type not in structs_sym_tab: - logger.error(f"Struct type '{struct_type}' not found in struct symbol table") + logger.error(f"Struct type '{struct_type}' not found") return struct_info = structs_sym_tab[struct_type] - - # Validate field exists if field_name not in struct_info.fields: logger.error(f"Field '{field_name}' not found in struct '{struct_type}'") return - # Get field type + # Allocate with field's type and alignment field_type = struct_info.field_type(field_name) - - # Allocate with field's type - var = builder.alloca(field_type, name=var_name) - - # Set alignment based on type - if isinstance(field_type, ir.IntType): - var.align = field_type.width // 8 - elif isinstance(field_type, ir.PointerType): - var.align = 8 - elif isinstance(field_type, ir.ArrayType): - var.align = ( - field_type.element.width // 8 - if isinstance(field_type.element, ir.IntType) - else 1 - ) - else: - var.align = 8 # Default alignment - + var = _allocate_with_type(builder, var_name, field_type) local_sym_tab[var_name] = LocalSymbol(var, field_type) + logger.info( f"Pre-allocated {var_name} from {struct_var}.{field_name} with type {field_type}" ) + + +def _allocate_with_type(builder, var_name, ir_type): + """Allocate variable with appropriate alignment for type.""" + var = builder.alloca(ir_type, name=var_name) + var.align = _get_alignment(ir_type) + return var + + +def _get_alignment(ir_type): + """Get appropriate alignment for IR type.""" + if isinstance(ir_type, ir.IntType): + return ir_type.width // 8 + elif isinstance(ir_type, ir.ArrayType) and isinstance(ir_type.element, ir.IntType): + return ir_type.element.width // 8 + else: + return 8 # Default: pointer size From 9fc3c85b754d72c44349a1c9a729aa8b8af0271b Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Wed, 15 Oct 2025 20:02:18 +0530 Subject: [PATCH 18/38] Add struct-field to named-var assignment of char arrays --- pythonbpf/allocation_pass.py | 20 +++++++++--- pythonbpf/assign_pass.py | 60 ++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 4 deletions(-) diff --git a/pythonbpf/allocation_pass.py b/pythonbpf/allocation_pass.py index 289d30f2..36b0a754 100644 --- a/pythonbpf/allocation_pass.py +++ b/pythonbpf/allocation_pass.py @@ -248,13 +248,25 @@ def _allocate_for_attribute(builder, var_name, rval, local_sym_tab, structs_sym_ logger.error(f"Field '{field_name}' not found in struct '{struct_type}'") return - # Allocate with field's type and alignment + # Get field type field_type = struct_info.field_type(field_name) - var = _allocate_with_type(builder, var_name, field_type) - local_sym_tab[var_name] = LocalSymbol(var, field_type) + + # Special case: char array -> allocate as i8* pointer instead + if ( + isinstance(field_type, ir.ArrayType) + and isinstance(field_type.element, ir.IntType) + and field_type.element.width == 8 + ): + alloc_type = ir.PointerType(ir.IntType(8)) + logger.info(f"Allocating {var_name} as i8* (pointer to char array)") + else: + alloc_type = field_type + + var = _allocate_with_type(builder, var_name, alloc_type) + local_sym_tab[var_name] = LocalSymbol(var, alloc_type) logger.info( - f"Pre-allocated {var_name} from {struct_var}.{field_name} with type {field_type}" + f"Pre-allocated {var_name} from {struct_var}.{field_name} with type {alloc_type}" ) diff --git a/pythonbpf/assign_pass.py b/pythonbpf/assign_pass.py index ab091415..a7cd52b0 100644 --- a/pythonbpf/assign_pass.py +++ b/pythonbpf/assign_pass.py @@ -71,6 +71,17 @@ def handle_variable_assignment( logger.info(f"Initialized struct {struct_name} for variable {var_name}") return True + # Special case: struct field char array -> pointer + # Handle this before eval_expr to get the pointer, not the value + if isinstance(rval, ast.Attribute) and isinstance(rval.value, ast.Name): + converted_val = _try_convert_char_array_to_ptr( + rval, var_type, builder, local_sym_tab, structs_sym_tab + ) + if converted_val is not None: + builder.store(converted_val, var_ptr) + logger.info(f"Assigned char array pointer to {var_name}") + return True + val_result = eval_expr( func, module, builder, rval, local_sym_tab, map_sym_tab, structs_sym_tab ) @@ -106,3 +117,52 @@ def handle_variable_assignment( builder.store(val, var_ptr) logger.info(f"Assigned value to variable {var_name}") return True + + +def _try_convert_char_array_to_ptr( + rval, var_type, builder, local_sym_tab, structs_sym_tab +): + """Try to convert char array field to i8* pointer""" + # Only convert if target is i8* + if not ( + isinstance(var_type, ir.PointerType) + and isinstance(var_type.pointee, ir.IntType) + and var_type.pointee.width == 8 + ): + return None + + struct_var = rval.value.id + field_name = rval.attr + + # Validate struct + if struct_var not in local_sym_tab: + return None + + struct_type = local_sym_tab[struct_var].metadata + if not struct_type or struct_type not in structs_sym_tab: + return None + + struct_info = structs_sym_tab[struct_type] + if field_name not in struct_info.fields: + return None + + field_type = struct_info.field_type(field_name) + + # Check if it's a char array + if not ( + isinstance(field_type, ir.ArrayType) + and isinstance(field_type.element, ir.IntType) + and field_type.element.width == 8 + ): + return None + + # Get pointer to struct field + struct_ptr = local_sym_tab[struct_var].var + field_ptr = struct_info.gep(builder, struct_ptr, field_name) + + # GEP to first element: [N x i8]* -> i8* + return builder.gep( + field_ptr, + [ir.Constant(ir.IntType(32), 0), ir.Constant(ir.IntType(32), 0)], + inbounds=True, + ) From 009b11aca64b54d29748137a913fd38e304ff440 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Wed, 15 Oct 2025 23:52:15 +0530 Subject: [PATCH 19/38] Implement bpf_probe_read_kernel_str helper, Allow i8* to i8 ArrayType conversion --- pythonbpf/assign_pass.py | 74 ++++++++++++++++++++--- pythonbpf/helper/__init__.py | 6 +- pythonbpf/helper/bpf_helper_handler.py | 65 ++++++++++++++++++++ pythonbpf/helper/helper_utils.py | 84 ++++++++++++++++++++++++++ pythonbpf/helper/helpers.py | 5 ++ 5 files changed, 223 insertions(+), 11 deletions(-) diff --git a/pythonbpf/assign_pass.py b/pythonbpf/assign_pass.py index a7cd52b0..e0ef2db2 100644 --- a/pythonbpf/assign_pass.py +++ b/pythonbpf/assign_pass.py @@ -2,6 +2,7 @@ import logging from llvmlite import ir from pythonbpf.expr import eval_expr +from pythonbpf.helper import emit_probe_read_kernel_str_call logger = logging.getLogger(__name__) @@ -27,27 +28,82 @@ def handle_struct_field_assignment( # Get field pointer and evaluate value field_ptr = struct_info.gep(builder, local_sym_tab[var_name].var, field_name) - val = eval_expr( + field_type = struct_info.field_type(field_name) + val_result = eval_expr( func, module, builder, rval, local_sym_tab, map_sym_tab, structs_sym_tab ) - if val is None: + if val_result is None: logger.error(f"Failed to evaluate value for {var_name}.{field_name}") return - # TODO: Handle string assignment to char array (not a priority) - field_type = struct_info.field_type(field_name) - if isinstance(field_type, ir.ArrayType) and val[1] == ir.PointerType(ir.IntType(8)): - logger.warning( - f"String to char array assignment not implemented for {var_name}.{field_name}" + val, val_type = val_result + + # Special case: i8* string to [N x i8] char array + if _is_char_array(field_type) and _is_i8_ptr(val_type): + _copy_string_to_char_array( + func, + module, + builder, + val, + field_ptr, + field_type, + local_sym_tab, + map_sym_tab, + structs_sym_tab, ) + logger.info(f"Copied string to char array {var_name}.{field_name}") return - # Store the value - builder.store(val[0], field_ptr) + # Regular assignment + builder.store(val, field_ptr) logger.info(f"Assigned to struct field {var_name}.{field_name}") +def _copy_string_to_char_array( + func, + module, + builder, + src_ptr, + dst_ptr, + array_type, + local_sym_tab, + map_sym_tab, + struct_sym_tab, +): + """Copy string (i8*) to char array ([N x i8]) using bpf_probe_read_kernel_str""" + + array_size = array_type.count + + # Get pointer to first element: [N x i8]* -> i8* + dst_i8_ptr = builder.gep( + dst_ptr, + [ir.Constant(ir.IntType(32), 0), ir.Constant(ir.IntType(32), 0)], + inbounds=True, + ) + + # Use the shared emitter function + emit_probe_read_kernel_str_call(builder, dst_i8_ptr, array_size, src_ptr) + + +def _is_char_array(ir_type): + """Check if type is [N x i8].""" + return ( + isinstance(ir_type, ir.ArrayType) + and isinstance(ir_type.element, ir.IntType) + and ir_type.element.width == 8 + ) + + +def _is_i8_ptr(ir_type): + """Check if type is i8*.""" + return ( + isinstance(ir_type, ir.PointerType) + and isinstance(ir_type.pointee, ir.IntType) + and ir_type.pointee.width == 8 + ) + + def handle_variable_assignment( func, module, builder, var_name, rval, local_sym_tab, map_sym_tab, structs_sym_tab ): diff --git a/pythonbpf/helper/__init__.py b/pythonbpf/helper/__init__.py index 26f792b5..2f9c3473 100644 --- a/pythonbpf/helper/__init__.py +++ b/pythonbpf/helper/__init__.py @@ -1,7 +1,7 @@ from .helper_registry import HelperHandlerRegistry from .helper_utils import reset_scratch_pool -from .bpf_helper_handler import handle_helper_call -from .helpers import ktime, pid, deref, comm, XDP_DROP, XDP_PASS +from .bpf_helper_handler import handle_helper_call, emit_probe_read_kernel_str_call +from .helpers import ktime, pid, deref, comm, probe_read_str, XDP_DROP, XDP_PASS # Register the helper handler with expr module @@ -59,10 +59,12 @@ def helper_call_handler( "HelperHandlerRegistry", "reset_scratch_pool", "handle_helper_call", + "emit_probe_read_kernel_str_call", "ktime", "pid", "deref", "comm", + "probe_read_str", "XDP_DROP", "XDP_PASS", ] diff --git a/pythonbpf/helper/bpf_helper_handler.py b/pythonbpf/helper/bpf_helper_handler.py index 1c96cb4b..78686778 100644 --- a/pythonbpf/helper/bpf_helper_handler.py +++ b/pythonbpf/helper/bpf_helper_handler.py @@ -8,6 +8,8 @@ get_flags_val, get_data_ptr_and_size, get_buffer_ptr_and_size, + get_char_array_ptr_and_size, + get_ptr_from_arg, ) from .printk_formatter import simple_string_print, handle_fstring_print @@ -26,6 +28,7 @@ class BPFHelperID(Enum): BPF_GET_CURRENT_PID_TGID = 14 BPF_GET_CURRENT_COMM = 16 BPF_PERF_EVENT_OUTPUT = 25 + BPF_PROBE_READ_KERNEL_STR = 115 @HelperHandlerRegistry.register("ktime") @@ -368,6 +371,68 @@ def bpf_perf_event_output_handler( return result, None +def emit_probe_read_kernel_str_call(builder, dst_ptr, dst_size, src_ptr): + """Emit LLVM IR call to bpf_probe_read_kernel_str""" + + fn_type = ir.FunctionType( + ir.IntType(64), + [ir.PointerType(), ir.IntType(32), ir.PointerType()], + var_arg=False, + ) + fn_ptr = builder.inttoptr( + ir.Constant(ir.IntType(64), BPFHelperID.BPF_PROBE_READ_KERNEL_STR.value), + ir.PointerType(fn_type), + ) + + result = builder.call( + fn_ptr, + [ + builder.bitcast(dst_ptr, ir.PointerType()), + ir.Constant(ir.IntType(32), dst_size), + builder.bitcast(src_ptr, ir.PointerType()), + ], + tail=False, + ) + + logger.info(f"Emitted bpf_probe_read_kernel_str (size={dst_size})") + return result + + +@HelperHandlerRegistry.register("probe_read_str") +def bpf_probe_read_kernel_str_emitter( + call, + map_ptr, + module, + builder, + func, + local_sym_tab=None, + struct_sym_tab=None, + map_sym_tab=None, +): + """Emit LLVM IR for bpf_probe_read_kernel_str helper.""" + + if len(call.args) != 2: + raise ValueError( + f"probe_read_str expects 2 args (dst, src), got {len(call.args)}" + ) + + # Get destination buffer (char array -> i8*) + dst_ptr, dst_size = get_char_array_ptr_and_size( + call.args[0], builder, local_sym_tab, struct_sym_tab + ) + + # Get source pointer (evaluate expression) + src_ptr, src_type = get_ptr_from_arg( + call.args[1], func, module, builder, local_sym_tab, map_sym_tab, struct_sym_tab + ) + + # Emit the helper call + result = emit_probe_read_kernel_str_call(builder, dst_ptr, dst_size, src_ptr) + + logger.info(f"Emitted bpf_probe_read_kernel_str (size={dst_size})") + return result, ir.IntType(64) + + def handle_helper_call( call, module, diff --git a/pythonbpf/helper/helper_utils.py b/pythonbpf/helper/helper_utils.py index cf89c302..7f3fdbe1 100644 --- a/pythonbpf/helper/helper_utils.py +++ b/pythonbpf/helper/helper_utils.py @@ -4,6 +4,7 @@ from llvmlite import ir from pythonbpf.expr import ( get_operand_value, + eval_expr, ) logger = logging.getLogger(__name__) @@ -190,3 +191,86 @@ def get_buffer_ptr_and_size(buf_arg, builder, local_sym_tab, struct_sym_tab): raise ValueError( "comm expects either a struct field (obj.field) or variable name" ) + + +def get_char_array_ptr_and_size(buf_arg, builder, local_sym_tab, struct_sym_tab): + """Get pointer to char array and its size.""" + + # Struct field: obj.field + if isinstance(buf_arg, ast.Attribute) and isinstance(buf_arg.value, ast.Name): + var_name = buf_arg.value.id + field_name = buf_arg.attr + + if not (local_sym_tab and var_name in local_sym_tab): + raise ValueError(f"Variable '{var_name}' not found") + + struct_type = local_sym_tab[var_name].metadata + if not (struct_sym_tab and struct_type in struct_sym_tab): + raise ValueError(f"Struct type '{struct_type}' not found") + + struct_info = struct_sym_tab[struct_type] + if field_name not in struct_info.fields: + raise ValueError(f"Field '{field_name}' not found") + + field_type = struct_info.field_type(field_name) + if not _is_char_array(field_type): + raise ValueError("Expected char array field") + + struct_ptr = local_sym_tab[var_name].var + field_ptr = struct_info.gep(builder, struct_ptr, field_name) + + # GEP to first element: [N x i8]* -> i8* + buf_ptr = builder.gep( + field_ptr, + [ir.Constant(ir.IntType(32), 0), ir.Constant(ir.IntType(32), 0)], + inbounds=True, + ) + return buf_ptr, field_type.count + + elif isinstance(buf_arg, ast.Name): + # NOTE: We shouldn't be doing this as we can't get size info + var_name = buf_arg.id + if not (local_sym_tab and var_name in local_sym_tab): + raise ValueError(f"Variable '{var_name}' not found") + + var_ptr = local_sym_tab[var_name].var + var_type = local_sym_tab[var_name].ir_type + + if not isinstance(var_type, ir.PointerType) and not isinstance( + var_type.pointee, ir.IntType(8) + ): + raise ValueError("Expected str ptr variable") + + return var_ptr, 256 # Size unknown for str ptr, using 256 as default + + else: + raise ValueError("Expected struct field or variable name") + + +def _is_char_array(ir_type): + """Check if IR type is [N x i8].""" + return ( + isinstance(ir_type, ir.ArrayType) + and isinstance(ir_type.element, ir.IntType) + and ir_type.element.width == 8 + ) + + +def get_ptr_from_arg( + arg, func, module, builder, local_sym_tab, map_sym_tab, struct_sym_tab +): + """Evaluate argument and return pointer value""" + + result = eval_expr( + func, module, builder, arg, local_sym_tab, map_sym_tab, struct_sym_tab + ) + + if not result: + raise ValueError("Failed to evaluate argument") + + val, val_type = result + + if not isinstance(val_type, ir.PointerType): + raise ValueError(f"Expected pointer type, got {val_type}") + + return val, val_type diff --git a/pythonbpf/helper/helpers.py b/pythonbpf/helper/helpers.py index 1861e679..cb1a8e12 100644 --- a/pythonbpf/helper/helpers.py +++ b/pythonbpf/helper/helpers.py @@ -22,6 +22,11 @@ def comm(buf): return ctypes.c_int64(0) +def probe_read_str(dst, src): + """Safely read a null-terminated string from kernel memory""" + return ctypes.c_int64(0) + + XDP_ABORTED = ctypes.c_int64(0) XDP_DROP = ctypes.c_int64(1) XDP_PASS = ctypes.c_int64(2) From c143739a040302033393198c3f60b75ffb5e57ba Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Thu, 16 Oct 2025 23:21:55 +0530 Subject: [PATCH 20/38] Add passing test struct_field_to_var_str for strings --- .../assign/struct_field_to_var_str.py | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 tests/passing_tests/assign/struct_field_to_var_str.py diff --git a/tests/passing_tests/assign/struct_field_to_var_str.py b/tests/passing_tests/assign/struct_field_to_var_str.py new file mode 100644 index 00000000..1374b7c9 --- /dev/null +++ b/tests/passing_tests/assign/struct_field_to_var_str.py @@ -0,0 +1,26 @@ +from pythonbpf import bpf, struct, section, bpfglobal +from pythonbpf.helper import comm + +from ctypes import c_void_p, c_int64 + + +@bpf +@struct +class data_t: + comm: str(16) # type: ignore [valid-type] + + +@bpf +@section("tracepoint/syscalls/sys_enter_clone") +def hello(ctx: c_void_p) -> c_int64: + dataobj = data_t() + comm(dataobj.comm) + strobj = dataobj.comm + print(f"clone called by comm {strobj}") + return 0 + + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" From fc55b7ecaa88889479aa1528d66f39f15511a285 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Thu, 16 Oct 2025 23:42:03 +0530 Subject: [PATCH 21/38] Add passing ptr_to_char_array test for strings --- .../passing_tests/assign/ptr_to_char_array.py | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 tests/passing_tests/assign/ptr_to_char_array.py diff --git a/tests/passing_tests/assign/ptr_to_char_array.py b/tests/passing_tests/assign/ptr_to_char_array.py new file mode 100644 index 00000000..6a090be2 --- /dev/null +++ b/tests/passing_tests/assign/ptr_to_char_array.py @@ -0,0 +1,28 @@ +from pythonbpf import bpf, struct, section, bpfglobal +from pythonbpf.helper import comm + +from ctypes import c_void_p, c_int64 + + +@bpf +@struct +class data_t: + comm: str(16) # type: ignore [valid-type] + copp: str(16) # type: ignore [valid-type] + + +@bpf +@section("tracepoint/syscalls/sys_enter_clone") +def hello(ctx: c_void_p) -> c_int64: + dataobj = data_t() + comm(dataobj.comm) + strobj = dataobj.comm + dataobj.copp = strobj + print(f"clone called by comm {dataobj.copp}") + return 0 + + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" From 60737d9894ac3a8fcc6f8e5d5ca83ad4f5fdde9b Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Fri, 17 Oct 2025 03:25:15 +0530 Subject: [PATCH 22/38] Improve error handling in compile, pass structs_sym_tab and maps_sym_tab to BpfProgram --- pythonbpf/codegen.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/pythonbpf/codegen.py b/pythonbpf/codegen.py index 5db9f880..16e4bf21 100644 --- a/pythonbpf/codegen.py +++ b/pythonbpf/codegen.py @@ -55,6 +55,7 @@ def processor(source_code, filename, module): func_proc(tree, module, bpf_chunks, map_sym_tab, structs_sym_tab) globals_list_creation(tree, module) + return structs_sym_tab, map_sym_tab def compile_to_ir(filename: str, output: str, loglevel=logging.INFO): @@ -80,7 +81,7 @@ def compile_to_ir(filename: str, output: str, loglevel=logging.INFO): True, ) - processor(source, filename, module) + structs_sym_tab, maps_sym_tab = processor(source, filename, module) wchar_size = module.add_metadata( [ @@ -127,7 +128,7 @@ def compile_to_ir(filename: str, output: str, loglevel=logging.INFO): f.write(str(module)) f.write("\n") - return output + return output, structs_sym_tab, maps_sym_tab def _run_llc(ll_file, obj_file): @@ -165,15 +166,14 @@ def compile(loglevel=logging.INFO) -> bool: ll_file = Path("/tmp") / caller_file.with_suffix(".ll").name o_file = caller_file.with_suffix(".o") - success = True - success = ( - compile_to_ir(str(caller_file), str(ll_file), loglevel=loglevel) and success - ) + compile_to_ir(str(caller_file), str(ll_file), loglevel=loglevel) - success = _run_llc(ll_file, o_file) and success + if not _run_llc(ll_file, o_file): + logger.error("Compilation to object file failed.") + return False logger.info(f"Object written to {o_file}") - return success + return True def BPF(loglevel=logging.INFO) -> BpfProgram: @@ -189,7 +189,11 @@ def BPF(loglevel=logging.INFO) -> BpfProgram: f.write(src) f.flush() source = f.name - compile_to_ir(source, str(inter.name), loglevel=loglevel) + _, structs_sym_tab, maps_sym_tab = compile_to_ir( + source, str(inter.name), loglevel=loglevel + ) _run_llc(str(inter.name), str(obj_file.name)) - return BpfProgram(str(obj_file.name)) + return BpfProgram( + str(obj_file.name), structs=structs_sym_tab, maps=maps_sym_tab + ) From 8c976e46ae45e39e233615734685b043a7c69170 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Mon, 20 Oct 2025 04:00:30 +0530 Subject: [PATCH 23/38] Fix loglevel and pylibbpf import in codegen --- pythonbpf/codegen.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pythonbpf/codegen.py b/pythonbpf/codegen.py index 16e4bf21..d33d8668 100644 --- a/pythonbpf/codegen.py +++ b/pythonbpf/codegen.py @@ -15,7 +15,7 @@ import subprocess import inspect from pathlib import Path -from pylibbpf import BpfProgram +from pylibbpf import BpfObject import tempfile from logging import Logger import logging @@ -158,7 +158,7 @@ def _run_llc(ll_file, obj_file): return False -def compile(loglevel=logging.INFO) -> bool: +def compile(loglevel=logging.WARNING) -> bool: # Look one level up the stack to the caller of this function caller_frame = inspect.stack()[1] caller_file = Path(caller_frame.filename).resolve() @@ -166,7 +166,9 @@ def compile(loglevel=logging.INFO) -> bool: ll_file = Path("/tmp") / caller_file.with_suffix(".ll").name o_file = caller_file.with_suffix(".o") - compile_to_ir(str(caller_file), str(ll_file), loglevel=loglevel) + _, structs_sym_tab, maps_sym_tab = compile_to_ir( + str(caller_file), str(ll_file), loglevel=loglevel + ) if not _run_llc(ll_file, o_file): logger.error("Compilation to object file failed.") @@ -176,7 +178,7 @@ def compile(loglevel=logging.INFO) -> bool: return True -def BPF(loglevel=logging.INFO) -> BpfProgram: +def BPF(loglevel=logging.WARNING) -> BpfObject: caller_frame = inspect.stack()[1] src = inspect.getsource(caller_frame.frame) with tempfile.NamedTemporaryFile( @@ -194,6 +196,4 @@ def BPF(loglevel=logging.INFO) -> BpfProgram: ) _run_llc(str(inter.name), str(obj_file.name)) - return BpfProgram( - str(obj_file.name), structs=structs_sym_tab, maps=maps_sym_tab - ) + return BpfObject(str(obj_file.name), structs=structs_sym_tab) From 5bba8dce12ce404a1fd0ed753db2fe200bbc35ce Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Mon, 20 Oct 2025 04:02:34 +0530 Subject: [PATCH 24/38] Complete hello_perf_output BCC example --- BCC-Examples/hello_perf_output.py | 32 ++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/BCC-Examples/hello_perf_output.py b/BCC-Examples/hello_perf_output.py index 57ec0987..40f14cca 100644 --- a/BCC-Examples/hello_perf_output.py +++ b/BCC-Examples/hello_perf_output.py @@ -1,7 +1,6 @@ -from pythonbpf import bpf, map, struct, section, bpfglobal, BPF, trace_pipe +from pythonbpf import bpf, map, struct, section, bpfglobal, BPF from pythonbpf.helper import ktime, pid, comm from pythonbpf.maps import PerfEventArray - from ctypes import c_void_p, c_int64, c_uint64 @@ -25,7 +24,6 @@ def hello(ctx: c_void_p) -> c_int64: dataobj = data_t() dataobj.pid, dataobj.ts = pid(), ktime() comm(dataobj.comm) - print(f"clone called at {dataobj.ts} by pid {dataobj.pid}, comm {dataobj.comm}") events.output(dataobj) return 0 # type: ignore [return-value] @@ -36,8 +34,28 @@ def LICENSE() -> str: return "GPL" -# compile -BPF().load_and_attach() +# Compile and load +b = BPF() +b.load() +attached = b.attach_all() + +start = 0 + + +def callback(cpu, event): + global start + if start == 0: + start = event.ts + ts = (event.ts - start) / 1e9 + print(f"[CPU {cpu}] PID: {event.pid}, TS: {ts}, COMM: {event.comm.decode()}") + + +perf = b["events"].open_perf_buffer(callback, struct_name="data_t") +print("Starting to poll... (Ctrl+C to stop)") +print("Try running: fork() or clone() system calls to trigger events") -print("Tracing clone()... Ctrl-C to end") -trace_pipe() +try: + while True: + b["events"].poll(1000) +except KeyboardInterrupt: + print("Stopping...") From aa85d0e0eff9401ca82f749979bab55f645b79b7 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Mon, 20 Oct 2025 14:58:50 +0530 Subject: [PATCH 25/38] Remove unnecessary attached var in hello_perf_output --- BCC-Examples/hello_perf_output.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/BCC-Examples/hello_perf_output.py b/BCC-Examples/hello_perf_output.py index 40f14cca..9de89b91 100644 --- a/BCC-Examples/hello_perf_output.py +++ b/BCC-Examples/hello_perf_output.py @@ -37,7 +37,7 @@ def LICENSE() -> str: # Compile and load b = BPF() b.load() -attached = b.attach_all() +b.attach_all() start = 0 From dd9411b7b9976a2050206bb75e9c2cba14c24371 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Mon, 20 Oct 2025 14:59:13 +0530 Subject: [PATCH 26/38] Fix userspace calling in hello_world --- BCC-Examples/hello_world.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/BCC-Examples/hello_world.py b/BCC-Examples/hello_world.py index 4b5647b8..520f84c4 100644 --- a/BCC-Examples/hello_world.py +++ b/BCC-Examples/hello_world.py @@ -15,7 +15,9 @@ def LICENSE() -> str: return "GPL" +# Compile and load b = BPF() -b.load_and_attach() +b.load() +b.attach_all() trace_pipe() From 54c97e648b3940c2849edc30d4f58feaf95cc7cc Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Mon, 20 Oct 2025 15:02:31 +0530 Subject: [PATCH 27/38] Fix userspace calling in hello_fields --- BCC-Examples/hello_fields.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/BCC-Examples/hello_fields.py b/BCC-Examples/hello_fields.py index 8a2464a7..46156c5c 100644 --- a/BCC-Examples/hello_fields.py +++ b/BCC-Examples/hello_fields.py @@ -15,9 +15,10 @@ def LICENSE() -> str: return "GPL" -# compile +# Compile and load b = BPF() -b.load_and_attach() +b.load() +b.attach_all() # header print(f"{'TIME(s)':<18} {'COMM':<16} {'PID':<6} {'MESSAGE'}") From 610cbe82a81a66f5d479f1afbbdc6678aae8268e Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Mon, 20 Oct 2025 15:04:40 +0530 Subject: [PATCH 28/38] Fix userspace calling in sync_count --- BCC-Examples/sync_count.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/BCC-Examples/sync_count.py b/BCC-Examples/sync_count.py index 51790b94..4eead08e 100644 --- a/BCC-Examples/sync_count.py +++ b/BCC-Examples/sync_count.py @@ -37,9 +37,10 @@ def LICENSE() -> str: return "GPL" -# compile +# Compile and load b = BPF() -b.load_and_attach() +b.load() +b.attach_all() print("Tracing for quick sync's... Ctrl-C to end") From 32736204479c2b8486c44883f6e2a3b1e2d6749e Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Mon, 20 Oct 2025 15:08:27 +0530 Subject: [PATCH 29/38] Fix userspace calling in sync_timing --- BCC-Examples/sync_timing.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/BCC-Examples/sync_timing.py b/BCC-Examples/sync_timing.py index 448caba4..aba494d8 100644 --- a/BCC-Examples/sync_timing.py +++ b/BCC-Examples/sync_timing.py @@ -33,9 +33,10 @@ def LICENSE() -> str: return "GPL" -# compile +# Compile and load b = BPF() -b.load_and_attach() +b.load() +b.attach_all() print("Tracing for quick sync's... Ctrl-C to end") From 174095973b5411ebdeda0974e886a2147cdaacae Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Mon, 20 Oct 2025 15:10:12 +0530 Subject: [PATCH 30/38] Fix userspace calling in sys_sync --- BCC-Examples/sys_sync.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/BCC-Examples/sys_sync.py b/BCC-Examples/sys_sync.py index 7749caec..4d7314a4 100644 --- a/BCC-Examples/sys_sync.py +++ b/BCC-Examples/sys_sync.py @@ -15,6 +15,9 @@ def LICENSE() -> str: return "GPL" -BPF().load_and_attach() +# Compile and load +b = BPF() +b.load() +b.attach_all() print("Tracing sys_sync()... Ctrl-C to end.") trace_pipe() From d0fecbc03c7c69b201712729114708470c659a1f Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Mon, 20 Oct 2025 15:43:57 +0530 Subject: [PATCH 31/38] Add sync_perf_output BCC example --- BCC-Examples/sync_perf_output.py | 78 ++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 BCC-Examples/sync_perf_output.py diff --git a/BCC-Examples/sync_perf_output.py b/BCC-Examples/sync_perf_output.py new file mode 100644 index 00000000..4b2447af --- /dev/null +++ b/BCC-Examples/sync_perf_output.py @@ -0,0 +1,78 @@ +from pythonbpf import bpf, map, struct, section, bpfglobal, BPF +from pythonbpf.helper import ktime +from pythonbpf.maps import HashMap +from pythonbpf.maps import PerfEventArray +from ctypes import c_void_p, c_int64 + + +@bpf +@struct +class data_t: + ts: c_int64 + ms: c_int64 + + +@bpf +@map +def events() -> PerfEventArray: + return PerfEventArray(key_size=c_int64, value_size=c_int64) + + +@bpf +@map +def last() -> HashMap: + return HashMap(key=c_int64, value=c_int64, max_entries=1) + + +@bpf +@section("tracepoint/syscalls/sys_enter_sync") +def do_trace(ctx: c_void_p) -> c_int64: + dat, dat.ts, key = data_t(), ktime(), 0 + tsp = last.lookup(key) + if tsp: + delta = ktime() - tsp + if delta < 1000000000: + dat.ms = delta // 1000000 + events.output(dat) + last.delete(key) + else: + last.update(key, ktime()) + return 0 # type: ignore [return-value] + + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + + +# Compile and load +b = BPF() +b.load() +b.attach_all() + +print("Tracing for quick sync's... Ctrl-C to end") + +# format output +start = 0 + + +def callback(cpu, event): + global start + if start == 0: + start = event.ts + event.ts -= start + print( + f"At time {event.ts / 1e9} s: Multiple sync detected, Last sync: {event.ms} ms ago" + ) + + +perf = b["events"].open_perf_buffer(callback, struct_name="data_t") +print("Starting to poll... (Ctrl+C to stop)") +print("Try running: fork() or clone() system calls to trigger events") + +try: + while True: + b["events"].poll(1000) +except KeyboardInterrupt: + print("Stopping...") From c07707a9ad57d40e0e71f06665131081499b45e2 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Tue, 21 Oct 2025 03:56:04 +0530 Subject: [PATCH 32/38] Add vfsreadlat.py BCC example --- BCC-Examples/hello_perf_output.py | 6 +- BCC-Examples/vfsreadlat.py | 127 ++++++++++++++++++++++++++++++ 2 files changed, 130 insertions(+), 3 deletions(-) create mode 100644 BCC-Examples/vfsreadlat.py diff --git a/BCC-Examples/hello_perf_output.py b/BCC-Examples/hello_perf_output.py index 9de89b91..2bc195d7 100644 --- a/BCC-Examples/hello_perf_output.py +++ b/BCC-Examples/hello_perf_output.py @@ -1,14 +1,14 @@ from pythonbpf import bpf, map, struct, section, bpfglobal, BPF from pythonbpf.helper import ktime, pid, comm from pythonbpf.maps import PerfEventArray -from ctypes import c_void_p, c_int64, c_uint64 +from ctypes import c_void_p, c_int64 @bpf @struct class data_t: - pid: c_uint64 - ts: c_uint64 + pid: c_int64 + ts: c_int64 comm: str(16) # type: ignore [valid-type] diff --git a/BCC-Examples/vfsreadlat.py b/BCC-Examples/vfsreadlat.py new file mode 100644 index 00000000..9dce0ea3 --- /dev/null +++ b/BCC-Examples/vfsreadlat.py @@ -0,0 +1,127 @@ +from pythonbpf import bpf, map, struct, section, bpfglobal, BPF +from pythonbpf.helper import ktime, pid +from pythonbpf.maps import HashMap, PerfEventArray +from ctypes import c_void_p, c_uint64 +import matplotlib.pyplot as plt +import numpy as np + + +@bpf +@struct +class latency_event: + pid: c_uint64 + delta_us: c_uint64 # Latency in microseconds + + +@bpf +@map +def start() -> HashMap: + return HashMap(key=c_uint64, value=c_uint64, max_entries=10240) + + +@bpf +@map +def events() -> PerfEventArray: + return PerfEventArray(key_size=c_uint64, value_size=c_uint64) + + +@bpf +@section("kprobe/vfs_read") +def do_entry(ctx: c_void_p) -> c_uint64: + p, ts = pid(), ktime() + start.update(p, ts) + return 0 # type: ignore [return-value] + + +@bpf +@section("kretprobe/vfs_read") +def do_return(ctx: c_void_p) -> c_uint64: + p = pid() + tsp = start.lookup(p) + + if tsp: + delta_ns = ktime() - tsp + + # Only track if latency > 1 microsecond + if delta_ns > 1000: + evt = latency_event() + evt.pid, evt.delta_us = p, delta_ns // 1000 + events.output(evt) + + start.delete(p) + + return 0 # type: ignore [return-value] + + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + + +# Load BPF +print("Loading BPF program...") +b = BPF() +b.load() +b.attach_all() + +# Collect latencies +latencies = [] + + +def callback(cpu, event): + latencies.append(event.delta_us) + + +b["events"].open_perf_buffer(callback, struct_name="latency_event") + +print("Tracing vfs_read latency... Hit Ctrl-C to end.") + +try: + while True: + b["events"].poll(1000) + if len(latencies) > 0 and len(latencies) % 1000 == 0: + print(f"Collected {len(latencies)} samples...") + +except KeyboardInterrupt: + print(f"Collected {len(latencies)} samples. Generating histogram...") + +# Create histogram with matplotlib +if latencies: + # Use log scale for better visualization + log_latencies = np.log2(latencies) + + plt.figure(figsize=(12, 6)) + + # Plot 1: Linear histogram + plt.subplot(1, 2, 1) + plt.hist(latencies, bins=50, edgecolor="black", alpha=0.7) + plt.xlabel("Latency (microseconds)") + plt.ylabel("Count") + plt.title("VFS Read Latency Distribution (Linear)") + plt.grid(True, alpha=0.3) + + # Plot 2: Log2 histogram (like BCC) + plt.subplot(1, 2, 2) + plt.hist(log_latencies, bins=50, edgecolor="black", alpha=0.7, color="orange") + plt.xlabel("log2(Latency in µs)") + plt.ylabel("Count") + plt.title("VFS Read Latency Distribution (Log2)") + plt.grid(True, alpha=0.3) + + # Add statistics + print("Statistics:") + print(f" Count: {len(latencies)}") + print(f" Min: {min(latencies)} µs") + print(f" Max: {max(latencies)} µs") + print(f" Mean: {np.mean(latencies):.2f} µs") + print(f" Median: {np.median(latencies):.2f} µs") + print(f" P95: {np.percentile(latencies, 95):.2f} µs") + print(f" P99: {np.percentile(latencies, 99):.2f} µs") + + plt.tight_layout() + plt.savefig("vfs_read_latency.png", dpi=150) + print("Histogram saved to vfs_read_latency.png") + plt.show() +else: + print("No samples collected!") From e98d5684ead7e4fd14f988c2e51e6bfb0b2c1852 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Tue, 21 Oct 2025 04:31:23 +0530 Subject: [PATCH 33/38] Add enhanced live vfsreadlat.py monitor BCC example with rich library --- BCC-Examples/vfsreadlat_rich.py | 178 ++++++++++++++++++++++++++++++++ 1 file changed, 178 insertions(+) create mode 100644 BCC-Examples/vfsreadlat_rich.py diff --git a/BCC-Examples/vfsreadlat_rich.py b/BCC-Examples/vfsreadlat_rich.py new file mode 100644 index 00000000..a8660acf --- /dev/null +++ b/BCC-Examples/vfsreadlat_rich.py @@ -0,0 +1,178 @@ +from pythonbpf import bpf, map, struct, section, bpfglobal, BPF +from pythonbpf.helper import ktime, pid +from pythonbpf.maps import HashMap, PerfEventArray +from ctypes import c_void_p, c_uint64 + +from rich.console import Console +from rich.live import Live +from rich.table import Table +from rich.panel import Panel +from rich.layout import Layout +import numpy as np +import threading +import time +from collections import Counter + +# ==================== BPF Setup ==================== + + +@bpf +@struct +class latency_event: + pid: c_uint64 + delta_us: c_uint64 + + +@bpf +@map +def start() -> HashMap: + return HashMap(key=c_uint64, value=c_uint64, max_entries=10240) + + +@bpf +@map +def events() -> PerfEventArray: + return PerfEventArray(key_size=c_uint64, value_size=c_uint64) + + +@bpf +@section("kprobe/vfs_read") +def do_entry(ctx: c_void_p) -> c_uint64: + p, ts = pid(), ktime() + start.update(p, ts) + return 0 # type: ignore [return-value] + + +@bpf +@section("kretprobe/vfs_read") +def do_return(ctx: c_void_p) -> c_uint64: + p = pid() + tsp = start.lookup(p) + + if tsp: + delta_ns = ktime() - tsp + + if delta_ns > 1000: + evt = latency_event() + evt.pid, evt.delta_us = p, delta_ns // 1000 + events.output(evt) + + start.delete(p) + + return 0 # type: ignore [return-value] + + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + + +console = Console() +console.print("[bold green]Loading BPF program...[/]") + +b = BPF() +b.load() +b.attach_all() + +# ==================== Data Collection ==================== + +all_latencies = [] +histogram_buckets = Counter() # type: ignore [var-annotated] + + +def callback(cpu, event): + all_latencies.append(event.delta_us) + # Create log2 bucket + bucket = int(np.floor(np.log2(event.delta_us + 1))) + histogram_buckets[bucket] += 1 + + +b["events"].open_perf_buffer(callback, struct_name="latency_event") + + +def poll_events(): + while True: + b["events"].poll(100) + + +poll_thread = threading.Thread(target=poll_events, daemon=True) +poll_thread.start() + +# ==================== Live Display ==================== + + +def generate_display(): + layout = Layout() + layout.split_column( + Layout(name="header", size=3), + Layout(name="stats", size=8), + Layout(name="histogram", size=20), + ) + + # Header + layout["header"].update( + Panel("[bold cyan]🔥 VFS Read Latency Monitor[/]", style="bold white on blue") + ) + + # Stats + if len(all_latencies) > 0: + lats = np.array(all_latencies) + stats_table = Table(show_header=False, box=None, padding=(0, 2)) + stats_table.add_column(style="bold cyan") + stats_table.add_column(style="bold yellow") + + stats_table.add_row("📊 Total Samples:", f"{len(lats):,}") + stats_table.add_row("⚡ Mean Latency:", f"{np.mean(lats):.2f} µs") + stats_table.add_row("📉 Min Latency:", f"{np.min(lats):.2f} µs") + stats_table.add_row("📈 Max Latency:", f"{np.max(lats):.2f} µs") + stats_table.add_row("🎯 P95 Latency:", f"{np.percentile(lats, 95):.2f} µs") + stats_table.add_row("🔥 P99 Latency:", f"{np.percentile(lats, 99):.2f} µs") + + layout["stats"].update( + Panel(stats_table, title="Statistics", border_style="green") + ) + else: + layout["stats"].update( + Panel("[yellow]Waiting for data...[/]", border_style="yellow") + ) + + # Histogram + if histogram_buckets: + hist_table = Table(title="Latency Distribution", box=None) + hist_table.add_column("Range", style="cyan", no_wrap=True) + hist_table.add_column("Count", justify="right", style="yellow") + hist_table.add_column("Distribution", style="green") + + max_count = max(histogram_buckets.values()) + + for bucket in sorted(histogram_buckets.keys()): + count = histogram_buckets[bucket] + lower = 2**bucket + upper = 2 ** (bucket + 1) + + # Create bar + bar_width = int((count / max_count) * 40) + bar = "█" * bar_width + + hist_table.add_row( + f"{lower:5d}-{upper:5d} µs", + f"{count:6d}", + f"[green]{bar}[/] {count / len(all_latencies) * 100:.1f}%", + ) + + layout["histogram"].update(Panel(hist_table, border_style="green")) + + return layout + + +try: + with Live(generate_display(), refresh_per_second=2, console=console) as live: + while True: + time.sleep(0.5) + live.update(generate_display()) +except KeyboardInterrupt: + console.print("\n[bold red]Stopping...[/]") + + if all_latencies: + console.print(f"\n[bold green]✅ Collected {len(all_latencies):,} samples[/]") From 798f07986a9fd62e107b69c71d77002f5556596f Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Tue, 21 Oct 2025 05:36:59 +0530 Subject: [PATCH 34/38] Add enhanced vfsreadlat BCC example with live plotly and dash graphs on browser --- BCC-Examples/vfsreadlat_plotly/bpf_program.py | 101 +++++++ BCC-Examples/vfsreadlat_plotly/dashboard.py | 282 ++++++++++++++++++ .../vfsreadlat_plotly/data_collector.py | 96 ++++++ 3 files changed, 479 insertions(+) create mode 100644 BCC-Examples/vfsreadlat_plotly/bpf_program.py create mode 100644 BCC-Examples/vfsreadlat_plotly/dashboard.py create mode 100644 BCC-Examples/vfsreadlat_plotly/data_collector.py diff --git a/BCC-Examples/vfsreadlat_plotly/bpf_program.py b/BCC-Examples/vfsreadlat_plotly/bpf_program.py new file mode 100644 index 00000000..41c87cc7 --- /dev/null +++ b/BCC-Examples/vfsreadlat_plotly/bpf_program.py @@ -0,0 +1,101 @@ +"""BPF program for tracing VFS read latency.""" + +from pythonbpf import bpf, map, struct, section, bpfglobal, BPF +from pythonbpf.helper import ktime, pid +from pythonbpf.maps import HashMap, PerfEventArray +from ctypes import c_void_p, c_uint64 +import argparse +from data_collector import LatencyCollector +from dashboard import LatencyDashboard + + +@bpf +@struct +class latency_event: + pid: c_uint64 + delta_us: c_uint64 + + +@bpf +@map +def start() -> HashMap: + """Map to store start timestamps by PID.""" + return HashMap(key=c_uint64, value=c_uint64, max_entries=10240) + + +@bpf +@map +def events() -> PerfEventArray: + """Perf event array for sending latency events to userspace.""" + return PerfEventArray(key_size=c_uint64, value_size=c_uint64) + + +@bpf +@section("kprobe/vfs_read") +def do_entry(ctx: c_void_p) -> c_uint64: + """Record start time when vfs_read is called.""" + p, ts = pid(), ktime() + start.update(p, ts) + return 0 # type: ignore [return-value] + + +@bpf +@section("kretprobe/vfs_read") +def do_return(ctx: c_void_p) -> c_uint64: + """Calculate and record latency when vfs_read returns.""" + p = pid() + tsp = start.lookup(p) + + if tsp: + delta_ns = ktime() - tsp + + # Only track latencies > 1 microsecond + if delta_ns > 1000: + evt = latency_event() + evt.pid, evt.delta_us = p, delta_ns // 1000 + events.output(evt) + + start.delete(p) + + return 0 # type: ignore [return-value] + + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + + +def parse_args(): + """Parse command line arguments.""" + parser = argparse.ArgumentParser( + description="Monitor VFS read latency with live dashboard" + ) + parser.add_argument( + "--host", default="0.0.0.0", help="Dashboard host (default: 0.0.0.0)" + ) + parser.add_argument( + "--port", type=int, default=8050, help="Dashboard port (default: 8050)" + ) + parser.add_argument( + "--buffer", type=int, default=10000, help="Recent data buffer size" + ) + return parser.parse_args() + + +args = parse_args() + +# Load BPF program +print("Loading BPF program...") +b = BPF() +b.load() +b.attach_all() +print("✅ BPF program loaded and attached") + +# Setup data collector +collector = LatencyCollector(b, buffer_size=args.buffer) +collector.start() + +# Create and run dashboard +dashboard = LatencyDashboard(collector) +dashboard.run(host=args.host, port=args.port) diff --git a/BCC-Examples/vfsreadlat_plotly/dashboard.py b/BCC-Examples/vfsreadlat_plotly/dashboard.py new file mode 100644 index 00000000..de430400 --- /dev/null +++ b/BCC-Examples/vfsreadlat_plotly/dashboard.py @@ -0,0 +1,282 @@ +"""Plotly Dash dashboard for visualizing latency data.""" + +import dash +from dash import dcc, html +from dash.dependencies import Input, Output +import plotly.graph_objects as go +from plotly.subplots import make_subplots +import numpy as np + + +class LatencyDashboard: + """Interactive dashboard for latency visualization.""" + + def __init__(self, collector, title: str = "VFS Read Latency Monitor"): + self.collector = collector + self.app = dash.Dash(__name__) + self.app.title = title + self._setup_layout() + self._setup_callbacks() + + def _setup_layout(self): + """Create dashboard layout.""" + self.app.layout = html.Div( + [ + html.H1( + "🔥 VFS Read Latency Dashboard", + style={ + "textAlign": "center", + "color": "#2c3e50", + "marginBottom": 20, + }, + ), + # Stats cards + html.Div( + [ + self._create_stat_card( + "total-samples", "📊 Total Samples", "#3498db" + ), + self._create_stat_card( + "mean-latency", "⚡ Mean Latency", "#e74c3c" + ), + self._create_stat_card( + "p99-latency", "🔥 P99 Latency", "#f39c12" + ), + ], + style={ + "display": "flex", + "justifyContent": "space-around", + "marginBottom": 30, + }, + ), + # Graphs - ✅ Make sure these IDs match the callback outputs + dcc.Graph(id="dual-histogram", style={"height": "450px"}), + dcc.Graph(id="log2-buckets", style={"height": "350px"}), + dcc.Graph(id="timeseries-graph", style={"height": "300px"}), + # Auto-update + dcc.Interval(id="interval-component", interval=1000, n_intervals=0), + ], + style={"padding": 20, "fontFamily": "Arial, sans-serif"}, + ) + + def _create_stat_card(self, id_name: str, title: str, color: str): + """Create a statistics card.""" + return html.Div( + [ + html.H3(title, style={"color": color}), + html.H2(id=id_name, style={"fontSize": 48, "color": "#2c3e50"}), + ], + className="stat-box", + style={ + "background": "white", + "padding": 20, + "borderRadius": 10, + "boxShadow": "0 4px 6px rgba(0,0,0,0.1)", + "textAlign": "center", + "flex": 1, + "margin": "0 10px", + }, + ) + + def _setup_callbacks(self): + """Setup dashboard callbacks.""" + + @self.app.callback( + [ + Output("total-samples", "children"), + Output("mean-latency", "children"), + Output("p99-latency", "children"), + Output("dual-histogram", "figure"), # ✅ Match layout IDs + Output("log2-buckets", "figure"), # ✅ Match layout IDs + Output("timeseries-graph", "figure"), # ✅ Match layout IDs + ], + [Input("interval-component", "n_intervals")], + ) + def update_dashboard(n): + stats = self.collector.get_stats() + + if stats.total == 0: + return self._empty_state() + + return ( + f"{stats.total:,}", + f"{stats.mean:.1f} µs", + f"{stats.p99:.1f} µs", + self._create_dual_histogram(), + self._create_log2_buckets(), + self._create_timeseries(), + ) + + def _empty_state(self): + """Return empty state for dashboard.""" + empty_fig = go.Figure() + empty_fig.update_layout( + title="Waiting for data... Generate some disk I/O!", template="plotly_white" + ) + # ✅ Return 6 values (3 stats + 3 figures) + return "0", "0 µs", "0 µs", empty_fig, empty_fig, empty_fig + + def _create_dual_histogram(self) -> go.Figure: + """Create side-by-side linear and log2 histograms.""" + latencies = self.collector.get_all_latencies() + + # Create subplots + fig = make_subplots( + rows=1, + cols=2, + subplot_titles=("Linear Scale", "Log2 Scale"), + horizontal_spacing=0.12, + ) + + # Linear histogram + fig.add_trace( + go.Histogram( + x=latencies, + nbinsx=50, + marker_color="rgb(55, 83, 109)", + opacity=0.75, + name="Linear", + ), + row=1, + col=1, + ) + + # Log2 histogram + log2_latencies = np.log2(latencies + 1) # +1 to avoid log2(0) + fig.add_trace( + go.Histogram( + x=log2_latencies, + nbinsx=30, + marker_color="rgb(243, 156, 18)", + opacity=0.75, + name="Log2", + ), + row=1, + col=2, + ) + + # Update axes + fig.update_xaxes(title_text="Latency (µs)", row=1, col=1) + fig.update_xaxes(title_text="log2(Latency in µs)", row=1, col=2) + fig.update_yaxes(title_text="Count", row=1, col=1) + fig.update_yaxes(title_text="Count", row=1, col=2) + + fig.update_layout( + title_text="📊 Latency Distribution (Linear vs Log2)", + template="plotly_white", + showlegend=False, + height=450, + ) + + return fig + + def _create_log2_buckets(self) -> go.Figure: + """Create bar chart of log2 buckets (like BCC histogram).""" + buckets = self.collector.get_histogram_buckets() + + if not buckets: + fig = go.Figure() + fig.update_layout( + title="🔥 Log2 Histogram - Waiting for data...", template="plotly_white" + ) + return fig + + # Sort buckets + sorted_buckets = sorted(buckets.keys()) + counts = [buckets[b] for b in sorted_buckets] + + # Create labels (e.g., "8-16µs", "16-32µs") + labels = [] + hover_text = [] + for bucket in sorted_buckets: + lower = 2**bucket + upper = 2 ** (bucket + 1) + labels.append(f"{lower}-{upper}") + + # Calculate percentage + total = sum(counts) + pct = (buckets[bucket] / total) * 100 if total > 0 else 0 + hover_text.append( + f"Range: {lower}-{upper} µs
" + f"Count: {buckets[bucket]:,}
" + f"Percentage: {pct:.2f}%" + ) + + # Create bar chart + fig = go.Figure() + + fig.add_trace( + go.Bar( + x=labels, + y=counts, + marker=dict( + color=counts, + colorscale="YlOrRd", + showscale=True, + colorbar=dict(title="Count"), + ), + text=counts, + textposition="outside", + hovertext=hover_text, + hoverinfo="text", + ) + ) + + fig.update_layout( + title="🔥 Log2 Histogram (BCC-style buckets)", + xaxis_title="Latency Range (µs)", + yaxis_title="Count", + template="plotly_white", + height=350, + xaxis=dict(tickangle=-45), + ) + + return fig + + def _create_timeseries(self) -> go.Figure: + """Create time series figure.""" + recent = self.collector.get_recent_latencies() + + if not recent: + fig = go.Figure() + fig.update_layout( + title="⏱️ Real-time Latency - Waiting for data...", + template="plotly_white", + ) + return fig + + times = [d["time"] for d in recent] + lats = [d["latency"] for d in recent] + + fig = go.Figure() + fig.add_trace( + go.Scatter( + x=times, + y=lats, + mode="lines", + line=dict(color="rgb(231, 76, 60)", width=2), + fill="tozeroy", + fillcolor="rgba(231, 76, 60, 0.2)", + ) + ) + + fig.update_layout( + title="⏱️ Real-time Latency (Last 10,000 samples)", + xaxis_title="Time (seconds)", + yaxis_title="Latency (µs)", + template="plotly_white", + height=300, + ) + + return fig + + def run(self, host: str = "0.0.0.0", port: int = 8050, debug: bool = False): + """Run the dashboard server.""" + print(f"\n{'=' * 60}") + print(f"🚀 Dashboard running at: http://{host}:{port}") + print(" Access from your browser to see live graphs") + print( + " Generate disk I/O to see data: dd if=/dev/zero of=/tmp/test bs=1M count=100" + ) + print(f"{'=' * 60}\n") + self.app.run(debug=debug, host=host, port=port) diff --git a/BCC-Examples/vfsreadlat_plotly/data_collector.py b/BCC-Examples/vfsreadlat_plotly/data_collector.py new file mode 100644 index 00000000..711e2f8a --- /dev/null +++ b/BCC-Examples/vfsreadlat_plotly/data_collector.py @@ -0,0 +1,96 @@ +"""Data collection and management.""" + +import threading +import time +import numpy as np +from collections import deque +from dataclasses import dataclass +from typing import List, Dict + + +@dataclass +class LatencyStats: + """Statistics computed from latency data.""" + + total: int = 0 + mean: float = 0.0 + median: float = 0.0 + min: float = 0.0 + max: float = 0.0 + p95: float = 0.0 + p99: float = 0.0 + + @classmethod + def from_array(cls, data: np.ndarray) -> "LatencyStats": + """Compute stats from numpy array.""" + if len(data) == 0: + return cls() + + return cls( + total=len(data), + mean=float(np.mean(data)), + median=float(np.median(data)), + min=float(np.min(data)), + max=float(np.max(data)), + p95=float(np.percentile(data, 95)), + p99=float(np.percentile(data, 99)), + ) + + +class LatencyCollector: + """Collects and manages latency data from BPF.""" + + def __init__(self, bpf_object, buffer_size: int = 10000): + self.bpf = bpf_object + self.all_latencies: List[float] = [] + self.recent_latencies = deque(maxlen=buffer_size) # type: ignore [var-annotated] + self.start_time = time.time() + self._lock = threading.Lock() + self._poll_thread = None + + def callback(self, cpu: int, event): + """Callback for BPF events.""" + with self._lock: + self.all_latencies.append(event.delta_us) + self.recent_latencies.append( + {"time": time.time() - self.start_time, "latency": event.delta_us} + ) + + def start(self): + """Start collecting data.""" + self.bpf["events"].open_perf_buffer(self.callback, struct_name="latency_event") + + def poll_loop(): + while True: + self.bpf["events"].poll(100) + + self._poll_thread = threading.Thread(target=poll_loop, daemon=True) + self._poll_thread.start() + print("✅ Data collection started") + + def get_all_latencies(self) -> np.ndarray: + """Get all latencies as numpy array.""" + with self._lock: + return np.array(self.all_latencies) if self.all_latencies else np.array([]) + + def get_recent_latencies(self) -> List[Dict]: + """Get recent latencies with timestamps.""" + with self._lock: + return list(self.recent_latencies) + + def get_stats(self) -> LatencyStats: + """Compute current statistics.""" + return LatencyStats.from_array(self.get_all_latencies()) + + def get_histogram_buckets(self) -> Dict[int, int]: + """Get log2 histogram buckets.""" + latencies = self.get_all_latencies() + if len(latencies) == 0: + return {} + + log_buckets = np.floor(np.log2(latencies + 1)).astype(int) + buckets = {} # type: ignore [var-annotated] + for bucket in log_buckets: + buckets[bucket] = buckets.get(bucket, 0) + 1 + + return buckets From f9494c870bc5c4fcd277dd776a76d7bd8729416a Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Wed, 22 Oct 2025 04:01:45 +0530 Subject: [PATCH 35/38] Fix logical fallacy in get_char_array_ptr_and_size --- pythonbpf/helper/helper_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pythonbpf/helper/helper_utils.py b/pythonbpf/helper/helper_utils.py index 7f3fdbe1..fdfd4524 100644 --- a/pythonbpf/helper/helper_utils.py +++ b/pythonbpf/helper/helper_utils.py @@ -236,7 +236,7 @@ def get_char_array_ptr_and_size(buf_arg, builder, local_sym_tab, struct_sym_tab) var_ptr = local_sym_tab[var_name].var var_type = local_sym_tab[var_name].ir_type - if not isinstance(var_type, ir.PointerType) and not isinstance( + if not isinstance(var_type, ir.PointerType) or not isinstance( var_type.pointee, ir.IntType(8) ): raise ValueError("Expected str ptr variable") From f4d903d4b505a088e1dde0c03bd950794db1ef3d Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Wed, 22 Oct 2025 04:06:22 +0530 Subject: [PATCH 36/38] Fix create_targets_and_rvals early returns --- pythonbpf/allocation_pass.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pythonbpf/allocation_pass.py b/pythonbpf/allocation_pass.py index 36b0a754..6833795d 100644 --- a/pythonbpf/allocation_pass.py +++ b/pythonbpf/allocation_pass.py @@ -27,11 +27,11 @@ def create_targets_and_rvals(stmt): if isinstance(stmt.targets[0], ast.Tuple): if not isinstance(stmt.value, ast.Tuple): logger.warning("Mismatched multi-target assignment, skipping allocation") - return + return [], [] targets, rvals = stmt.targets[0].elts, stmt.value.elts if len(targets) != len(rvals): logger.warning("length of LHS != length of RHS, skipping allocation") - return + return [], [] return targets, rvals return stmt.targets, [stmt.value] From 77c0d131beb5b09c6af4f6a067f294be5dd30dac Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Wed, 22 Oct 2025 04:09:18 +0530 Subject: [PATCH 37/38] Add permission error handling in trace_pipe --- pythonbpf/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pythonbpf/utils.py b/pythonbpf/utils.py index 47e99132..1540801f 100644 --- a/pythonbpf/utils.py +++ b/pythonbpf/utils.py @@ -7,6 +7,8 @@ def trace_pipe(): subprocess.run(["cat", "/sys/kernel/tracing/trace_pipe"]) except KeyboardInterrupt: print("Tracing stopped.") + except (FileNotFoundError, PermissionError) as e: + print(f"Error accessing trace_pipe: {e}. Try running as root.") def trace_fields(): From 37af7d2e20df791f3cb030b169d07b2bfd88c396 Mon Sep 17 00:00:00 2001 From: Pragyansh Chaturvedi Date: Wed, 22 Oct 2025 04:12:42 +0530 Subject: [PATCH 38/38] Janitorial fix format --- pythonbpf/allocation_pass.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pythonbpf/allocation_pass.py b/pythonbpf/allocation_pass.py index 22c457dd..49c787f4 100644 --- a/pythonbpf/allocation_pass.py +++ b/pythonbpf/allocation_pass.py @@ -65,11 +65,11 @@ def handle_assign_allocation(builder, stmt, local_sym_tab, structs_sym_tab): if var_name in local_sym_tab: logger.debug(f"Variable {var_name} already allocated, skipping") continue - + # When allocating a variable, check if it's a vmlinux struct type - if isinstance(stmt.value, ast.Name) and VmlinuxHandlerRegistry.is_vmlinux_struct( - stmt.value.id - ): + if isinstance( + stmt.value, ast.Name + ) and VmlinuxHandlerRegistry.is_vmlinux_struct(stmt.value.id): # Handle vmlinux struct allocation # This requires more implementation print(stmt.value) @@ -95,6 +95,7 @@ def handle_assign_allocation(builder, stmt, local_sym_tab, structs_sym_tab): f"Unsupported assignment value type for {var_name}: {type(rval).__name__}" ) + def _allocate_for_call(builder, var_name, rval, local_sym_tab, structs_sym_tab): """Allocate memory for variable assigned from a call."""